/*
 * Copyright (c) 1991, 1992, 1993, 1994, 1996, 1997, 1998
 *	The Regents of the University of California.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that: (1) source code distributions
 * retain the above copyright notice and this paragraph in its entirety, (2)
 * distributions including binary code include the above copyright notice and
 * this paragraph in its entirety in the documentation or other materials
 * provided with the distribution, and (3) all advertising materials mentioning
 * features or use of this software display the following acknowledgement:
 * ``This product includes software developed by the University of California,
 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
 * the University nor the names of its contributors may be used to endorse
 * or promote products derived from this software without specific prior
 * written permission.
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 */

#ifndef lint
static const char copyright[] =
    "@(#) Copyright (c) 1991, 1992, 1993, 1994, 1996, 1997, 1998\n\
The Regents of the University of California.  All rights reserved.\n";
static const char rcsid[] =
    "@(#) $Header: histtrim.c,v 1.32 98/05/12 13:28:29 leres Exp $ (LBL)";
#endif

/*
 * histtrim - trim the history file
 */
#include <sys/types.h>
#include <sys/file.h>
#include <sys/time.h>

#include <ctype.h>
#include <errno.h>
#ifdef HAVE_MEMORY_H
#include <memory.h>
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef TIME_WITH_SYS_TIME
#include <time.h>
#endif
#include <unistd.h>

#include "gnuc.h"
#ifdef HAVE_OS_PROTO_H
#include "os-proto.h"
#endif

#include <err.h>

#include "dexpire.h"
#include "hash.h"
#include "util.h"

#define LINESIZE 8192

/* Private data */
static char *dfeedback = DFEEDBACK;
static char *history = HISTORY;
static char *nhistory = NHISTORY;
static char *expires = EXPIRES;
static char *nexpires = NEXPIRES;
static char *spool_dir = SPOOL_DIR;

static time_t currenttime;	/* current timestamp */
static time_t mintime = 0;	/* keep all history entries newer than this */

/* Public data */
#ifdef HAVE__PROGNAME
extern char *__progname;
#else
char *__progname;
#endif

int debug = 0;			/* debugging modes */
int verbose = 0;		/* verbose information to stdout */
int doexpires = 0;		/* update the explicit expires file */

/* Statistics */
int bad = 0;
int newnogroup = 0;		/* number of newly created "no groups" */
int oldnogroup = 0;		/* number of "no groups" omitted */
int nogroup = 0;		/* number of "no groups" in new history */
int nofeedback = 0;		/* number not in the feedback file */
int neverexpired = 0;
int emitted = 0;
int numread = 0;		/* number of old history lines read */

/* Forwards */
int readfeedback(FILE *, int (*)(char *, time_t));
int main(int, char **);
int histtrim(FILE *, FILE *, FILE *);
int artexists(char *);

/* External data */
extern char *optarg;
extern int optind;
extern int opterr;

int
main(int argc, char **argv)
{
	register int op;
#ifndef HAVE__PROGNAME
	register char *cp;
#endif
	register FILE *fin, *fout, *fexp;
	int status;
	char *usage = "usage: %s [-duv] [-m days] [-H history] [-N newhistory]";

#ifndef HAVE__PROGNAME
	if ((cp = strrchr(argv[0], '/')) != NULL)
		__progname = cp + 1;
	else
		__progname = argv[0];
#endif

	/* Process arguments */
	while ((op = getopt(argc, argv, "duvm:H:N:")) != EOF)
		switch (op) {

		case 'd':
			++debug;
			break;

		case 'm':
			mintime = atoi(optarg);
			if (mintime <= 0 || mintime > 365)
				errx(1, "\"%s\" invalid argument to -m",
				    optarg);
			/* Convert days to seconds */
			mintime *= 24 * 60 * 60;
			break;

		case 'u':
			++doexpires;
			break;

		case 'v':
			++verbose;
			break;

		case 'H':
		      history = optarg;
		      break;

		case 'N':
		      nhistory = optarg;
		      break;

		default:
			errx(1, usage, __progname);
		}

	if (optind != argc)
		errx(1, usage, __progname);

	/* Fetch current time (used in various calculations) */
	currenttime = time(0);

	/* Convert from delta to timestamp */
	mintime = currenttime - mintime;

	/* Report various times */
	if (verbose) {
		msg("Current time: %s", fmtdate(currenttime));
		if (mintime != currenttime)
			msg("Minimum time: %s", fmtdate(mintime));
		else
			msg("No minimum time");
		(void) fflush(stdout);
	}

	/* Load in the feedback file */
	if ((fin = fopen(dfeedback, "r")) == NULL)
		err(1, "fopen(): %s", dfeedback);
	if (!readfeedback(fin, hashadd))
		exit(1);
	(void)fclose(fin);

	/* Dump hash table if debugging */
	if (debug > 2)
		hashdump();

	/* Open the current history file */
	if ((fin = fopen(history, "r")) == NULL)
		err(1, "fopen(): %s", history);

	/* Create the new history file */
	if ((fout = fopen(nhistory, "w")) == NULL)
		err(1, "fopen(): %s", nhistory);

	/* If requested, create the explicit expires file */
	fexp = NULL;
	if (doexpires && (fexp = fopen(nexpires, "w")) == NULL)
		warn("fopen(): %s", nexpires);

	/* Process the history file */
	status = histtrim(fin, fout, fexp);

	/* Check for errors and close the files */
	if (ferror(fin)) {
		warn("ferror(): %s", history);
		status |= 1;
	}
	if (ferror(fout)) {
		warn("ferror(): %s", nhistory);
		status |= 1;
	}
	if (fexp && ferror(fexp)) {
		warn("ferror(): %s", nexpires);
		(void)fclose(fexp);
		fexp = NULL;
	}
	(void)fclose(fin);
	(void)fclose(fout);
	if (fexp)
		(void)fclose(fexp);

	/* Rename the explicit expires file */
	if (fexp && rename(nexpires, expires) < 0 &&
	    unlink(expires) < 0 &&
	    rename(nexpires, expires) < 0)
		warn("rename(): %s", nexpires);

	if (verbose) {
		(void) fflush(stderr);
		msg("%d bad", bad);
		msg("%d newly created \"no groups\"", newnogroup);
		msg("%d old \"no groups\"", oldnogroup);
		msg("%d \"no groups\" omitted", nogroup);
		msg("%d not in the dfeedback file", nofeedback);
		msg("%d neverexpired", neverexpired);
		msg("%d numread", numread);
		msg("%d emitted", emitted);
		msg("Took %s", fmtdelta(time(0) - currenttime));
	}

	exit(status);
}

int
histtrim(register FILE *fin, register FILE *fout, register FILE *fexp)
{
	/* XXX want this to be a register */
	char *cp, *cp2;
	register int h;
	register int numkept;
	time_t t, ht;
	char line[LINESIZE], line2[LINESIZE];
	register int n;
	register int hasexpires;
	int keep;
	char *e;

	e = NULL;
	n = 1;

	while (fgets(line, sizeof(line), fin)) {
		/* Save a copy */
		strcpy(line2, line);

		/* Statistics */
		++numread;

		/* Flag the fact that we haven't counted these yet */
		numkept = -1;

		/* Step over message id */
		/* XXX should really check for legal message-id characters */
		cp = strchr(line, '\t');
		if (cp == NULL) {
			e = "missing timestamps";
			goto eatline;
		}
		++cp;

		/* Arrival timestamp */
		t = atol(cp);

		if (!isdigit(*cp)) {
			e = "missing arrival timestamp";
			goto eatline;
		}
		++cp;
		while (isdigit(*cp))
			++cp;

		/* Look for seperator */
		if (*cp != '~') {
			e = "missing arrival/expires timestamp seperator";
			goto eatline;
		}

		/* Step over '~' */
		++cp;

		/* Look for explicit expires time and date */
		if (*cp == '-') {
			/* Step over dash */
			++cp;
			hasexpires = 0;
		} else if (isdigit(*cp)) {
			++cp;
			/* Step over expires timestamp */
			while (isdigit(*cp))
				++cp;
			hasexpires = 1;
		} else {
			e = "missing expires timestamp";
			goto eatline;
		}

		if (*cp != '~') {
			e = "missing expires/date-header timestamp seperator";
			goto eatline;
		}

		/* Step over '~' */
		++cp;

		if (!isdigit(*cp)) {
			e = "missing date-header timestamp";
			goto eatline;
		}
		++cp;
		while (isdigit(*cp))
			++cp;

		/* Look for newsgroup seperator */
		if (*cp == '\t') {
			/* Step over tab and point to start of newsgroups */
			++cp;

			/* Check for newsgroup(s) */
			if (*cp == '\n')
				cp = NULL;
		} else if (*cp != '\n') {
			e = "garbage following date-header";
			goto eatline;
		} else
			cp = NULL;

		/* Check for newsgroups */
		if (cp == NULL) {
			/* No newsgroups */
			if (t < mintime) {
				/* History entry is too old; suppress */
				++oldnogroup;
				continue;
			}
			/* Keep history entry */
			++nogroup;
			goto emit;
		}

		/* Save pointer to start of first newsgroup */
		cp2 = cp;

		/* Loop through newsgroup(s) */
		numkept = 0;
		for (;;) {
			keep = 0;
			/* Hash groupname and get group time */
			h = hashgroup(&cp2);
			ht = hashfind(h, cp);

			if (ht < 0) {
				/* Not in feedback file */
				++nofeedback;

				/* Keep history entry if article exists */
				if (artexists(cp))
					++keep;
			} else if (ht == 0) {
				/* "Never expired" newsgroup */
				++neverexpired;

				/* Keep history entry if article exists */
				if (artexists(cp))
					++keep;
			} else if (t >= ht) {
				/* Keep if not too old */
				++keep;
			}

			/* Update number of groups kept */
			numkept += keep;

			/* Step over article number */
			if (*cp2 != '/') {
				e = "bad article name format";
				goto eatline;
			}
			++cp2;
			while (isdigit(*cp2))
				++cp2;

			if (*cp2 == '\n') {
				/* End of line; all done */
				if (!keep) {
					/* If multiple articles, eat space */
					if (cp[-1] == ' ')
						--cp;
					*cp++ = '\n';
					*cp = '\0';
				}
				break;
			}
			if (*cp2 != ' ') {
				e = "trailing junk";
				goto eatline;
			}
			/* Step over space */
			++cp2;
			if (!keep) {
				/* Remove newsgroup */
				memcpy(cp, cp2, strlen(cp2) + 1);
				cp2 = cp;
			} else
				cp = cp2;
		}

		if (numkept == 0) {
			/* History entry now has "no groups" */
			++newnogroup;

			/* Suppress if too old */
			if (t < mintime) {
				++oldnogroup;
				continue;
			}
			++nogroup;
		}
emit:
		/* Output this (possibly modified) history line */
		fputs(line, fout);
		++n;

		/* Only add to expires file if there are newsgroups */
		if (fexp != NULL && hasexpires && numkept != 0) {
			/* Look for at least one newsgroup if necessary */
			if (numkept < 0 && (cp = strchr(line, '\t')) != NULL &&
			    strchr(cp, '\t') == NULL)
				numkept = 1;
			if (numkept > 0)
				fputs(line, fexp);
		}
		continue;
eatline:
		++bad;
		warnx("parse error (%s) on line %d:", e, n);
		warnx(" %s", line2);
	}
	emitted = n - 1;
	return (0);
}

/* Check if an article actually exists */
int
artexists(register char *art)
{
	register int i;
	register char *path;


	path = artpath(spool_dir, art);
	i = access(path, F_OK);
	if (debug > 1)
		warnx("checking (%s): %s", path, i < 0 ? "fails" : "succeeds");
	if (i < 0)
		return (0);
	return (1);
}

/* Read and parse the feedback file */
int
readfeedback(register FILE *f, int (*fn)(char *, time_t))
{
	register int n;
	register char *cp, *cp2;
	char line[LINESIZE];
	time_t t;

	n = 0;
	while (fgets(line, sizeof(line), f)) {
		++n;
		cp = line;
		/* String trailing newline */
		cp2 = cp + strlen(cp) - 1;
		if (cp2 >= cp && *cp2 == '\n')
			*cp2++ = '\0';
		t = atoi(cp);
		if ((cp = strchr(cp, '\t')) == NULL) {
			warnx("feedback file syntax error #1 line %d", n);
			return (0);
		}
		++cp;
		/* Convert '/'s to '.'s a la the history file */
		for (cp2 = cp; *cp2 != '\0'; ++cp2)
			if (*cp2 == '/')
				*cp2 = '.';
		(void)(*fn)(cp, t);
	}

	/* Paranoid safety check */
	if (ferror(f)) {
		warn("readfeedback()");
		return (0);
	}

	return (1);
}
