/*** analog 3.0      http://www.statslab.cam.ac.uk/~sret1/analog/  ***/

/*** input.c; parsing the logfiles */

#include "analhea2.h"

extern char *block_start, *block_end, *block_bell, *record_start, *pos;
extern logical termchar[];
extern unsigned int year, month, date, hr, min, code;
extern double bytes;
extern char am;
static logical stdin_used = FALSE;

FILE *my_lfopen(Filelist *p, char *filetype)
{             /* open for reading */
#ifndef NOPIPES
  extern Alias *uncompresshead;
  char *cmd;
  Alias *up;
#endif

  FILE *f;

  if (IS_STDIN(p -> name)) {
    if (stdin_used)
      warn('F', "stdin already used; cannot use it as %s", filetype);
    else {
      f = stdin;
      stdin_used = TRUE;
      debug('F', "Opening stdin as %s", filetype);
    }
  }
  else {
#ifdef VMS
    f = fopen(p -> name, "r", "shr=upd");
#else
    f = fopen(p -> name, "r");
#endif
    if (f == NULL)
      warn('F', "Failed to open %s %s: ignoring it", filetype, p -> name);
    else {
      debug('F', "Opening %s as %s", p -> name, filetype);
#ifndef NOPIPES
      for (up = uncompresshead; up != NULL && !(p -> ispipe); TO_NEXT(up)) {
	if (wildmatch(p -> name, up -> from)) {
	  (void)fclose(f);
	  cmd = (char *)xmalloc(strlen(up -> to) + strlen(p -> name) + 2);
	  (void)sprintf(cmd, "%s %s", up -> to, p -> name);
	  f = popen(cmd, "r");
	  free((void *)cmd);
	  p -> ispipe = TRUE;
	  debug('F', "  Using %s to uncompress it", up -> to);
	}
      }
#endif
    }
  }
  return(f);
}

FILE *my_fopen(char *name, char *filetype)
{             /* open for reading */
  FILE *f;

  if (IS_STDIN(name)) {
    if (stdin_used)
      warn('F', "stdin already used; cannot use it as %s", filetype);
    else {
      f = stdin;
      stdin_used = TRUE;
      debug('F', "Opening stdin as %s", filetype);
    }
  }
  else {
#ifdef VMS
    f = fopen(name, "r", "shr=upd");
#else
    f = fopen(name, "r");
#endif
    if (f == NULL)
      warn('F', "Failed to open %s %s: ignoring it", filetype, name);
    else
      debug('F', "Opening %s as %s", name, filetype);
  }
  return(f);
}

#define LFCLOSE_DATEFMT "%d/%m/%y:%H%n"
int my_lfclose(FILE *f, Filelist *p, Filelist *allc, Filelist *allp,
	       char *filetype, Dateman *dman, unsigned long *totdata,
	       double *bys, double *bys7, char **monthname,
	       unsigned int monthlen)
{
  unsigned long *data = p -> data;
  char *datestr;
  Filelist *ap, *nextap;
  Inputformatlist *fmt;
  choice count[INPUT_NUMBER];
  logical done;
  int i, rc;

  debug('F', "Closing %s %s", filetype,
	IS_STDIN(p -> name)?"stdin":(p -> name));
  debug('S', "Successful requests: %lu", data[LOGDATA_SUCC]);
  debug('S', "Redirected requests: %lu", data[LOGDATA_REDIR]);
  debug('S', "Failed requests: %lu", data[LOGDATA_FAIL]);
  debug('S', "Requests returning informational status code: %lu",
	data[LOGDATA_INFO]);
  debug('S', "Status code not given: %lu", data[LOGDATA_UNKNOWN]); 
  if (allp != NULL) {  /* i.e. logfile not cache file */
   debug('S', "Unwanted lines: %lu", data[LOGDATA_UNWANTED]);
    debug('S', "Corrupt lines: %lu", data[LOGDATA_CORRUPT]);
    if (data[LOGDATA_CORRUPT] >
	(data[LOGDATA_SUCC] + data[LOGDATA_REDIR] + data[LOGDATA_FAIL] +
	 data[LOGDATA_INFO] + data[LOGDATA_UNKNOWN] +
	 data[LOGDATA_UNWANTED]) / 10 && data[LOGDATA_CORRUPT] > 10) {
      warn('L', "Large number of corrupt lines in %s %s: "
	   "try different LOGFORMAT", filetype,
	   IS_STDIN(p -> name)?"stdin":(p -> name));
      report_logformat(stderr, p -> format, TRUE);
    }
  }
  for (i = 0; i < LOGDATA_NUMBER; i++)
    totdata[i] += data[i];
  *bys += p -> bytes;
  *bys7 += p -> bytes7;
  if (p -> from <= p -> to) {
    dman -> firsttime = MIN(dman -> firsttime, p -> from);
    dman -> lasttime = MAX(dman -> lasttime, p -> to);
    if (p -> tz > 0)
      debug('S', "Times in %s offset by +%d minutes", filetype, p -> tz);
    else if (p -> tz < 0)
      debug('S', "Times in %s offset by %d minutes", filetype, p -> tz);
    datestr = (char *)xmalloc((size_t)datefmtlen(LFCLOSE_DATEFMT, monthlen, 0,
						 NULL) + 1);
    debug('S', "Earliest entry in %s: %s", filetype,
	  datesprintf(datestr, LFCLOSE_DATEFMT, p -> from / 1440,
		      (p -> from % 1440) / 60, p -> from % 60, 0, 0, monthname,
		      NULL, 0, 0, NULL));
    debug('S', "Latest entry in %s: %s", filetype,
	  datesprintf(datestr, LFCLOSE_DATEFMT, p -> to / 1440,
		      (p -> to % 1440) / 60, p -> to % 60, 0, 0, monthname,
		      NULL, 0, 0, NULL));
    free((void *)datestr);
    if (allp == NULL) {
      p -> format -> count[ITEM_FILE] = 2;  /* good enough for date check */
      count[ITEM_FILE] = 2;
      p -> from += 4;  /* avoid false alerts */
    }
    else {
      for (i = 0; i < INPUT_NUMBER; i++)
	count[i] = 0;
      for (fmt = p -> format; fmt != NULL; TO_NEXT(fmt)) {
	for (i = 0; i < INPUT_NUMBER; i++)
	  count[i] = MAX(count[i], fmt -> count[i]);
      }
    }
    for (ap = (allc == NULL)?allp:allc; ap != p; ap = nextap) {
      if (ap -> from < p -> to && p -> from < ap -> to) {
	for (done = FALSE, fmt = ap -> format; fmt != NULL && !done;
	     TO_NEXT(fmt)) {
	  for (i = 0; i < INPUT_NUMBER && !done; i++) {
	    if (fmt -> count[i] == 2 && count[i] == 2) {
	      warn('L', "%ss %s and %s overlap: possible double counting",
		   filetype, ap -> name, p -> name);
	      done = TRUE;
	    }
	  }
	}
      }
      nextap = ap -> next;
      if (nextap == NULL)
	nextap = allp;  /* run through allc then through allp */
    }
  }
  else if (data[LOGDATA_SUCC] + data[LOGDATA_REDIR] + data[LOGDATA_FAIL] +
	   data[LOGDATA_INFO] + data[LOGDATA_UNKNOWN] > 0)
    debug('S', "No times in %s", filetype);

  if (!(p -> ispipe))
    rc = fclose(f);    /* Not much can go wrong with fclose. I hope. */
#ifndef NOPIPES
  else if (feof(f)) {
    if ((rc = pclose(f)) != 0)
      warn('F', "Problems uncompressing %s %s", filetype, p -> name);
  }
  else
    rc = pclose(f);  /* not reached EOF: pclose will return broken pipe */
#endif
  return(rc);
}

int my_fclose(FILE *f, char *name, char *filetype)
{
  debug('F', "Closing %s %s", filetype, IS_STDIN(name)?"stdin":name);
  return(fclose(f));
}

FILE *logfile_init(Filelist *logfilep) {
  if (logfilep -> format == NULL) {
    debug('F', "Ignoring logfile %s, which contains no items being analysed",
	  logfilep -> name);
    return(NULL);
  }
  pos = NULL;
  year = 0;
  bytes = 0;
  code = 0;
  return(my_lfopen(logfilep, "logfile"));
}

choice getmoredata(FILE *f, char *start, size_t length) {

  block_end = start + fread((void *)start, 1, length, f);
  block_bell = block_end - BLOCK_EPSILON;  /* saves repeating this calc. */
  if (block_end == start)
    return(EOF);
  return(OK);
}

choice getnextline(FILE *f, char *start) {
  /* not very efficient: only for use during initialisation */
  char *s = start;

  if ((*s = (char)getc(f)) == (char)EOF)
    return(EOF);
  while (*s == '\r' || *s == '\n')
    *s = (char)getc(f);    /* run past any new line */
  if (*s == (char)EOF)
    return(EOF);
  for (*(++s) = (char)getc(f); *s != '\r' && *s != '\n' && *s != (char)EOF &&
	 s < start + BLOCKSIZE - 1; *(++s) = (char)getc(f))
    ;  /* read in to next new line */
  *s = '\0';
  if (s == start + BLOCKSIZE - 1)
    return(FALSE);
  return(TRUE);
}

void shunt_data(FILE *f) {
  (void)memmove((void *)block_start, (void *)record_start,
		(size_t)(block_end - record_start));
  (void)getmoredata(f, block_start + (block_end - record_start),
		    (size_t)(BLOCKSIZE - (block_end - record_start)));
  pos -= record_start - block_start;
  record_start = block_start;
}

int strtomonth(char *m)  /* convert 3 letter month abbrev. to int */
{
  int monthno = ERR;

  switch (m[0]) {
  case 'A':
  case 'a':
    switch (m[1]) {
    case 'p':
    case 'P':
      monthno = 3;
      break;
    case 'u':
    case 'U':
      monthno = 7;
      break;
    }
    break;
  case 'D':
  case 'd':
    monthno = 11;
    break;
  case 'F':
  case 'f':
    monthno = 1;
    break;
  case 'J':
  case 'j':
    switch (m[1]) {
    case 'a':
    case 'A':
      monthno = 0;
      break;
    case 'u':
    case 'U':
      switch (m[2]) {
      case 'l':
      case 'L':
	monthno = 6;
	break;
      case 'n':
      case 'N':
	monthno = 5;
	break;
      }
      break;
    }
    break;
  case 'M':
  case 'm':
    switch (m[2]) {
    case 'r':
    case 'R':
      monthno = 2;
      break;
    case 'y':
    case 'Y':
      monthno = 4;
      break;
    }
    break;
  case 'N':
  case 'n':
    monthno = 10;
    break;
  case 'O':
  case 'o':
    monthno = 9;
      break;
  case 'S':
  case 's':
    monthno = 8;
    break;
  }
  return(monthno);
}

choice parsemonth(FILE *f, void *arg, char c) {
  unsigned int *m = (unsigned int *)arg;

  *block_end = '\0';
  *m = (unsigned int)strtomonth(pos);
  if (*m == (unsigned int)ERR)
    return(FALSE);
  else {
    pos += 3;
    return(TRUE);
  }
}

choice parseuint2(FILE *f, void *arg, char c) {
  unsigned int *x = (unsigned int *)arg;

  *block_end = '\0';
  if (!isdigit(*pos))
    return(FALSE);
  *x = *pos - '0';
  pos++;

  if (!isdigit(*pos))
    return(TRUE);
  *x *= 10;
  *x += *pos - '0';
  pos++;
  return(TRUE);
}

choice parsenmonth(FILE *f, void *arg, char c) {  /* nearly same as uint2 */
  unsigned int *x = (unsigned int *)arg;

  *block_end = '\0';
  if (!isdigit(*pos))
    return(FALSE);
  *x = *pos - '0';
  pos++;

  if (!isdigit(*pos)) {
    if (*x == 0)
      return(FALSE);
    (*x)--;   /* to convert to internal month representation */
    return(TRUE);
  }
  *x *= 10;
  *x += *pos - '0';
  pos++;
  if (*x == 0)
    return(FALSE);
  (*x)--;
  return(TRUE);
}

choice parseuint2strict(FILE *f, void *arg, char c) {
  unsigned int *x = (unsigned int *)arg;

  *block_end = '\0';
  if (!isdigit(*pos))
    return(FALSE);
  *x = 10 * (*pos - '0');
  pos++;

  if (!isdigit(*pos))
    return(FALSE);
  *x += *pos - '0';
  pos++;
  return(TRUE);
}

choice parseuint3strict(FILE *f, void *arg, char c) {
  unsigned int *x = (unsigned int *)arg;

  *block_end = '\0';
  if (!isdigit(*pos)) {
    if (*pos == '-') {   /* NB uint3strict only used for status codes */
      pos++;
      *x = 200;
      return(TRUE);
    }
    else
      return(FALSE);
  }
  *x = 100 * (*pos - '0');
  pos++;

  if (!isdigit(*pos))
    return(FALSE);
  *x += 10 * (*pos - '0');
  pos++;

  if (!isdigit(*pos))
    return(FALSE);
  *x += *pos - '0';
  pos++;
  return(TRUE);
}

choice parseuint4strict(FILE *f, void *arg, char c) {
  unsigned int *x = (unsigned int *)arg;

  *block_end = '\0';
  if (!isdigit(*pos))
    return(FALSE);
  *x = 1000 * (*pos - '0');
  pos++;

  if (!isdigit(*pos))
    return(FALSE);
  *x += 100 * (*pos - '0');
  pos++;

  if (!isdigit(*pos))
    return(FALSE);
  *x += 10 * (*pos - '0');
  pos++;

  if (!isdigit(*pos))
    return(FALSE);
  *x += *pos - '0';
  pos++;
  return(TRUE);
}

choice parseudint(FILE *f, void *arg, char c) {
  double *x = (double *)arg;

  *block_end = '\0';
  if (*pos == '-') {
    *x = 0.0;  /* because used for bytes (only) */
    pos++;
    return(TRUE);
  }
  if (!isdigit(*pos))
    return(FALSE);
  *x = *pos - '0';
  pos++;

  while (isdigit(*pos)) {
    *x *= 10;
    *x += *pos - '0';
    pos++;
  }
  return(TRUE);
}

choice parseyear(FILE *f, void *arg, char c) {
  unsigned int *y = (unsigned int *)arg;
  logical rc;

  *block_end = '\0';
  rc = parseuint2strict(f, y, c);
  if (*y >= 70)
    *y += 1900;
  else
    *y += 2000;
  return(rc);
}

choice parseam(FILE *f, void *arg, char c) {
  char *d = (char *)arg;

  *block_end = '\0';
  *d = tolower(*pos);
  if (*d == 'a' || *d == 'p') {
    pos++;
    return(TRUE);
  }
  else
    return(FALSE);
}

choice parsecode(FILE *f, void *arg, char c) {
  unsigned int *x = (unsigned int *)arg;
  char *d = pos, e;

  *block_end = '\0';
  if (parsejunk(f, NULL, c) == FALSE || pos == d)
    return(FALSE);
  e = *(pos - 1);
  *(pos - 1) = '\0';
  *x = IGNORE_CODE;
  if (d[0] == 'O' && d[1] == 'K')
    *x = 200;
  else if (STREQ(d, "ERR!"))
    *x = 404;
  else if (STREQ(d, "PRIV"))
    *x = 401;
  else if (*(d++) == 'g' && *(d++) == 'e' && *(d++) == 't' && *(d++) == ' ' &&
	   *(d++) == 'f') {
    if (STREQ(d, "ile"))
      *x = 200;
    else if (STREQ(d, "ailed"))
      *x = 499;
  }
  *(pos - 1) = e;
  return(TRUE);
}

choice parsejunk(FILE *f, void *arg, char c) { /* NB allows empty strings */

  *block_end = c;
  if (c == WHITESPACE) {
    termchar[(unsigned char)' '] = TRUE;
    termchar[(unsigned char)'\t'] = TRUE;
  }
  else
    termchar[(unsigned char)c] = TRUE;
  while (!termchar[(unsigned char)(*pos)])
    pos++;

  if (pos == block_end) {
    if (record_start == block_start) {
      if (c == WHITESPACE) {
	termchar[(unsigned char)' '] = FALSE;
	termchar[(unsigned char)'\t'] = FALSE;
      }
      else if (c != '\r' && c != '\n' && c != '\0')
	termchar[(unsigned char)c] = FALSE;
      return(FALSE);
    }
    shunt_data(f);
    *block_end = c;
    while (!termchar[(unsigned char)(*pos)])
      pos++;
    if (pos == block_end) {
      if (c == WHITESPACE) {
	termchar[(unsigned char)' '] = FALSE;
	termchar[(unsigned char)'\t'] = FALSE;
      }
      else if (c != '\r' && c != '\n' && c != '\0')
	termchar[(unsigned char)c] = FALSE;
      return(FALSE);
    }
  }
  if (c == WHITESPACE) {
    termchar[(unsigned char)' '] = FALSE;
    termchar[(unsigned char)'\t'] = FALSE;
  }
  else if (c != '\r' && c != '\n' && c != '\0')
    termchar[(unsigned char)c] = FALSE;
  if (*pos != c && !(c == '\n' && *pos == '\r') &&
      !(c == WHITESPACE && (*pos == ' ' || *pos == '\t')))
    return(FALSE);
  if (c == '\n' || c == '\r') {
    while ((*pos == '\n' || *pos == '\r') && pos < block_end)
      pos++;
  }
  else if (c != WHITESPACE)
    pos++;
  return(TRUE);
}

choice parsespace(FILE *f, void *arg, char c) {

  *block_end = '\0';
  if (pos == block_end && record_start != block_start)
    shunt_data(f);
  if (*pos != '\t' && *pos != ' ')
    return(FALSE);
  while (*pos == '\t' || *pos == ' ')
    pos++;
  if (pos == block_end && record_start != block_start) {
    shunt_data(f);
    while (*pos == '\t' || *pos == ' ')
      pos++;
  }
  return(TRUE);
}

choice parseoptspace(FILE *f, void *arg, char c) {

  *block_end = '\0';
  if (pos == block_end && record_start != block_start)
    shunt_data(f);
  while (*pos == '\t' || *pos == ' ')
    pos++;
  if (pos == block_end && record_start != block_start) {
    shunt_data(f);
    while (*pos == '\t' || *pos == ' ')
      pos++;
  }
  return(TRUE);
}

choice parsenewline(FILE *f, void *arg, char c) {
  /* allow new line to be preceded by white space */
  *block_end = 'a';
  if (*pos != '\n' && *pos != ' ' && *pos != '\r' && *pos != '\t')
    return(FALSE);
  while (*pos == ' ' || *pos == '\t')
    pos++;
  if (pos == block_end && record_start != block_start) {
    shunt_data(f);
    *block_end = 'a';
    while (*pos == ' ' || *pos == '\t')
      pos++;
  }

  if (*pos != '\n' && *pos != '\r')
    return(FALSE);
  termchar[(unsigned char)'\0'] = FALSE;
  while (termchar[(unsigned char)(*pos)])
    pos++;
  if (pos == block_end && record_start != block_start) {
    shunt_data(f);
    *block_end = 'a';
    while (termchar[(unsigned char)(*pos)])
      pos++;
  }
  termchar[(unsigned char)'\0'] = TRUE;
  return(TRUE);
}

void parsenonnewline(FILE *f) {

  *block_end = '\n';
  termchar[(unsigned char)'\0'] = FALSE;
  while (!termchar[(unsigned char)(*pos)])
    pos++;

  if (pos == block_end) {
    if (record_start == block_start) {
      termchar[(unsigned char)'\0'] = TRUE;
      return;
    }
    shunt_data(f);
    *block_end = '\n';
    while (!termchar[(unsigned char)(*pos)])
      pos++;
  }
  termchar[(unsigned char)'\0'] = TRUE;
}

choice parsestring(FILE *f, void *arg, char c) { /* NB allows empty strings */
  Memman *m = (Memman *)arg;
  size_t length = 0;

  char *string_start;
  string_start = pos;

  *block_end = c;
  if (c == WHITESPACE) {
    termchar[(unsigned char)' '] = TRUE;
    termchar[(unsigned char)'\t'] = TRUE;
  }
  else
    termchar[(unsigned char)c] = TRUE;
  while (!termchar[(unsigned char)(*pos)]) {
    pos++;
    length++;
  }

  if (pos == block_end) {
    if (record_start == block_start) {
      if (c == WHITESPACE) {
	termchar[(unsigned char)' '] = FALSE;
	termchar[(unsigned char)'\t'] = FALSE;
      }
      else if (c != '\r' && c != '\n' && c != '\0')
	termchar[(unsigned char)c] = FALSE;
      return(FALSE);
    }
    string_start -= record_start - block_start;
    shunt_data(f);
    *block_end = c;
    while (!termchar[(unsigned char)(*pos)]) {
      pos++;
      length++;
    }
    if (pos == block_end) {
      if (c == WHITESPACE) {
	termchar[(unsigned char)' '] = FALSE;
	termchar[(unsigned char)'\t'] = FALSE;
      }
      else if (c != '\r' && c != '\n' && c != '\0')
	termchar[(unsigned char)c] = FALSE;
      return(FALSE);
    }
  }

  if (c == WHITESPACE) {
    termchar[(unsigned char)' '] = FALSE;
    termchar[(unsigned char)'\t'] = FALSE;
  }
  else if (c != '\r' && c != '\n' && c != '\0')
    termchar[(unsigned char)c] = FALSE;
  if (*pos != c && !(c == '\n' && *pos == '\r') &&
      !(c == WHITESPACE && (*pos == ' ' || *pos == '\t')))
    return(FALSE);

  (void)memcpy(submalloc(m, length + 1), (void *)string_start, length);
  *((char *)(m -> next_pos) - 1) = '\0'; /* = curr_pos + length */
  if (c == '\n' || c == '\r') {
    while ((*pos == '\n' || *pos == '\r') && pos < block_end)
      pos++;
  }
  else if (c != WHITESPACE)
    pos++;
  return(TRUE);
}

choice parsemacfile(FILE *f, void *arg, char c) {
  Memman *m = (Memman *)arg;
  char *d;
  size_t len;

  if (parsestring(f, arg, c) == FALSE)
    return(FALSE);
  for (d = m -> curr_pos; *d != '\0' && *d != '?'; d++) {
    if (*d == ':')
      *d = '/';
  }
  if (*((char *)(m -> curr_pos)) != '/') {   /* insert initial slash */
    d = m -> curr_pos;
    m -> next_pos = m -> curr_pos;
    len = strlen(d);
    (void)memcpy((void *)((char *)submalloc(m, len + 2) + 1), (void *)d,
		 len + 1);
    *((char *)(m -> curr_pos)) = '/';
    *((char *)(m -> next_pos) - 1) = '\0';
  }
  return(TRUE);
}

choice parseref(FILE *f, void *arg, char c) {
  /* For referrer in old referrer logs: as parsestring except also checks ->
     immediately after delimiting character (presumably space: not \n or \r) */
  Memman *m = (Memman *)arg;
  size_t length = 0;

  char *string_start;
  string_start = pos;

  *block_end = '\r';
  if (c == WHITESPACE) {
    termchar[(unsigned char)' '] = TRUE;
    termchar[(unsigned char)'\t'] = TRUE;
  }
  else
    termchar[(unsigned char)c] = TRUE;
  while (!termchar[(unsigned char)(*pos)] ||
	 (*pos == c && (*(pos + 1) != '-' || *(pos + 2) != '>'))) {
    pos++;
    length++;
  }

  if (pos == block_end) {
    if (record_start == block_start) {
      if (c == WHITESPACE) {
	termchar[(unsigned char)' '] = FALSE;
	termchar[(unsigned char)'\t'] = FALSE;
      }
      else if (c != '\r' && c != '\n' && c != '\0')
	termchar[(unsigned char)c] = FALSE;
      return(FALSE);
    }
    string_start -= record_start - block_start;
    shunt_data(f);
    *block_end = c;
    while (!termchar[(unsigned char)(*pos)] ||
	   (*pos == c && (*(pos + 1) != '-' || *(pos + 2) != '>'))) {
      pos++;
      length++;
    }
    if (pos == block_end) {
      if (c == WHITESPACE) {
	termchar[(unsigned char)' '] = FALSE;
	termchar[(unsigned char)'\t'] = FALSE;
      }
      else if (c != '\r' && c != '\n' && c != '\0')
	termchar[(unsigned char)c] = FALSE;
      return(FALSE);
    }
  }

  if (c == WHITESPACE) {
    termchar[(unsigned char)' '] = FALSE;
    termchar[(unsigned char)'\t'] = FALSE;
  }
  else if (c != '\r' && c != '\n' && c != '\0')
    termchar[(unsigned char)c] = FALSE;
  if (*pos != c)
    return(FALSE);

  (void)memcpy(submalloc(m, length + 1), (void *)string_start, length);
  *((char *)(m -> next_pos) - 1) = '\0';
  pos++;

  return(TRUE);
}

choice checkchar(FILE *f, void *arg, char c) {
  return(*(pos++) == c);
}

choice parselogfmt(FILE *f, void *arg, char c) {
  extern Inputformatlist *logformat;
  extern char *workspace;  /* assume large enough, as usual */
  char *d, *e;
  unsigned int x, y;

  logformat -> used = TRUE;  /* so as to start afresh */
  *block_end = '\n';
  d = strpbrk(pos, "\r\n\0");
  if (d == block_end) {
    shunt_data(f);
    *block_end = '\n';
    d = strpbrk(pos, "\r\n\0");
    if (d == block_end)
      return(FALSE);
  }
  *d = '\0';
  switch (c) {
  case '0':  /* DEFAULT format. These are caught and translated earlier: this
		is just in case we have specified DEFAULTLOGFORMAT DEFAULT */
  case '1':  /* AUTO format */
    if (*pos == '!' && *(pos + 1) == '!') {
      debug('F', "  Detect that it's in WebSTAR format");
      configlogfmt((void *)&logformat, NULL, "WEBSTAR", NULL, -1);
    }
    else if (chrn(pos, ',') == 15) {
      e = strchr(strchr(pos, ',') + 1, ',') + 2;
      x = (*(e++) - '0');
      if (*e != '/')
	x = x * 10 + (*(e++) - '0');
      y = (*(++e) - '0');
      if (*(++e) != '/')
	y = y * 10 + (*e - '0');
      if (x <= 12 && y > 12) {
	debug('F',
	      "  Detect that it's in Microsoft format (North American dates)");
	configlogfmt((void *)&logformat, NULL, "MICROSOFT-NA", NULL, -1);
      }
      else if (x > 12 && y <= 12) {
	debug('F',
	      "  Detect that it's in Microsoft format (international dates)");
	configlogfmt((void *)&logformat, NULL, "MICROSOFT-INT", NULL, -1);
      }
      else {
	warn('F', "Microsoft logfile with ambiguous dates: use LOGFORMAT MICROSOFT-NA or LOGFORMAT MICROSOFT-INT");
	*d = '\n';
	return(BADLOGFMT);
      }
    }
    else if (*pos == 'f' && *(pos + 1) == 'o' && *(pos + 2) == 'r' &&
	     *(pos + 3) == 'm' && *(pos + 4) == 'a' && *(pos + 5) == 't' &&
	     *(pos + 6) == '=') {
      debug('F', "  Reading it in Netscape format");
      configlogfmt((void *)&logformat, NULL, "NETSCAPE", NULL, -1);
    }
    else if (isdigit(*pos) && isdigit(*(pos + 3)) && isdigit(*(pos + 9)) &&
	     chrn(pos, '\t') >= 4) {
      e = strchr(pos, '\t') + 1;
      x = (*(e++) - '0');
      if (*e != '/')
	x = x * 10 + (*(e++) - '0');
      y = (*(++e) - '0');
      if (*(++e) != '/')
	y = y * 10 + (*e - '0');
      if (x <= 12 && y > 12) {
	debug('F', "  Detect that it's in Netpresenz format (North American dates)");
	configlogfmt((void *)&logformat, NULL, "NETPRESENZ-NA", NULL, -1);
      }
      else if (x > 12 && y <= 12) {
	debug('F', "  Detect that it's in Netpresenz format (international dates)");
	configlogfmt((void *)&logformat, NULL, "NETPRESENZ-INT", NULL, -1);
      }
      else {
	warn('F', "Netpresenz logfile with ambiguous dates: use LOGFORMAT NETPRESENZ-NA or LOGFORMAT NETPRESENZ-INT");
	*d = '\n';
	return(BADLOGFMT);
      }
    }
    else if (strstr(pos, " -> ") != NULL) {
      debug('F', "  Detect that it's in referrer log format");
      configlogfmt((void *)&logformat, NULL, "REFERRER", NULL, -1);
    }
    else if (*pos == '[' && (*(pos + 21) == ']' || *(pos + 27) == ']')) {
      debug('F', "  Detect that it's in browser log format");
      configlogfmt((void *)&logformat, NULL, "BROWSER", NULL, -1);
    }
    else if (*pos == '#') {
      debug('F', "  Detect that it's in W3 extended format");
      configlogfmt((void *)&logformat, NULL, "EXTENDED", NULL, -1);
    }
    else if ((e = strchr(pos + 6, '[')) != NULL && *(e + 27) == ']' &&
	     strchr(pos, '"') == e + 29) {
      x = chrn(e + 23, '"');
      if (x == 2) {
	debug('F', "  Detect that it's in common log format");
	configlogfmt((void *)&logformat, NULL, "COMMON", NULL, -1);
      }
      else if (x == 6) {
	debug('F', "  Detect that it's in NCSA combined format");
	configlogfmt((void *)&logformat, NULL, "COMBINED", NULL, -1);
      }
      else {
	*d = '\n';
	return(BADLOGFMT);
      }
    }
    else {
      *d = '\n';
      return(BADLOGFMT);
    }
    *d = '\n';
    break;
  case '2':  /* WebSTAR format */
    /* pos is already start of format proper */
    workspace[0] = '\0';
    pos = strtok(pos, " \t");
    while (pos != NULL) {
      if (!IS_EMPTY_STRING(workspace))
	(void)strcat(workspace, "\t");
      if (strcaseeq(pos, "COMMON_LOG_FORMAT"))
	(void)strcpy(workspace, "COMMON");
      else if (strcaseeq(pos, "DATE"))
	(void)strcat(workspace, "%m/%d/%y");
      else if (strcaseeq(pos, "TIME"))
	(void)strcat(workspace, "%W%h:%n:%j");
      else if (strcaseeq(pos, "RESULT"))
	(void)strcat(workspace, "%C");
      else if (strcaseeq(pos, "SC-STATUS") || strcaseeq(pos, "CS-STATUS"))
	(void)strcat(workspace, "%c");
      else if (strcaseeq(pos, "URL") || strcaseeq(pos, "CS-URI-STEM") ||
	       strcaseeq(pos, "CS-URI"))
	(void)strcat(workspace, "%R");
      else if (strcaseeq(pos, "BYTES") || strcaseeq(pos, "BYTES_SENT"))
	(void)strcat(workspace, "%b");
      else if (strcaseeq(pos, "HOSTNAME") || strcaseeq(pos, "CS-HOST") ||
	       strcaseeq(pos, "CS-IP") || strcaseeq(pos, "C-IP") ||
	       strcaseeq(pos, "C-DNS") || strcaseeq(pos, "CS-DNS"))
	(void)strcat(workspace, "%S");
      else if (strcaseeq(pos, "REFERER"))
	(void)strcat(workspace, "%f");
      else if (strcaseeq(pos, "CS(REFERER)"))
	(void)strcat(workspace, "\"%f\"");
      else if (strcaseeq(pos, "AGENT"))
	(void)strcat(workspace, "%B");
      else if (strcaseeq(pos, "CS(USER-AGENT)"))
	(void)strcat(workspace, "\"%B\"");
      else if (strcaseeq(pos, "CS(HOST)"))
	(void)strcat(workspace, "\"%v\"");
      else if (strcaseeq(pos, "SEARCH_ARGS") || strcaseeq(pos, "CS-URI-QUERY"))
	(void)strcat(workspace, "%q");
      else if (strcaseeq(pos, "USER"))
	(void)strcat(workspace, "%u");
      else
	(void)strcat(workspace, "%j");
      pos = strtok((char *)NULL, " \t");
    }
    configlogfmt((void *)&logformat, NULL, "WEBSTAR", NULL, -1);
    configlogfmt((void *)&logformat, NULL, workspace, NULL, -3);
    pos = d + 1;  /* start at next line */
    (void)parsenewline(f, NULL, '\0');
    break;
  case '3':  /* W3 extended format */
    workspace[0] = '\0';
    pos = strtok(pos, " \t");
    while (pos != NULL) {
      if (!IS_EMPTY_STRING(workspace))
	(void)strcat(workspace, "%w");
      if (strcaseeq(pos, "DATE"))
	(void)strcat(workspace, "%Y-%m-%d");
      else if (strcaseeq(pos, "TIME"))
	(void)strcat(workspace, "%h:%n:%j");
      else if (strcaseeq(pos, "BYTES") || strcaseeq(pos, "SC-BYTES"))
	(void)strcat(workspace, "%b");
      else if (strcaseeq(pos, "SC-STATUS"))
	(void)strcat(workspace, "%c");
      else if (strcaseeq(pos, "CS-IP") || strcaseeq(pos, "C-IP") ||
	       strcaseeq(pos, "C-DNS") || strcaseeq(pos, "CS-DNS"))
	(void)strcat(workspace, "%S");
      else if (strcaseeq(pos, "CS-URI-STEM") || strcaseeq(pos, "CS-URI"))
	(void)strcat(workspace, "%R");
      else if (strcaseeq(pos, "CS(REFERER)"))
	(void)strcat(workspace, "\"%f\"");
      else if (strcaseeq(pos, "CS(USER-AGENT)"))
	(void)strcat(workspace, "\"%B\"");
      else if (strcaseeq(pos, "CS(HOST)"))
	(void)strcat(workspace, "\"%v\"");
      else if (strcaseeq(pos, "CS-HOST") || strcaseeq(pos, "S-IP") ||
	       strcaseeq(pos, "S-DNS"))
	(void)strcat(workspace, "%v");
      else if (strcaseeq(pos, "CS-URI-QUERY"))
	(void)strcat(workspace, "%q");
      else if (strcaseeq(pos, "CS(FROM)"))
	(void)strcat(workspace, "\"%u\"");
      else if (strcaseeq(pos, "CS-USERNAME"))
	(void)strcat(workspace, "%u");
      else
	(void)strcat(workspace, "%j");
      pos = strtok((char *)NULL, " \t");
    }
    configlogfmt((void *)&logformat, NULL, "EXTENDED", NULL, -1);
    configlogfmt((void *)&logformat, NULL, workspace, NULL, -3);
    pos = d + 1;
    (void)parsenewline(f, NULL, '\0');
    break;
  case '4':  /* Netscape format */
    workspace[0] = '\0';
    while (*pos != '\0') {
      if (*pos != '%') {
	e = strchr(workspace, '\0');
	*e = *(pos++);
	*(e + 1) = '\0';
      }
      else {
	if ((e = strchr(++pos, '%')) == NULL)
	  return(BADLOGFMT);
	else *e = '\0';
	if (STREQ(pos, "Ses->client.ip"))
	  (void)strcat(workspace, "%S");
	else if (STREQ(pos, "Req->vars.auth-user"))
	  (void)strcat(workspace, "%u");
	else if (STREQ(pos, "SYSDATE"))
	  (void)strcat(workspace, "%d/%M/%Y:%h:%n:%j");
	else if (STREQ(pos, "Req->reqpb.clf-request"))
	  (void)strcat(workspace, "%j %r %j");
	else if (STREQ(pos, "Req->srvhdrs.clf-status"))
	  (void)strcat(workspace, "%c");
	else if (STREQ(pos, "Req->srvhdrs.content-length"))
	  (void)strcat(workspace, "%b");
	else if (STREQ(pos, "Req->headers.referer"))
	  (void)strcat(workspace, "%f");
	else if (STREQ(pos, "Req->headers.user-agent"))
	  (void)strcat(workspace, "%B");
	else
	  (void)strcat(workspace, "%j");
	pos = e + 1;
      }
    }
    configlogfmt((void *)&logformat, NULL, "NETSCAPE", NULL, -1);
    configlogfmt((void *)&logformat, NULL, workspace, NULL, -3);
    pos = d + 1;
    (void)parsenewline(f, NULL, '\0');
    break;
  }
  return(NEWLOGFMT);
}

choice parsenextrecord(FILE *f, Inputformat *format) {
  Inputformat *ipf, *ipf2;
  logical rc;

  if (pos == NULL) {
    if (getmoredata(f, block_start, BLOCKSIZE) == EOF)
      return(EOF);
    pos = block_start;
    record_start = pos;
  }

  pos = record_start;

  for (ipf = format; ipf -> inpfns != NULL; TO_NEXT(ipf)) {

    if (pos > block_bell) {
      if (record_start != block_start)
	shunt_data(f);
      if (pos == block_end)
	return(EOF);
    }

    rc = ipf -> inpfns -> fn(f, ipf -> inpfns -> opt, ipf -> sep);
    if (rc != TRUE) {
      for (ipf2 = format; ipf2 != ipf; TO_NEXT(ipf2)) {
	if (ipf2 -> inpfns -> fn == &parsestring ||
	    ipf2 -> inpfns -> fn == &parsemacfile ||
	    ipf2 -> inpfns -> fn == &parseref)
	  ((Memman *)(ipf2 -> inpfns -> opt)) -> next_pos =
	    ((Memman *)(ipf2 -> inpfns -> opt)) -> curr_pos;
	/* reset strings; NB ipf returned !TRUE so didn't allocate */
      }
      return(rc);
    }
  }
  return(TRUE);
}

int parseconfline(char *s, char **cmd, char **arg1, char **arg2) {
  char *c, d;

  for (c = s; *c == ' ' || *c == '\t'; c++)
    ;   /* run past white space */
  if (*c == '\0' || *c == '#')
    return(-1);
  *cmd = c;
  while (*c != ' ' && *c != '\t' && *c != '\0' && *c != '#')
    c++;
  if (*c == '\0' || *c == '#') {
    *c = '\0';
    return(0);
  }
  *c = '\0';
  c++;
  for ( ; *c == ' ' || *c == '\t'; c++)
    ;   /* run past white space again */
  if (*c == '\0' || *c == '#')
    return(0);
  *arg1 = c;
  if (*c == '\'' || *c == '"' || *c == '(') {
    d = (*c == '(')?')':(*c);  /* terminating character for next string */
    *arg1 = (++c);
    while (*c != d && *c != '\0')
      c++;
  }
  else while (*c != ' ' && *c != '\t' && *c != '\0' && *c != '#')
    c++;
  if (*c == '\0' || *c == '#') {
    *c = '\0';
    return(1);
  }
  *c = '\0';
  c++;
  for ( ; *c == ' ' || *c == '\t'; c++)
    ;
  if (*c == '\0' || *c == '#')
    return(1);
  *arg2 = c;
  if (*c == '\'' || *c == '"' || *c == '(') {
    d = (*c == '(')?')':(*c);
    *arg2 = (++c);
    while (*c != d && *c != '\0')
      c++;
  }
  else while (*c != ' ' && *c != '\t' && *c != '\0' && *c != '#')
    c++;
  if (*c == '\0' || *c == '#') {
    *c = '\0';
    return(2);
  }
  *c = '\0';
  c++;
  for ( ; *c == ' ' || *c == '\t'; c++)
    ;
  if (*c == '\0' || *c == '#')
    return(2);
  return(3);
}

int nextconfline(FILE *f, char **cmd, char **arg1, char **arg2) {
  /* if 255 increased, so must u[] be in confline() */
  int rc;

  *cmd = NULL;
  *arg1 = NULL;
  *arg2 = NULL;
  while (TRUE) {
    if (getnextline(f, block_start) == EOF)
      return(EOF);
    if (strlen(block_start) >= 255) {
      *(block_start + 70) = '\0';
      warn('C', "Ignoring long configuration line starting\n%s", block_start);
    }
    else if ((rc = parseconfline(block_start, cmd, arg1, arg2)) != -1)
      return(rc);  /* o/wise line was blank or a comment: go round again */
  }
}

char *nextlngstr(FILE *f, char *name) {  /* similar logic to nextconfline() */

  while (TRUE) {
    if (getnextline(f, block_start) == EOF)
      error("language file %s too short", name);
    if (strlen(block_start) >= 255)
      error("language file %s contains excessively long lines", name);
    if (block_start[0] != '#' || block_start[1] != '#')
      return(block_start);
  }
}

choice nextdnsline(FILE *f, timecode_t *timec, char **name, char **alias) {
  char *timestr = NULL;

  *name = NULL;
  *alias = NULL;
  if (getnextline(f, block_start) == EOF)
    return(EOF);
  if (strlen(block_start) >= 255) {
    *(block_start + 70) = '\0';
    warn('C', "Ignoring long line in DNS file starting\n%s", block_start);
    return(FALSE);
  }
  if ((timestr = strtok(block_start, " ")) == NULL || !isdigit(*timestr) ||
      (*name = strtok((char *)NULL, " ")) == NULL ||
      (*alias = strtok((char *)NULL, " ")) == NULL ||
      strtok((char *)NULL, " ") != NULL ||
      (*timec = strtoul(timestr, (char **)NULL, 10)) == 0) {
    warn('C', "Ignoring corrupt line in DNS file looking like\n%s",
	 block_start);
    return(FALSE);
  }
  return(TRUE);
}

void process_domainsfile(FILE *f, Options *op) {
  /* size of v is bounded because u in confline is */
  char *s, *t, *u, v[256];

  while (TRUE) {
    if (getnextline(f, block_start) == EOF)
      return;
    if (strlen(block_start) >= 250) {
      *(block_start + 70) = '\0';
      warn('C', "Ignoring long line in domains file starting\n%s",
	   block_start);
    }
    else {
      for (s = block_start; *s == ' ' || *s == '\t'; s++)
	;
      for (t = s; *t != ' ' && *t != '\t' && *t != '\0'; t++)
	;
      if (*t == '\0')
	warn('C', "Ignoring incomplete line in domains file\n%s", block_start);
      else {
	*t = '\0';
	for (t++; *t == ' ' || *t == '\t'; t++)
	  ;
	for (u = strchr(block_start, '\0') - 1;
	     (*u == ' ' || *u == '\t') && u > t; u--)
	  ;
	if (u > t)
	  *(u + 1) = '\0';
	(void)sprintf(v, ".%s (%s)", s, t);
	confline(op, "DOMOUTPUTALIAS", s, v, -1);
      }
    }
  }
}
