ftp.nice.ch/pub/next/unix/network/www/analog.NIHS.bs.gnutar.gz#/analog/sscanf.c

This is sscanf.c in view mode; [Download] [Up]

/*** analog 1.9beta ***/
/* Please read Readme.html, or http://www.statslab.cam.ac.uk/~sret1/analog/  */

/*** sscanf.c; functions to replace sscanf(), which is far too slow, in
     certain specific cases ***/

#include "analhea2.h"

/*** Now the scanning routines ***/

int sscanf_date(char *inputline, int *date, int *monthno, int *year, int *hr,
		int *min)
{    /* scanning date from common/agent/referer log */

  extern int strtomonth();          /* in utils.c */

  register char *cin = inputline;
  char *cout;
  char month[4];
  int i;

  if (!isdigit(*cin))
    return(0);
  else
    *date = 10 * (*cin - '0');
  cin++;
  if (!isdigit(*cin))
    return(0);
  else
    *date += (*cin - '0');

  /* read in month */
  cin++;
  if (*cin != '/')
    return(1);
  cin++;
  cout = month;
  for (i = 0; i < 3 && *cin != '\0'; i++) {
    *cout = *cin;
    cout++;
    cin++;
  }
  if (*cin == '\0')
    return(1);
  *cout = '\0';
  if ((*monthno = strtomonth(month)) == ERR)
    return(1);

  /* read in year */
  if (*cin != '/')
    return(2);
  cin++;
  if (!isdigit(*cin))
    return(2);
  else
    *year = 1000 * (*cin - '0');
  cin++;
  if (!isdigit(*cin))
    return(2);
  else
    *year += 100 * (*cin - '0');
  cin++;
  if (!isdigit(*cin)) {
    if (*cin != ':')
      return(2);
    else {     /* allow two digit years for Spyglass server */
      *year /= 100;
      *year += 1900;
      if (*year < 1970)
	*year += 100;
    }
  }
  else {
    *year += 10 * (*cin - '0');
    cin++;
    if (!isdigit(*cin))
      return(2);
    else
      *year += (*cin - '0');
    cin++;
    if (*cin != ':')
      return(3);
  }

  /* read in hour */
  cin++;
  if (!isdigit(*cin))
    return(3);
  else
    *hr = 10 * (*cin - '0');
  cin++;
  if (!isdigit(*cin))
    return(3);
  else
    *hr += (*cin - '0');

  /* read in minute */
  cin++;
  if (*cin != ':')
    return(4);
  cin++;
  if (!isdigit(*cin))
    return(4);
  else
    *min = 10 * (*cin - '0');
  cin++;
  if (!isdigit(*cin))
    return(4);
  else
    *min += (*cin - '0');

  /* don't read in second, but check it for correct form */
  cin++;
  if (*cin != ':')
    return(4);
  cin++;
  if (!isdigit(*cin))
    return(4);
  cin++;
  if (!isdigit(*cin))
    return(4);

  return(5);
}

int sscanf_olddate(char *inputline, int *date, int *monthno, int *year,
		   int *hr, int *min)
{    /* the same thing for NCSA old-style and error logs */

  extern int strtomonth();          /* in utils.c */

  register char *cin = inputline;
  char *cout;
  char month[4];
  int i;

  /* ignore day of week, so scan until next ' ' */
  for (cin++; *cin != ' ' && *cin != '\0'; cin++)
    ;
  if (*cin == '\0')
    return(0);

  /* read in month */
  cin++;
  cout = month;
  for (i = 0; i < 3 && *cin != '\0'; i++) {
    *cout = *cin;
    cout++;
    cin++;
  }
  if (*cin == '\0')
    return(0);
  *cout = '\0';
  if ((*monthno = strtomonth(month)) == ERR)
    return(1);

  /* read in date */
  if (*cin != ' ')
    return(1);
  cin++;
  if (!isdigit(*cin) && *cin != ' ')
    return(1);
  else if (*cin != ' ')
    *date = 10 * (*cin - '0');
  else
    *date = 0;
  cin++;
  if (!isdigit(*cin))
    return(1);
  else
    *date += (*cin - '0');

  /* read in hour */
  cin++;
  if (*cin != ' ')
    return(2);
  cin++;
  if (!isdigit(*cin))
    return(2);
  else
    *hr = 10 * (*cin - '0');
  cin++;
  if (!isdigit(*cin))
    return(2);
  else
    *hr += (*cin - '0');

  /* read in minute */
  cin++;
  if (*cin != ':')
    return(3);
  cin++;
  if (!isdigit(*cin))
    return(3);
  else
    *min = 10 * (*cin - '0');
  cin++;
  if (!isdigit(*cin))
    return(3);
  else
    *min += (*cin - '0');
  
  /* ignore second (but check format) */
  cin++;
  if (*cin != ':')
    return(4);
  cin++;
  if (!isdigit(*cin))
    return(4);
  cin++;
  if (!isdigit(*cin))
    return(4);
  cin++;
  if (*cin != ' ')
    return(4);

  /* read year */
  cin++;
  if (!isdigit(*cin))
    return(4);
  else
    *year = 1000 * (*cin - '0');
  cin++;
  if (!isdigit(*cin))
    return(4);
  else
    *year += 100 * (*cin - '0');
  cin++;
  if (!isdigit(*cin))
    return(4);
  else
    *year += 10 * (*cin - '0');
  cin++;
  if (!isdigit(*cin))
    return(4);
  else
    *year += (*cin - '0');
  return(5);
}

int sscanf_common(char *inputline, char hostn[MAXSTRINGLENGTH], int *date,
		  int *monthno, int *year, int *hr, int *min,
		  char filename[MAXSTRINGLENGTH],
		  char referer[MAXSTRINGLENGTH], char agent[MAXSTRINGLENGTH],
		  int *code, char bytestr[16], size_t preflength)
{     /* scanning 'common' format logfile entries */
  extern flag included();           /* in alias.c */

  extern flag bq, Bq, fq;
  extern struct include *noexpandhead;

  register char *cin = inputline;      /* the character we are reading */
  register char *cout;                 /* where we are putting it */
  int i;

  /* read in hostname */
  i = 0;
  for (cout = hostn; *cin != ' ' && *cin != '\0' && i < MAXSTRINGLENGTH - 1;
       cin++) { 
    *cout = *cin;
    cout++;
    i++;
  }
  if (*cin != ' ')
    return(0);
  *cout = '\0';

  /* scan until next '[' */
  for (cin++; *cin != '[' && *cin != '\0'; cin++)
    ;
  if (*cin == '\0')
    return(1);

  /* read in date */
  cin++;
  if (sscanf_date(cin, date, monthno, year, hr, min) < 5)
    return(1);
  else
    cin += 20;
  
  /* ignore timezone; so scan to next '"' */
  for ( ; *cin != '"' && *cin != '\0'; cin++)
    ;
  if (*cin == '\0')
    return(6);

  /* ignore method; so read to next ' ' */
  for (cin++; *cin != ' ' && *cin != '\0'; cin++)
    ;
  if (*cin == '\0')
    return(6);

  /* read in filename */
  cin++;
  i = 0;
  for (cout = filename; *cin != ' ' && *cin != '\0' && *cin != '"' &&
       *cin != '?' && i < MAXSTRINGLENGTH - 1 - preflength; cin++) {
    *cout = *cin;
    cout++;
    i++;
  }
  *cout = '\0';
  if (*cin == '?' && !included(filename, noexpandhead)) {  /* read in args */
    for ( ; *cin != ' ' && *cin != '\0' && *cin != '"' &&
	 i < MAXSTRINGLENGTH - 1 - preflength; cin++) {
      *cout = *cin;
      cout++;
      i++;
    }
    *cout = '\0';
  }
  if (*cin != ' ' && *cin != '"' && *cin != '?')
    return(6);

  /* scan to next " */
  for ( ; *cin != '"' && *cin != '\0' ; cin++)
    ;
  if (*cin == '\0')
    return(7);

  /* read in return code; always 3 digits, or a - (successes; call them 299) */
  cin++;
  if (*cin != ' ')
    return(7);
  cin++;
  if (!isdigit(*cin))
    if (*cin == '-' && *(cin + 1) == ' ')
      *code = 299;
    else
      return(7);
  else {
    *code = 100 * (*cin - '0');
    cin++;
    if (!isdigit(*cin))
      return(7);
    else
      *code += 10 * (*cin - '0');
    cin++;
    if (!isdigit(*cin))
      return(7);
    else
      *code += (*cin - '0');
  }

  /* read in bytestr */
  cin++;
  if (*cin != ' ')
    return (8);
  cin++;
  i = 0;
  for (cout = bytestr; *cin != ' ' && *cin != '\n' && *cin != '\0' && i < 16;
       cin++) {
    *cout = *cin;
    cout++;
  }
  *cout = '\0';

  /* Finally, try and read in referer and agent of NCSA combined format */
  if (*cin != ' ' || (!fq && !bq && !Bq))
    return(9);
  if (*(++cin) != '"')
    return(9);
  i = 0;
  cin++;
  for (cout = referer; *cin != '\0' && *cin != '"' && i < MAXSTRINGLENGTH - 1;
       cin++) {
    *cout = *cin;
    cout++;
    i++;
  }
  *cout = '\0';
  if (*cin != '"')
    return(9);
  if (*(++cin) != ' ')
    return(10);
  if (*(++cin) != '"')
    return(10);
  i = 0;
  cin++;
  for (cout = agent; *cin != '\0' && *cin != '"' && i < MAXSTRINGLENGTH - 1;
       cin++) {
    *cout = *cin;
    cout++;
    i++;
  }
  *cout = '\0';
  if (*cin != '"')
    return(10);
  else
    return(11);

}

int sscanf_ncsaold(char *inputline, char hostn[MAXSTRINGLENGTH], int *monthno,
		   int *date, int *hr, int *min, int *year,
		   char filename[MAXSTRINGLENGTH], size_t preflength)
{    /* scanning NCSA old-style logfile entries */
  extern flag included();           /* in alias.c */

  extern struct include *noexpandhead;

  register char *cin = inputline;   /* the character we are reading */
  register char *cout;              /* where we are putting it */
  int i;

  /* read in hostname */
  i = 0;
  for (cout = hostn; *cin != ' ' && *cin != '\0' && i < MAXSTRINGLENGTH - 1;
       cin++) { 
    *cout = *cin;
    cout++;
    i++;
  }
  if (*cin != ' ')
    return(0);
  *cout = '\0';

  /* scan until next '[' */
  for (cin++; *cin != '[' && *cin != '\0'; cin++)
    ;
  if (*cin == '\0')
    return(1);

  /* read in date */
  cin++;
  if (sscanf_olddate(cin, date, monthno, year, hr, min) < 5)
    return(1);
  else
    cin += 24;

  /* ignore method, so skip to second space */
  for ( ; *cin != ' ' && *cin != '\0'; cin++)
    ;
  if (*cin == '\0')
    return(6);

  for (cin++; *cin != ' ' && *cin != '\0'; cin++)
    ;
  if (*cin == '\0')
    return(6);

  /* finally, read in the filename */
  cin++;
  i = 0;
  for (cout = filename; *cin != ' ' && *cin != '\n' && *cin != '?' &&
       *cin != '\0' && i < MAXSTRINGLENGTH - 1 - preflength; cin++) {
    *cout = *cin;
    cout++;
    i++;
  }
  *cout = '\0';
  if (*cin == '?' && !included(filename, noexpandhead)) {  /* read in args */
    for ( ; *cin != ' ' && *cin != '\0' && *cin != '"' &&
	 i < MAXSTRINGLENGTH - 1 - preflength; cin++) {
      *cout = *cin;
      cout++;
      i++;
    }
    *cout = '\0';
  }
  return (7);

}

int sscanf_domains(char *inputline, char string1[MAXSTRINGLENGTH],
		   char string2[MAXSTRINGLENGTH])
{   /* scanning the domains file */
  register char *cin = inputline;
  register char *cout;
  int i;

  /* run past any white space */
  while (*cin == ' ' || *cin == '\t')
    cin++;

  /* if no strings on this line, return 0 */
  if (*cin == '#' || *cin == '\n' || *cin == '\0')
    return(0);

  /* otherwise fill up string 1; coerce domains to lower case */
  i = 0;
  for (cout = string1; *cin != ' ' && *cin != '\t' && *cin != '#' &&
       *cin != '\0' && *cin != '\n' && i < MAXSTRINGLENGTH - 1; cin++) {
    *cout = tolower(*cin);
    cout++;
    i++;
  }

  /* is that the end of the line (maybe after some white space)? */
  if (*cin == '#' || *cin == '\0' || *cin == '\n' || i == MAXSTRINGLENGTH - 1)
    return(1);

  *cout = '\0';
  cin++;

  while (*cin == ' ' || *cin == '\t')
    cin++;

  if (*cin == '#' || *cin == '\n' || *cin == '\0')
    return(1);

  /* otherwise fill up string 2 */
  for (cout = string2; *cin != '#' && *cin != '\n' && *cin != '\0' &&
       i < MAXSTRINGLENGTH - 1; cin++) {
    *cout = *cin;
    cout++;
    i++;
  }

  *cout = '\0';
  return(2);

}

int sscanf_config(char *inputline, char string1[MAXSTRINGLENGTH],
		  char string2[MAXSTRINGLENGTH],
		  char string3[MAXSTRINGLENGTH])
{   /* scanning the config file */
  register char *cin = inputline;
  register char *cout;
  int i;

  /* run past any white space */
  while (*cin == ' ' || *cin == '\t')
    cin++;

  /* if no strings on this line, return 0 */
  if (*cin == '#' || *cin == '\n' || *cin == '\0')
    return(0);

  /* otherwise fill up string 1; convert arguments to upper case */
  i = 0;
  for (cout = string1; *cin != ' ' && *cin != '\t' && *cin != '#' &&
       *cin != '\0' && *cin != '\n' && i < MAXSTRINGLENGTH - 1; cin++) {
    *cout = *cin;
    cout++;
    i++;
  }

  *cout = '\0';

  /* is that the end of the line (maybe after some white space)? */
  if (*cin == '#' || *cin == '\0' || *cin == '\n' || i == MAXSTRINGLENGTH - 1)
    return(1);

  cin++;

  while (*cin == ' ' || *cin == '\t')
    cin++;

  if (*cin == '#' || *cin == '\n' || *cin == '\0')
    return(1);

  /* if string 2 starts with a quote mark, fill up until the next quote
     mark. Otherwise, just fill until the next space */

  if (*cin == '\'') {
    cin++;
    for (cout = string2; *cin != '\n' && *cin != '\0' && *cin != '\'' &&
	 i < MAXSTRINGLENGTH - 1; cin++) {
      *cout = *cin;
      cout++;
      i++;
    }
  }
  else if (*cin == '"') {
    cin++;
    for (cout = string2; *cin != '\n' && *cin != '\0' && *cin != '"' &&
	 i < MAXSTRINGLENGTH - 1; cin++) {
      *cout = *cin;
      cout++;
      i++;
    }
  }
  else {
    for (cout = string2; *cin != '#' && *cin != '\n' && *cin != '\0' &&
	 *cin != ' ' && *cin != '\t' && i < MAXSTRINGLENGTH - 1; cin++) {
      *cout = *cin;
      cout++;
      i++;
    }
  }

  *cout = '\0';

  /* is that the end of the line (maybe after some white space)? */
  if (*cin == '#' || *cin == '\0' || *cin == '\n' || i == MAXSTRINGLENGTH - 1)
    return(2);

  cin++;

  while (*cin == ' ' || *cin == '\t')
    cin++;

  if (*cin == '#' || *cin == '\n' || *cin == '\0')
    return(2);

  /* otherwise fill up string 3 */
  if (*cin == '\'') {
    cin++;
    for (cout = string3; *cin != '\n' && *cin != '\0' && *cin != '\'' &&
	 i < MAXSTRINGLENGTH - 1; cin++) {
      *cout = *cin;
      cout++;
      i++;
    }
  }
  else if (*cin == '"') {
    cin++;
    for (cout = string3; *cin != '\n' && *cin != '\0' && *cin != '"' &&
	 i < MAXSTRINGLENGTH - 1; cin++) {
      *cout = *cin;
      cout++;
      i++;
    }
  }
  else {
    for (cout = string3; *cin != '#' && *cin != '\n' && *cin != '\0' &&
	 *cin != ' ' && *cin != '\t' && i < MAXSTRINGLENGTH - 1; cin++) {
      *cout = *cin;
      cout++;
      i++;
    }
  }

  *cout = '\0';

  /* is that the end of the line (maybe after some white space)? */
  if (*cin == '#' || *cin == '\0' || *cin == '\n' || i == MAXSTRINGLENGTH - 1)
    return(3);

  cin++;

  while (*cin == ' ' || *cin == '\t')
    cin++;

  if (*cin == '#' || *cin == '\n' || *cin == '\0')
    return(3);

  return(4);   /* we don't ever want to read a fourth string; just know if
		  there is one for error checking */

}


int sscanf_referer(char *inputline, int *date, int *monthno, int *year,
		   int *hr, int *min, char from[MAXSTRINGLENGTH],
		   char to[MAXSTRINGLENGTH])
{   /* scanning the referer log */
    /* The format is "[date] from -> to". The [date] is optional. */

  extern flag included();           /* in alias.c */

  extern struct include *refexpandhead, *noexpandhead;

  register char *cin = inputline;
  register char *cout;
  int i;

  /* scan the date */
  if (*cin == '[') {
    cin++;
    if (sscanf_date(cin, date, monthno, year, hr, min) < 5)
      return(0);
    else
      cin += 20;
    if (*cin != ']')
      return(5);
    if (*(++cin) != ' ')
      return(5);
    cin++;
  }
  else
    *date = 0;   /* as marker */

  /* now fill up the from string */

  i = 0;
  for (cout = from; *cin != ' ' && *cin != '\0' && *cin != '#' &&
       *cin != '?' && i < MAXSTRINGLENGTH - 1; cin++) {
    *cout = *cin;
    cout++;
    i++;
  }
  *cout = '\0';
  if (*cin == '?' && included(from, refexpandhead)) {  /* read in args */
    for ( ; *cin != ' ' && *cin != '\0' && *cin != '#' &&
	 i < MAXSTRINGLENGTH - 1; cin++) {
      *cout = *cin;
      cout++;
      i++;
    }
    *cout = '\0';
  }

  /* check at this point that the line syntax is ok */

  if (*cin == '#' || *cin == '?') {
    while (*cin != ' ' && *cin != '\0')
      cin++;
  }

  if (*cin != ' ')
    return(5);
  cin++;
  if (*cin != '-')
    return(6);
  cin++;
  if (*cin != '>')
    return(6);
  cin++;
  if (*cin != ' ')
    return(6);
  cin++;

  /* and the to string */

  i = 0;
  for (cout = to; *cin != ' ' && *cin != '\0' && *cin != '\n'
       && i < MAXSTRINGLENGTH - 1; cin++) {
    *cout = *cin;
    cout++;
    i++;
  }
  *cout = '\0';
  if (*cin == '?' && !included(to, noexpandhead)) {  /* read in args */
    for ( ; *cin != ' ' && *cin != '\0' && i < MAXSTRINGLENGTH - 1; cin++) {
      *cout = *cin;
      cout++;
      i++;
    }
    *cout = '\0';
  }

  if (*cin != '\n')
    return(6);
  else
    return(7);

}

These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.