This is sscanf.c in view mode; [Download] [Up]
/*** analog 1.9beta ***/
/* Please read Readme.html, or http://www.statslab.cam.ac.uk/~sret1/analog/ */
/*** sscanf.c; functions to replace sscanf(), which is far too slow, in
certain specific cases ***/
#include "analhea2.h"
/*** Now the scanning routines ***/
int sscanf_date(char *inputline, int *date, int *monthno, int *year, int *hr,
int *min)
{ /* scanning date from common/agent/referer log */
extern int strtomonth(); /* in utils.c */
register char *cin = inputline;
char *cout;
char month[4];
int i;
if (!isdigit(*cin))
return(0);
else
*date = 10 * (*cin - '0');
cin++;
if (!isdigit(*cin))
return(0);
else
*date += (*cin - '0');
/* read in month */
cin++;
if (*cin != '/')
return(1);
cin++;
cout = month;
for (i = 0; i < 3 && *cin != '\0'; i++) {
*cout = *cin;
cout++;
cin++;
}
if (*cin == '\0')
return(1);
*cout = '\0';
if ((*monthno = strtomonth(month)) == ERR)
return(1);
/* read in year */
if (*cin != '/')
return(2);
cin++;
if (!isdigit(*cin))
return(2);
else
*year = 1000 * (*cin - '0');
cin++;
if (!isdigit(*cin))
return(2);
else
*year += 100 * (*cin - '0');
cin++;
if (!isdigit(*cin)) {
if (*cin != ':')
return(2);
else { /* allow two digit years for Spyglass server */
*year /= 100;
*year += 1900;
if (*year < 1970)
*year += 100;
}
}
else {
*year += 10 * (*cin - '0');
cin++;
if (!isdigit(*cin))
return(2);
else
*year += (*cin - '0');
cin++;
if (*cin != ':')
return(3);
}
/* read in hour */
cin++;
if (!isdigit(*cin))
return(3);
else
*hr = 10 * (*cin - '0');
cin++;
if (!isdigit(*cin))
return(3);
else
*hr += (*cin - '0');
/* read in minute */
cin++;
if (*cin != ':')
return(4);
cin++;
if (!isdigit(*cin))
return(4);
else
*min = 10 * (*cin - '0');
cin++;
if (!isdigit(*cin))
return(4);
else
*min += (*cin - '0');
/* don't read in second, but check it for correct form */
cin++;
if (*cin != ':')
return(4);
cin++;
if (!isdigit(*cin))
return(4);
cin++;
if (!isdigit(*cin))
return(4);
return(5);
}
int sscanf_olddate(char *inputline, int *date, int *monthno, int *year,
int *hr, int *min)
{ /* the same thing for NCSA old-style and error logs */
extern int strtomonth(); /* in utils.c */
register char *cin = inputline;
char *cout;
char month[4];
int i;
/* ignore day of week, so scan until next ' ' */
for (cin++; *cin != ' ' && *cin != '\0'; cin++)
;
if (*cin == '\0')
return(0);
/* read in month */
cin++;
cout = month;
for (i = 0; i < 3 && *cin != '\0'; i++) {
*cout = *cin;
cout++;
cin++;
}
if (*cin == '\0')
return(0);
*cout = '\0';
if ((*monthno = strtomonth(month)) == ERR)
return(1);
/* read in date */
if (*cin != ' ')
return(1);
cin++;
if (!isdigit(*cin) && *cin != ' ')
return(1);
else if (*cin != ' ')
*date = 10 * (*cin - '0');
else
*date = 0;
cin++;
if (!isdigit(*cin))
return(1);
else
*date += (*cin - '0');
/* read in hour */
cin++;
if (*cin != ' ')
return(2);
cin++;
if (!isdigit(*cin))
return(2);
else
*hr = 10 * (*cin - '0');
cin++;
if (!isdigit(*cin))
return(2);
else
*hr += (*cin - '0');
/* read in minute */
cin++;
if (*cin != ':')
return(3);
cin++;
if (!isdigit(*cin))
return(3);
else
*min = 10 * (*cin - '0');
cin++;
if (!isdigit(*cin))
return(3);
else
*min += (*cin - '0');
/* ignore second (but check format) */
cin++;
if (*cin != ':')
return(4);
cin++;
if (!isdigit(*cin))
return(4);
cin++;
if (!isdigit(*cin))
return(4);
cin++;
if (*cin != ' ')
return(4);
/* read year */
cin++;
if (!isdigit(*cin))
return(4);
else
*year = 1000 * (*cin - '0');
cin++;
if (!isdigit(*cin))
return(4);
else
*year += 100 * (*cin - '0');
cin++;
if (!isdigit(*cin))
return(4);
else
*year += 10 * (*cin - '0');
cin++;
if (!isdigit(*cin))
return(4);
else
*year += (*cin - '0');
return(5);
}
int sscanf_common(char *inputline, char hostn[MAXSTRINGLENGTH], int *date,
int *monthno, int *year, int *hr, int *min,
char filename[MAXSTRINGLENGTH],
char referer[MAXSTRINGLENGTH], char agent[MAXSTRINGLENGTH],
int *code, char bytestr[16], size_t preflength)
{ /* scanning 'common' format logfile entries */
extern flag included(); /* in alias.c */
extern flag bq, Bq, fq;
extern struct include *noexpandhead;
register char *cin = inputline; /* the character we are reading */
register char *cout; /* where we are putting it */
int i;
/* read in hostname */
i = 0;
for (cout = hostn; *cin != ' ' && *cin != '\0' && i < MAXSTRINGLENGTH - 1;
cin++) {
*cout = *cin;
cout++;
i++;
}
if (*cin != ' ')
return(0);
*cout = '\0';
/* scan until next '[' */
for (cin++; *cin != '[' && *cin != '\0'; cin++)
;
if (*cin == '\0')
return(1);
/* read in date */
cin++;
if (sscanf_date(cin, date, monthno, year, hr, min) < 5)
return(1);
else
cin += 20;
/* ignore timezone; so scan to next '"' */
for ( ; *cin != '"' && *cin != '\0'; cin++)
;
if (*cin == '\0')
return(6);
/* ignore method; so read to next ' ' */
for (cin++; *cin != ' ' && *cin != '\0'; cin++)
;
if (*cin == '\0')
return(6);
/* read in filename */
cin++;
i = 0;
for (cout = filename; *cin != ' ' && *cin != '\0' && *cin != '"' &&
*cin != '?' && i < MAXSTRINGLENGTH - 1 - preflength; cin++) {
*cout = *cin;
cout++;
i++;
}
*cout = '\0';
if (*cin == '?' && !included(filename, noexpandhead)) { /* read in args */
for ( ; *cin != ' ' && *cin != '\0' && *cin != '"' &&
i < MAXSTRINGLENGTH - 1 - preflength; cin++) {
*cout = *cin;
cout++;
i++;
}
*cout = '\0';
}
if (*cin != ' ' && *cin != '"' && *cin != '?')
return(6);
/* scan to next " */
for ( ; *cin != '"' && *cin != '\0' ; cin++)
;
if (*cin == '\0')
return(7);
/* read in return code; always 3 digits, or a - (successes; call them 299) */
cin++;
if (*cin != ' ')
return(7);
cin++;
if (!isdigit(*cin))
if (*cin == '-' && *(cin + 1) == ' ')
*code = 299;
else
return(7);
else {
*code = 100 * (*cin - '0');
cin++;
if (!isdigit(*cin))
return(7);
else
*code += 10 * (*cin - '0');
cin++;
if (!isdigit(*cin))
return(7);
else
*code += (*cin - '0');
}
/* read in bytestr */
cin++;
if (*cin != ' ')
return (8);
cin++;
i = 0;
for (cout = bytestr; *cin != ' ' && *cin != '\n' && *cin != '\0' && i < 16;
cin++) {
*cout = *cin;
cout++;
}
*cout = '\0';
/* Finally, try and read in referer and agent of NCSA combined format */
if (*cin != ' ' || (!fq && !bq && !Bq))
return(9);
if (*(++cin) != '"')
return(9);
i = 0;
cin++;
for (cout = referer; *cin != '\0' && *cin != '"' && i < MAXSTRINGLENGTH - 1;
cin++) {
*cout = *cin;
cout++;
i++;
}
*cout = '\0';
if (*cin != '"')
return(9);
if (*(++cin) != ' ')
return(10);
if (*(++cin) != '"')
return(10);
i = 0;
cin++;
for (cout = agent; *cin != '\0' && *cin != '"' && i < MAXSTRINGLENGTH - 1;
cin++) {
*cout = *cin;
cout++;
i++;
}
*cout = '\0';
if (*cin != '"')
return(10);
else
return(11);
}
int sscanf_ncsaold(char *inputline, char hostn[MAXSTRINGLENGTH], int *monthno,
int *date, int *hr, int *min, int *year,
char filename[MAXSTRINGLENGTH], size_t preflength)
{ /* scanning NCSA old-style logfile entries */
extern flag included(); /* in alias.c */
extern struct include *noexpandhead;
register char *cin = inputline; /* the character we are reading */
register char *cout; /* where we are putting it */
int i;
/* read in hostname */
i = 0;
for (cout = hostn; *cin != ' ' && *cin != '\0' && i < MAXSTRINGLENGTH - 1;
cin++) {
*cout = *cin;
cout++;
i++;
}
if (*cin != ' ')
return(0);
*cout = '\0';
/* scan until next '[' */
for (cin++; *cin != '[' && *cin != '\0'; cin++)
;
if (*cin == '\0')
return(1);
/* read in date */
cin++;
if (sscanf_olddate(cin, date, monthno, year, hr, min) < 5)
return(1);
else
cin += 24;
/* ignore method, so skip to second space */
for ( ; *cin != ' ' && *cin != '\0'; cin++)
;
if (*cin == '\0')
return(6);
for (cin++; *cin != ' ' && *cin != '\0'; cin++)
;
if (*cin == '\0')
return(6);
/* finally, read in the filename */
cin++;
i = 0;
for (cout = filename; *cin != ' ' && *cin != '\n' && *cin != '?' &&
*cin != '\0' && i < MAXSTRINGLENGTH - 1 - preflength; cin++) {
*cout = *cin;
cout++;
i++;
}
*cout = '\0';
if (*cin == '?' && !included(filename, noexpandhead)) { /* read in args */
for ( ; *cin != ' ' && *cin != '\0' && *cin != '"' &&
i < MAXSTRINGLENGTH - 1 - preflength; cin++) {
*cout = *cin;
cout++;
i++;
}
*cout = '\0';
}
return (7);
}
int sscanf_domains(char *inputline, char string1[MAXSTRINGLENGTH],
char string2[MAXSTRINGLENGTH])
{ /* scanning the domains file */
register char *cin = inputline;
register char *cout;
int i;
/* run past any white space */
while (*cin == ' ' || *cin == '\t')
cin++;
/* if no strings on this line, return 0 */
if (*cin == '#' || *cin == '\n' || *cin == '\0')
return(0);
/* otherwise fill up string 1; coerce domains to lower case */
i = 0;
for (cout = string1; *cin != ' ' && *cin != '\t' && *cin != '#' &&
*cin != '\0' && *cin != '\n' && i < MAXSTRINGLENGTH - 1; cin++) {
*cout = tolower(*cin);
cout++;
i++;
}
/* is that the end of the line (maybe after some white space)? */
if (*cin == '#' || *cin == '\0' || *cin == '\n' || i == MAXSTRINGLENGTH - 1)
return(1);
*cout = '\0';
cin++;
while (*cin == ' ' || *cin == '\t')
cin++;
if (*cin == '#' || *cin == '\n' || *cin == '\0')
return(1);
/* otherwise fill up string 2 */
for (cout = string2; *cin != '#' && *cin != '\n' && *cin != '\0' &&
i < MAXSTRINGLENGTH - 1; cin++) {
*cout = *cin;
cout++;
i++;
}
*cout = '\0';
return(2);
}
int sscanf_config(char *inputline, char string1[MAXSTRINGLENGTH],
char string2[MAXSTRINGLENGTH],
char string3[MAXSTRINGLENGTH])
{ /* scanning the config file */
register char *cin = inputline;
register char *cout;
int i;
/* run past any white space */
while (*cin == ' ' || *cin == '\t')
cin++;
/* if no strings on this line, return 0 */
if (*cin == '#' || *cin == '\n' || *cin == '\0')
return(0);
/* otherwise fill up string 1; convert arguments to upper case */
i = 0;
for (cout = string1; *cin != ' ' && *cin != '\t' && *cin != '#' &&
*cin != '\0' && *cin != '\n' && i < MAXSTRINGLENGTH - 1; cin++) {
*cout = *cin;
cout++;
i++;
}
*cout = '\0';
/* is that the end of the line (maybe after some white space)? */
if (*cin == '#' || *cin == '\0' || *cin == '\n' || i == MAXSTRINGLENGTH - 1)
return(1);
cin++;
while (*cin == ' ' || *cin == '\t')
cin++;
if (*cin == '#' || *cin == '\n' || *cin == '\0')
return(1);
/* if string 2 starts with a quote mark, fill up until the next quote
mark. Otherwise, just fill until the next space */
if (*cin == '\'') {
cin++;
for (cout = string2; *cin != '\n' && *cin != '\0' && *cin != '\'' &&
i < MAXSTRINGLENGTH - 1; cin++) {
*cout = *cin;
cout++;
i++;
}
}
else if (*cin == '"') {
cin++;
for (cout = string2; *cin != '\n' && *cin != '\0' && *cin != '"' &&
i < MAXSTRINGLENGTH - 1; cin++) {
*cout = *cin;
cout++;
i++;
}
}
else {
for (cout = string2; *cin != '#' && *cin != '\n' && *cin != '\0' &&
*cin != ' ' && *cin != '\t' && i < MAXSTRINGLENGTH - 1; cin++) {
*cout = *cin;
cout++;
i++;
}
}
*cout = '\0';
/* is that the end of the line (maybe after some white space)? */
if (*cin == '#' || *cin == '\0' || *cin == '\n' || i == MAXSTRINGLENGTH - 1)
return(2);
cin++;
while (*cin == ' ' || *cin == '\t')
cin++;
if (*cin == '#' || *cin == '\n' || *cin == '\0')
return(2);
/* otherwise fill up string 3 */
if (*cin == '\'') {
cin++;
for (cout = string3; *cin != '\n' && *cin != '\0' && *cin != '\'' &&
i < MAXSTRINGLENGTH - 1; cin++) {
*cout = *cin;
cout++;
i++;
}
}
else if (*cin == '"') {
cin++;
for (cout = string3; *cin != '\n' && *cin != '\0' && *cin != '"' &&
i < MAXSTRINGLENGTH - 1; cin++) {
*cout = *cin;
cout++;
i++;
}
}
else {
for (cout = string3; *cin != '#' && *cin != '\n' && *cin != '\0' &&
*cin != ' ' && *cin != '\t' && i < MAXSTRINGLENGTH - 1; cin++) {
*cout = *cin;
cout++;
i++;
}
}
*cout = '\0';
/* is that the end of the line (maybe after some white space)? */
if (*cin == '#' || *cin == '\0' || *cin == '\n' || i == MAXSTRINGLENGTH - 1)
return(3);
cin++;
while (*cin == ' ' || *cin == '\t')
cin++;
if (*cin == '#' || *cin == '\n' || *cin == '\0')
return(3);
return(4); /* we don't ever want to read a fourth string; just know if
there is one for error checking */
}
int sscanf_referer(char *inputline, int *date, int *monthno, int *year,
int *hr, int *min, char from[MAXSTRINGLENGTH],
char to[MAXSTRINGLENGTH])
{ /* scanning the referer log */
/* The format is "[date] from -> to". The [date] is optional. */
extern flag included(); /* in alias.c */
extern struct include *refexpandhead, *noexpandhead;
register char *cin = inputline;
register char *cout;
int i;
/* scan the date */
if (*cin == '[') {
cin++;
if (sscanf_date(cin, date, monthno, year, hr, min) < 5)
return(0);
else
cin += 20;
if (*cin != ']')
return(5);
if (*(++cin) != ' ')
return(5);
cin++;
}
else
*date = 0; /* as marker */
/* now fill up the from string */
i = 0;
for (cout = from; *cin != ' ' && *cin != '\0' && *cin != '#' &&
*cin != '?' && i < MAXSTRINGLENGTH - 1; cin++) {
*cout = *cin;
cout++;
i++;
}
*cout = '\0';
if (*cin == '?' && included(from, refexpandhead)) { /* read in args */
for ( ; *cin != ' ' && *cin != '\0' && *cin != '#' &&
i < MAXSTRINGLENGTH - 1; cin++) {
*cout = *cin;
cout++;
i++;
}
*cout = '\0';
}
/* check at this point that the line syntax is ok */
if (*cin == '#' || *cin == '?') {
while (*cin != ' ' && *cin != '\0')
cin++;
}
if (*cin != ' ')
return(5);
cin++;
if (*cin != '-')
return(6);
cin++;
if (*cin != '>')
return(6);
cin++;
if (*cin != ' ')
return(6);
cin++;
/* and the to string */
i = 0;
for (cout = to; *cin != ' ' && *cin != '\0' && *cin != '\n'
&& i < MAXSTRINGLENGTH - 1; cin++) {
*cout = *cin;
cout++;
i++;
}
*cout = '\0';
if (*cin == '?' && !included(to, noexpandhead)) { /* read in args */
for ( ; *cin != ' ' && *cin != '\0' && i < MAXSTRINGLENGTH - 1; cin++) {
*cout = *cin;
cout++;
i++;
}
*cout = '\0';
}
if (*cin != '\n')
return(6);
else
return(7);
}
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.