This is sscanf.c in view mode; [Download] [Up]
/*** analog 1.9beta ***/ /* Please read Readme.html, or http://www.statslab.cam.ac.uk/~sret1/analog/ */ /*** sscanf.c; functions to replace sscanf(), which is far too slow, in certain specific cases ***/ #include "analhea2.h" /*** Now the scanning routines ***/ int sscanf_date(char *inputline, int *date, int *monthno, int *year, int *hr, int *min) { /* scanning date from common/agent/referer log */ extern int strtomonth(); /* in utils.c */ register char *cin = inputline; char *cout; char month[4]; int i; if (!isdigit(*cin)) return(0); else *date = 10 * (*cin - '0'); cin++; if (!isdigit(*cin)) return(0); else *date += (*cin - '0'); /* read in month */ cin++; if (*cin != '/') return(1); cin++; cout = month; for (i = 0; i < 3 && *cin != '\0'; i++) { *cout = *cin; cout++; cin++; } if (*cin == '\0') return(1); *cout = '\0'; if ((*monthno = strtomonth(month)) == ERR) return(1); /* read in year */ if (*cin != '/') return(2); cin++; if (!isdigit(*cin)) return(2); else *year = 1000 * (*cin - '0'); cin++; if (!isdigit(*cin)) return(2); else *year += 100 * (*cin - '0'); cin++; if (!isdigit(*cin)) { if (*cin != ':') return(2); else { /* allow two digit years for Spyglass server */ *year /= 100; *year += 1900; if (*year < 1970) *year += 100; } } else { *year += 10 * (*cin - '0'); cin++; if (!isdigit(*cin)) return(2); else *year += (*cin - '0'); cin++; if (*cin != ':') return(3); } /* read in hour */ cin++; if (!isdigit(*cin)) return(3); else *hr = 10 * (*cin - '0'); cin++; if (!isdigit(*cin)) return(3); else *hr += (*cin - '0'); /* read in minute */ cin++; if (*cin != ':') return(4); cin++; if (!isdigit(*cin)) return(4); else *min = 10 * (*cin - '0'); cin++; if (!isdigit(*cin)) return(4); else *min += (*cin - '0'); /* don't read in second, but check it for correct form */ cin++; if (*cin != ':') return(4); cin++; if (!isdigit(*cin)) return(4); cin++; if (!isdigit(*cin)) return(4); return(5); } int sscanf_olddate(char *inputline, int *date, int *monthno, int *year, int *hr, int *min) { /* the same thing for NCSA old-style and error logs */ extern int strtomonth(); /* in utils.c */ register char *cin = inputline; char *cout; char month[4]; int i; /* ignore day of week, so scan until next ' ' */ for (cin++; *cin != ' ' && *cin != '\0'; cin++) ; if (*cin == '\0') return(0); /* read in month */ cin++; cout = month; for (i = 0; i < 3 && *cin != '\0'; i++) { *cout = *cin; cout++; cin++; } if (*cin == '\0') return(0); *cout = '\0'; if ((*monthno = strtomonth(month)) == ERR) return(1); /* read in date */ if (*cin != ' ') return(1); cin++; if (!isdigit(*cin) && *cin != ' ') return(1); else if (*cin != ' ') *date = 10 * (*cin - '0'); else *date = 0; cin++; if (!isdigit(*cin)) return(1); else *date += (*cin - '0'); /* read in hour */ cin++; if (*cin != ' ') return(2); cin++; if (!isdigit(*cin)) return(2); else *hr = 10 * (*cin - '0'); cin++; if (!isdigit(*cin)) return(2); else *hr += (*cin - '0'); /* read in minute */ cin++; if (*cin != ':') return(3); cin++; if (!isdigit(*cin)) return(3); else *min = 10 * (*cin - '0'); cin++; if (!isdigit(*cin)) return(3); else *min += (*cin - '0'); /* ignore second (but check format) */ cin++; if (*cin != ':') return(4); cin++; if (!isdigit(*cin)) return(4); cin++; if (!isdigit(*cin)) return(4); cin++; if (*cin != ' ') return(4); /* read year */ cin++; if (!isdigit(*cin)) return(4); else *year = 1000 * (*cin - '0'); cin++; if (!isdigit(*cin)) return(4); else *year += 100 * (*cin - '0'); cin++; if (!isdigit(*cin)) return(4); else *year += 10 * (*cin - '0'); cin++; if (!isdigit(*cin)) return(4); else *year += (*cin - '0'); return(5); } int sscanf_common(char *inputline, char hostn[MAXSTRINGLENGTH], int *date, int *monthno, int *year, int *hr, int *min, char filename[MAXSTRINGLENGTH], char referer[MAXSTRINGLENGTH], char agent[MAXSTRINGLENGTH], int *code, char bytestr[16], size_t preflength) { /* scanning 'common' format logfile entries */ extern flag included(); /* in alias.c */ extern flag bq, Bq, fq; extern struct include *noexpandhead; register char *cin = inputline; /* the character we are reading */ register char *cout; /* where we are putting it */ int i; /* read in hostname */ i = 0; for (cout = hostn; *cin != ' ' && *cin != '\0' && i < MAXSTRINGLENGTH - 1; cin++) { *cout = *cin; cout++; i++; } if (*cin != ' ') return(0); *cout = '\0'; /* scan until next '[' */ for (cin++; *cin != '[' && *cin != '\0'; cin++) ; if (*cin == '\0') return(1); /* read in date */ cin++; if (sscanf_date(cin, date, monthno, year, hr, min) < 5) return(1); else cin += 20; /* ignore timezone; so scan to next '"' */ for ( ; *cin != '"' && *cin != '\0'; cin++) ; if (*cin == '\0') return(6); /* ignore method; so read to next ' ' */ for (cin++; *cin != ' ' && *cin != '\0'; cin++) ; if (*cin == '\0') return(6); /* read in filename */ cin++; i = 0; for (cout = filename; *cin != ' ' && *cin != '\0' && *cin != '"' && *cin != '?' && i < MAXSTRINGLENGTH - 1 - preflength; cin++) { *cout = *cin; cout++; i++; } *cout = '\0'; if (*cin == '?' && !included(filename, noexpandhead)) { /* read in args */ for ( ; *cin != ' ' && *cin != '\0' && *cin != '"' && i < MAXSTRINGLENGTH - 1 - preflength; cin++) { *cout = *cin; cout++; i++; } *cout = '\0'; } if (*cin != ' ' && *cin != '"' && *cin != '?') return(6); /* scan to next " */ for ( ; *cin != '"' && *cin != '\0' ; cin++) ; if (*cin == '\0') return(7); /* read in return code; always 3 digits, or a - (successes; call them 299) */ cin++; if (*cin != ' ') return(7); cin++; if (!isdigit(*cin)) if (*cin == '-' && *(cin + 1) == ' ') *code = 299; else return(7); else { *code = 100 * (*cin - '0'); cin++; if (!isdigit(*cin)) return(7); else *code += 10 * (*cin - '0'); cin++; if (!isdigit(*cin)) return(7); else *code += (*cin - '0'); } /* read in bytestr */ cin++; if (*cin != ' ') return (8); cin++; i = 0; for (cout = bytestr; *cin != ' ' && *cin != '\n' && *cin != '\0' && i < 16; cin++) { *cout = *cin; cout++; } *cout = '\0'; /* Finally, try and read in referer and agent of NCSA combined format */ if (*cin != ' ' || (!fq && !bq && !Bq)) return(9); if (*(++cin) != '"') return(9); i = 0; cin++; for (cout = referer; *cin != '\0' && *cin != '"' && i < MAXSTRINGLENGTH - 1; cin++) { *cout = *cin; cout++; i++; } *cout = '\0'; if (*cin != '"') return(9); if (*(++cin) != ' ') return(10); if (*(++cin) != '"') return(10); i = 0; cin++; for (cout = agent; *cin != '\0' && *cin != '"' && i < MAXSTRINGLENGTH - 1; cin++) { *cout = *cin; cout++; i++; } *cout = '\0'; if (*cin != '"') return(10); else return(11); } int sscanf_ncsaold(char *inputline, char hostn[MAXSTRINGLENGTH], int *monthno, int *date, int *hr, int *min, int *year, char filename[MAXSTRINGLENGTH], size_t preflength) { /* scanning NCSA old-style logfile entries */ extern flag included(); /* in alias.c */ extern struct include *noexpandhead; register char *cin = inputline; /* the character we are reading */ register char *cout; /* where we are putting it */ int i; /* read in hostname */ i = 0; for (cout = hostn; *cin != ' ' && *cin != '\0' && i < MAXSTRINGLENGTH - 1; cin++) { *cout = *cin; cout++; i++; } if (*cin != ' ') return(0); *cout = '\0'; /* scan until next '[' */ for (cin++; *cin != '[' && *cin != '\0'; cin++) ; if (*cin == '\0') return(1); /* read in date */ cin++; if (sscanf_olddate(cin, date, monthno, year, hr, min) < 5) return(1); else cin += 24; /* ignore method, so skip to second space */ for ( ; *cin != ' ' && *cin != '\0'; cin++) ; if (*cin == '\0') return(6); for (cin++; *cin != ' ' && *cin != '\0'; cin++) ; if (*cin == '\0') return(6); /* finally, read in the filename */ cin++; i = 0; for (cout = filename; *cin != ' ' && *cin != '\n' && *cin != '?' && *cin != '\0' && i < MAXSTRINGLENGTH - 1 - preflength; cin++) { *cout = *cin; cout++; i++; } *cout = '\0'; if (*cin == '?' && !included(filename, noexpandhead)) { /* read in args */ for ( ; *cin != ' ' && *cin != '\0' && *cin != '"' && i < MAXSTRINGLENGTH - 1 - preflength; cin++) { *cout = *cin; cout++; i++; } *cout = '\0'; } return (7); } int sscanf_domains(char *inputline, char string1[MAXSTRINGLENGTH], char string2[MAXSTRINGLENGTH]) { /* scanning the domains file */ register char *cin = inputline; register char *cout; int i; /* run past any white space */ while (*cin == ' ' || *cin == '\t') cin++; /* if no strings on this line, return 0 */ if (*cin == '#' || *cin == '\n' || *cin == '\0') return(0); /* otherwise fill up string 1; coerce domains to lower case */ i = 0; for (cout = string1; *cin != ' ' && *cin != '\t' && *cin != '#' && *cin != '\0' && *cin != '\n' && i < MAXSTRINGLENGTH - 1; cin++) { *cout = tolower(*cin); cout++; i++; } /* is that the end of the line (maybe after some white space)? */ if (*cin == '#' || *cin == '\0' || *cin == '\n' || i == MAXSTRINGLENGTH - 1) return(1); *cout = '\0'; cin++; while (*cin == ' ' || *cin == '\t') cin++; if (*cin == '#' || *cin == '\n' || *cin == '\0') return(1); /* otherwise fill up string 2 */ for (cout = string2; *cin != '#' && *cin != '\n' && *cin != '\0' && i < MAXSTRINGLENGTH - 1; cin++) { *cout = *cin; cout++; i++; } *cout = '\0'; return(2); } int sscanf_config(char *inputline, char string1[MAXSTRINGLENGTH], char string2[MAXSTRINGLENGTH], char string3[MAXSTRINGLENGTH]) { /* scanning the config file */ register char *cin = inputline; register char *cout; int i; /* run past any white space */ while (*cin == ' ' || *cin == '\t') cin++; /* if no strings on this line, return 0 */ if (*cin == '#' || *cin == '\n' || *cin == '\0') return(0); /* otherwise fill up string 1; convert arguments to upper case */ i = 0; for (cout = string1; *cin != ' ' && *cin != '\t' && *cin != '#' && *cin != '\0' && *cin != '\n' && i < MAXSTRINGLENGTH - 1; cin++) { *cout = *cin; cout++; i++; } *cout = '\0'; /* is that the end of the line (maybe after some white space)? */ if (*cin == '#' || *cin == '\0' || *cin == '\n' || i == MAXSTRINGLENGTH - 1) return(1); cin++; while (*cin == ' ' || *cin == '\t') cin++; if (*cin == '#' || *cin == '\n' || *cin == '\0') return(1); /* if string 2 starts with a quote mark, fill up until the next quote mark. Otherwise, just fill until the next space */ if (*cin == '\'') { cin++; for (cout = string2; *cin != '\n' && *cin != '\0' && *cin != '\'' && i < MAXSTRINGLENGTH - 1; cin++) { *cout = *cin; cout++; i++; } } else if (*cin == '"') { cin++; for (cout = string2; *cin != '\n' && *cin != '\0' && *cin != '"' && i < MAXSTRINGLENGTH - 1; cin++) { *cout = *cin; cout++; i++; } } else { for (cout = string2; *cin != '#' && *cin != '\n' && *cin != '\0' && *cin != ' ' && *cin != '\t' && i < MAXSTRINGLENGTH - 1; cin++) { *cout = *cin; cout++; i++; } } *cout = '\0'; /* is that the end of the line (maybe after some white space)? */ if (*cin == '#' || *cin == '\0' || *cin == '\n' || i == MAXSTRINGLENGTH - 1) return(2); cin++; while (*cin == ' ' || *cin == '\t') cin++; if (*cin == '#' || *cin == '\n' || *cin == '\0') return(2); /* otherwise fill up string 3 */ if (*cin == '\'') { cin++; for (cout = string3; *cin != '\n' && *cin != '\0' && *cin != '\'' && i < MAXSTRINGLENGTH - 1; cin++) { *cout = *cin; cout++; i++; } } else if (*cin == '"') { cin++; for (cout = string3; *cin != '\n' && *cin != '\0' && *cin != '"' && i < MAXSTRINGLENGTH - 1; cin++) { *cout = *cin; cout++; i++; } } else { for (cout = string3; *cin != '#' && *cin != '\n' && *cin != '\0' && *cin != ' ' && *cin != '\t' && i < MAXSTRINGLENGTH - 1; cin++) { *cout = *cin; cout++; i++; } } *cout = '\0'; /* is that the end of the line (maybe after some white space)? */ if (*cin == '#' || *cin == '\0' || *cin == '\n' || i == MAXSTRINGLENGTH - 1) return(3); cin++; while (*cin == ' ' || *cin == '\t') cin++; if (*cin == '#' || *cin == '\n' || *cin == '\0') return(3); return(4); /* we don't ever want to read a fourth string; just know if there is one for error checking */ } int sscanf_referer(char *inputline, int *date, int *monthno, int *year, int *hr, int *min, char from[MAXSTRINGLENGTH], char to[MAXSTRINGLENGTH]) { /* scanning the referer log */ /* The format is "[date] from -> to". The [date] is optional. */ extern flag included(); /* in alias.c */ extern struct include *refexpandhead, *noexpandhead; register char *cin = inputline; register char *cout; int i; /* scan the date */ if (*cin == '[') { cin++; if (sscanf_date(cin, date, monthno, year, hr, min) < 5) return(0); else cin += 20; if (*cin != ']') return(5); if (*(++cin) != ' ') return(5); cin++; } else *date = 0; /* as marker */ /* now fill up the from string */ i = 0; for (cout = from; *cin != ' ' && *cin != '\0' && *cin != '#' && *cin != '?' && i < MAXSTRINGLENGTH - 1; cin++) { *cout = *cin; cout++; i++; } *cout = '\0'; if (*cin == '?' && included(from, refexpandhead)) { /* read in args */ for ( ; *cin != ' ' && *cin != '\0' && *cin != '#' && i < MAXSTRINGLENGTH - 1; cin++) { *cout = *cin; cout++; i++; } *cout = '\0'; } /* check at this point that the line syntax is ok */ if (*cin == '#' || *cin == '?') { while (*cin != ' ' && *cin != '\0') cin++; } if (*cin != ' ') return(5); cin++; if (*cin != '-') return(6); cin++; if (*cin != '>') return(6); cin++; if (*cin != ' ') return(6); cin++; /* and the to string */ i = 0; for (cout = to; *cin != ' ' && *cin != '\0' && *cin != '\n' && i < MAXSTRINGLENGTH - 1; cin++) { *cout = *cin; cout++; i++; } *cout = '\0'; if (*cin == '?' && !included(to, noexpandhead)) { /* read in args */ for ( ; *cin != ' ' && *cin != '\0' && i < MAXSTRINGLENGTH - 1; cin++) { *cout = *cin; cout++; i++; } *cout = '\0'; } if (*cin != '\n') return(6); else return(7); }
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.