This is msql_lex.c in view mode; [Download] [Up]
/* ** msql_lex.c - ** ** ** Copyright (c) 1993-95 David J. Hughes ** Copyright (c) 1995 Hughes Technologies ** ** Permission to use, copy, and distribute for non-commercial purposes, ** is hereby granted without fee, providing that the above copyright ** notice appear in all copies and that both the copyright notice and this ** permission notice appear in supporting documentation. ** ** This software is provided "as is" without any expressed or implied warranty. ** ** ID = "$Id:" ** */ /* ** This is a hand crafted scanner that looks and smells like a lex ** generated scanner. I've kept the same interface so that unmodified ** yacc parsers can run with this. ** ** This scanner uses a state machine to translate the input data into ** tokens. Failed matches cause a fallback to the start of the token ** scan and a possible transition to a known alternate state. The ** state structure is defined in doc/scanner.doc ** ** NOTE : Because the scanner must revert back to the start of the ** token on a failure it can only work from an input buffer. It ** cannot work from a file or anything else. */ #include <stdio.h> #include <ctype.h> #include <sys/types.h> #include <sys/socket.h> #include <netinet/in.h> #include <arpa/inet.h> #include <common/portability.h> #include "msql_priv.h" #include "y.tab.h" #define REG register #define NUM_HASH 16 #ifndef DEBUG # define malloc(s) Malloc(s,__FILE__,__LINE__) # define free(a) Free(a,__FILE__,__LINE__) #endif /* ** Macros for handling the scanner's internal pointers */ #define yyGet() (*tokPtr++); yytoklen++ #define yyUnget() tokPtr--; yytoklen-- #define yySkip() (*tokPtr++); tokStart++ #define yyRevert() {tokPtr=tokStart; yytoklen=0;} #define yyReturn(t) {tokStart=tokPtr; return(t);} /* ** Macros for matching character classes. These are in addition to ** those provided in <ctypes.h> */ #ifdef iswhite # undef iswhite #endif #define iswhite(c) (c == ' ' || c == '\t' || c == '\n') #ifdef iscompop # undef iscompop #endif #define iscompop(c) (c == '<' || c == '>' || c == '=') /* ** Debugging macros. */ /* #define DEBUG_STATE /* Define this to watch the state transitions */ #ifdef DEBUG # define token(x) (int) "x" #else # define token(x) x #endif /* DEBUG */ #ifdef DEBUG_STATE # define CASE(x) case x: if (x) printf("%c -> state %d\n",c,x); \ else printf("Scanner starting at state 0\n"); #else # define CASE(x) case x: #endif u_char *yytext = NULL; u_int yytoklen = 0; int yylineno = 1; static u_char *tokPtr, *tokStart; static int state; #ifdef DEBUG YYSTYPE yylval; #else extern YYSTYPE yylval; #endif typedef struct symtab_s { char *name; int tok; } symtab_t; static symtab_t symtab[16][16] = { { /* 0 */ { "select", token(SELECT)}, { "values", token(VALUES)}, { 0, 0} }, { /* 1 */ { "or", token(OR)}, { "not", token(NOT)}, { 0, 0} }, { /* 2 */ { "distinct", token(DISTINCT)}, { 0, 0} }, { /* 3 */ { "and", token(AND)}, { "delete", token(DELETE)}, { "update", token(UPDATE)}, { 0, 0} }, { /* 4 */ { "from", token(FROM)}, { "create", token(CREATE)}, { "primary", token(PRIMARY)}, { "smallint", token(INT)}, { "real", token(REAL)}, { "as", token(AS)}, { 0, 0} }, { /* 5 */ { "drop", token(DROP)}, { "insert", token(INSERT)}, { "like", token(LIKE)}, { 0, 0} }, { /* 6 */ { "NULL", token(NULLSYM)}, { 0, 0} }, { /* 7 */ { "asc", token(ASC)}, { 0, 0} }, { /* 8 */ { "table", token(TABLE)}, { 0, 0} }, { /* 9 */ { "<=", token(LE)}, { "all", token(ALL)}, { "key", token(KEY)}, { 0, 0} }, { /* 10 */ { "<>", token(NE)}, { "into", token(INTO)}, { 0, 0} }, { /* 11 */ { "where", token(WHERE)}, { ">=", token(GE)}, { "by", token(BY)}, { "null", token(NULLSYM)}, { "int", token(INT)}, { 0, 0} }, { /* 12 */ { "<", token(LT)}, { "order", token(ORDER)}, { "set", token(SET)}, { 0, 0} }, { /* 13 */ { "=", token(EQ)}, { 0, 0} }, { /* 14 */ { ">", token(GT)}, { "integer", token(INT)}, { "char", token(CHAR)}, { 0, 0} }, { /* 15 */ { "desc", token(DESC)}, { "limit", token(LIMIT)}, { 0, 0} } }; msqlInitScanner(buf) u_char *buf; { tokStart = buf; state = 0; yylineno = 1; } int findKeyword(tok,len) char *tok; int len; { REG char *cp1, *cp2, tmp; REG symtab_t *stab; int found; REG int hash=0, index=0; cp1 = tok; while(*cp1 && index++ < len) { hash += *cp1++; } hash = hash & (NUM_HASH - 1); stab = symtab[hash]; while(stab->name) { cp1 = stab->name; cp2 = tok; found = 1; while(cp2 - tok < len) { if (!(*cp1)) { found = 0; break; } /* if (tolower(*cp2++) != *cp1++) */ tmp = *cp2++; if (tmp >64 && tmp<91) tmp+=32; if (tmp != *cp1++) { found = 0; break; } } if (*cp1) { found = 0; } if (found) { yytext = (u_char *)stab->name; yylval = (YYSTYPE)stab->tok; return(stab->tok); } stab++; } return(0); } u_char *tokenDup(tok,len) u_char *tok; int len; { u_char *new; new = (u_char *)malloc(len+1); (void)bcopy(tok,new,len); *(new + len) = 0; return(new); } u_char *readTextLiteral(tok) u_char *tok; { REG u_char c; int bail; bail = 0; while(!bail) { c = yyGet(); switch(c) { case 0: return(NULL); case '\\': c = yyGet(); if (!c) return(NULL); break; case '\'': bail=1; break; } } return(tokenDup(tok,yytoklen)); } int yylex() { REG u_char c; REG u_char t; int tokval; static u_char dummyBuf[2]; /* ** Handle the end of input. We return an EOI token when we hit ** the end and then return a 0 on the next call to yylex. This ** allows the parser to do the right thing with trailing garbage ** in the expression. */ if (state == 1000) { return(0); } state = 0; /* ** Dive into the state machine */ while(1) { switch(state) { /* State 0 : Start of token */ CASE(0) tokPtr = tokStart; yytext = NULL; yytoklen = 0; c = yyGet(); while (iswhite(c)) { if (c == '\n') yylineno++; c = yySkip(); } if (c == '\'') { state = 12; break; } if (isalpha(c)) { state = 1; break; } if (isdigit(c)) { state = 5; break; } if (c == '.') { t = yyGet(); if ( isdigit(t) ) { yyUnget(); state = 7; break; } else yyUnget(); } if (c == '-' || c == '+') { state = 9; break; } if (iscompop(c)) { state = 10; break; } if (c == '#') { state = 14; break; } if (c == 0) { state = 1000; break; } state = 999; break; /* State 1 : Incomplete keyword or ident */ CASE(1) c = yyGet(); if (isalpha(c)) { state = 1; break; } if (isdigit(c) || c == '_') { state = 3; break; } state = 2; break; /* State 2 : Complete keyword or ident */ CASE(2) yyUnget(); tokval = findKeyword(tokStart,yytoklen); if (tokval) { yyReturn(tokval); } else { yytext = tokenDup(tokStart,yytoklen); yylval = (YYSTYPE) yytext; yyReturn(token(IDENT)); } break; /* State 3 : Incomplete ident */ CASE(3) c = yyGet(); if (isalnum(c) || c == '_') { state = 3; break; } state = 4; break; /* State 4: Complete ident */ CASE(4) yyUnget(); yytext = tokenDup(tokStart,yytoklen); yylval = (YYSTYPE) yytext; yyReturn(token(IDENT)); /* State 5: Incomplete real or int number */ CASE(5) c = yyGet(); if (isdigit(c)) { state = 5; break; } if (c == '.') { state = 7; break; } state = 6; break; /* State 6: Complete integer number */ CASE(6) yyUnget(); yytext = tokenDup(tokStart,yytoklen); yylval = (YYSTYPE) yytext; yyReturn(token(NUM)); break; /* State 7: Incomplete real number */ CASE(7) c = yyGet(); /* Analogy Start */ if(c == 'e' || c == 'E') { state = 15; break; } /* Analogy End */ if (isdigit(c)) { state = 7; break; } state = 8; break; /* State 8: Complete real number */ CASE(8) yyUnget(); yytext = tokenDup(tokStart,yytoklen); yylval = (YYSTYPE) yytext; yyReturn(token(REAL_NUM)); /* State 9: Incomplete signed number */ CASE(9) c = yyGet(); if (isdigit(c)) { state = 5; break; } if (c == '.') { state = 7; break; } state = 999; break; /* State 10: Incomplete comparison operator */ CASE(10) c = yyGet(); if (iscompop(c)) { state = 10; break; } state = 11; break; /* State 11: Complete comparison operator */ CASE(11) yyUnget(); tokval = findKeyword(tokStart,yytoklen); if (tokval) { yyReturn(tokval); } state = 999; break; /* State 12: Incomplete text string */ CASE(12) yytext = readTextLiteral(tokStart); yylval = (YYSTYPE) yytext; if (yytext) { state = 13; break; } state = 999; break; /* State 13: Complete text string */ CASE(13) yyReturn(token(TEXT)); break; /* State 14: Comment */ CASE(14) c = yySkip(); if (c == '\n') { state = 0; } else { state = 14; } break; /* Analogy Start */ /* State 15: Exponent Sign in Scientific Notation */ CASE(15) c = yyGet(); if(c == '-' || c == '+') { state = 16; break; } state = 999; break; /* State 16: Exponent Value-first digit in Scientific ** Notation */ CASE(16) c = yyGet(); if (isdigit(c)) { state = 17; break; } state = 999; /* if no digit, then token ** is unknown */ break; /* State 17: Exponent Value in Scientific Notation */ CASE(17) c = yyGet(); if (isdigit(c)) { state = 17; break; } state = 8; /* At least 1 exponent ** digit was required */ break; /* Analogy End */ /* State 999 : Unknown token. Revert to single char */ CASE(999) yyRevert(); c = yyGet(); *dummyBuf = c; *(dummyBuf+1) = 0; yytext = dummyBuf; yylval = (YYSTYPE) yytext; yyReturn(token(yytext[0])); /* State 1000 : End Of Input */ CASE(1000) yyReturn(token(END_OF_INPUT)); } } } #ifdef DEBUG main() { char *p, tmpBuf[4 * 1024]; (void)bzero(tmpBuf,sizeof(tmpBuf)); read(fileno(stdin),tmpBuf,sizeof(tmpBuf)); msqlInitScanner(tmpBuf); while(p = (char *) yylex()) { printf("%-15.15s of length %u is \"%s\"\n", p, yytoklen, yytext?yytext:(u_char *)"(null)"); } } #endif
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.