This is msql_lex.c in view mode; [Download] [Up]
/*
** msql_lex.c -
**
**
** Copyright (c) 1993-95 David J. Hughes
** Copyright (c) 1995 Hughes Technologies
**
** Permission to use, copy, and distribute for non-commercial purposes,
** is hereby granted without fee, providing that the above copyright
** notice appear in all copies and that both the copyright notice and this
** permission notice appear in supporting documentation.
**
** This software is provided "as is" without any expressed or implied warranty.
**
** ID = "$Id:"
**
*/
/*
** This is a hand crafted scanner that looks and smells like a lex
** generated scanner. I've kept the same interface so that unmodified
** yacc parsers can run with this.
**
** This scanner uses a state machine to translate the input data into
** tokens. Failed matches cause a fallback to the start of the token
** scan and a possible transition to a known alternate state. The
** state structure is defined in doc/scanner.doc
**
** NOTE : Because the scanner must revert back to the start of the
** token on a failure it can only work from an input buffer. It
** cannot work from a file or anything else.
*/
#include <stdio.h>
#include <ctype.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <common/portability.h>
#include "msql_priv.h"
#include "y.tab.h"
#define REG register
#define NUM_HASH 16
#ifndef DEBUG
# define malloc(s) Malloc(s,__FILE__,__LINE__)
# define free(a) Free(a,__FILE__,__LINE__)
#endif
/*
** Macros for handling the scanner's internal pointers
*/
#define yyGet() (*tokPtr++); yytoklen++
#define yyUnget() tokPtr--; yytoklen--
#define yySkip() (*tokPtr++); tokStart++
#define yyRevert() {tokPtr=tokStart; yytoklen=0;}
#define yyReturn(t) {tokStart=tokPtr; return(t);}
/*
** Macros for matching character classes. These are in addition to
** those provided in <ctypes.h>
*/
#ifdef iswhite
# undef iswhite
#endif
#define iswhite(c) (c == ' ' || c == '\t' || c == '\n')
#ifdef iscompop
# undef iscompop
#endif
#define iscompop(c) (c == '<' || c == '>' || c == '=')
/*
** Debugging macros.
*/
/* #define DEBUG_STATE /* Define this to watch the state transitions */
#ifdef DEBUG
# define token(x) (int) "x"
#else
# define token(x) x
#endif /* DEBUG */
#ifdef DEBUG_STATE
# define CASE(x) case x: if (x) printf("%c -> state %d\n",c,x); \
else printf("Scanner starting at state 0\n");
#else
# define CASE(x) case x:
#endif
u_char *yytext = NULL;
u_int yytoklen = 0;
int yylineno = 1;
static u_char *tokPtr,
*tokStart;
static int state;
#ifdef DEBUG
YYSTYPE yylval;
#else
extern YYSTYPE yylval;
#endif
typedef struct symtab_s {
char *name;
int tok;
} symtab_t;
static symtab_t symtab[16][16] = {
{ /* 0 */
{ "select", token(SELECT)},
{ "values", token(VALUES)},
{ 0, 0}
},
{ /* 1 */
{ "or", token(OR)},
{ "not", token(NOT)},
{ 0, 0}
},
{ /* 2 */
{ "distinct", token(DISTINCT)},
{ 0, 0}
},
{ /* 3 */
{ "and", token(AND)},
{ "delete", token(DELETE)},
{ "update", token(UPDATE)},
{ 0, 0}
},
{ /* 4 */
{ "from", token(FROM)},
{ "create", token(CREATE)},
{ "primary", token(PRIMARY)},
{ "smallint", token(INT)},
{ "real", token(REAL)},
{ "as", token(AS)},
{ 0, 0}
},
{ /* 5 */
{ "drop", token(DROP)},
{ "insert", token(INSERT)},
{ "like", token(LIKE)},
{ 0, 0}
},
{ /* 6 */
{ "NULL", token(NULLSYM)},
{ 0, 0}
},
{ /* 7 */
{ "asc", token(ASC)},
{ 0, 0}
},
{ /* 8 */
{ "table", token(TABLE)},
{ 0, 0}
},
{ /* 9 */
{ "<=", token(LE)},
{ "all", token(ALL)},
{ "key", token(KEY)},
{ 0, 0}
},
{ /* 10 */
{ "<>", token(NE)},
{ "into", token(INTO)},
{ 0, 0}
},
{ /* 11 */
{ "where", token(WHERE)},
{ ">=", token(GE)},
{ "by", token(BY)},
{ "null", token(NULLSYM)},
{ "int", token(INT)},
{ 0, 0}
},
{ /* 12 */
{ "<", token(LT)},
{ "order", token(ORDER)},
{ "set", token(SET)},
{ 0, 0}
},
{ /* 13 */
{ "=", token(EQ)},
{ 0, 0}
},
{ /* 14 */
{ ">", token(GT)},
{ "integer", token(INT)},
{ "char", token(CHAR)},
{ 0, 0}
},
{ /* 15 */
{ "desc", token(DESC)},
{ "limit", token(LIMIT)},
{ 0, 0}
}
};
msqlInitScanner(buf)
u_char *buf;
{
tokStart = buf;
state = 0;
yylineno = 1;
}
int findKeyword(tok,len)
char *tok;
int len;
{
REG char *cp1,
*cp2,
tmp;
REG symtab_t *stab;
int found;
REG int hash=0,
index=0;
cp1 = tok;
while(*cp1 && index++ < len)
{
hash += *cp1++;
}
hash = hash & (NUM_HASH - 1);
stab = symtab[hash];
while(stab->name)
{
cp1 = stab->name;
cp2 = tok;
found = 1;
while(cp2 - tok < len)
{
if (!(*cp1))
{
found = 0;
break;
}
/*
if (tolower(*cp2++) != *cp1++)
*/
tmp = *cp2++;
if (tmp >64 && tmp<91)
tmp+=32;
if (tmp != *cp1++)
{
found = 0;
break;
}
}
if (*cp1)
{
found = 0;
}
if (found)
{
yytext = (u_char *)stab->name;
yylval = (YYSTYPE)stab->tok;
return(stab->tok);
}
stab++;
}
return(0);
}
u_char *tokenDup(tok,len)
u_char *tok;
int len;
{
u_char *new;
new = (u_char *)malloc(len+1);
(void)bcopy(tok,new,len);
*(new + len) = 0;
return(new);
}
u_char *readTextLiteral(tok)
u_char *tok;
{
REG u_char c;
int bail;
bail = 0;
while(!bail)
{
c = yyGet();
switch(c)
{
case 0:
return(NULL);
case '\\':
c = yyGet();
if (!c)
return(NULL);
break;
case '\'':
bail=1;
break;
}
}
return(tokenDup(tok,yytoklen));
}
int yylex()
{
REG u_char c;
REG u_char t;
int tokval;
static u_char dummyBuf[2];
/*
** Handle the end of input. We return an EOI token when we hit
** the end and then return a 0 on the next call to yylex. This
** allows the parser to do the right thing with trailing garbage
** in the expression.
*/
if (state == 1000)
{
return(0);
}
state = 0;
/*
** Dive into the state machine
*/
while(1)
{
switch(state)
{
/* State 0 : Start of token */
CASE(0)
tokPtr = tokStart;
yytext = NULL;
yytoklen = 0;
c = yyGet();
while (iswhite(c))
{
if (c == '\n')
yylineno++;
c = yySkip();
}
if (c == '\'')
{
state = 12;
break;
}
if (isalpha(c))
{
state = 1;
break;
}
if (isdigit(c))
{
state = 5;
break;
}
if (c == '.')
{
t = yyGet();
if ( isdigit(t) )
{
yyUnget();
state = 7;
break;
}
else
yyUnget();
}
if (c == '-' || c == '+')
{
state = 9;
break;
}
if (iscompop(c))
{
state = 10;
break;
}
if (c == '#')
{
state = 14;
break;
}
if (c == 0)
{
state = 1000;
break;
}
state = 999;
break;
/* State 1 : Incomplete keyword or ident */
CASE(1)
c = yyGet();
if (isalpha(c))
{
state = 1;
break;
}
if (isdigit(c) || c == '_')
{
state = 3;
break;
}
state = 2;
break;
/* State 2 : Complete keyword or ident */
CASE(2)
yyUnget();
tokval = findKeyword(tokStart,yytoklen);
if (tokval)
{
yyReturn(tokval);
}
else
{
yytext = tokenDup(tokStart,yytoklen);
yylval = (YYSTYPE) yytext;
yyReturn(token(IDENT));
}
break;
/* State 3 : Incomplete ident */
CASE(3)
c = yyGet();
if (isalnum(c) || c == '_')
{
state = 3;
break;
}
state = 4;
break;
/* State 4: Complete ident */
CASE(4)
yyUnget();
yytext = tokenDup(tokStart,yytoklen);
yylval = (YYSTYPE) yytext;
yyReturn(token(IDENT));
/* State 5: Incomplete real or int number */
CASE(5)
c = yyGet();
if (isdigit(c))
{
state = 5;
break;
}
if (c == '.')
{
state = 7;
break;
}
state = 6;
break;
/* State 6: Complete integer number */
CASE(6)
yyUnget();
yytext = tokenDup(tokStart,yytoklen);
yylval = (YYSTYPE) yytext;
yyReturn(token(NUM));
break;
/* State 7: Incomplete real number */
CASE(7)
c = yyGet();
/* Analogy Start */
if(c == 'e' || c == 'E')
{
state = 15;
break;
}
/* Analogy End */
if (isdigit(c))
{
state = 7;
break;
}
state = 8;
break;
/* State 8: Complete real number */
CASE(8)
yyUnget();
yytext = tokenDup(tokStart,yytoklen);
yylval = (YYSTYPE) yytext;
yyReturn(token(REAL_NUM));
/* State 9: Incomplete signed number */
CASE(9)
c = yyGet();
if (isdigit(c))
{
state = 5;
break;
}
if (c == '.')
{
state = 7;
break;
}
state = 999;
break;
/* State 10: Incomplete comparison operator */
CASE(10)
c = yyGet();
if (iscompop(c))
{
state = 10;
break;
}
state = 11;
break;
/* State 11: Complete comparison operator */
CASE(11)
yyUnget();
tokval = findKeyword(tokStart,yytoklen);
if (tokval)
{
yyReturn(tokval);
}
state = 999;
break;
/* State 12: Incomplete text string */
CASE(12)
yytext = readTextLiteral(tokStart);
yylval = (YYSTYPE) yytext;
if (yytext)
{
state = 13;
break;
}
state = 999;
break;
/* State 13: Complete text string */
CASE(13)
yyReturn(token(TEXT));
break;
/* State 14: Comment */
CASE(14)
c = yySkip();
if (c == '\n')
{
state = 0;
}
else
{
state = 14;
}
break;
/* Analogy Start */
/* State 15: Exponent Sign in Scientific Notation */
CASE(15)
c = yyGet();
if(c == '-' || c == '+')
{
state = 16;
break;
}
state = 999;
break;
/* State 16: Exponent Value-first digit in Scientific
** Notation */
CASE(16)
c = yyGet();
if (isdigit(c))
{
state = 17;
break;
}
state = 999; /* if no digit, then token
** is unknown */
break;
/* State 17: Exponent Value in Scientific Notation */
CASE(17)
c = yyGet();
if (isdigit(c))
{
state = 17;
break;
}
state = 8; /* At least 1 exponent
** digit was required */
break;
/* Analogy End */
/* State 999 : Unknown token. Revert to single char */
CASE(999)
yyRevert();
c = yyGet();
*dummyBuf = c;
*(dummyBuf+1) = 0;
yytext = dummyBuf;
yylval = (YYSTYPE) yytext;
yyReturn(token(yytext[0]));
/* State 1000 : End Of Input */
CASE(1000)
yyReturn(token(END_OF_INPUT));
}
}
}
#ifdef DEBUG
main()
{
char *p,
tmpBuf[4 * 1024];
(void)bzero(tmpBuf,sizeof(tmpBuf));
read(fileno(stdin),tmpBuf,sizeof(tmpBuf));
msqlInitScanner(tmpBuf);
while(p = (char *) yylex())
{
printf("%-15.15s of length %u is \"%s\"\n", p, yytoklen,
yytext?yytext:(u_char *)"(null)");
}
}
#endif
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.