This is scanner.h in view mode; [Download] [Up]
/**************************************************************************** ** *A scanner.h GAP source Martin Schoenert ** *A @(#)$Id: scanner.h,v 3.4 1993/05/05 11:10:12 fceller Rel $ ** *Y Copyright 1990-1992, Lehrstuhl D fuer Mathematik, RWTH Aachen, Germany ** ** This file declares the functions of the scanner, which is responsible for ** all input and output processing. ** ** The scanner exports two very important abstractions. The first is the ** concept that an input file is a stream of symbols, such nasty things as ** <space>, <tab>, <newline> characters or comments (they are worst :-), ** characters making up identifiers or digits that make up integers are ** hidden from the rest of GAP. ** ** The second is the concept of a current input and output file. In the ** main module they are opened and closed with the 'OpenInput' and ** 'CloseInput' respectively 'OpenOutput' and 'CloseOutput' calls. All the ** other modules just read from the current input and write to the current ** output file. ** ** The scanner relies on the functions provided by the operating system ** dependent module 'system.c' for the low level input/output. ** *H $Log: scanner.h,v $ *H Revision 3.4 1993/05/05 11:10:12 fceller *H added 'LogInputTo' *H *H Revision 3.3 1992/12/16 19:46:21 martin *H added character constants *H *H Revision 3.2 1992/12/08 11:50:26 martin *H added '<list>{<positions>}' *H *H Revision 3.1 1991/04/30 16:12:45 martin *H initial revision under RCS *H */ /**************************************************************************** ** *V Symbol . . . . . . . . . . . . . . . . . current symbol read from input ** ** The variable 'Symbol' contains the current symbol read from the input. ** It is represented as an unsigned long integer. ** ** The possible values for 'Symbol' are defined in the definition file of ** this package as follows: */ #define S_ILLEGAL (0L) #define S_IDENT ((1L<< 3)) #define S_INT ((1L<< 4)) #define S_CHAR ((1L<< 5)+0) #define S_STRING ((1L<< 5)+1) #define S_DOT ((1L<< 6)) #define S_LBRACK ((1L<< 7)+0) #define S_RBRACK ((1L<< 8)+0) #define S_LBRACE ((1L<< 7)+1) #define S_RBRACE ((1L<< 8)+1) #define S_LPAREN ((1L<< 9)) #define S_RPAREN ((1L<<10)) #define S_COMMA ((1L<<11)+0) #define S_DOTDOT ((1L<<11)+1) #define S_IF ((1L<<12)+0) #define S_THEN ((1L<<13)) #define S_ELIF ((1L<<14)+0) #define S_ELSE ((1L<<14)+1) #define S_FI ((1L<<15)) #define S_FOR ((1L<<12)+1) #define S_DO ((1L<<16)) #define S_OD ((1L<<17)) #define S_REPEAT ((1L<<12)+2) #define S_UNTIL ((1L<<18)) #define S_WHILE ((1L<<12)+3) #define S_ASSIGN ((1L<<19)) #define S_SEMICOLON ((1L<<20)) #define S_FUNCTION ((1L<<21)) #define S_LOCAL ((1L<<22)) #define S_END ((1L<<23)) #define S_RETURN ((1L<<12)+4) #define S_MAPTO ((1L<<24)) #define S_NOT ((1L<<25)+0) #define S_AND ((1L<<25)+1) #define S_OR ((1L<<25)+2) #define S_EQ ((1L<<26)+0) #define S_LT ((1L<<26)+1) #define S_GT ((1L<<26)+2) #define S_NE ((1L<<26)+3) #define S_LE ((1L<<26)+4) #define S_GE ((1L<<26)+5) #define S_IN ((1L<<26)+6) #define S_PLUS ((1L<<27)+0) #define S_MINUS ((1L<<27)+1) #define S_MULT ((1L<<28)+0) #define S_DIV ((1L<<28)+1) #define S_MOD ((1L<<28)+2) #define S_POW ((1L<<28)+3) #define S_QUIT ((1L<<29)) #define S_EOF ((1L<<30)) extern unsigned long Symbol; /**************************************************************************** ** *T TypSymbolSet . . . . . . . . . . . . . . . . . . type of sets of symbols ** ** 'TypSymbolSet' is the type of sets of symbols. Sets of symbols are used ** in the error recovery of the parser to specify that 'Match' should skip ** all symbols until finding one in a specified set. ** ** If there were less than 32 different symbols things would be very easy. ** We could simply assign the symbolic constants that are the possible ** values for 'Symbol' values 1, 2, 4, 8, 16, ... and so on. Then making a ** set would simply mean or-ing the values, as in 'S_INT|S_STRING', and ** checking whether a symbol is in a set would be '(<symbol> & <set>) != 0'. ** ** There are however more than 32 different symbols, so we must be more ** clever. We group some symbols that are syntactically equivalent like ** '*', '/' in a class. We use the least significant 3 bits to differentiate ** between members in one class. And now every symbol class, many of which ** contain just one symbol, has exactely one of the remaining most ** significant 29 bits set. Thus sets of symbols are represented as ** unsigned long integers, which is typedef-ed to 'TypSymbolSet'. ** ** The classes are as follows, all other symbols are in a class themself: ** if, for, repeat, while, return ** elif, else ** not, and, or ** =, <>, <, >=, <=, >, in ** +, - ** *, /, mod, ^ ** ** 'TypSymbolSet' is defined in the definition file of this package as ** follows: */ typedef unsigned long TypSymbolSet; /**************************************************************************** ** *F IS_IN( <symbol>, <set> ) . . . . . . . . is a symbol in a set of symbols ** ** 'IS_IN' returns 1 if the symbol <symbol> is in the symbol set <set> and 0 ** otherwise. Due to the grouping into classes some symbol sets may contain ** more than mentioned, for example 'IS_IN(S_POW,S_MULT|S_DIV|S_MOD)' is 1. ** ** 'IS_IN' is defined in the definition file of this package as follows: */ #define IS_IN(SYMBOL,SET) ((SYMBOL) & ((SET) & ~7)) /**************************************************************************** ** *V EXPRBEGIN . . . . . . . . . . . . set of symbols that start an expression *V STATBEGIN . . . . . . . . . . . . . set of symbols that start a statement ** ** 'EXPRBEGIN' is the set of symbols that might start an expression. ** 'STATBEGIN' is the set of symbols that might start a stament, this is a ** superset of 'EXPRBEGIN', since expressions are themselfs statments. ** ** 'EXPRBEGIN' and 'STATBEGIN' are defined in the definition file of this ** package as follows: */ #define EXPRBEGIN (S_IDENT|S_INT|S_STRING|S_LPAREN|S_FUNCTION) #define STATBEGIN (EXPRBEGIN|S_IF|S_FOR|S_WHILE|S_REPEAT|S_RETURN) /**************************************************************************** ** *V Value . . . . . . . . . . . . value of the identifier, integer or string ** ** If 'Symbol' is 'S_IDENT', 'S_INT' or 'S_TRING' the variable 'Value' holds ** the name of the identifier, the digits of the integer or the value of the ** string constant. ** ** Note that the size of 'Value' limits the maximal number of significant ** characters of an identifier, the maximal size of an integer and the ** maximal length of a string. 'GetIdent', 'GetInt' and 'GetStr' truncate ** identifier, integers or strings after that many characters. */ extern char Value [1024]; /**************************************************************************** ** *V NrError . . . . . . . . . . . . . . . . number of errors in current expr *V NrErrLine . . . . . . . . . . . . . . . number of errors on current line ** ** 'NrError' is an integer whose value is the number of errors already found ** in the current expression. It is set to 0 at the beginning of 'Read' and ** incremented with each 'SyntaxError' call, including those from 'Match'. ** ** If 'NrError' is greater than zero the parser functions will not create ** new bags. This prevents the parser from creating new bags after an error ** occured. ** ** 'NrErrLine' is an integer whose value is the number of errors found on ** the current line. It is set to 0 in 'GetLine' and incremented with each ** 'SyntaxError' call, including those from 'Match'. ** ** If 'NrErrLine' is greater than zero 'SyntaxError' will not print an ** error message. This prevents the printing of multiple error messages for ** one line, since they probabely just reflect the fact that the parser ** has not resynchronized yet. */ extern long NrError; extern long NrErrLine; /**************************************************************************** ** *V Prompt . . . . . . . . . . . . . . . . . . . . . . prompt to be printed ** ** 'Prompt' holds the string that is to be printed if a new line is read ** from the interactive files '*stdin*' or '*errin*'. ** ** It is set to 'gap> ' or 'brk> ' in the read-eval-print loops and changed ** to the partial prompt '> ' in 'Read' after the first symbol is read. */ extern char * Prompt; /**************************************************************************** ** *F SyntaxError( <msg> ) . . . . . . . . . . . . . . . raise a syntax error ** ** 'SyntaxError' prints the current line, followed by the error message: ** ** ^ syntax error, <msg> in <current file name> ** ** with the '^' pointing to the current symbol on the current line. If the ** <current file name> is '*stdin*' it is not printed. ** ** 'SyntaxError' is called from the parser to print error messages for those ** errors that are not cought by 'Match', for example if the left hand side ** of an assignment is not a variable, a list element or a record component, ** or if two formal arguments of a function have the same identifier. It is ** also called for warnings, for example if a statement has no effect. ** ** 'SyntaxError' first increments 'NrError' by 1. If 'NrError' is greater ** than zero the parser functions will not create new bags. This prevents ** the parser from creating new bags after an error occured. ** ** 'SyntaxError' also increments 'NrErrLine' by 1. If 'NrErrLine' is ** greater than zero 'SyntaxError' will not print an error message. This ** prevents the printing of multiple error messages for one line, since they ** probabely just reflect the fact that the parser has not resynchronized ** yet. 'NrErrLine' is reset to 0 if a new line is read in 'GetLine'. */ void SyntaxError P(( char * msg )); /**************************************************************************** ** *F Match( <symbol>, <msg>, <skipto> ) . match current symbol and fetch next ** ** 'Match' is the main interface between the scanner and the parser. It ** performs the 4 most common actions in the scanner with just one call. ** First it checks that the current symbol stored in the variable 'Symbol' ** is the expected symbol as passed in the argument <symbol>. If it is, ** 'Match' reads the next symbol from input and returns. Otherwise 'Match' ** first prints the current input line followed by the syntax error message: ** '^ syntax error, <msg> expected' with '^' pointing to the current symbol. ** It then skips symbols up to one in the resynchronisation set <skipto>. ** Actually 'Match' calls 'SyntaxError' so its comments apply here too. ** ** One kind of typical 'Match' call has the form ** ** 'Match( Symbol, "", 0L );'. ** ** This is used if the parser knows that the current symbol is correct, for ** example in 'RdReturn' the first symbol must be 'S_RETURN', otherwise ** 'RdReturn' would not have been called. Called this way 'Match' will of ** course never raise an syntax error, therefore <msg> and <skipto> are of ** no concern, they are passed nevertheless to please lint. The effect of ** this call is merely to read the next symbol from input. ** ** Another typical 'Match' call is in 'RdIf' after we read the if symbol and ** the condition following, and now expect to see the 'then' symbol: ** ** Match( S_THEN, "then", STATBEGIN|S_ELIF|S_ELSE|S_FI|follow ); ** ** If the current symbol is 'S_THEN' it is matched and the next symbol is ** read. Otherwise 'Match' prints the current line followed by the error ** message: '^ syntax error, then expected'. Then 'Match' skips all symbols ** until finding either a symbol that can begin a statment, an 'elif' or ** 'else' or 'fi' symbol, or a symbol that is contained in the set <follow> ** which is passed to 'RdIf' and contains all symbols allowing one of the ** calling functions to resynchronize, for example 'S_OD' if 'RdIf' has been ** called from 'RdFor'. <follow> always contain 'S_EOF', which 'Read' uses ** to resynchronise. ** ** If 'Match' needs to read a new line from '*stdin*' or '*errin*' to get ** the next symbol it prints the string pointed to by 'Prompt'. */ void Match P(( unsigned long symbol, char * msg, TypSymbolSet skipto )); /**************************************************************************** ** *F Pr( <format>, <arg1>, <arg2> ) . . . . . . . . . print formatted output ** ** 'Pr' is the output function. The first argument is a 'printf' like format ** string containing up to 2 '%' format fields, specifing how the ** corresponding arguments are to be printed. The two arguments are passed ** as 'long' integers. This is possible since every C object ('int', ** 'char', pointers) except 'float' or 'double', which are not used in GAP, ** can be converted to a 'long' without loss of information. ** ** The function 'Pr' currently support the following '%' format fields: ** '%c' the corresponding argument represents a character, usually it is ** its ASCII or EBCDIC code, and this character is printed. ** '%s' the corresponding argument is the address of a null terminated ** character string which is printed. ** '%d' the corresponding argument is a signed integer, which is printed. ** Between the '%' and the 'd' an integer might be used to specify ** the width of a field in which the integer is right justified. If ** the first character is '0' 'Pr' pads with '0' instead of <space>. ** '%>' increment the indentation level. ** '%<' decrement the indentation level. ** '%%' can be used to print a single '%' character. No argument is used. ** ** You must always cast the arguments to '(long)' to avoid problems with ** those compilers with a default integer size of 16 instead of 32 bit. You ** must pass 0L if you don't make use of an argument to please lint. */ void Pr P(( char * format, long arg1, long arg2 )); /**************************************************************************** ** *F OpenInput( <filename> ) . . . . . . . . . . open a file as current input ** ** 'OpenInput' opens the file with the name <filename> as current input. ** All subsequent input will be taken from that file, until it is closed ** again with 'CloseInput' or another file is opened with 'OpenInput'. ** 'OpenInput' will not close the current file, i.e., if <filename> is ** closed again, input will again be taken from the current input file. ** ** 'OpenInput' returns 1 if it could successfully open <filename> for ** reading and 0 to indicate failure. 'OpenInput' will fail if the file ** does not exist or if you do not have permissions to read it. 'OpenInput' ** may also fail if you have too many files open at once. It is system ** dependent how many are too many, but 16 files should work everywhere. ** ** Directely after the 'OpenInput' call the variable 'Symbol' has the value ** 'S_ILLEGAL' to indicate that no symbol has yet been read from this file. ** The first symbol is read by 'Read' in the first call to 'Match' call. ** ** You can open '*stdin*' to read from the standard input file, which is ** usually the terminal, or '*errin*' to read from the standard error file, ** which is the terminal even if '*stdin*' is redirected from a file. ** 'OpenInput' passes those file names to 'SyFopen' like any other name, ** they are just a convention between the main and the system package. ** 'SyFopen' and thus 'OpenInput' will fail to open '*errin*' if the file ** 'stderr' (Unix file descriptor 2) is not a terminal, because of a ** redirection say, to avoid that break loops take their input from a file. ** ** It is not neccessary to open the initial input file, 'InitScanner' opens ** '*stdin*' for that purpose. This file on the other hand can not be ** closed by 'CloseInput'. */ long OpenInput P(( char * filename )); /**************************************************************************** ** *F CloseInput() . . . . . . . . . . . . . . . . . close current input file ** ** 'CloseInput' will close the current input file. Subsequent input will ** again be taken from the previous input file. 'CloseInput' will return 1 ** to indicate success. ** ** 'CloseInput' will not close the initial input file '*stdin*', and returns ** 0 if such an attempt is made. This is used in 'Error' which calls ** 'CloseInput' until it returns 0, therebye closing all open input files. ** ** Calling 'CloseInput' if the corresponding 'OpenInput' call failed will ** close the current output file, which will lead to very strange behaviour. */ long CloseInput P(( void )); /**************************************************************************** ** *F OpenOutput( <filename> ) . . . . . . . . . open a file as current output ** ** 'OpenOutput' opens the file with the name <filename> as current output. ** All subsequent output will go to that file, until either it is closed ** again with 'CloseOutput' or another file is opened with 'OpenOutput'. ** The file is truncated to size 0 if it existed, otherwise it is created. ** 'OpenOutput' does not close the current file, i.e., if <filename> is ** closed again, output will go again to the current output file. ** ** 'OpenOutput' returns 1 if it could successfully open <filename> for ** writing and 0 to indicate failure. 'OpenOutput' will fail if you do not ** have permissions to create the file or write to it. 'OpenOutput' may ** also fail if you have too many files open at once. It is system ** dependent how many are too many, but 16 files should work everywhere. ** ** You can open '*stdout*' to write to the standard output file, which is ** usually the terminal, or '*errout*' to write to the standard error file, ** which is the terminal even if '*stdout*' is redirected to a file. ** 'OpenOutput' passes those file names to 'SyFopen' like any other name, ** they are just a convention between the main and the system package. ** ** It is not neccessary to open the initial output file, 'InitScanner' opens ** '*stdout*' for that purpose. This file on the other hand can not be ** closed by 'CloseOutput'. */ long OpenOutput P(( char * filename )); /**************************************************************************** ** *F CloseOutput() . . . . . . . . . . . . . . . . . close current output file ** ** 'CloseOutput' will first flush all pending output and then close the ** current output file. Subsequent output will again go to the previous ** output file. 'CloseOutput' returns 1 to indicate success. ** ** 'CloseOutput' will not close the initial output file '*stdout*', and ** returns 0 if such attempt is made. This is used in 'Error' which calls ** 'CloseOutput' until it returns 0, thereby closing all open output files. ** ** Calling 'CloseOutput' if the corresponding 'OpenOutput' call failed will ** close the current output file, which will lead to very strange behaviour. ** On the other hand if you forget to call 'CloseOutput' at the end of a ** 'PrintTo' call or an error will not yield much better results. */ long CloseOutput P(( void )); /**************************************************************************** ** *F OpenAppend( <filename> ) . . open a file as current output for appending ** ** 'OpenAppend' opens the file with the name <filename> as current output. ** All subsequent output will go to that file, until either it is closed ** again with 'CloseAppend' or another file is opened with 'OpenOutput'. ** Unlike 'OpenOutput' 'OpenAppend' does not truncate the file to size 0 if ** it exists. Appart from that 'OpenAppend' is equal to 'OpenOutput' so its ** description applies to 'OpenAppend' too. */ long OpenAppend P(( char * filename )); /**************************************************************************** ** *F CloseAppend() . . . . . . . . . . . . . . . . . close current output file ** ** 'CloseAppend' will first flush all pending output and then close the ** current output file. Subsequent output will again go to the previous ** output file. 'CloseAppend' returns 1 to indicate success. 'CloseAppend' ** is exactely equal to 'CloseOutput' so its description applies. */ long CloseAppend P(( void )); /**************************************************************************** ** *F OpenLog( <filename> ) . . . . . . . . . . . . . log interaction to a file ** ** 'OpenLog' instructs the scanner to echo all input from the files ** '*stdin*' and '*errin*' and all output to the files '*stdout*' and ** '*errout*' to the file with name <filename>. The file is truncated to ** size 0 if it existed, otherwise it is created. ** ** 'OpenLog' returns 1 if it could successfully open <filename> for writing ** and 0 to indicate failure. 'OpenLog' will fail if you do not have ** permissions to create the file or write to it. 'OpenOutput' may also ** fail if you have too many files open at once. It is system dependent how ** many are too many, but 16 files should work everywhere. Finally ** 'OpenLog' will fail if there is already a current logfile. */ long OpenLog P(( char * filename )); /**************************************************************************** ** *F CloseLog() . . . . . . . . . . . . . . . . . . close the current logfile ** ** 'CloseLog' closes the current logfile again, so that input from '*stdin*' ** and '*errin*' and output to '*stdout*' and '*errout*' will no longer be ** echoed to a file. 'CloseLog' will return 1 to indicate success. ** ** 'CloseLog' will fail if there is no logfile active and will return 0 in ** this case. */ long CloseLog P(( void )); /**************************************************************************** ** *F OpenInputLog( <filename> ) . . . . . . . . . . . . . log input to a file ** ** 'OpenInputLog' instructs the scanner to echo all input from the files ** '*stdin*' and '*errin*' to the file with name <filename>. The file is ** truncated to size 0 if it existed, otherwise it is created. ** ** 'OpenInputLog' returns 1 if it could successfully open <filename> for ** writing and 0 to indicate failure. 'OpenInputLog' will fail if you do ** not have permissions to create the file or write to it. 'OpenInputLog' ** may also fail if you have too many files open at once. It is system ** dependent how many are too many, but 16 files should work everywhere. ** Finally 'OpenInputLog' will fail if there is already a current logfile. */ long OpenInputLog P(( char* )); /**************************************************************************** ** *F CloseInputLog() . . . . . . . . . . . . . . . . close the current logfile ** ** 'CloseInputLog' closes the current logfile again, so that input from ** '*stdin*' and '*errin*' will no longer be echoed to a file. ** 'CloseInputLog' will return 1 to indicate success. ** ** 'CloseInputLog' will fail if there is no logfile active and will return 0 ** in this case. */ long CloseInputLog P(( void )); /**************************************************************************** ** *F OpenTest( <filename> ) . . . . . . . . open an input file for test mode ** ** 'OpenTest' opens the file with the name <filename> as current input for ** test mode. All subsequent input will be taken from that file, until it ** is closed again with 'CloseTest' or another file is opened with ** 'OpenInput'. 'OpenTest' will not close the current file, i.e., if ** <filename> is closed again, input will be taken again from the current ** input file. ** ** Test mode works as follows. If the scanner is about to print a line to ** the current output file (or to be more precise to the output file that ** was current when 'OpenTest' was called) this line is compared with the ** next line from the test input file, i.e., the one opened by 'OpenTest'. ** If this line starts with '#>' and the rest of it matches the output line ** the output line is not printed and the input comment line is discarded. ** Otherwise the scanner prints the output line and does not discard the ** input line. ** ** On the other hand if an input line is encountered on the test input that ** starts with '#>' the scanner assumes that this is an expected output line ** that did not appear and echoes this line to the current output file. ** ** The upshot is that you can write test files that consist of alternating ** input and, as '#>' test comment lines the expected output. If GAP ** behaves normal and produces the expected output then nothing is printed. ** But if something goes wrong you see what actually was printed and what ** was expected instead. ** ** As a convention GAP test files should end with a print statement like: ** ** Print("prime 3.002 06-Jul-90 ",417000000/Runtime()," GAPstones\n"); ** ** without a matching '#>' comment line. This tells the user that the test ** file completed and also how much time it took. The constant should be ** such that a VAX 11/780 gets roughly 1000 GAPstones. ** ** 'OpenTest' returns 1 if it could successfully open <filename> for reading ** and 0 to indicate failure. 'OpenTest' will fail if the file does not ** exist or if you have no permissions to read it. 'OpenTest' may also fail ** if you have too many files open at once. It is system dependent how many ** are too may, but 16 files shoule work everywhere. ** ** Directely after the 'OpenTest' call the variable 'Symbol' has the value ** 'S_ILLEGAL' to indicate that no symbol has yet been read from this file. ** The first symbol is read by 'Read' in the first call to 'Match' call. */ long OpenTest P(( char * filename )); /**************************************************************************** ** *F CloseTest() . . . . . . . . . . . . . . . . . . close the test input file ** ** 'CloseTest' closes the current test input file and ends test mode. ** Subsequent input will again be taken from the previous input file. ** Output will no longer be compared with comment lines from the test input ** file. 'CloseTest' will return 1 to indicate success. ** ** 'CloseTest' will not close a non test input file and returns 0 if such an ** attempt is made. */ long CloseTest P(( void )); /**************************************************************************** ** *F InitScanner() . . . . . . . . . . . . . . initialize the scanner package ** ** 'InitScanner' initializes the scanner package. This justs sets the ** current input file to '*stdin*' and current output file to '*stdout*'. */ void InitScanner P(( void ));
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.