This is scanner.c in view mode; [Download] [Up]
/**************************************************************************** ** *A scanner.c GAP source Martin Schoenert ** *A @(#)$Id: scanner.c,v 3.22.1.1 1995/05/18 10:59:06 mschoene Rel $ ** *Y Copyright 1990-1992, Lehrstuhl D fuer Mathematik, RWTH Aachen, Germany ** ** This file contains the functions of the scanner, which is responsible for ** all input and output processing. ** ** The scanner exports two very important abstractions. The first is the ** concept that an input file is a stream of symbols, such nasty things as ** <space>, <tab>, <newline> characters or comments (they are worst :-), ** characters making up identifiers or digits that make up integers are ** hidden from the rest of GAP. ** ** The second is the concept of a current input and output file. In the ** main module they are opened and closed with the 'OpenInput' and ** 'CloseInput' respectively 'OpenOutput' and 'CloseOutput' calls. All the ** other modules just read from the current input and write to the current ** output file. ** ** The scanner relies on the functions provided by the operating system ** dependent module 'system.c' for the low level input/output. ** *H $Log: scanner.c,v $ *H Revision 3.22.1.1 1995/05/18 10:59:06 mschoene *H moved the handling of help requests to the scanner *H *H Revision 3.22 1994/04/20 10:10:36 mschoene *H changed 'GetSymbol' to allow formfeed as whitespace *H *H Revision 3.21 1993/10/15 09:20:37 martin *H renamed 'SyLinelength' to 'SyNrRows' *H *H Revision 3.20 1993/05/05 11:10:12 fceller *H added 'LogInputTo' *H *H Revision 3.19 1993/04/30 11:31:34 fceller *H removed 'GetQident', *H 'GetIdent' now calls 'GetSymbol' if the identifier starts *H with '\<newline>' *H *H Revision 3.18 1993/02/23 12:56:24 fceller *H fixed scanner from hanging while reading a single '#' without '\n' *H *H Revision 3.17 1992/12/16 19:46:21 martin *H added character constants *H *H Revision 3.16 1992/12/08 11:40:54 martin *H added '<list>{<positions>}' *H *H Revision 3.15 1992/01/28 16:13:16 martin *H fixed the printing *H *H Revision 3.14 1992/01/02 14:44:34 martin *H added magic variable '~' *H *H Revision 3.13 1991/06/03 07:09:49 martin *H changed the format of syntax error messages *H *H Revision 3.12 1991/05/22 09:45:33 martin *H fixed the counting of line numbers for very long lines *H *H Revision 3.11 1991/05/22 09:22:06 martin *H fixed the reading of very long comments *H *H Revision 3.10 1991/04/30 16:12:43 martin *H initial revision under RCS *H *H Revision 3.9 1991/01/23 12:00:00 martin *H improved 'SyntaxError' to print line numbers *H *H Revision 3.8 1991/01/23 12:00:00 martin *H improved 'Pr' to accept field width for strings *H *H Revision 3.7 1990/12/13 12:00:00 martin *H fixed 'OpenInput' to not write to 'InputFiles[-1]' *H *H Revision 3.6 1990/11/09 12:00:00 martin *H changed printing of lists, to avoid \ *H *H Revision 3.5 1990/10/04 12:00:00 martin *H extended scanner to allow '1var' and 'rec.1' *H *H Revision 3.4 1990/09/30 12:00:00 martin *H changed it back again, needs further thoughts *H *H Revision 3.3 1990/09/30 12:00:00 martin *H changed parsing of '<name>.<digits>' *H *H Revision 3.2 1990/09/10 12:00:00 martin *H fixed two pretty printer bugs *H *H Revision 3.1 1990/08/31 12:00:00 martin *H changed '(char)0xff' to ''\377'' as <eof> *H *H Revision 3.0 1990/08/28 12:00:00 martin *H fixed parsing of strings and quoted identifiers *H */ #include "system.h" /* system dependent functions */ #include "scanner.h" /* definition part of this package */ /**************************************************************************** ** *V Symbol . . . . . . . . . . . . . . . . . current symbol read from input ** ** The variable 'Symbol' contains the current symbol read from the input. ** It is represented as an unsigned long integer. ** ** The possible values for 'Symbol' are defined in the definition file of ** this package as follows: ** #define S_ILLEGAL (0L) #define S_IDENT ((1L<< 3)) #define S_INT ((1L<< 4)) #define S_CHAR ((1L<< 5)+0) #define S_STRING ((1L<< 5)+1) #define S_DOT ((1L<< 6)) #define S_LBRACK ((1L<< 7)+0) #define S_RBRACK ((1L<< 8)+0) #define S_LBRACE ((1L<< 7)+1) #define S_RBRACE ((1L<< 8)+1) #define S_LPAREN ((1L<< 9)) #define S_RPAREN ((1L<<10)) #define S_COMMA ((1L<<11)+0) #define S_DOTDOT ((1L<<11)+1) #define S_IF ((1L<<12)+0) #define S_THEN ((1L<<13)) #define S_ELIF ((1L<<14)+0) #define S_ELSE ((1L<<14)+1) #define S_FI ((1L<<15)) #define S_FOR ((1L<<12)+1) #define S_DO ((1L<<16)) #define S_OD ((1L<<17)) #define S_REPEAT ((1L<<12)+2) #define S_UNTIL ((1L<<18)) #define S_WHILE ((1L<<12)+3) #define S_ASSIGN ((1L<<19)) #define S_SEMICOLON ((1L<<20)) #define S_FUNCTION ((1L<<21)) #define S_LOCAL ((1L<<22)) #define S_END ((1L<<23)) #define S_RETURN ((1L<<12)+4) #define S_MAPTO ((1L<<24)) #define S_NOT ((1L<<25)+0) #define S_AND ((1L<<25)+1) #define S_OR ((1L<<25)+2) #define S_EQ ((1L<<26)+0) #define S_LT ((1L<<26)+1) #define S_GT ((1L<<26)+2) #define S_NE ((1L<<26)+3) #define S_LE ((1L<<26)+4) #define S_GE ((1L<<26)+5) #define S_IN ((1L<<26)+6) #define S_PLUS ((1L<<27)+0) #define S_MINUS ((1L<<27)+1) #define S_MULT ((1L<<28)+0) #define S_DIV ((1L<<28)+1) #define S_MOD ((1L<<28)+2) #define S_POW ((1L<<28)+3) #define S_QUIT ((1L<<29)) #define S_EOF ((1L<<30)) */ unsigned long Symbol; /**************************************************************************** ** *T TypSymbolSet . . . . . . . . . . . . . . . . . . type of sets of symbols ** ** 'TypSymbolSet' is the type of sets of symbols. Sets of symbols are used ** in the error recovery of the parser to specify that 'Match' should skip ** all symbols until finding one in a specified set. ** ** If there were less than 32 different symbols things would be very easy. ** We could simply assign the symbolic constants that are the possible ** values for 'Symbol' values 1, 2, 4, 8, 16, ... and so on. Then making a ** set would simply mean or-ing the values, as in 'S_INT|S_STRING', and ** checking whether a symbol is in a set would be '(<symbol> & <set>) != 0'. ** ** There are however more than 32 different symbols, so we must be more ** clever. We group some symbols that are syntactically equivalent like ** '*', '/' in a class. We use the least significant 3 bits to differentiate ** between members in one class. And now every symbol class, many of which ** contain just one symbol, has exactely one of the remaining most ** significant 29 bits set. Thus sets of symbols are represented as ** unsigned long integers, which is typedef-ed to 'TypSymbolSet'. ** ** The classes are as follows, all other symbols are in a class themself: ** if, for, repeat, while, return ** elif, else ** not, and, or ** =, <>, <, >=, <=, >, in ** +, - ** *, /, mod, ^ ** ** 'TypSymbolSet' is defined in the definition file of this package as ** follows: ** typedef unsigned long TypSymbolSet; */ /**************************************************************************** ** *F IS_IN( <symbol>, <set> ) . . . . . . . . is a symbol in a set of symbols ** ** 'IS_IN' returns 1 if the symbol <symbol> is in the symbol set <set> and 0 ** otherwise. Due to the grouping into classes some symbol sets may contain ** more than mentioned, for example 'IS_IN(S_POW,S_MULT|S_DIV|S_MOD)' is 1. ** ** 'IS_IN' is defined in the definition file of this package as follows: ** #define IS_IN(SYMBOL,SET) ((SYMBOL) & ((SET) & ~7)) */ /**************************************************************************** ** *V EXPRBEGIN . . . . . . . . . . . . set of symbols that start an expression *V STATBEGIN . . . . . . . . . . . . . set of symbols that start a statement ** ** 'EXPRBEGIN' is the set of symbols that might start an expression. ** 'STATBEGIN' is the set of symbols that might start a stament, this is a ** superset of 'EXPRBEGIN', since expressions are themselfs statments. ** ** 'EXPRBEGIN' and 'STATBEGIN' are defined in the definition file of this ** package as follows: ** #define EXPRBEGIN (S_IDENT|S_INT|S_STRING|S_LPAREN|S_FUNCTION) #define STATBEGIN (EXPRBEGIN|S_IF|S_FOR|S_WHILE|S_REPEAT|S_RETURN) */ /**************************************************************************** ** *V Value . . . . . . . . . . . . value of the identifier, integer or string ** ** If 'Symbol' is 'S_IDENT', 'S_INT' or 'S_TRING' the variable 'Value' holds ** the name of the identifier, the digits of the integer or the value of the ** string constant. ** ** Note that the size of 'Value' limits the maximal number of significant ** characters of an identifier, the maximal size of an integer and the ** maximal length of a string. 'GetIdent', 'GetInt' and 'GetStr' truncate ** identifier, integers or strings after that many characters. */ char Value [1024]; /**************************************************************************** ** *V NrError . . . . . . . . . . . . . . . . number of errors in current expr *V NrErrLine . . . . . . . . . . . . . . . number of errors on current line ** ** 'NrError' is an integer whose value is the number of errors already found ** in the current expression. It is set to 0 at the beginning of 'Read' and ** incremented with each 'SyntaxError' call, including those from 'Match'. ** ** If 'NrError' is greater than zero the parser functions will not create ** new bags. This prevents the parser from creating new bags after an error ** occured. ** ** 'NrErrLine' is an integer whose value is the number of errors found on ** the current line. It is set to 0 in 'GetLine' and incremented with each ** 'SyntaxError' call, including those from 'Match'. ** ** If 'NrErrLine' is greater than zero 'SyntaxError' will not print an ** error message. This prevents the printing of multiple error messages for ** one line, since they probabely just reflect the fact that the parser ** has not resynchronized yet. */ long NrError; long NrErrLine; /**************************************************************************** ** *V Prompt . . . . . . . . . . . . . . . . . . . . . . prompt to be printed ** ** 'Prompt' holds the string that is to be printed if a new line is read ** from the interactive files '*stdin*' or '*errin*'. ** ** It is set to 'gap> ' or 'brk> ' in the read-eval-print loops and changed ** to the partial prompt '> ' in 'Read' after the first symbol is read. */ char * Prompt; /**************************************************************************** ** *T TypInputFile . . . . . . . . . . structure of an open input file, local *V InputFiles[] . . . . . . . . . . . . . stack of open input files, local *V Input . . . . . . . . . . . . . . . pointer to current input file, local *V In . . . . . . . . . . . . . . . . . pointer to current character, local ** ** 'TypInputFile' describes the information stored for open input files: ** 'file' holds the file identifier which is received from 'SyFopen' and ** which is passed to 'SyFgets' and 'SyFclose' to identify this file. ** 'name' is the name of the file, this is only used in error messages. ** 'line' is a buffer that holds the current input line. This is always ** terminated by the character '\0'. Because 'line' holds only part of the ** line for very long lines the last character need not be a <newline>. ** 'ptr' points to the current character within that line. This is not used ** for the current input file, where 'In' points to the current character. ** 'number' is the number of the current line, is used in error messages. ** ** 'InputFiles' is the stack of the open input files. It is represented as ** an array of structures of type 'TypInputFile'. ** ** 'Input' is a pointer to the current input file. It points to the top of ** the stack 'InputFiles'. ** ** 'In' is a pointer to the current input character, i.e., '*In' is the ** current input character. It points into the buffer 'Input->line'. */ typedef struct { long file; char name [64]; char line [256]; char * ptr; long number; } TypInputFile; TypInputFile InputFiles [16]; TypInputFile * Input; char * In; /**************************************************************************** ** *T TypOutputFiles . . . . . . . . . structure of an open output file, local *V OutputFiles . . . . . . . . . . . . . . stack of open output files, local *V Output . . . . . . . . . . . . . . pointer to current output file, local ** ** 'TypOutputFile' describes the information stored for open output files: ** 'file' holds the file identifier which is received from 'SyFopen' and ** which is passed to 'SyFputs' and 'SyFclose' to identify this file. ** 'line' is a buffer that holds the current output line. ** 'pos' is the position of the current character on that line. ** ** 'OutputFiles' is the stack of open output files. It is represented as ** an array of structures of type 'TypOutputFile'. ** ** 'Output' is a pointer to the current output file. It points to the top ** of the stack 'OutputFiles'. */ typedef struct { long file; char line [256]; long pos; long indent; long spos; long sindent; } TypOutputFile; TypOutputFile OutputFiles [16]; TypOutputFile * Output; /**************************************************************************** ** *V Logfile . . . . . . . . . . . . . . . . file identifier of logfile, local ** ** 'Logfile' is the file identifier of the current logfile. If this is not ** -1 the scanner echoes all input from the files '*stdin*' and '*errin*' ** and all output to the files '*stdout*' and '*errout*' to this file. */ long Logfile = -1; /**************************************************************************** ** *V InputLogfile . . . . . . . . . . . . . file identifier of logfile, local ** ** 'InputLogfile' is the file identifier of the current logfile. If it is ** not -1 the scanner echoes input from the files '*stdin*' and '*errin*' ** to this file. */ long InputLogfile = -1; /**************************************************************************** ** *V TestInput . . . . . . . . . . . . . file identifier of test input, local *V TestOutput . . . . . . . . . . . . file identifier of test output, local *V TestLine . . . . . . . . . . . . . . . . one line from test input, local ** ** 'TestInput' is the file identifier of the file for test input. If this ** is not -1 and 'GetLine' reads a line from 'TestInput' that begins with ** '#>' 'GetLine' assumes that this was expected as output that did not ** appear and echoes this input line to 'TestOutput'. ** ** 'TestOutput' is the current output file for test output. If 'TestInput' ** is not -1 then 'PutLine' compares every line that is about to be printed ** to 'TestOutput' with the next line from 'TestInput'. If this line starts ** with '#>' and the rest of it matches the output line the output line is ** not printed and the input comment line is discarded. Otherwise 'PutLine' ** prints the output line and does not discard the input line. ** ** 'TestLine' holds the one line that is read from 'TestInput' to compare it ** with a line that is about to be printed to 'TestOutput'. */ long TestInput = -1; long TestOutput = -1; char TestLine [256]; /**************************************************************************** ** *F GetLine() . . . . . . . . . . . . . . . . . . . . . . . get a line, local ** ** 'GetLine' fetches another line from the input file 'Input->file' into the ** buffer 'Input->line', sets the pointer 'In' to the beginning of this ** buffer and returns the first character from the line. ** ** If the input file is '*stdin*' or '*errin*' 'GetLine' first prints ** 'Prompt', unless it is '*stdin*' and GAP was called with option '-q'. ** ** If there is a logfile in use and the input file is '*stdin*' or '*errin*' ** 'GetLine' echoes the new line to the logfile. */ char GetLine () { /* if file is '*stdin*' or '*errin*' print the prompt and flush it */ if ( Input->file == 0 ) { if ( ! SyQuiet ) Pr( "%s%c", (long)Prompt, (long)'\03' ); else Pr( "%c", (long)'\03', 0L ); } else if ( Input->file == 2 ) { Pr( "%s%c", (long)Prompt, (long)'\03' ); } /* bump the line number */ if ( Input->line < In && (*(In-1) == '\n' || *(In-1) == '\r') ) Input->number++; /* initialize 'In', no errors on this line so far */ In = Input->line; In[0] = '\0'; NrErrLine = 0; /* if the input file is the test input there may be one line waiting */ if ( Input->file == TestInput && TestLine[0] != '\0' ) { SyStrncat( In, TestLine, sizeof(Input->line) ); TestLine[0] = '\0'; } /* otherwise try to read a line */ else if ( ! SyFgets( In, sizeof(Input->line), Input->file ) ) { In[0] = '\377'; In[1] = '\0'; return *In; } /* deal with help requests (preliminary hack) */ if ( In[0] == '?' ) { In[SyStrlen(In)-1] = '\0'; SyHelp( In+1, Input->file ); In[0] = '\n'; In[1] = '\0'; } /* if neccessary echo the line to the logfile */ if ( Logfile != -1 && (Input->file == 0 || Input->file == 2) ) SyFputs( In, Logfile ); if ( InputLogfile != -1 && (Input->file == 0 || Input->file == 2) ) SyFputs( In, InputLogfile ); /* if this input file is the test input look for unmatched '#>' lines */ if ( Input->file == TestInput && In[0] == '#' && In[1] == '>' ) { SyFputs( In, TestOutput ); return GetLine(); } /* return the current character */ return *In; } /**************************************************************************** ** *F GET_CHAR() . . . . . . . . . . . . . . . . get the next character, local ** ** 'GET_CHAR' returns the next character from the current input file. This ** character is afterwords also available as '*In'. ** ** For efficiency reasons 'GET_CHAR' is a macro that just increments the ** pointer 'In' and checks that there is another character. If not, for ** example at the end a line, 'GET_CHAR' calls 'GetLine' to fetch a new line ** from the input file. */ #define GET_CHAR() (*++In != '\0' ? *In : GetLine()) /**************************************************************************** ** *F GetIdent() . . . . . . . . . . . . . get an identifier or keyword, local ** ** 'GetIdent' reads an identifier from the current input file into the ** variable 'Value' and sets 'Symbol' to 'S_IDENT'. The first character of ** the identifier is the current character pointed to by 'In'. If the ** characters make up a keyword 'GetIdent' will set 'Symbol' to the ** corresponding value. The parser will ignore 'Value' in this case. ** ** An identifier consists of a letter followed by more letters, digits and ** underscores '_'. An identifier is terminated by the first character not ** in this class. The escape sequence '\<newline>' is ignored, making it ** possible to split long identifiers over multiple lines. The backslash ** '\' can be used to include special characters like '(' in identifiers. ** For example 'G\(2\,5\)' is an identifier not a call to a function 'G'. ** ** The size of 'Value' limits the number of significant characters in an ** identifier. If an identifier has more characters 'GetIdent' will ** silently truncate it. ** ** After reading the identifier 'GetIdent' looks at the first and the last ** character of 'Value' to see if it could possibly be a keyword. For ** example 'test' could not be a keyword because there is no keyword ** starting and ending with a 't'. After that test either 'GetIdent' knows ** that 'Value' is not a keyword, or there is a unique possible keyword that ** could match, because no two keywords have identical first and last ** characters. For example if 'Value' starts with 'f' and ends with 'n' the ** only possible keyword is 'function'. Thus in this case 'GetIdent' can ** decide with one string comparison if 'Value' holds a keyword or not. */ void GetSymbol P(( void )); void GetIdent () { long i; long isQuoted; /* initially it could be a keyword */ isQuoted = 0; /* read all characters into 'Value' */ for ( i=0; IsAlpha(*In) || IsDigit(*In) || *In=='_' || *In=='\\'; i++ ) { /* handle escape sequences */ if ( *In == '\\' ) { GET_CHAR(); if ( *In == '\n' && i == 0 ) { GetSymbol(); return; } else if ( *In == '\n' && i < sizeof(Value)-1 ) i--; else if ( *In == 'n' && i < sizeof(Value)-1 ) Value[i] = '\n'; else if ( *In == 't' && i < sizeof(Value)-1 ) Value[i] = '\t'; else if ( *In == 'r' && i < sizeof(Value)-1 ) Value[i] = '\r'; else if ( *In == 'b' && i < sizeof(Value)-1 ) Value[i] = '\b'; else if ( i < sizeof(Value)-1 ) Value[i] = *In; isQuoted = 1; } /* put normal chars into 'Value' but only if there is room */ else { if ( i < sizeof(Value)-1 ) Value[i] = *In; } /* read the next character */ GET_CHAR(); } /* terminate the identifier and lets assume that it is not a keyword */ if ( i < sizeof(Value)-1 ) Value[i] = '\0'; Symbol = S_IDENT; /* now check if 'Value' holds a keyword */ switch ( 256*Value[0]+Value[i-1] ) { case 256*'a'+'d': if(!SyStrcmp(Value,"and")) Symbol=S_AND; break; case 256*'d'+'o': if(!SyStrcmp(Value,"do")) Symbol=S_DO; break; case 256*'e'+'f': if(!SyStrcmp(Value,"elif")) Symbol=S_ELIF; break; case 256*'e'+'e': if(!SyStrcmp(Value,"else")) Symbol=S_ELSE; break; case 256*'e'+'d': if(!SyStrcmp(Value,"end")) Symbol=S_END; break; case 256*'f'+'i': if(!SyStrcmp(Value,"fi")) Symbol=S_FI; break; case 256*'f'+'r': if(!SyStrcmp(Value,"for")) Symbol=S_FOR; break; case 256*'f'+'n': if(!SyStrcmp(Value,"function"))Symbol=S_FUNCTION;break; case 256*'i'+'f': if(!SyStrcmp(Value,"if")) Symbol=S_IF; break; case 256*'i'+'n': if(!SyStrcmp(Value,"in")) Symbol=S_IN; break; case 256*'l'+'l': if(!SyStrcmp(Value,"local")) Symbol=S_LOCAL; break; case 256*'m'+'d': if(!SyStrcmp(Value,"mod")) Symbol=S_MOD; break; case 256*'n'+'t': if(!SyStrcmp(Value,"not")) Symbol=S_NOT; break; case 256*'o'+'d': if(!SyStrcmp(Value,"od")) Symbol=S_OD; break; case 256*'o'+'r': if(!SyStrcmp(Value,"or")) Symbol=S_OR; break; case 256*'r'+'t': if(!SyStrcmp(Value,"repeat")) Symbol=S_REPEAT; break; case 256*'r'+'n': if(!SyStrcmp(Value,"return")) Symbol=S_RETURN; break; case 256*'t'+'n': if(!SyStrcmp(Value,"then")) Symbol=S_THEN; break; case 256*'u'+'l': if(!SyStrcmp(Value,"until")) Symbol=S_UNTIL; break; case 256*'w'+'e': if(!SyStrcmp(Value,"while")) Symbol=S_WHILE; break; case 256*'q'+'t': if(!SyStrcmp(Value,"quit")) Symbol=S_QUIT; break; } /* if it is quoted it is an identifier */ if ( isQuoted ) Symbol = S_IDENT; } /**************************************************************************** ** *F GetInt() . . . . . . . . . . . . . . . . . . . . . get an integer, local ** ** 'GetInt' reads an integer number from the current input file into the ** variable 'Value' and sets 'Symbol' to 'S_INT'. The first character of ** the integer is the current character pointed to by 'In'. ** ** An integer is a sequence of digits '0..9'. The escape sequence ** '\<newline>' is ignored, making it possible to split long integers over ** multiple lines. ** ** If the sequence contains characters which are not digits 'GetInt' will ** interpret the sequence as an identifier and set 'Symbol' to 'S_IDENT'. ** ** The size of 'Value' limits the maximal number of digits of an integer. ** If an integer has more digits 'GetInt' issues a warning and truncates it. */ void GetInt () { long i; long isInt; isInt = 1; /* read the digits into 'Value' */ for ( i=0; IsDigit(*In) || IsAlpha(*In) || *In=='_' || *In=='\\'; i++ ) { /* handle escape sequences */ if ( *In == '\\' ) { GET_CHAR(); if ( *In == '\n' && i < sizeof(Value)-1 ) i--; else if ( *In == 'n' && i < sizeof(Value)-1 ) Value[i] = '\n'; else if ( *In == 't' && i < sizeof(Value)-1 ) Value[i] = '\t'; else if ( *In == 'r' && i < sizeof(Value)-1 ) Value[i] = '\r'; else if ( *In == 'b' && i < sizeof(Value)-1 ) Value[i] = '\b'; else if ( *In == 'c' && i < sizeof(Value)-1 ) Value[i] = '\03'; else if ( i < sizeof(Value)-1 ) Value[i] = *In; } /* put normal chars into 'Value' but only if there is room */ else { if ( i < sizeof(Value)-1 ) Value[i] = *In; } /* if the characters contain non digits it is a variable */ if ( ! IsDigit(*In) && *In != '\n' ) isInt = 0; /* get the next character */ GET_CHAR(); } /* check for numbers with too many digits */ if ( sizeof(Value)-1 <= i ) SyntaxError("integer must have less than 1024 digits"); /* terminate the integer */ if ( i < sizeof(Value)-1 ) Value[i] = '\0'; if ( isInt ) Symbol = S_INT; else Symbol = S_IDENT; } /**************************************************************************** ** *F GetStr() . . . . . . . . . . . . . . . . . . . . . . get a string, local ** ** 'GetStr' reads a string from the current input file into the variable ** 'Value' and sets 'Symbol' to 'S_STRING'. The opening double quote '"' ** of the string is the current character pointed to by 'In'. ** ** A string is a sequence of characters delimited by double quotes '"'. It ** must not include '"' or <newline> characters, but the escape sequences ** '\"' or '\n' can be used instead. The escape sequence '\<newline>' is ** ignored, making it possible to split long strings over multiple lines. ** ** An error is raised if the string includes a <newline> character or if the ** file ends before the closing '"'. ** ** The size of 'Value' limits the maximal number of characters in a string. ** If a string has more characters 'GetStr' issues a error and truncates it. */ void GetStr () { long i = 0; /* skip '"' */ GET_CHAR(); /* read all characters into 'Value' */ for ( i = 0; *In != '"' && *In != '\n' && *In != '\377'; i++ ) { /* handle escape sequences */ if ( *In == '\\' ) { GET_CHAR(); if ( *In == '\n' && i < sizeof(Value)-1 ) i--; else if ( *In == 'n' && i < sizeof(Value)-1 ) Value[i] = '\n'; else if ( *In == 't' && i < sizeof(Value)-1 ) Value[i] = '\t'; else if ( *In == 'r' && i < sizeof(Value)-1 ) Value[i] = '\r'; else if ( *In == 'b' && i < sizeof(Value)-1 ) Value[i] = '\b'; else if ( *In == 'c' && i < sizeof(Value)-1 ) Value[i] = '\03'; else if ( i < sizeof(Value)-1 ) Value[i] = *In; } /* put normal chars into 'Value' but only if there is room */ else { if ( i < sizeof(Value)-1 ) Value[i] = *In; } /* read the next character */ GET_CHAR(); } /* check for error conditions */ if ( *In == '\n' ) SyntaxError("string must not include <newline>"); if ( *In == '\377' ) SyntaxError("string must end with \" before end of file"); if ( sizeof(Value)-1 <= i ) SyntaxError("string must have less than 1024 characters"); /* terminate the string, set 'Symbol' and skip trailing '"' */ if ( i < sizeof(Value)-1 ) Value[i] = '\0'; Symbol = S_STRING; if ( *In == '"' ) GET_CHAR(); } /**************************************************************************** ** *F GetChar() . . . . . . . . . . . . . . . . . get a single character, local ** ** 'GetChar' reads the next character from the current input file into the ** variable 'Value' and sets 'Symbol' to 'S_CHAR'. The opening single quote ** '\'' of the character is the current character pointed to by 'In'. ** ** A character is a single character delimited by single quotes '\''. It ** must not be '\'' or <newline>, but the escape sequences '\\\'' or '\n' ** can be used instead. */ void GetChar () { /* skip '\'' */ GET_CHAR(); /* handle escape equences */ if ( *In == '\\' ) { GET_CHAR(); if ( *In == 'n' ) Value[0] = '\n'; else if ( *In == 't' ) Value[0] = '\t'; else if ( *In == 'r' ) Value[0] = '\r'; else if ( *In == 'b' ) Value[0] = '\b'; else if ( *In == 'c' ) Value[0] = '\03'; else Value[0] = *In; } /* put normal chars into 'Value' */ else { Value[0] = *In; } /* read the next character */ GET_CHAR(); /* check for terminating single quote */ if ( *In != '\'' ) SyntaxError("missing single quote in character constant"); /* skip the closing quote */ Symbol = S_CHAR; if ( *In == '\'' ) GET_CHAR(); } /**************************************************************************** ** *F GetSymbol() . . . . . . . . . . . . . . . . . get the next symbol, local ** ** 'GetSymbol' reads the next symbol from the input, storing it in the ** variable 'Symbol'. If 'Symbol' is 'T_IDENT', 'T_INT' or 'T_STRING' the ** value of the symbol is stored in the variable 'Value'. 'GetSymbol' first ** skips all <space>, <tab> and <newline> characters and comments. ** ** After reading a symbol the current character is the first character ** beyond that symbol. */ void GetSymbol () { /* if no character is available then get one */ if ( *In == '\0' ) GET_CHAR(); /* skip over <spaces>, <tabs>, <newlines> and comments */ while (*In==' '||*In=='\t'||*In=='\n'||*In=='\r'||*In=='\f'||*In=='#') { if ( *In == '#' ) { while ( *In != '\n' && *In != '\r' && *In != '\377' ) GET_CHAR(); } GET_CHAR(); } /* switch according to the character */ switch ( *In ) { case '.': Symbol = S_DOT; GET_CHAR(); if ( *In == '.' ) { Symbol = S_DOTDOT; GET_CHAR(); break; } break; case '[': Symbol = S_LBRACK; GET_CHAR(); break; case ']': Symbol = S_RBRACK; GET_CHAR(); break; case '{': Symbol = S_LBRACE; GET_CHAR(); break; case '}': Symbol = S_RBRACE; GET_CHAR(); break; case '(': Symbol = S_LPAREN; GET_CHAR(); break; case ')': Symbol = S_RPAREN; GET_CHAR(); break; case ',': Symbol = S_COMMA; GET_CHAR(); break; case ':': Symbol = S_ILLEGAL; GET_CHAR(); if ( *In == '=' ) { Symbol = S_ASSIGN; GET_CHAR(); break; } break; case ';': Symbol = S_SEMICOLON; GET_CHAR(); break; case '=': Symbol = S_EQ; GET_CHAR(); break; case '<': Symbol = S_LT; GET_CHAR(); if ( *In == '=' ) { Symbol = S_LE; GET_CHAR(); break; } if ( *In == '>' ) { Symbol = S_NE; GET_CHAR(); break; } break; case '>': Symbol = S_GT; GET_CHAR(); if ( *In == '=' ) { Symbol = S_GE; GET_CHAR(); break; } break; case '+': Symbol = S_PLUS; GET_CHAR(); break; case '-': Symbol = S_MINUS; GET_CHAR(); if ( *In == '>' ) { Symbol=S_MAPTO; GET_CHAR(); break; } break; case '*': Symbol = S_MULT; GET_CHAR(); break; case '/': Symbol = S_DIV; GET_CHAR(); break; case '^': Symbol = S_POW; GET_CHAR(); break; case '"': GetStr(); break; case '\'': GetChar(); break; case '\\': GetIdent(); break; case '_': GetIdent(); break; case '~': Value[0] = '~'; Value[1] = '\0'; Symbol = S_IDENT; GET_CHAR(); break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': GetInt(); break; case '\377': Symbol = S_EOF; *In = '\0'; break; default : if ( IsAlpha(*In) ) { GetIdent(); break; } Symbol = S_ILLEGAL; GET_CHAR(); break; } } /**************************************************************************** ** *F SyntaxError( <msg> ) . . . . . . . . . . . . . . . raise a syntax error ** ** 'SyntaxError' prints the current line, followed by the error message: ** ** ^ syntax error, <msg> in <current file name> ** ** with the '^' pointing to the current symbol on the current line. If the ** <current file name> is '*stdin*' it is not printed. ** ** 'SyntaxError' is called from the parser to print error messages for those ** errors that are not cought by 'Match', for example if the left hand side ** of an assignment is not a variable, a list element or a record component, ** or if two formal arguments of a function have the same identifier. It is ** also called for warnings, for example if a statement has no effect. ** ** 'SyntaxError' first increments 'NrError' by 1. If 'NrError' is greater ** than zero the parser functions will not create new bags. This prevents ** the parser from creating new bags after an error occured. ** ** 'SyntaxError' also increments 'NrErrLine' by 1. If 'NrErrLine' is ** greater than zero 'SyntaxError' will not print an error message. This ** prevents the printing of multiple error messages for one line, since they ** probabely just reflect the fact that the parser has not resynchronized ** yet. 'NrErrLine' is reset to 0 if a new line is read in 'GetLine'. */ void SyntaxError ( msg ) char * msg; { long i; /* one more error */ NrError++; NrErrLine++; /* do not print a message if we found one already on the current line */ if ( NrErrLine != 1 ) return; /* print the message and the filename, unless it is '*stdin*' */ Pr( "Syntax error: %s", (long)msg, 0L ); if ( SyStrcmp( "*stdin*", Input->name ) != 0 ) Pr( " in %s line %d", (long)Input->name, (long)Input->number ); Pr( "\n", 0L, 0L ); /* print the current line */ Pr( "%s", (long)Input->line, 0L ); /* print a '^' pointing to the current position */ for ( i = 0; i < In - Input->line - 1; i++ ) { if ( Input->line[i] == '\t' ) Pr("\t",0L,0L); else Pr(" ",0L,0L); } Pr( "^\n", 0L, 0L ); } /**************************************************************************** ** *F Match( <symbol>, <msg>, <skipto> ) . match current symbol and fetch next ** ** 'Match' is the main interface between the scanner and the parser. It ** performs the 4 most common actions in the scanner with just one call. ** First it checks that the current symbol stored in the variable 'Symbol' ** is the expected symbol as passed in the argument <symbol>. If it is, ** 'Match' reads the next symbol from input and returns. Otherwise 'Match' ** first prints the current input line followed by the syntax error message: ** '^ syntax error, <msg> expected' with '^' pointing to the current symbol. ** It then skips symbols up to one in the resynchronisation set <skipto>. ** Actually 'Match' calls 'SyntaxError' so its comments apply here too. ** ** One kind of typical 'Match' call has the form ** ** 'Match( Symbol, "", 0L );'. ** ** This is used if the parser knows that the current symbol is correct, for ** example in 'RdReturn' the first symbol must be 'S_RETURN', otherwise ** 'RdReturn' would not have been called. Called this way 'Match' will of ** course never raise an syntax error, therefore <msg> and <skipto> are of ** no concern, they are passed nevertheless to please lint. The effect of ** this call is merely to read the next symbol from input. ** ** Another typical 'Match' call is in 'RdIf' after we read the if symbol and ** the condition following, and now expect to see the 'then' symbol: ** ** Match( S_THEN, "then", STATBEGIN|S_ELIF|S_ELSE|S_FI|follow ); ** ** If the current symbol is 'S_THEN' it is matched and the next symbol is ** read. Otherwise 'Match' prints the current line followed by the error ** message: '^ syntax error, then expected'. Then 'Match' skips all symbols ** until finding either a symbol that can begin a statment, an 'elif' or ** 'else' or 'fi' symbol, or a symbol that is contained in the set <follow> ** which is passed to 'RdIf' and contains all symbols allowing one of the ** calling functions to resynchronize, for example 'S_OD' if 'RdIf' has been ** called from 'RdFor'. <follow> always contain 'S_EOF', which 'Read' uses ** to resynchronise. ** ** If 'Match' needs to read a new line from '*stdin*' or '*errin*' to get ** the next symbol it prints the string pointed to by 'Prompt'. */ void Match ( symbol, msg, skipto ) unsigned long symbol; char * msg; TypSymbolSet skipto; { char errmsg [256]; /* if 'Symbol' is the expected symbol match it away */ if ( symbol == Symbol ) { GetSymbol(); } /* else generate an error message and skip to a symbol in <skipto> */ else { errmsg[0] ='\0'; SyStrncat( errmsg, msg, sizeof(errmsg)-1 ); SyStrncat( errmsg, " expected", (long)(sizeof(errmsg)-1-SyStrlen(errmsg)) ); SyntaxError( errmsg ); while ( ! IS_IN( Symbol, skipto ) ) GetSymbol(); } } /**************************************************************************** ** *F PutLine() . . . . . . . . . . . . . . . . . . . . . . print a line, local ** ** 'PutLine' prints the current output line 'Output->line' to the current ** output file 'Output->file'. It is called from 'PutChr'. ** ** 'PutLine' also compares the output line with the next line from the test ** input file 'TestInput' if 'TestInput' is not -1. If this input line ** starts with '#>' and the rest of the line matches the output line then ** the output line is not printed and the input line is discarded. ** ** 'PutLine' also echoes the output line to the logfile 'Logfile' if ** 'Logfile' is not -1 and the output file is '*stdout*' or '*errout*'. ** ** Finally 'PutLine' checks whether the user has hit '<ctr>-C' to interrupt ** the printing. */ void PutLine () { /* if in test mode and the next input line matches print nothing */ if ( TestInput != -1 && TestOutput == Output->file && (TestLine[0]!='\0' || SyFgets(TestLine,sizeof(TestLine),TestInput)) && TestLine[0]=='#' && TestLine[1]=='>' && ! SyStrcmp( TestLine+2, Output->line ) ) { TestLine[0] = '\0'; } /* otherwise output this line */ else { SyFputs( Output->line, Output->file ); } /* if neccessary echo it to the logfile */ if ( Logfile != -1 && (Output->file == 1 || Output->file == 3)) SyFputs( Output->line, Logfile ); } /**************************************************************************** ** *F PutChr( <ch> ) . . . . . . . . . . . . . . . print character <ch>, local ** ** 'PutChr' prints the single character <ch> to the current output file. ** ** 'PutChr' buffers the output characters until either <ch> is <newline>, ** <ch> is '\03' (<flush>) or the buffer fills up. ** ** In the later case 'PutChr' has to decide where to split the output line. ** It takes the point at which $linelength - pos + 8 * indent$ is minimal. */ void PutChr ( ch ) char ch; { long i; char str [ 256 ]; /* '\01', increment indentation level */ if ( ch == '\01' ) { /* if this is a better place to split the line remember it */ if ( Output->indent < Output->pos && SyNrCols-Output->pos + 16*Output->indent <= SyNrCols-Output->spos + 16*Output->sindent ) { Output->spos = Output->pos; Output->sindent = Output->indent; } Output->indent++; } /* '\02', decrement indentation level */ else if ( ch == '\02' ) { /* if this is a better place to split the line remember it */ if ( Output->indent < Output->pos && SyNrCols-Output->pos + 16*Output->indent <= SyNrCols-Output->spos + 16*Output->sindent ) { Output->spos = Output->pos; Output->sindent = Output->indent; } Output->indent--; } /* '\03', print line */ else if ( ch == '\03' ) { /* print the line */ Output->line[ Output->pos ] = '\0'; PutLine(); /* start the next line */ Output->pos = 0; /* first character is a very bad place to split */ Output->spos = 0; Output->sindent = 666; } /* <newline> or <return>, print line, indent next */ else if ( ch == '\n' || ch == '\r' ) { /* put the character on the line and terminate it */ Output->line[ Output->pos++ ] = ch; Output->line[ Output->pos ] = '\0'; /* print the line */ PutLine(); /* indent for next line */ Output->pos = 0; for ( i = 0; i < Output->indent; i++ ) Output->line[ Output->pos++ ] = ' '; /* set up new split positions */ Output->spos = 0; Output->sindent = 666; } /* normal character, room on the current line */ else if ( Output->pos < SyNrCols-2 ) { /* put the character on this line */ Output->line[ Output->pos++ ] = ch; } /* if we are going to split at the end of the line, discard blanks */ else if ( Output->spos == Output->pos && ch == ' ' ) { ; } /* full line, acceptable split position */ else if ( Output->spos != 0 ) { /* add character to the line, terminate it */ Output->line[ Output->pos++ ] = ch; Output->line[ Output->pos++ ] = '\0'; /* copy the rest after the best split position to a safe place */ for ( i = Output->spos; i < Output->pos; i++ ) str[ i-Output->spos ] = Output->line[ i ]; /* print line up to the best split position */ Output->line[ Output->spos++ ] = '\n'; Output->line[ Output->spos ] = '\0'; PutLine(); /* indent for the rest */ Output->pos = 0; for ( i = 0; i < Output->sindent; i++ ) Output->line[ Output->pos++ ] = ' '; /* copy the rest onto the next line */ for ( i = 0; str[ i ] != '\0'; i++ ) Output->line[ Output->pos++ ] = str[ i ]; /* set new split position */ Output->spos = 0; Output->sindent = 666; } /* full line, no splitt position */ else { /* append a '\', and print the line */ Output->line[ Output->pos++ ] = '\\'; Output->line[ Output->pos++ ] = '\n'; Output->line[ Output->pos ] = '\0'; PutLine(); /* add the character to the next line */ Output->pos = 0; Output->line[ Output->pos++ ] = ch; /* the first character is a very bad place to split */ Output->spos = 0; Output->sindent = 666; } } /**************************************************************************** ** *F Pr( <format>, <arg1>, <arg2> ) . . . . . . . . . print formatted output ** ** 'Pr' is the output function. The first argument is a 'printf' like format ** string containing up to 2 '%' format fields, specifing how the ** corresponding arguments are to be printed. The two arguments are passed ** as 'long' integers. This is possible since every C object ('int', ** 'char', pointers) except 'float' or 'double', which are not used in GAP, ** can be converted to a 'long' without loss of information. ** ** The function 'Pr' currently support the following '%' format fields: ** '%c' the corresponding argument represents a character, usually it is ** its ASCII or EBCDIC code, and this character is printed. ** '%s' the corresponding argument is the address of a null terminated ** character string which is printed. ** '%d' the corresponding argument is a signed integer, which is printed. ** Between the '%' and the 'd' an integer might be used to specify ** the width of a field in which the integer is right justified. If ** the first character is '0' 'Pr' pads with '0' instead of <space>. ** '%>' increment the indentation level. ** '%<' decrement the indentation level. ** '%%' can be used to print a single '%' character. No argument is used. ** ** You must always cast the arguments to '(long)' to avoid problems with ** those compilers with a default integer size of 16 instead of 32 bit. You ** must pass 0L if you don't make use of an argument to please lint. */ void Pr ( format, arg1, arg2 ) char * format; long arg1, arg2; { char * p, * q; long prec, n; char fill; /* loop over the characters of the <format> string */ for ( p = format; *p != '\0'; p++ ) { /* if the character is '%' do something special */ if ( *p == '%' ) { /* first look for a precision field */ p++; if ( *p == '0' ) fill = '0'; else fill = ' '; for ( prec = 0; IsDigit(*p); p++ ) prec = 10 * prec + *p - '0'; /* '%d' print an integer */ if ( *p == 'd' ) { if ( arg1 < 0 ) { prec--; for ( n=1; n <= -(arg1/10); n*=10 ) prec--; while ( --prec > 0 ) PutChr(fill); PutChr('-'); for ( ; n > 0; n /= 10 ) PutChr( (char)(-((arg1/n)%10) + '0') ); arg1 = arg2; } else { for ( n=1; n<=arg1/10; n*=10 ) prec--; while ( --prec > 0 ) PutChr(fill); for ( ; n > 0; n /= 10 ) PutChr( (char)(((arg1/n)%10) + '0') ); arg1 = arg2; } } /* '%s' print a string */ else if ( *p == 's' ) { for ( q = (char*)arg1; *q != '\0'; q++ ) prec--; while ( prec-- > 0 ) PutChr(' '); for ( q = (char*)arg1; *q != '\0'; q++ ) PutChr( *q ); arg1 = arg2; } /* '%c' print a character */ else if ( *p == 'c' ) { PutChr( (char)arg1 ); arg1 = arg2; } /* '%>' increment the indentation level */ else if ( *p == '>' ) { PutChr( '\01' ); while ( --prec > 0 ) PutChr( '\01' ); } /* '%<' decrement the indentation level */ else if ( *p == '<' ) { PutChr( '\02' ); while ( --prec > 0 ) PutChr( '\02' ); } /* '%%' print a '%' character */ else if ( *p == '%' ) { PutChr( '%' ); } /* else raise an error */ else { for ( p = "%format error"; *p != '\0'; p++ ) PutChr( *p ); } } /* not a '%' character, simply print it */ else { PutChr( *p ); } } } /**************************************************************************** ** *F OpenInput( <filename> ) . . . . . . . . . . open a file as current input ** ** 'OpenInput' opens the file with the name <filename> as current input. ** All subsequent input will be taken from that file, until it is closed ** again with 'CloseInput' or another file is opened with 'OpenInput'. ** 'OpenInput' will not close the current file, i.e., if <filename> is ** closed again, input will again be taken from the current input file. ** ** 'OpenInput' returns 1 if it could successfully open <filename> for ** reading and 0 to indicate failure. 'OpenInput' will fail if the file ** does not exist or if you do not have permissions to read it. 'OpenInput' ** may also fail if you have too many files open at once. It is system ** dependent how many are too many, but 16 files should work everywhere. ** ** Directely after the 'OpenInput' call the variable 'Symbol' has the value ** 'S_ILLEGAL' to indicate that no symbol has yet been read from this file. ** The first symbol is read by 'Read' in the first call to 'Match' call. ** ** You can open '*stdin*' to read from the standard input file, which is ** usually the terminal, or '*errin*' to read from the standard error file, ** which is the terminal even if '*stdin*' is redirected from a file. ** 'OpenInput' passes those file names to 'SyFopen' like any other name, ** they are just a convention between the main and the system package. ** 'SyFopen' and thus 'OpenInput' will fail to open '*errin*' if the file ** 'stderr' (Unix file descriptor 2) is not a terminal, because of a ** redirection say, to avoid that break loops take their input from a file. ** ** It is not neccessary to open the initial input file, 'InitScanner' opens ** '*stdin*' for that purpose. This file on the other hand can not be ** closed by 'CloseInput'. */ long OpenInput ( filename ) char * filename; { long file; /* fail if we can not handle another open input file */ if ( Input+1 == InputFiles+(sizeof(InputFiles)/sizeof(InputFiles[0])) ) return 0; /* in test mode keep reading from test input file for break loop input */ if ( TestInput != -1 && ! SyStrcmp( filename, "*errin*" ) ) return 1; /* try to open the input file */ file = SyFopen( filename, "r" ); if ( file == -1 ) return 0; /* remember the current position in the current file */ if ( Input != InputFiles-1 ) Input->ptr = In; /* enter the file identifier and the file name */ Input++; Input->file = file; Input->name[0] = '\0'; SyStrncat( Input->name, filename, sizeof(Input->name) ); /* start with an empty line and no symbol */ In = Input->line; In[0] = In[1] = '\0'; Symbol = S_ILLEGAL; Input->number = 1; /* indicate success */ return 1; } /**************************************************************************** ** *F CloseInput() . . . . . . . . . . . . . . . . . close current input file ** ** 'CloseInput' will close the current input file. Subsequent input will ** again be taken from the previous input file. 'CloseInput' will return 1 ** to indicate success. ** ** 'CloseInput' will not close the initial input file '*stdin*', and returns ** 0 if such an attempt is made. This is used in 'Error' which calls ** 'CloseInput' until it returns 0, therebye closing all open input files. ** ** Calling 'CloseInput' if the corresponding 'OpenInput' call failed will ** close the current output file, which will lead to very strange behaviour. */ long CloseInput () { /* refuse to close the initial input file */ if ( Input == InputFiles ) return 0; /* refuse to close the test input file */ if ( Input->file == TestInput ) return 0; /* close the input file */ SyFclose( Input->file ); /* revert to last file */ Input--; In = Input->ptr; /* indicate that the next symbol has not yet been read */ Symbol = S_ILLEGAL; /* indicate success */ return 1; } /**************************************************************************** ** *F OpenOutput( <filename> ) . . . . . . . . . open a file as current output ** ** 'OpenOutput' opens the file with the name <filename> as current output. ** All subsequent output will go to that file, until either it is closed ** again with 'CloseOutput' or another file is opened with 'OpenOutput'. ** The file is truncated to size 0 if it existed, otherwise it is created. ** 'OpenOutput' does not close the current file, i.e., if <filename> is ** closed again, output will go again to the current output file. ** ** 'OpenOutput' returns 1 if it could successfully open <filename> for ** writing and 0 to indicate failure. 'OpenOutput' will fail if you do not ** have permissions to create the file or write to it. 'OpenOutput' may ** also fail if you have too many files open at once. It is system ** dependent how many are too many, but 16 files should work everywhere. ** ** You can open '*stdout*' to write to the standard output file, which is ** usually the terminal, or '*errout*' to write to the standard error file, ** which is the terminal even if '*stdout*' is redirected to a file. ** 'OpenOutput' passes those file names to 'SyFopen' like any other name, ** they are just a convention between the main and the system package. ** ** It is not neccessary to open the initial output file, 'InitScanner' opens ** '*stdout*' for that purpose. This file on the other hand can not be ** closed by 'CloseOutput'. */ long OpenOutput ( filename ) char * filename; { long file; /* fail if we can not handle another open output file */ if ( Output+1==OutputFiles+(sizeof(OutputFiles)/sizeof(OutputFiles[0])) ) return 0; /* in test mode keep printing to test output file for breakloop output */ if ( TestInput != -1 && ! SyStrcmp( filename, "*errout*" ) ) return 1; /* try to open the file */ file = SyFopen( filename, "w" ); if ( file == -1 ) return 0; /* put the file on the stack, start at position 0 on an empty line */ Output++; Output->file = file; Output->line[0] = '\0'; Output->pos = 0; Output->indent = 0; /* variables related to line splitting, very bad place to split */ Output->spos = 0; Output->sindent = 666; /* indicate success */ return 1; } /**************************************************************************** ** *F CloseOutput() . . . . . . . . . . . . . . . . . close current output file ** ** 'CloseOutput' will first flush all pending output and then close the ** current output file. Subsequent output will again go to the previous ** output file. 'CloseOutput' returns 1 to indicate success. ** ** 'CloseOutput' will not close the initial output file '*stdout*', and ** returns 0 if such attempt is made. This is used in 'Error' which calls ** 'CloseOutput' until it returns 0, thereby closing all open output files. ** ** Calling 'CloseOutput' if the corresponding 'OpenOutput' call failed will ** close the current output file, which will lead to very strange behaviour. ** On the other hand if you forget to call 'CloseOutput' at the end of a ** 'PrintTo' call or an error will not yield much better results. */ long CloseOutput () { /* refuse to close the initial output file '*stdout*' */ if ( Output == OutputFiles ) return 0; /* refuse to close the test output file */ if ( Output->file == TestOutput ) return 0; /* flush output and close the file */ Pr( "%c", (long)'\03', 0L ); SyFclose( Output->file ); /* revert to previous output file and indicate success */ Output--; return 1; } /**************************************************************************** ** *F OpenAppend( <filename> ) . . open a file as current output for appending ** ** 'OpenAppend' opens the file with the name <filename> as current output. ** All subsequent output will go to that file, until either it is closed ** again with 'CloseAppend' or another file is opened with 'OpenOutput'. ** Unlike 'OpenOutput' 'OpenAppend' does not truncate the file to size 0 if ** it exists. Appart from that 'OpenAppend' is equal to 'OpenOutput' so its ** description applies to 'OpenAppend' too. */ long OpenAppend ( filename ) char * filename; { long file; /* fail if we can not handle another open output file */ if ( Output+1==OutputFiles+(sizeof(OutputFiles)/sizeof(OutputFiles[0])) ) return 0; /* in test mode keep printing to test output file for breakloop output */ if ( TestInput != -1 && ! SyStrcmp( filename, "*errout*" ) ) return 1; /* try to open the file */ file = SyFopen( filename, "a" ); if ( file == -1 ) return 0; /* put the file on the stack, start at position 0 on an empty line */ Output++; Output->file = file; Output->line[0] = '\0'; Output->pos = 0; Output->indent = 0; /* variables related to line splitting, very bad place to split */ Output->spos = 0; Output->sindent = 666; /* indicate success */ return 1; } /**************************************************************************** ** *F CloseAppend() . . . . . . . . . . . . . . . . . close current output file ** ** 'CloseAppend' will first flush all pending output and then close the ** current output file. Subsequent output will again go to the previous ** output file. 'CloseAppend' returns 1 to indicate success. 'CloseAppend' ** is exactely equal to 'CloseOutput' so its description applies. */ long CloseAppend () { /* refuse to close the initial output file '*stdout*' */ if ( Output == OutputFiles ) return 0; /* refuse to close the test output file */ if ( Output->file == TestOutput ) return 0; /* flush output and close the file */ Pr( "%c", (long)'\03', 0L ); SyFclose( Output->file ); /* revert to previous output file and indicate success */ Output--; return 1; } /**************************************************************************** ** *F OpenLog( <filename> ) . . . . . . . . . . . . . log interaction to a file ** ** 'OpenLog' instructs the scanner to echo all input from the files ** '*stdin*' and '*errin*' and all output to the files '*stdout*' and ** '*errout*' to the file with name <filename>. The file is truncated to ** size 0 if it existed, otherwise it is created. ** ** 'OpenLog' returns 1 if it could successfully open <filename> for writing ** and 0 to indicate failure. 'OpenLog' will fail if you do not have ** permissions to create the file or write to it. 'OpenOutput' may also ** fail if you have too many files open at once. It is system dependent how ** many are too many, but 16 files should work everywhere. Finally ** 'OpenLog' will fail if there is already a current logfile. */ long OpenLog ( filename ) char * filename; { /* refuse to open a logfile if we already log to one */ if ( Logfile != -1 ) return 0; /* try to open the file */ Logfile = SyFopen( filename, "w" ); if ( Logfile == -1 ) return 0; /* otherwise indicate success */ return 1; } /**************************************************************************** ** *F CloseLog() . . . . . . . . . . . . . . . . . . close the current logfile ** ** 'CloseLog' closes the current logfile again, so that input from '*stdin*' ** and '*errin*' and output to '*stdout*' and '*errout*' will no longer be ** echoed to a file. 'CloseLog' will return 1 to indicate success. ** ** 'CloseLog' will fail if there is no logfile active and will return 0 in ** this case. */ long CloseLog () { /* refuse to close a non existent logfile */ if ( Logfile == -1 ) return 0; /* close the logfile */ SyFclose( Logfile ); Logfile = -1; /* indicate success */ return 1; } /**************************************************************************** ** *F OpenInputLog( <filename> ) . . . . . . . . . . . . . log input to a file ** ** 'OpenInputLog' instructs the scanner to echo all input from the files ** '*stdin*' and '*errin*' to the file with name <filename>. The file is ** truncated to size 0 if it existed, otherwise it is created. ** ** 'OpenInputLog' returns 1 if it could successfully open <filename> for ** writing and 0 to indicate failure. 'OpenInputLog' will fail if you do ** not have permissions to create the file or write to it. 'OpenInputLog' ** may also fail if you have too many files open at once. It is system ** dependent how many are too many, but 16 files should work everywhere. ** Finally 'OpenInputLog' will fail if there is already a current logfile. */ long OpenInputLog ( filename ) char * filename; { /* refuse to open a logfile if we already log to one */ if ( InputLogfile != -1 ) return 0; /* try to open the file */ InputLogfile = SyFopen( filename, "w" ); if ( InputLogfile == -1 ) return 0; /* otherwise indicate success */ return 1; } /**************************************************************************** ** *F CloseInputLog() . . . . . . . . . . . . . . . . close the current logfile ** ** 'CloseInputLog' closes the current logfile again, so that input from ** '*stdin*' and '*errin*' will no longer be echoed to a file. ** 'CloseInputLog' will return 1 to indicate success. ** ** 'CloseInputLog' will fail if there is no logfile active and will return 0 ** in this case. */ long CloseInputLog () { /* refuse to close a non existent logfile */ if ( InputLogfile == -1 ) return 0; /* close the logfile */ SyFclose( InputLogfile ); InputLogfile = -1; /* indicate success */ return 1; } /**************************************************************************** ** *F OpenTest( <filename> ) . . . . . . . . open an input file for test mode ** ** 'OpenTest' opens the file with the name <filename> as current input for ** test mode. All subsequent input will be taken from that file, until it ** is closed again with 'CloseTest' or another file is opened with ** 'OpenInput'. 'OpenTest' will not close the current file, i.e., if ** <filename> is closed again, input will be taken again from the current ** input file. ** ** Test mode works as follows. If the scanner is about to print a line to ** the current output file (or to be more precise to the output file that ** was current when 'OpenTest' was called) this line is compared with the ** next line from the test input file, i.e., the one opened by 'OpenTest'. ** If this line starts with '#>' and the rest of it matches the output line ** the output line is not printed and the input comment line is discarded. ** Otherwise the scanner prints the output line and does not discard the ** input line. ** ** On the other hand if an input line is encountered on the test input that ** starts with '#>' the scanner assumes that this is an expected output line ** that did not appear and echoes this line to the current output file. ** ** The upshot is that you can write test files that consist of alternating ** input and, as '#>' test comment lines the expected output. If GAP ** behaves normal and produces the expected output then nothing is printed. ** But if something goes wrong you see what actually was printed and what ** was expected instead. ** ** As a convention GAP test files should end with a print statement like: ** ** Print("prime 3.002 06-Jul-90 ",417000000/Runtime()," GAPstones\n"); ** ** without a matching '#>' comment line. This tells the user that the test ** file completed and also how much time it took. The constant should be ** such that a VAX 11/780 gets roughly 1000 GAPstones. ** ** 'OpenTest' returns 1 if it could successfully open <filename> for reading ** and 0 to indicate failure. 'OpenTest' will fail if the file does not ** exist or if you have no permissions to read it. 'OpenTest' may also fail ** if you have too many files open at once. It is system dependent how many ** are too may, but 16 files shoule work everywhere. ** ** Directely after the 'OpenTest' call the variable 'Symbol' has the value ** 'S_ILLEGAL' to indicate that no symbol has yet been read from this file. ** The first symbol is read by 'Read' in the first call to 'Match' call. */ long OpenTest ( filename ) char * filename; { /* do not allow to nest test files */ if ( TestInput != -1 ) return 0; /* try to open the file as input file */ if ( ! OpenInput( filename ) ) return 0; /* remember this is a test input */ TestInput = Input->file; TestOutput = Output->file; TestLine[0] = '\0'; /* indicate success */ return 1; } /**************************************************************************** ** *F CloseTest() . . . . . . . . . . . . . . . . . . close the test input file ** ** 'CloseTest' closes the current test input file and ends test mode. ** Subsequent input will again be taken from the previous input file. ** Output will no longer be compared with comment lines from the test input ** file. 'CloseTest' will return 1 to indicate success. ** ** 'CloseTest' will not close a non test input file and returns 0 if such an ** attempt is made. */ long CloseTest () { /* refuse to a non test file */ if ( TestInput != Input->file ) return 0; /* close the input file */ SyFclose( Input->file ); /* revert to last file */ Input--; In = Input->ptr; /* indicate that the next symbol has not yet been read */ Symbol = S_ILLEGAL; /* we are no longer in test mode */ TestInput = -1; TestOutput = -1; TestLine[0] = '\0'; /* indicate success */ return 1; } /**************************************************************************** ** *F InitScanner() . . . . . . . . . . . . . . initialize the scanner package ** ** 'InitScanner' initializes the scanner package. This justs sets the ** current input file to '*stdin*' and current output file to '*stdout*'. */ void InitScanner () { long ignore; Input = InputFiles-1; ignore = OpenInput( "*stdin*" ); Output = OutputFiles-1; ignore = OpenOutput( "*stdout*" ); Logfile = -1; InputLogfile = -1; TestInput = -1; TestOutput = -1; }
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.