token.c

This is token.c in view mode; [Download] [Up]
/*  File        : Token.c
    Author      : Richard A. O'Keefe
    Modified by : Deeporn H. Beardsley & Saumya Debray
    Updated     : Summer 1988
    Purpose     : Tokenizer for SB-Prolog.
 
*/
 
#ifdef  vms
#include stdio
#else
#include <stdio.h>
#endif
 
/* stuff defined to interface with SB-Prolog */

#include "builtin.h"
#include <errno.h>
 
/*  We used to use an 8-bit character set under VMS, but 7-bit ASCII
 *  elsewhere.  Now that DIS 8859/1 exists (a draft international
 *  standard for an 8-bit extension of ASCII) we use that, and we are
 *  in luck: it is almost identical to the VMS character set.
 */
#define AlphabetSize 256
#define SBPMAXINT       268435455
 
extern  char *strcpy(/* CHAR_PTR, CHAR_PTR */);
#define StrCpy(dst, src) (void)strcpy(dst, src)
#define Printf           (void)printf
#define Sprintf          (void)sprintf
#define Fprintf          (void)fprintf
 
#define InRange(X,L,U) ((unsigned)((X)-(L)) <= (unsigned)((U)-(L)))
#define IsLayout(X) InRange(InType(X), SPACE, EOLN)
 
/*  VERY IMPORTANT NOTE: I assume that the stdio library returns the value
 *  EOF when character input hits the end of the file, and that this value
 *  is actually the integer -1.  You will note the DigVal(), InType(), and
 *  OuType() macros below, and there is a ChType() macro used in crack().
 *  They all depend on this assumption.
 */
 
#define DIGIT    0              /* 0 .. 9 */
#define BREAK    1              /* _ */
#define UPPER    2              /* A .. Z */
#define LOWER    3              /* a .. z */
#define SIGN     4              /* -/+*<=>#@$\^&~`:.? */
#define NOBLE    5              /* !; (don't form compounds) */
#define PUNCT    6              /* (),[]|{}% */
#define ATMQT    7              /* ' (atom quote) */
#define LISQT    8              /* " (list quote) */
#define STRQT    9              /* $ (string quote) */
#define CHRQT   10              /* ` (character quote, maybe) */
#define TILDE   11              /* ~ (like character quote but buggy) */
#define SPACE   12              /* layout and control chars */
#define EOLN    13              /* line terminators ^J ^L */
#define REALO   14              /* floating point number */
#define EOFCH   15              /* end of file */
#define ALPHA   DIGIT           /* any of digit, break, upper, lower */
#define BEGIN   BREAK           /* atom left-paren pair */
#define ENDCL   EOLN            /* end of clause token */
#define RREAL	16		/* radix number(real) - overflowed */
#define RDIGIT	17		/* radix number(int) */
 
#define InType(c)  (intab.chtype+1)[c]
#define DigVal(c)  (digval+1)[c]
 
BYTE outqt[EOFCH+1];
 
struct CHARS
{
   int  eolcom;         /* End-of-line comment, default % */
   int  endeol;         /* early terminator of eolcoms, default none */
   int  begcom;         /* In-line comment start, default / */
   int  astcom;         /* In-line comment second, default * */
   int  endcom;         /* In-line comment finish, default / */
   int  radix;          /* Radix character, default ' */
   int  dpoint;         /* Decimal point, default . */
   int  escape;         /* String escape character, default \ */
   int  termin;         /* Terminates a clause */
   CHAR chtype[AlphabetSize+1];
};
 
struct CHARS intab =       /* Special character table */
{
   '%',                    /* eolcom: end of line comments */
   -1,                     /* endeol: early end for eolcoms */
   '/',                    /* begcom: in-line comments */
   '*',                    /* astcom: in-line comments */
   '/',                    /* endcom: in-line comments */
   '\'',                   /* radix : radix separator */
   '.',                    /* dpoint: decimal point */
   -1,                     /* escape: string escape character */
   '.',                    /* termin: ends clause, sign or solo */
   {
       EOFCH,                  /* really the -1th element of the table: */
   /*  ^@      ^A      ^B      ^C      ^D      ^E      ^F      ^G      */
       SPACE,  SPACE,  SPACE,  SPACE,  SPACE,  SPACE,  SPACE,  SPACE,
   /*  ^H      ^I      ^J      ^K      ^L      ^M      ^N      ^O      */
       SPACE,  SPACE,  EOLN,   SPACE,  EOLN,   SPACE,  SPACE,  SPACE,
   /*  ^P      ^Q      ^R      ^S      ^T      ^U      ^V      ^W      */
       SPACE,  SPACE,  SPACE,  SPACE,  SPACE,  SPACE,  SPACE,  SPACE,
   /*  ^X      ^Y      ^Z      ^[      ^\      ^]      ^^      ^_      */
       SPACE,  SPACE,  SPACE,  SPACE,  SPACE,  SPACE,  SPACE,  SPACE,
   /*  sp      !       "       #       $       %       &       '       */
       SPACE,  NOBLE,  LISQT,  SIGN,   LOWER,  PUNCT,  SIGN,   ATMQT,
   /*  (       )       *       +       ,       -       .       /       */
       PUNCT,  PUNCT,  SIGN,   SIGN,   PUNCT,  SIGN,   SIGN,   SIGN,
   /*  0       1       2       3       4       5       6       7       */
       DIGIT,  DIGIT,  DIGIT,  DIGIT,  DIGIT,  DIGIT,  DIGIT,  DIGIT,
   /*  8       9       :       ;       <       =       >       ?       */
       DIGIT,  DIGIT,  SIGN,   PUNCT,  SIGN,   SIGN,   SIGN,   SIGN,
   /*  @       A       B       C       D       E       F       G       */
       SIGN,   UPPER,  UPPER,  UPPER,  UPPER,  UPPER,  UPPER,  UPPER,
   /*  H       I       J       K       L       M       N       O       */
       UPPER,  UPPER,  UPPER,  UPPER,  UPPER,  UPPER,  UPPER,  UPPER,
   /*  P       Q       R       S       T       U       V       W       */
       UPPER,  UPPER,  UPPER,  UPPER,  UPPER,  UPPER,  UPPER,  UPPER,
   /*  X       Y       Z       [       \       ]       ^       _       */
       UPPER,  UPPER,  UPPER,  PUNCT,  SIGN,   PUNCT,  SIGN,   BREAK,
   /*  `       a       b       c       d       e       f       g       */
       SIGN,   LOWER,  LOWER,  LOWER,  LOWER,  LOWER,  LOWER,  LOWER,
   /*  h       i       j       k       l       m       n       o       */
       LOWER,  LOWER,  LOWER,  LOWER,  LOWER,  LOWER,  LOWER,  LOWER,
   /*  p       q       r       s       t       u       v       w       */
       LOWER,  LOWER,  LOWER,  LOWER,  LOWER,  LOWER,  LOWER,  LOWER,
   /*  x       y       z       {       |       }       ~       ^?      */
       LOWER,  LOWER,  LOWER,  PUNCT,  PUNCT,  PUNCT,  SIGN,   SPACE,
   /*  128     129     130     131     132     133     134     135     */
       SPACE,  SPACE,  SPACE,  SPACE,  SPACE,  SPACE,  SPACE,  SPACE,
   /*  136     137     138     139     140     141     142     143     */
       SPACE,  SPACE,  SPACE,  SPACE,  SPACE,  SPACE,  SPACE,  SPACE,
   /*  144     145     146     147     148     149     150     151     */
       SPACE,  SPACE,  SPACE,  SPACE,  SPACE,  SPACE,  SPACE,  SPACE,
   /*  152     153     154     155     156     157     158     159     */
       SPACE,  SPACE,  SPACE,  SPACE,  SPACE,  SPACE,  SPACE,  SPACE,
   /*  NBSP    !-inv   cents   pounds  ching   yen     brobar  section */
       SPACE,  SIGN,   SIGN,   SIGN,   SIGN,   SIGN,   SIGN,   SIGN,
   /*  "accent copyr   -a ord  <<      nothook SHY     (reg)   ovbar   */
       SIGN,   SIGN,   LOWER,  SIGN,   SIGN,   SIGN,   SIGN,   SIGN,
   /*  degrees +/-     super 2 super 3 -       micron  pilcrow -       */
       SIGN,   SIGN,   LOWER,  LOWER,  SIGN,   SIGN,   SIGN,   SIGN,
   /*  ,       super 1 -o ord  >>      1/4     1/2     3/4     ?-inv   */
       SIGN,   LOWER,  LOWER,  SIGN,   SIGN,   SIGN,   SIGN,   SIGN,
   /*  `A      'A      ^A      ~A      "A      oA      AE      ,C      */
       UPPER,  UPPER,  UPPER,  UPPER,  UPPER,  UPPER,  UPPER,  UPPER,
   /*  `E      'E      ^E      "E      `I      'I      ^I      "I      */
       UPPER,  UPPER,  UPPER,  UPPER,  UPPER,  UPPER,  UPPER,  UPPER,
   /*  ETH     ~N      `O      'O      ^O      ~O      "O      x times */
#ifdef  vms
       UPPER,  UPPER,  UPPER,  UPPER,  UPPER,  UPPER,  UPPER,  UPPER,
#else
       UPPER,  UPPER,  UPPER,  UPPER,  UPPER,  UPPER,  UPPER,  SIGN,
#endif
   /*  /O      `U      'U      ^U      "U      'Y      THORN   ,B      */
       UPPER,  UPPER,  UPPER,  UPPER,  UPPER,  UPPER,  UPPER,  LOWER,
   /*  `a      'a      ^a      ~a      "a      oa      ae      ,c      */
       LOWER,  LOWER,  LOWER,  LOWER,  LOWER,  LOWER,  LOWER,  LOWER,
   /*  `e      'e      ^e      "e      `i      'i      ^i      "i      */
       LOWER,  LOWER,  LOWER,  LOWER,  LOWER,  LOWER,  LOWER,  LOWER,
   /*  eth     ~n      `o      'o      ^o      ~o      "o      -:-     */
#ifdef  vms
       LOWER,  LOWER,  LOWER,  LOWER,  LOWER,  LOWER,  LOWER,  LOWER,
#else
       LOWER,  LOWER,  LOWER,  LOWER,  LOWER,  LOWER,  LOWER,  SIGN,
#endif
   /*  /o      `u      'u      ^u      "u      'y      thorn  "y       */
#ifdef  vms
       LOWER,  LOWER,  LOWER,  LOWER,  LOWER,  LOWER,  LOWER,  SPACE
#else
       LOWER,  LOWER,  LOWER,  LOWER,  LOWER,  LOWER,  LOWER,  LOWER
#endif
   }
};
 
CHAR digval[AlphabetSize+1] =
{
        99,                     /* really the -1th element of the table */
    /*  ^@      ^A      ^B      ^C      ^D      ^E      ^F      ^G      */
        99,     99,     99,     99,     99,     99,     99,     99,
    /*  ^H      ^I      ^J      ^K      ^L      ^M      ^N      ^O      */
        99,     99,     99,     99,     99,     99,     99,     99,
    /*  ^P      ^Q      ^R      ^S      ^T      ^U      ^V      ^W      */
        99,     99,     99,     99,     99,     99,     99,     99,
    /*  ^X      ^Y      ^Z      ^[      ^\      ^]      ^^      ^_      */
        99,     99,     99,     99,     99,     99,     99,     99,
    /*  sp      !       "       #       $       %       &       '       */
        99,     99,     99,     99,     99,     99,     99,     99,
    /*  (       )       *       +       ,       -       .       /       */
        99,     99,     99,     99,     99,     99,     99,     99,
    /*  0       1       2       3       4       5       6       7       */
        0,      1,      2,      3,      4,      5,      6,      7,
    /*  8       9       :       ;       <       =       >       ?       */
        8,      9,      99,     99,     99,     99,     99,     99,
    /*  @       A       B       C       D       E       F       G       */
        99,     10,     11,     12,     13,     14,     15,     99,
    /*  H       I       J       K       L       M       N       O       */
        99,     99,     99,     99,     99,     99,     99,     99,
    /*  P       Q       R       S       T       U       V       W       */
        99,     99,     99,     99,     99,     99,     99,     99,
    /*  X       Y       Z       [       \       ]       ^       _       */
        99,     99,     99,     99,     99,     99,     99,     0,  /*NB*/
    /*  `       a       b       c       d       e       f       g       */
        99,     10,     11,     12,     13,     14,     15,     99,
    /*  h       i       j       k       l       m       n       o       */
        99,     99,     99,     99,     99,     99,     99,     99,
    /*  p       q       r       s       t       u       v       w       */
        99,     99,     99,     99,     99,     99,     99,     99,
    /*  x       y       z       {       |       }       ~       ^?      */
        99,     99,     99,     99,     99,     99,     99,     99,
    /*  128     129     130     131     132     133     134     135     */
        99,     99,     99,     99,     99,     99,     99,     99,
    /*  136     137     138     139     140     141     142     143     */
        99,     99,     99,     99,     99,     99,     99,     99,
    /*  144     145     146     147     148     149     150     151     */
        99,     99,     99,     99,     99,     99,     99,     99,
    /*  152     153     154     155     156     157     158     159     */
        99,     99,     99,     99,     99,     99,     99,     99,
    /*  160     161     162     163     164     165     166     167     */
        99,     99,     99,     99,     99,     99,     99,     99,
    /*  168     169     170(-a) 171     172     173     174     175     */
        99,     99,     99,     99,     99,     99,     99,     99,
    /*  176     177     178(2)  179(3)  180     181     182     183     */
        99,     99,     2,      3,      99,     99,     99,     99,
    /*  184     185(1)  186(-o) 187     188     189     190     191     */
        99,     1,      99,     99,     99,     99,     99,     99,
    /*  192     193     194     195     196     197     198     199     */
        99,     99,     99,     99,     99,     99,     99,     99,
    /*  200     201     202     203     204     205     206     207     */
        99,     99,     99,     99,     99,     99,     99,     99,
    /*  208     209     210     211     212     213     214     215     */
        99,     99,     99,     99,     99,     99,     99,     99,
    /*  216     217     218     219     220     221     222     223     */
        99,     99,     99,     99,     99,     99,     99,     99,
    /*  224     225     226     227     228     229     230     231     */
        99,     99,     99,     99,     99,     99,     99,     99,
    /*  232     233     234     235     236     237     238     239     */
        99,     99,     99,     99,     99,     99,     99,     99,
    /*  240     241     242     243     244     245     246     247     */
        99,     99,     99,     99,     99,     99,     99,     99,
    /*  248     249     250     251     252     253     254     255     */
        99,     999,     99,     99,     99,     99,     99,     99
    };
 
 
/* values returned to calling program */

#define SPECIAL 0       /* puncuation , ( ) [ ] ... */
#define VARO    1       /* type is a variable */
#define FUNC	2	/* type is atom( */
#define NUMBERO 3       /* type is a number */
#define ATOMO   4       /* type is an atom */
#define ENDCLS	5       /* END of clause but not file */
#define USCORE  6       /* underscore '_' */
#define SEMI	7	/* ; */
#define BADEND  8       /* END of file, not end of clause */
#define STRING  9       /* type is a char string */

int cNUMERO = 0, cATOMO   = 0, cFUNC = 0, cVARO   = 0, cUSCORE = 0,
    cSTRING = 0, cSPECIAL = 0, cSEMI = 0, cENDCLS = 0, cENDPRG = 0;

extern LONG_PTR insert();
static BYTE perm = PERM;

extern FILE *curr_in, *curr_out;    /* current input, output streams */
 
 
void SyntaxError(message)
CHAR_PTR message;
{
   Fprintf(stderr, "Syntax error: %s\n", message);
   exit(1);
}
 
/*  GetToken() reads a single token from the input stream and returns
 *  its type, which is one of
 *      DIGIT   -- a number
 *      BEGIN   -- an atom( pair
 *      LOWER   -- an atom
 *      UPPER   -- a variable
 *      PUNCT   -- a single punctuation mark
 *      LISQT   -- a quoted list of character codes
 *      STRQT   -- a quoted string
 *      ENDCL   -- end of clause (normally '.\n').
 *      EOFCH   -- signifies end-of-file.
 *      RREAL   -- a real, from some radix notation, in double_v.
 *      RDIGIT  -- an integer, from some radix notation, in rad_int.
 *  In all cases except the last, the text of the token is in AtomStr.
 *  There are two questions: between which pairs of adjacent tokens is
 *  a space (a) necessary, (b) desirable?  There is an additional
 *  dummy token type used by the output routines, namely
 *      NOBLE   -- extra space is definitely not needed.
 *  I leave it as an exercise for the reader to answer question (a).
 *  Since this program is to produce output I find palatable (even if
 *  it isn't exactly what I'd write myself), extra spaces are ok.  In
 *  fact, the main use of this program is as an editor command, so it
 *  is normal to do a bit of manual post-processing.  Question (b) is
 *  the one to worry about then.  My answer is that a space is never
 *  written
 *      - after  PUNCT ( [ { |
 *      - before PUNCT ) ] } | , <ENDCL>
 *  is written after comma only sometimes, and is otherwise always
 *  written.  The variable lastput thus takes these values:
 *      ALPHA   -- put a space except before PUNCT
 *      SIGN    -- as alpha, but different so ENDCL knows to put a space.
 *      NOBLE   -- don't put a space
 *      ENDCL   -- just ended a clause
 *      EOFCH   -- at beginning of file
 */
 
int     lastc = ' ';    /* previous character */
#define MaxStrLen      1000 
BYTE    AtomStr[MaxStrLen+20];
LONG    list_p;
int     rtnint;
double  double_v;
LONG    rad_int;
 
CHAR    tok2long[] = "token too long";
CHAR    eofinrem[] = "end of file in comment";
CHAR    badexpt[]  = "bad exponent";
CHAR    badradix[] = "radix > 36";
 
 
/*  read_character(FILE* card, BYTE q)
 *  reads one character from a quoted atom, list, string, or character.
 *  Doubled quotes are read as single characters, otherwise a
 *  quote is returned as -1 and lastc is set to the next character.
 *  If the input syntax has character escapes, they are processed.
 *  Note that many more character escape sequences are accepted than
 *  are generated.  There is a divergence from C: \xhh sequences are
 *  two hexadecimal digits long, not three.
 *  Note that the \c and \<space> sequences combine to make a pretty
 *  way of continuing strings.  Do it like this:
 *      "This is a string, which \c
 *     \ has to be continued over \c
 *     \ several lines.\n".
 */
 
int read_character(card, q)
register FILE *card;
register int q;
{
   register int c;
 
   c = getc(card);
BACK:
   if (c < 0) {
DOERR:
      if (q < 0)
         SyntaxError("end of file in character constant");
      else {
         CHAR message[80];
         Sprintf(message, "end of file in %cquoted%c constant", q, q);
         SyntaxError(message);
      }
   }
   if (c == q) {
      c = getc(card);
      if (c == q)
	 return c;
      lastc = c;
      return -1;
   } else if (c != intab.escape)
      return c;
 
   /*  If we get here, we have read the "\" of an escape sequence  */

   c = getc(card);
   switch (c) {
      case EOF:
	 clearerr(curr_in);
	 goto DOERR;
      case 'n':  case 'N':         /* newline */
         return 10;
      case 't':  case 'T':         /* tab */
         return  9;
      case 'r':  case 'R':         /* reeturn */
         return 13;
      case 'v':  case 'V':         /* vertical tab */
         return 11;
      case 'b':  case 'B':         /* backspace */
         return  8;
      case 'f':  case 'F':         /* formfeed */
         return 12;
      case 'e':  case 'E':         /* escape */
         return 27;
      case 'd':  case 'D':         /* delete */
         return 127;
      case 's':  case 'S':         /* space */
         return 32;
      case 'a':  case 'A':         /* alarm */
         return  7;
      case '^':                    /* control */
         c = getc(card);
         if (c < 0)
	    goto DOERR;
         return (c == '?' ? 127 : c&31);
      case 'c':  case 'C':         /* continuation */
         while (IsLayout(c = getc(card))) 
	    ;
         goto BACK;
      case 'x':  case 'X':         /* hexadecimal */
         {  int i, n;
            for (n = 0, i = 2; --i >= 0; n = (n<<4) + DigVal(c))
               if (DigVal(c = getc(card)) >= 16) {
                  if (c < 0)
	             goto DOERR;
                  (void)ungetc(c, card);
                  break;
               }
            return (n & 255);
         }
      case 'o':  case 'O':         /* octal */
         c = getc(card);
         if (DigVal(c) >= 8) {
            if (c < 0)
	       goto DOERR;
            (void)ungetc(c, card);
            return 0;
         }
      case '0':  case '1':  case '2':  case '3':
      case '4':  case '5':  case '6':  case '7':
         {  int i, n;
            for (n = c-'0', i = 2; --i >= 0; n = (n<<3) + DigVal(c))
               if (DigVal(c = getc(card)) >= 8) {
                  if (c < 0)
	             goto DOERR;
                  (void)ungetc(c, card);
                  break;
               }
            return (n & 255);
         }
      default:
         if (!IsLayout(c))
	    return c;
         c = getc(card);
         goto BACK;
   }
} 
 
 
/*  com0plain(card, endeol)
 *  These comments have the form
 *      <eolcom> <char>* <newline>                      {PUNCT}
 *  or  <eolcom><eolcom> <char>* <newline>              {SIGN }
 *  depending on the classification of <eolcom>.  Note that we could
 *  handle ADA comments with no trouble at all.  There was a Pop-2
 *  dialect which had end-of-line comments using "!" where the comment
 *  could also be terminated by "!".  You could obtain the effect of
 *  including a "!" in the comment by doubling it, but what you had
 *  then was of course two comments.  The endeol parameter of this
 *  function allows the handling of comments like that which can be
 *  terminated either by a new-line character or an <endeol>, whichever
 *  comes first.  For ordinary purposes, endeol = -1 will do fine.
 *  When this is called, the initial <eolcom>s have been consumed.
 *  We return the first character after the comment.
 *  If the end of the source file is encountered, we do not treat it
 *  as an error, but quietly close the comment and return EOF as the
 *  "FOLLOWing" character.
 */

int com0plain(card, endeol)
register FILE *card;        /* source file */
register int endeol;        /* The closing character "!" */
{
   register int c;
 
   while ((c = getc(card)) >= 0 && c != '\n' && c != endeol)
      ;
   if (c >= 0)
      c = getc(card);
   return c;
}
 
 
/*  The states in the next two functions are
 *      0       - after an uninteresting character
 *      1       - after an "astcom"
 *      2       - after a  "begcom"
 *  Assuming begcom = "(", astom = "#", endcom = ")",
 *  com2plain will accept "(#)" as a complete comment.  This can
 *  be changed by initialising the state to 0 rather than 1.
 *  The same is true of com2nest, which accepts "(#(#)#) as a
 *  complete comment.  Changing it would be rather harder.
 *  Fixing the bug where the closing <astcom> is copied if it is
 *  not an asterisk may entail rejecting "(#)".
 */
 
/*  com2plain(card, astcom, endcom)
 *  handles PL/I-style comments, that is, comments which begin with
 *  a pair of characters <begcom><astcom> and end with a pair of
 *  chracters <astcom><endcom>, where nesting is not allowed.  For
 *  example, if we take begcom='(', astcom='*', endcom=')' as in
 *  Pascal, the comment "(* not a (* plain *)^ comment *) ends at
 *  the "^".
 *  For this kind of comment, it is perfectly sensible for any of
 *  the characters to be equal.  For example, if all three of the
 *  bracket characters are "#", then "## stuff ##" is a comment.
 *  When this is called, the initial <begcom><astcom> has been consumed.
 */

void com2plain(card, astcom, endcom)
register FILE *card;        /* source file */
int astcom;                 /* The asterisk character "*" */
int endcom;                 /* The closing character "/" */
{
   register int c;
   register int state;
 
   for (state = 0; (c = getc(card)) >= 0; ) {
      if (c == endcom && state)
	 break;
      state = c == astcom;
   }
   if (c < 0)
      SyntaxError(eofinrem);
}
 
 
int GetToken()
{
   register FILE *card = curr_in;
   register BYTE_PTR s = AtomStr;
   register int  c, d;
   register int  n = MaxStrLen;
   LONG     oldv = 0, newv = 0; 
   LONG_PTR newpair, list_head, stack_top;
 
   c = lastc;
START:
   switch (InType(c)) {
      case DIGIT:
         /* The FOLLOWing kinds of numbers exist:
          * (1) unsigned decimal integers: d+
          * (2) unsigned based integers: d+Ro+[R]
          * (3) unsigned floats: d* [. d*] [e +/-] d+
          * (4) characters: 0Rc[R]
          * We allow underscores in numbers too, ignoring them.
          */
         do {
            if (c != '_')
	       *s++ = c;
            c = getc(card);
         } while (InType(c) <= BREAK);
         if (c == intab.radix) { 
            *s = 0;
            for (d = 0, s = AtomStr; c = *s++; ) {
               d = d*10-'0'+c;
               if (d > 36)
		  SyntaxError(badradix);
            }
            if (d == 0) {       /*  0'c['] is a character code  */
               d = read_character(card, -1);
               Sprintf(AtomStr, "%d", d);
               d = getc(card);
               lastc = d == intab.radix ? getc(card) : d;
               return DIGIT;
            }
            while (c = getc(card), DigVal(c) < 99)
               if (c != '_') {
		  oldv = newv;
		  newv = newv*d + DigVal(c);
		  if (newv < oldv || newv > SBPMAXINT) {
		     Fprintf(stderr, "*** overflow in radix notation ***\n");
		     double_v = oldv*1.0*d + DigVal(c);
		     while (c = getc(card), DigVal(c) < 99)
                        if (c != '_') 
			   double_v = double_v*d + DigVal(c);
                     if (c == intab.radix)
		        c = getc(card);
                     lastc = c;
		     return RREAL;
		  }
	       }
/*
            Sprintf(AtomStr, "%ld", newv);
*/
	    rad_int = newv;
            if (c == intab.radix)
	       c = getc(card);
            lastc = c;
            return RDIGIT;
         } else if (c == intab.dpoint) {
            d = getc(card);
            if (InType(d) == DIGIT) {
DECIMAL:       *s++ = '.';
               do {
                  if (d != '_')
		     *s++ = d;
                  d = getc(card);
               } while (InType(d) <= BREAK);
               if ((d | 32) == 'e') {
                  *s++ = 'E';
                  d = getc(card);
                  if (d == '-') {
		     *s++ = d;
		     d = getc(card);
		  } else if (d == '+')
		     d = getc(card);
                  if (InType(d) > BREAK)
		     SyntaxError(badexpt);
                  do {
                     if (d != '_')
		        *s++ = d;
                     d = getc(card);
                  } while (InType(d) <= BREAK);
               }
               c = d;
               *s = 0;
	       lastc = c;
               return REALO;
            } else       /* c has not changed */
               ungetc(d, card);
         }
         *s = 0;
	 lastc = c;
         return DIGIT;
 
      case BREAK:
      case UPPER:
         do {
            if (--n < 0)
	       SyntaxError(tok2long);
            *s++ = c;
	    c = getc(card);
         } while (InType(c) <= LOWER);
         *s = 0;
	 lastc = c;
         rtnint = (int)(s - AtomStr);
         return UPPER;
 
      case LOWER:
          do {
             if (--n < 0) SyntaxError(tok2long);
             *s++ = c;
	     c = getc(card);
          } while (InType(c) <= LOWER);
          *s = 0;
SYMBOL:   if (c == '(') {
             lastc = getc(card);
             rtnint = (int)(s - AtomStr);
             return BEGIN;
          } else {
             lastc = c;
             rtnint = (int)(s - AtomStr);
             return LOWER;
          }
 
      case SIGN:
          *s = c;
	  d = getc(card);
          if (c == intab.begcom && d == intab.astcom) {
ASTCOM:      com2plain(card, d, intab.endcom);
             c = getc(card);
             goto START;
          } else if (c == intab.dpoint && InType(d) == DIGIT) {
             *s++ = '0';
             goto DECIMAL;
          }
          while (InType(d) == SIGN) {
             if (--n == 0)
		SyntaxError(tok2long);
             *++s = d;
	     d = getc(card);
          }
          *++s = 0;
          if (InType(d) >= SPACE && c == intab.termin && AtomStr[1] == 0) {
             lastc = d;
             return ENDCL;       /* i.e. '.' FOLLOWed by layout */
          }
          c = d;
          goto SYMBOL;
 
      case NOBLE:
          if (c == intab.termin) {
             *s = 0;
	     lastc = ' ';
             return ENDCL;
          } else if (c == intab.eolcom) {
             c = com0plain(card, intab.endeol);
             goto START;
          }
          *s++ = c;
	  *s = 0;
          lastc = c = getc(card);
          goto SYMBOL;
 
      case PUNCT:
         if (c == intab.termin) {
            *s = 0;
	    lastc = ' ';
            return ENDCL;
         } else if (c == intab.eolcom) {
            c = com0plain(card, intab.endeol);
            goto START;
         }
         d = getc(card);
         if (c == intab.begcom && d == intab.astcom)
	    goto ASTCOM;
 
         /*  If we arrive here, c is an ordinary punctuation mark  */

         if (c == '(')    /* need to distingusih between atom( and atom ( */
            *s++ = ' ';
         lastc = d;
	 *s++ = c;
	 *s = 0;
         rtnint = (int)(s - AtomStr);
         return PUNCT;
 
      case CHRQT:
         /* `c[`] is read as an integer.
           * Eventually we should treat characters as a distinct
           * token type, so they can be generated on output.
           * If the character quote, atom quote, list quote,
           * or string quote is the radix character, we should
           * generate 0'x notation, otherwise `x`.
           */
         d = read_character(card, -1);
         Sprintf(AtomStr, "%d", d);
         d = getc(card);
         lastc = d == c ? getc(card) : d;
         return DIGIT;
 
      case ATMQT:
      case STRQT:
         while ((d = read_character(card, c)) >= 0) {
            if (--n < 0) SyntaxError(tok2long);
            *s++ = d;
         }
         *s = 0;
         rtnint = (int) (s - AtomStr);
         c = lastc;
         goto SYMBOL;

      case LISQT: 
	 /* check for potential heap overflow */
	 /*   (this will guarantee space for lists of up to 50 elements) */
	 stack_top = (breg < ereg) ? breg : ereg - ENV_SIZE(cpreg);
	 if (stack_top < hreg + 100) {
	    garbage_collection("GetToken");
	    if (stack_top < hreg + 100)    /* still too full */
	       quit("Heap overflow\n");
	 }

	 list_head = newpair = hreg;
         while ((d = read_character(card, c)) >= 0) {
	    hreg += 2;
	    *newpair++ = MAKEINT(d);
	    *newpair++ = (LONG)hreg | LIST_TAG; 
	 }
	 if (list_head == hreg)   /* null string */
	    list_p = nil_sym;
	 else {
	    *(--newpair) = nil_sym;
	    list_p = (LONG)list_head | LIST_TAG;
	 }
	 return LISQT;

      case EOLN:
      case SPACE:
         c = getc(card);
         goto START;
 
      case EOFCH:
	 clearerr(curr_in);
         return EOFCH;
   }
   Fprintf(stderr, "Internal error: InType(%d)==%d\n", c, InType(c));
   abort();                /* There is no way we can get here */
   /*NOTREACHED*/
}

 
void b_NEXT_TOKEN()
{
   register LONG     op;
   register LONG_PTR top;
   register FILE     *card = curr_in;
   int      i, atoi(), oldnum, newnum;
   int      len;
   double   atof();
   LONG     makefloat(), ptr;

   i = GetToken();
   switch (i) {
      case LOWER:
	 op = reg[1];  DEREF(op);  FOLLOW(op) = MAKEINT(ATOMO);
         ptr = (LONG)insert(AtomStr, rtnint, 0, &perm) | CS_TAG;
	 op = reg[2];  DEREF(op);  FOLLOW(op) = ptr;
cATOMO++;
         break;
      case BEGIN:
	 op = reg[1];  DEREF(op);  FOLLOW(op) = MAKEINT(FUNC);
         ptr = (LONG)insert(AtomStr, rtnint, 0, &perm) | CS_TAG;
	 op = reg[2];  DEREF(op);  FOLLOW(op) = ptr;
cFUNC++;
         break;
      case UPPER:
         if (AtomStr[0] == '_' && AtomStr[1] == 0) {
	    op = reg[1];  DEREF(op);  FOLLOW(op) = MAKEINT(USCORE);
cUSCORE++;
         } else {
	    op = reg[1];  DEREF(op);  FOLLOW(op) = MAKEINT(VARO);
cVARO++;
	 }
	 if (rtnint > 256) {
	    AtomStr[256] = 0;
	    rtnint = 256;
	    Fprintf(stderr, "*** Name of constant too long: %s\n", AtomStr);
         }
	 ptr = (LONG)insert(AtomStr, rtnint, 0, &perm) | CS_TAG;
	 op = reg[2];  DEREF(op);  FOLLOW(op) = ptr;
         break;
      case REALO:
	 op = reg[2];  DEREF(op);
	 FOLLOW(op) = makefloat(atof(AtomStr));
	 op = reg[1];  DEREF(op);  FOLLOW(op) = MAKEINT(NUMBERO);
cNUMERO++;
         break;
      case RREAL:
	 op = reg[2];  DEREF(op);  FOLLOW(op) = makefloat(double_v);
	 op = reg[1];  DEREF(op);  FOLLOW(op) = MAKEINT(NUMBERO);
cNUMERO++;
	 break;
      case RDIGIT:
cNUMERO++;
	 op = reg[1];  DEREF(op);  FOLLOW(op) = MAKEINT(NUMBERO);
	 op = reg[2];  DEREF(op);  FOLLOW(op) = MAKEINT(rad_int);
	 break;
      case DIGIT:
cNUMERO++;
	 op = reg[1];  DEREF(op);  FOLLOW(op) = MAKEINT(NUMBERO);
	 op = reg[2];  DEREF(op); 
	 for (len = oldnum = newnum = 0; AtomStr[len] != 0; len++) {
      	    oldnum = newnum;
    	    newnum = newnum * 10 + DigVal(AtomStr[len]);
    	    if (newnum < oldnum || newnum > SBPMAXINT) {
	       Fprintf(stderr, "*** overflow >> %s\n", AtomStr);
	       len = strlen(AtomStr);
	       AtomStr[len++] = '.';    
	       AtomStr[len++] = '0';
	       AtomStr[len] = 0;
	       FOLLOW(op) = makefloat(atof(AtomStr));
	       return;
	    }
	 }
     	 FOLLOW(op) = MAKEINT(newnum);
         break;
      case LISQT:
	 op = reg[1];  DEREF(op);  FOLLOW(op) = MAKEINT(STRING);
	 op = reg[2];  DEREF(op);  FOLLOW(op) = list_p;
cSTRING++;
         break;
      case PUNCT:
         /* there are nine punctuation marks, */
         /* ( , )  [ | ]  { ; }  */
         /* % is listed as one, but isn't really. */
         if (AtomStr[0] == ';') {
	    op = reg[1];  DEREF(op);  FOLLOW(op) = MAKEINT(SEMI);
cSEMI++;
         } else {
	    op = reg[1];  DEREF(op);  FOLLOW(op) = MAKEINT(SPECIAL);
cSPECIAL++;
            ptr = (LONG)insert(AtomStr, rtnint, 0, &perm) | CS_TAG;
	    op = reg[2];  DEREF(op);  FOLLOW(op) = ptr;
         }
         break;
      case ENDCL:
	 op = reg[1];  DEREF(op);  FOLLOW(op) = MAKEINT(ENDCLS);
cENDCLS++;
         break;
      case EOFCH:
	 op = reg[1];  DEREF(op);  FOLLOW(op) = MAKEINT(BADEND);
         break;
      default:
         Fprintf(stderr, "Internal error %d %s\n", i, AtomStr);
   }
}
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.