ftp.nice.ch/pub/next/science/astronomy/ephem_NISH_bs.tar.gz#/ephem/Source/compiler.c

This is compiler.c in view mode; [Download] [Up]

/* module to compile and execute a c-style arithmetic expression.
 * public entry points are compile_expr() and execute_expr().
 *
 * one reason this is so nice and tight is that all opcodes are the same size
 * (an int) and the tokens the parser returns are directly usable as opcodes,
 * for the most part. constants and variables are compiled as an opcode
 * with an offset into the auxiliary opcode tape, opx.
 */

#include <math.h>
#include <ctype.h>
#ifdef VMS
#include <stdlib.h>
#endif
#ifdef NeXT
# include <stdlib.h>
#endif
#include "screen.h"

/* parser tokens and opcodes, as necessary */
#define	HALT	0	/* good value for HALT since program is inited to 0 */
/* binary operators (precedences in table, below) */
#define	ADD	1
#define	SUB	2
#define	MULT	3
#define	DIV	4
#define	AND	5
#define	OR	6
#define	GT	7
#define	GE	8
#define	EQ	9
#define	NE	10
#define	LT	11
#define	LE	12
/* unary op, precedence in NEG_PREC #define, below */
#define	NEG	13
/* symantically operands, ie, constants, variables and all functions */
#define	CONST	14	
#define	VAR	15
#define	ABS	16	/* add functions if desired just like this is done */
#define	SQRT	17	/* add functions if desired just like this is done */
/* purely tokens - never get compiled as such */
#define	LPAREN	255
#define	RPAREN	254
#define	ERR	(-1)

/* precedence of each of the binary operators.
 * in case of a tie, compiler associates left-to-right.
 * N.B. each entry's index must correspond to its #define!
 */
static int precedence[] = {0,5,5,6,6,2,1,4,4,3,3,4,4};
#define	NEG_PREC	7	/* negation is highest */

/* execute-time operand stack */
#define	MAX_STACK	16
static double stack[MAX_STACK], *sp;

/* space for compiled opcodes - the "program".
 * opcodes go in lower 8 bits.
 * when an opcode has an operand (as CONST and VAR) it is really in opx[] and
 *   the index is in the remaining upper bits.
 */
#define	MAX_PROG 32
static int program[MAX_PROG], *pc;
#define	OP_SHIFT	8
#define	OP_MASK		0xff

/* auxiliary operand info.
 * the operands (all but lower 8 bits) of CONST and VAR are really indeces
 * into this array. thus, no point in making this any longer than you have
 * bits more than 8 in your machine's int to index into it, ie, make
 *    MAX_OPX <= 1 << ((sizeof(int)-1)*8)
 * also, the fld's must refer to ones being flog'd, so not point in more
 * of these then that might be used for plotting and srching combined.
 */
#define	MAX_OPX	16
typedef union {
    double opu_f;		/* value when opcode is CONST */
    int opu_fld;		/* rcfpack() of field when opcode is VAR */
} OpX;
static OpX opx[MAX_OPX];
static int opxidx;

/* these are global just for easy/rapid access */
static int parens_nest;	/* to check that parens end up nested */
static char *err_msg;	/* caller provides storage; we point at it with this */
static char *cexpr, *lcexpr; /* pointers that move along caller's expression */
static int good_prog;	/* != 0 when program appears to be good */

/* compile the given c-style expression.
 * return 0 and set good_prog if ok,
 * else return -1 and a reason message in errbuf.
 */
compile_expr (ex, errbuf)
char *ex;
char *errbuf;
{
	int instr;

	/* init the globals.
	 * also delete any flogs used in the previous program.
	 */
	cexpr = ex;
	err_msg = errbuf;
	pc = program;
	opxidx = 0;
	parens_nest = 0;
	do {
	    instr = *pc++;
	    if ((instr & OP_MASK) == VAR)
		flog_delete (opx[instr >> OP_SHIFT].opu_fld);
	} while (instr != HALT);

	pc = program;
	if (compile(0) == ERR) {
	    (void) sprintf (err_msg + strlen(err_msg), " at \"%.10s\"", lcexpr);
	    good_prog = 0;
	    return (-1);
	}
	*pc++ = HALT;
	good_prog = 1;
	return (0);
}

/* execute the expression previously compiled with compile_expr().
 * return 0 with *vp set to the answer if ok, else return -1 with a reason
 * why not message in errbuf.
 */
execute_expr (vp, errbuf)
double *vp;
char *errbuf;
{
	int s;

	err_msg = errbuf;
	sp = stack + MAX_STACK;	/* grows towards lower addresses */
	pc = program;
	s = execute(vp);
	if (s < 0)
	    good_prog = 0;
	return (s);
}

/* this is a way for the outside world to ask whether there is currently a
 * reasonable program compiled and able to execute.
 */
prog_isgood()
{
	return (good_prog);
}

/* get and return the opcode corresponding to the next token.
 * leave with lcexpr pointing at the new token, cexpr just after it.
 * also watch for mismatches parens and proper operator/operand alternation.
 */
static
next_token ()
{
	static char toomt[] = "More than %d terms";
	static char badop[] = "Illegal operator";
	int tok = ERR;	/* just something illegal */
	char c;

	while ((c = *cexpr) == ' ')
	    cexpr++;
	lcexpr = cexpr++;

	/* mainly check for a binary operator */
	switch (c) {
	case '\0': --cexpr; tok = HALT; break; /* keep returning HALT */
	case '+': tok = ADD; break; /* compiler knows when it's really unary */
	case '-': tok = SUB; break; /* compiler knows when it's really negate */
	case '*': tok = MULT; break;
	case '/': tok = DIV; break;
	case '(': parens_nest++; tok = LPAREN; break;
	case ')':
	    if (--parens_nest < 0) {
	        (void) sprintf (err_msg, "Too many right parens");
		return (ERR);
	    } else
		tok = RPAREN;
	    break;
	case '|':
	    if (*cexpr == '|') { cexpr++; tok = OR; }
	    else { (void) sprintf (err_msg, badop); return (ERR); }
	    break;
	case '&':
	    if (*cexpr == '&') { cexpr++; tok = AND; }
	    else { (void) sprintf (err_msg, badop); return (ERR); }
	    break;
	case '=':
	    if (*cexpr == '=') { cexpr++; tok = EQ; }
	    else { (void) sprintf (err_msg, badop); return (ERR); }
	    break;
	case '!':
	    if (*cexpr == '=') { cexpr++; tok = NE; }
	    else { (void) sprintf (err_msg, badop); return (ERR); }
	    break;
	case '<':
	    if (*cexpr == '=') { cexpr++; tok = LE; }
	    else tok = LT;
	    break;
	case '>':
	    if (*cexpr == '=') { cexpr++; tok = GE; }
	    else tok = GT;
	    break;
	}

	if (tok != ERR)
	    return (tok);

	/* not op so check for a constant, variable or function */
	if (isdigit(c) || c == '.') {
	    if (opxidx > MAX_OPX) {
		(void) sprintf (err_msg, toomt, MAX_OPX);
		return (ERR);
	    }
	    opx[opxidx].opu_f = atof (lcexpr);
	    tok = CONST | (opxidx++ << OP_SHIFT);
	    skip_double();
	} else if (isalpha(c)) {
	    /* check list of functions */
	    if (strncmp (lcexpr, "abs", 3) == 0) {
		cexpr += 2;
		tok = ABS;
	    } else if (strncmp (lcexpr, "sqrt", 4) == 0) {
		cexpr += 3;
		tok = SQRT;
	    } else {
		/* not a function, so assume it's a variable */
		int fld;
		if (opxidx > MAX_OPX) {
		    (void) sprintf (err_msg, toomt, MAX_OPX);
		    return (ERR);
		}
		fld = parse_fieldname ();
		if (fld < 0) {
		    (void) sprintf (err_msg, "Unknown field");
		    return (ERR);
		} else {
		    if (flog_add (fld) < 0) { /* register with field logger */
			(void) sprintf (err_msg, "Sorry; too many fields");
			return (ERR);
		    }
		    opx[opxidx].opu_fld = fld;
		    tok = VAR | (opxidx++ << OP_SHIFT);
		}
	    }
	}

	return (tok);
}

/* move cexpr on past a double.
 * allow sci notation.
 * no need to worry about a leading '-' or '+' but allow them after an 'e'.
 * TODO: this handles all the desired cases, but also admits a bit too much
 *   such as things like 1eee2...3. geeze; to skip a double right you almost
 *   have to go ahead and crack it!
 */
static
skip_double()
{
	int sawe = 0;	/* so we can allow '-' or '+' right after an 'e' */

	while (1) {
	    char c = *cexpr;
	    if (isdigit(c) || c=='.' || (sawe && (c=='-' || c=='+'))) {
		sawe = 0;
		cexpr++;
	    } else if (c == 'e') {
		sawe = 1;
		cexpr++;
	    } else
		break;
	}
}

/* call this whenever you want to dig out the next (sub)expression.
 * keep compiling instructions as long as the operators are higher precedence
 * than prec, then return that "look-ahead" token that wasn't (higher prec).
 * if error, fill in a message in err_msg[] and return ERR.
 */
static
compile (prec)
int prec;
{
	int expect_binop = 0;	/* set after we have seen any operand.
				 * used by SUB so it can tell if it really 
				 * should be taken to be a NEG instead.
				 */
	int tok = next_token ();

        while (1) {
	    int p;
	    if (tok == ERR)
		return (ERR);
	    if (pc - program >= MAX_PROG) {
		(void) sprintf (err_msg, "Program is too long");
		return (ERR);
	    }

	    /* check for special things like functions, constants and parens */
            switch (tok & OP_MASK) {
            case HALT: return (tok);
	    case ADD:
		if (expect_binop)
		    break;	/* procede with binary addition */
		/* just skip a unary positive(?) */
		tok = next_token();
		continue;
	    case SUB:
		if (expect_binop)
		    break;	/* procede with binary subtract */
		tok = compile (NEG_PREC);
		*pc++ = NEG;
		expect_binop = 1;
		continue;
            case ABS: /* other funcs would be handled the same too ... */
	    case SQRT:
		/* eat up the function parenthesized argument */
		if (next_token() != LPAREN || compile (0) != RPAREN) {
		    (void) sprintf (err_msg, "Function arglist error");
		    return (ERR);
		}
		/* then handled same as ... */
            case CONST: /* handled same as... */
	    case VAR:
		*pc++ = tok;
		tok = next_token();
		expect_binop = 1;
		continue;
            case LPAREN:
		if (compile (0) != RPAREN) {
		    (void) sprintf (err_msg, "Unmatched left paren");
		    return (ERR);
		}
		tok = next_token();
		expect_binop = 1;
		continue;
            case RPAREN:
		return (RPAREN);
            }

	    /* everything else is a binary operator */
	    p = precedence[tok];
            if (p > prec) {
                int newtok = compile (p);
		if (newtok == ERR)
		    return (ERR);
                *pc++ = tok;
		expect_binop = 1;
                tok = newtok;
            } else
                return (tok);
        }
}

/* "run" the program[] compiled with compile().
 * if ok, return 0 and the final result,
 * else return -1 with a reason why not message in err_msg.
 */
static
execute(result)
double *result;
{
	int instr; 

	do {
	    instr = *pc++;
	    switch (instr & OP_MASK) {
	    /* put these in numberic order so hopefully even the dumbest
	     * compiler will choose to use a jump table, not a cascade of ifs.
	     */
	    case HALT: break;	/* outer loop will stop us */
	    case ADD:  sp[1] = sp[1] +  sp[0]; sp++; break;
	    case SUB:  sp[1] = sp[1] -  sp[0]; sp++; break;
	    case MULT: sp[1] = sp[1] *  sp[0]; sp++; break;
	    case DIV:  sp[1] = sp[1] /  sp[0]; sp++; break;
	    case AND:  sp[1] = sp[1] && sp[0] ? 1 : 0; sp++; break;
	    case OR:   sp[1] = sp[1] || sp[0] ? 1 : 0; sp++; break;
	    case GT:   sp[1] = sp[1] >  sp[0] ? 1 : 0; sp++; break;
	    case GE:   sp[1] = sp[1] >= sp[0] ? 1 : 0; sp++; break;
	    case EQ:   sp[1] = sp[1] == sp[0] ? 1 : 0; sp++; break;
	    case NE:   sp[1] = sp[1] != sp[0] ? 1 : 0; sp++; break;
	    case LT:   sp[1] = sp[1] <  sp[0] ? 1 : 0; sp++; break;
	    case LE:   sp[1] = sp[1] <= sp[0] ? 1 : 0; sp++; break;
	    case NEG:  *sp = -*sp; break;
	    case CONST: *--sp = opx[instr >> OP_SHIFT].opu_f; break;
	    case VAR:
		if (flog_get(opx[instr>>OP_SHIFT].opu_fld, --sp, (char *)0)<0) {
		    (void) sprintf (err_msg, "Bug! VAR field not logged");
		    return (-1);
		}
		break;
	    case ABS:  *sp = fabs (*sp); break;
	    case SQRT: *sp = sqrt (*sp); break;
	    default:
		(void) sprintf (err_msg, "Bug! bad opcode: 0x%x", instr);
		return (-1);
	    }
	    if (sp < stack) {
		(void) sprintf (err_msg, "Runtime stack overflow");
		return (-1);
	    } else if (sp - stack > MAX_STACK) {
		(void) sprintf (err_msg, "Bug! runtime stack underflow");
		return (-1);
	    }
	} while (instr != HALT);

	/* result should now be on top of stack */
	if (sp != &stack[MAX_STACK - 1]) {
	    (void) sprintf (err_msg, "Bug! stack has %d items",
							MAX_STACK - (sp-stack));
	    return (-1);
	}
	*result = *sp;
	return (0);
}

/* starting with lcexpr pointing at a string expected to be a field name,
 * return an rcfpack(r,c,0) of the field else -1 if bad.
 * when return, leave lcexpr alone but move cexpr to just after the name.
 */
static
parse_fieldname ()
{
	int r = -1, c = -1; 	/* anything illegal */
	char *fn = lcexpr;	/* likely faster than using the global */
	char f0, f1;
	char *dp;

	/* search for first thing not an alpha char.
	 * leave it in f0 and leave dp pointing to it.
	 */
	dp = fn;
	while (isalpha(f0 = *dp))
	    dp++;

	/* crack the new field name.
	 * when done trying, leave dp pointing at first char just after it.
	 * set r and c if we recognized it.
	 */
	if (f0 == '.') {
	    int jcontext = 0;	/* need more of then as time goes on */

	    /* object.column "dot" notation pair.
	     * crack the first portion (pointed to by fn): set r.
	     * then the second portion (pointed to by dp+1): set c.
	     */
	    f0 = fn[0];
	    f1 = fn[1];
	    switch (f0) {
	    case 'c':		    r = R_CALLISTO;
		break;
	    case 'e':		    r = R_EUROPA;
		break;
	    case 'g':		    r = R_GANYMEDE;
		break;
	    case 'i':		    r = R_IO;
		break;
	    case 'j':
				    r = R_JUPITER;
		jcontext = 1;
		break;
	    case 'm':
		if (f1 == 'a')      r = R_MARS;
		else if (f1 == 'e') r = R_MERCURY;
		else if (f1 == 'o') r = R_MOON;
		break;
	    case 'n':		    r = R_NEPTUNE;
		break;
	    case 'p':		    r = R_PLUTO;
		break;
	    case 's':
		if (f1 == 'a')      r = R_SATURN;
		else if (f1 == 'u') r = R_SUN;
		break;
	    case 'u':		    r = R_URANUS;
		break;
	    case 'x':		    r = R_OBJX;
		break;
	    case 'y':		    r = R_OBJY;
		break;
	    case 'v':		    r = R_VENUS;
		break;
	    }

	    /* now crack the column (stuff after the dp) */
	    dp++;	/* point at good stuff just after the decimal pt */
	    f0 = dp[0];
	    f1 = dp[1];
	    switch (f0) {
	    case 'a':
		if (f1 == 'l')        c = C_ALT;
		else if (f1 == 'z')   c = C_AZ;
		break;
	    case 'd':		      c = C_DEC;
		break;
	    case 'e':
		if (f1 == 'd')        c = C_EDIST;
		else if (f1 == 'l')   c = C_ELONG;
		break;
	    case 'h':
		if (f1 == 'l') {
		    if (dp[2] == 'a')              c = C_HLAT;
		    else if (dp[2] == 'o')         c = C_HLONG;
		} else if (f1 == 'r' || f1 == 'u') c = C_TUP;
		break;
	    case 'j':		      c = C_JUPITER;
		break;
	    case 'm':
		if (f1 == 'a')        c = C_MARS;
		else if (f1 == 'e')   c = C_MERCURY;
		else if (f1 == 'o')   c = C_MOON;
		break;
	    case 'n':		      c = C_NEPTUNE;
		break;
	    case 'p':
		if (f1 == 'h')        c = C_PHASE;
		else if (f1 == 'l')   c = C_PLUTO;
		break;
	    case 'r':
		if (f1 == 'a') {
		    if (dp[2] == 'z') c = C_RISEAZ;
		    else 	      c = C_RA;
		} else if (f1 == 't') c = C_RISETM;
		break;
	    case 's':
		if (f1 == 'a') {
		    if (dp[2] == 'z') c = C_SETAZ;
		    else	      c = C_SATURN;
		} else if (f1 == 'd') c = C_SDIST;
		else if (f1 == 'i')   c = C_SIZE;
		else if (f1 == 't')   c = C_SETTM;
		else if (f1 == 'u')   c = C_SUN;
		break;
	    case 't':
		if (f1 == 'a')        c = C_TRANSALT;
		else if (f1 == 't')   c = C_TRANSTM;
		break;
	    case 'u':		      c = C_URANUS;
		break;
	    case 'x':		      c = jcontext ? C_OBJX : C_JMX;
		break;
	    case 'y':		      c = jcontext ? C_OBJY : C_JMY;
		break;
	    case 'z':		      c = C_JMZ;
		break;
	    case 'v':
		if (f1 == 'e')        c = C_VENUS;
		else if (f1 == 'm')   c = C_MAG;
		break;
	    }

	    /* now skip dp on past the column stuff */
	    while (isalpha(*dp))
		dp++;
	} else {
	    /* no decimal point; some other field */
	    f0 = fn[0];
	    f1 = fn[1];
	    switch (f0) {
	    case 'd':
		if (f1 == 'a')      r = R_DAWN, c = C_DAWNV;
		else if (f1 == 'u') r = R_DUSK, c = C_DUSKV;
		break;
	    case 'j':
		if (f1 == 'I') {
		    if (fn[2] == 'I') r = R_JCML, c = C_JCMLSII;
		    else 	      r = R_JCML, c = C_JCMLSI;
		}
		break;
	    case 'n':
		r = R_LON, c = C_LONV;
		break;
	    }
	}

	cexpr = dp;
	if (r <= 0 || c <= 0) return (-1);
	return (rcfpack (r, c, 0));
}

These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.