comp_scan.c

This is comp_scan.c in view mode; [Download] [Up]
/***************************************************************************
*                            COPYRIGHT NOTICE                              *
****************************************************************************
*                ncurses is copyright (C) 1992-1995                        *
*                          Zeyd M. Ben-Halim                               *
*                          zmbenhal@netcom.com                             *
*                          Eric S. Raymond                                 *
*                          esr@snark.thyrsus.com                           *
*                                                                          *
*        Permission is hereby granted to reproduce and distribute ncurses  *
*        by any means and for any fee, whether alone or as part of a       *
*        larger distribution, in source or in binary form, PROVIDED        *
*        this notice is included with any such distribution, and is not    *
*        removed from any of its header files. Mention of ncurses in any   *
*        applications linked with it is highly appreciated.                *
*                                                                          *
*        ncurses comes AS IS with no warranty, implied or expressed.       *
*                                                                          *
***************************************************************************/

/*
 *	comp_scan.c --- Lexical scanner for terminfo compiler.
 *
 *	_nc_reset_input()
 *	_nc_get_token()
 *	_nc_panic_mode()
 *	int _nc_syntax;
 *	int _nc_curr_line;
 *	long _nc_curr_file_pos;
 *	long _nc_comment_start;
 *	long _nc_comment_end;
 */

#include "curses.priv.h"

#include <stdarg.h>
#include <string.h>
#include <ctype.h>
#include "tic.h"

/*
 * Maximum length of string capability we'll accept before raising an error.
 * Yes, there is a real capability in /etc/termcap this long, an "is".
 */
#define MAXCAPLEN	600

#define iswhite(ch)	(ch == ' '  ||  ch == '\t')

int	_nc_syntax;		/* termcap or terminfo? */
int	_nc_curr_line;		/* current line # in input */
int	_nc_curr_col;		/* current column # in input */
long	_nc_curr_file_pos;	/* file offset of current line */
long	_nc_comment_start;	/* start of comment range before name */
long	_nc_comment_end;	/* end of comment range before name */
long	_nc_start_line;		/* start line of current entry */

/*****************************************************************************
 *
 * Token-grabbing machinery
 *
 *****************************************************************************/

static bool first_column;	/* See 'next_char()' below */
static char separator;		/* capability separator */
static int pushtype;		/* type of pushback token */
static char pushname[MAX_NAME_SIZE+1];

static int  next_char(void);
static long stream_pos(void);
static bool end_of_stream(void);
static char trans_string(char *);
static void push_back(char c);

/* Assume we may be looking at a termcap-style continuation */
static inline int eat_escaped_newline(int ch)
{
	if (ch == '\\')
		while ((ch = next_char()) == '\n'  ||  iswhite(ch))
			continue;
	return ch;
}

/*
 *	int
 *	get_token()
 *
 *	Scans the input for the next token, storing the specifics in the
 *	global structure 'curr_token' and returning one of the following:
 *
 *		NAMES		A line beginning in column 1.  'name'
 *				will be set to point to everything up to but
 *				not including the first separator on the line.
 *		BOOLEAN		An entry consisting of a name followed by
 *				a separator.  'name' will be set to point to
 *				the name of the capability.
 *		NUMBER		An entry of the form
 *					name#digits,
 *				'name' will be set to point to the capability
 *				name and 'valnumber' to the number given.
 *		STRING		An entry of the form
 *					name=characters,
 *				'name' is set to the capability name and
 *				'valstring' to the string of characters, with
 *				input translations done.
 *		CANCEL		An entry of the form
 *					name@,
 *				'name' is set to the capability name and
 *				'valnumber' to -1.
 *		EOF		The end of the file has been reached.
 *
 *	A `separator' is either a comma or a semicolon, depending on whether
 *	we are in termcap or terminfo mode.
 *
 */

int _nc_get_token(void)
{
static const char terminfo_punct[] = "@%&*!#";
long		number;
int		type;
int		ch;
bool		found;
static char	buffer[MAX_ENTRY_SIZE];
char		*ptr;
int		dot_flag = FALSE;
long		token_start;

	if (pushtype != NO_PUSHBACK)
	{
	    int retval = pushtype;

	    _nc_set_type(pushname);
	    DEBUG(3, ("pushed-back token: `%s', class %d",
		      _nc_curr_token.tk_name, pushtype));

	    pushtype = NO_PUSHBACK;
	    pushname[0] = '\0';

	    /* currtok wasn't altered by _nc_push_token() */
	    return(retval);
	}

	if (end_of_stream())
	    return(EOF);

start_token:
	token_start = stream_pos();
	while ((ch = next_char()) == '\n'  ||  iswhite(ch))
	    continue;

	ch = eat_escaped_newline(ch);
	
	if (ch == EOF)
	    type = EOF;
	else {
	    /* if this is a termcap entry, skip a leading separator */
	    if (separator == ':' && ch == ':')
		ch = next_char();

	    if (ch == '.') {
			dot_flag = TRUE;
			DEBUG(8, ("dot-flag set"));

			while ((ch = next_char())=='.' || iswhite(ch))
			    continue;
	    }

	    if (ch == EOF) {
		type = EOF;
		goto end_of_token;
	    }

	    /* have to make some punctuation chars legal for terminfo */
	    if (!isalnum(ch) && !strchr(terminfo_punct, (char)ch)) {
		 _nc_warning("Illegal character (expected alphanumeric or %s) - %s",
		 	terminfo_punct, _tracechar(ch));
		 _nc_panic_mode(separator);
		 goto start_token;
	    }

	    ptr = buffer;
	    *(ptr++) = ch;

	    if (first_column) {
	    		char	*desc;

			_nc_comment_start = token_start;
			_nc_comment_end = _nc_curr_file_pos;
			_nc_start_line = _nc_curr_line;

			_nc_syntax = ERR;
			while ((ch = next_char()) != '\n')
			{
			    if (ch == EOF)
				_nc_err_abort("premature EOF");
			    else if (ch == ':')
			    {
				_nc_syntax = SYN_TERMCAP;
				separator = ':';
				break;
			    }
			    else if (ch == ',')
			    {
				_nc_syntax = SYN_TERMINFO;
				separator = ',';
				/*
				 * Fall-through here is not an accident.
				 * The idea is that if we see a comma, we
				 * figure this is terminfo unless we 
				 * subsequently run into a colon -- but
				 * we don't stop looking for that colon until
				 * hitting a newline.  This allows commas to
				 * be embedded in description fields of
				 * either syntax.
				 */
				/* FALLTHRU */
			    }
			    else
				ch = eat_escaped_newline(ch);

			    *ptr++ = ch;
			}
			ptr[0] = '\0';
			if (_nc_syntax == ERR)
			{
			    /*
			     * Grrr...what we ought to do here is barf, 
			     * complaining that the entry is malformed.
			     * But because a couple of name fields in the 
			     * 8.2 termcap file end with |\, we just have
			     * to assume it's termcap syntax.
			     */
			    _nc_syntax = SYN_TERMCAP;
			    separator = ':';
			}
			else if (_nc_syntax == SYN_TERMINFO)
			{
			    /* throw away trailing /, *$/ */
			    for (--ptr; iswhite(*ptr) || *ptr == ','; ptr--)
				continue;
			    ptr[1] = '\0';
			}

			/*
			 * This is the soonest we have the terminal name 
			 * fetched.  Set up for following warning messages.
			 */
			ptr = strchr(buffer, '|');
			if (ptr == (char *)NULL)
			    ptr = buffer + strlen(buffer);
			ch = *ptr;
			*ptr = '\0';
			_nc_set_type(buffer);
			*ptr = ch;

			/*
			 * Compute the boundary between the aliases and the
			 * description field for syntax-checking purposes.
			 */
			desc = strrchr(buffer, '|');
			if (desc)
			    if (*desc == '\0')
				_nc_warning("empty longname field");
#if UNUSED	/* Solaris doesn't do this */
			    else if (strchr(desc, ' ') == (char *)NULL)
			    {
				_nc_warning("older tic versions may treat the description field as an alias");
				desc = (char *)NULL;
			    }
#endif
			if (!desc)
			    desc = buffer + strlen(buffer);

			/*
			 * Whitespace in a name field other than the long name
			 * can confuse rdist and some termcap tools.  Slashes
			 * are a no-no.  Other special characters can be
			 * dangerous due to shell expansion.
			 */
			for (ptr = buffer; ptr < desc; ptr++)
			{
			    if (isspace(*ptr))
			    {
				_nc_warning("whitespace in name or alias field");
				break;
			    }
			    else if (*ptr == '/')
			    {
				_nc_warning("slashes aren't allowed in names or aliases");
				break;
			    }
			    else if (strchr("$[]!*?", *ptr))
			    {
				_nc_warning("dubious character `%c' in name or alias field", *ptr);
				break;
			    }
			}

			ptr = buffer;

			_nc_curr_token.tk_name = buffer;
			type = NAMES;
	    } else {
			ch = next_char();
			/* we must allow ';' to catch k; */
			while (isalnum(ch) || ch == ';') {
			    	*(ptr++) = ch;
			    	ch = next_char();
			}

			*ptr++ = '\0';
			switch (ch) {
			case ',':
			case ':':
				if (ch != separator)
					_nc_err_abort("Separator inconsistent with syntax");
				_nc_curr_token.tk_name = buffer;
				type = BOOLEAN;
				break;
			case '@':
				if ((ch = next_char()) != separator)
					_nc_warning("Missing separator after `%s', have %s",
						buffer, _tracechar(ch));
				_nc_curr_token.tk_name = buffer;
				type = CANCEL;
				break;

		    	case '#':
				number = 0;
				found  = FALSE;
				while (isdigit(ch = next_char())) {
					number = number * 10 + ch - '0';
					found  = TRUE;
				}
				if (found == FALSE)
					_nc_warning("no value given for `%s'", buffer);
				if (ch != separator)
					_nc_warning("Missing separator");
				_nc_curr_token.tk_name = buffer;
				_nc_curr_token.tk_valnumber = number;
				type = NUMBER;
				break;
		    
			case '=':
				ch = trans_string(ptr);
				if (ch != separator)
					_nc_warning("Missing separator");
				_nc_curr_token.tk_name = buffer;
				_nc_curr_token.tk_valstring = ptr;
				type = STRING;
				break;

			case EOF:
				type = EOF;
				break;
			default:
				/* just to get rid of the compiler warning */
				type = UNDEF;
				_nc_warning("Illegal character - %s",
					_tracechar(ch));
			}
		} /* end else (first_column == FALSE) */
	} /* end else (ch != EOF) */

end_of_token:
	if (dot_flag == TRUE)
	    DEBUG(8, ("Commented out "));

	if (_nc_tracing & 0x80)
	{
	    fprintf(stderr, "Token: ");
	    switch (type)
	    {
		case BOOLEAN:
		    fprintf(stderr, "Boolean; name='%s'\n",
			    _nc_curr_token.tk_name);
		    break;
		
		case NUMBER:
		    fprintf(stderr, "Number;  name='%s', value=%d\n",
			    _nc_curr_token.tk_name,
			    _nc_curr_token.tk_valnumber);
		    break;
		
		case STRING:
		    fprintf(stderr, "String;  name='%s', value='%s'\n",
			    _nc_curr_token.tk_name,
			    _nc_visbuf(_nc_curr_token.tk_valstring));
		    break;
		
		case CANCEL:
		    fprintf(stderr, "Cancel; name='%s'\n",
			    _nc_curr_token.tk_name);
		    break;
		
		case NAMES:

		    fprintf(stderr, "Names; value='%s'\n",
			    _nc_curr_token.tk_name);
		    break;

		case EOF:
		    fprintf(stderr, "End of file\n");
		    break;

		default:
		    _nc_warning("Bad token type");
	    }
	}

	if (dot_flag == TRUE)		/* if commented out, use the next one */
	    type = _nc_get_token();

	DEBUG(3, ("token: `%s', class %d", _nc_curr_token.tk_name, type));

	return(type);
}

/*
 *	char
 *	trans_string(ptr)
 *
 *	Reads characters using next_char() until encountering a separator, nl,
 *	or end-of-file.  The returned value is the character which caused
 *	reading to stop.  The following translations are done on the input:
 *
 *		^X  goes to  ctrl-X (i.e. X & 037)
 *		{\E,\n,\r,\b,\t,\f}  go to
 *			{ESCAPE,newline,carriage-return,backspace,tab,formfeed}
 *		{\^,\\}  go to  {carat,backslash}
 *		\ddd (for ddd = up to three octal digits)  goes to the character ddd
 *
 *		\e == \E
 *		\0 == \200
 *
 */

static char
trans_string(char *ptr)
{
int	count = 0;
int	number;
int	i, c;
chtype	ch, last_ch = '\0';

	while ((ch = c = next_char()) != separator && c != EOF) {
	    if ((_nc_syntax == SYN_TERMCAP) && c == '\n')
	    	break;
	    if (ch == '^' && last_ch != '%') {
		ch = c = next_char();
		if (c == EOF)
		    _nc_err_abort("Premature EOF");

		if (! (is7bits(ch) && isprint(ch))) {
		    _nc_warning("Illegal ^ character - %s",
		    	_tracechar((unsigned char)ch));
		}
		if (ch == '?')
		    *(ptr++) = '\177';
		else
		    *(ptr++) = (char)(ch & 037);
	    }
	    else if (ch == '\\') {
		ch = c = next_char();
		if (c == EOF)
		    _nc_err_abort("Premature EOF");
		
		if (ch >= '0'  &&  ch <= '7') {
		    number = ch - '0';
		    for (i=0; i < 2; i++) {
			ch = c = next_char();
			if (c == EOF)
			    _nc_err_abort("Premature EOF");
			
			if (ch < '0'  ||  ch > '7') {
			    if (isdigit(ch)) {
				_nc_warning("Non-octal digit `%c' in \\ sequence", ch);
				/* allow the digit; it'll do less harm */
			    } else {
				push_back(c);
				break;
			    }
			}

			number = number * 8 + ch - '0';
		    }

		    if (number == 0)
			number = 0200;
		    *(ptr++) = (char) number;
		} else {
		    switch (c) {
			case 'E':
			case 'e':	*(ptr++) = '\033';	break;
			
			case 'l':
			case 'n':	*(ptr++) = '\n';	break;
			
			case 'r':	*(ptr++) = '\r';	break;
			
			case 'b':	*(ptr++) = '\010';	break;

			case 's':	*(ptr++) = ' ';		break;
			
			case 'f':	*(ptr++) = '\014';	break;
			
			case 't':	*(ptr++) = '\t';	break;
			
			case '\\':	*(ptr++) = '\\';	break;
			
			case '^': 	*(ptr++) = '^';		break;

			case ',':	*(ptr++) = '\\';
					*(ptr++) = ',';		break;

			case ':':	*(ptr++) = ':';		break;

			case '\n':
			    continue;

			default:
			    _nc_warning("Illegal character %s in \\ sequence",
				    _tracechar((unsigned char)ch));
			    *(ptr++) = (char)ch;
		    } /* endswitch (ch) */
		} /* endelse (ch < '0' ||  ch > '7') */
	    } /* end else if (ch == '\\') */
	    else {
		*(ptr++) = (char)ch;
	    }
	    
	    count ++;

	    last_ch = ch;

	    if (count > MAXCAPLEN)
		_nc_warning("Very long string found.  Missing separator?");
	} /* end while */

	*ptr = '\0';

	return(ch);
}

/*
 *	_nc_push_token()
 *
 *	Push a token of given type so that it will be reread by the next
 *	get_token() call.
 */

void _nc_push_token(int class)
{
    /*
     * This implementation is kind of bogus, it will fail if we ever do
     * more than one pushback at a time between get_token() calls.  It
     * relies on the fact that curr_tok is static storage that nothing 
     * but get_token() touches.
     */
    pushtype = class;
    _nc_get_type(pushname);

    DEBUG(3, ("pushing token: `%s', class %d",
	      _nc_curr_token.tk_name, pushtype));
}

/*
 * Panic mode error recovery - skip everything until a "ch" is found.
 */
void _nc_panic_mode(char ch)
{
	int c;

	for (;;) {
		c = next_char();
		if (c == ch)
			return;
		if (c == EOF)
			return;
	}
}

/*****************************************************************************
 *
 * Character-stream handling
 *
 *****************************************************************************/

#define LEXBUFSIZ	1024

static char *bufptr;		/* otherwise, the input buffer pointer */
static char *bufstart;		/* start of buffer so we can compute offsets */
static FILE *yyin;		/* scanner's input file descriptor */

/*
 *	_nc_reset_input()
 *
 *	Resets the input-reading routines.  Used on initialization,
 *	or after a seek has been done.  Exactly one argument must be
 *	non-null.
 */

void _nc_reset_input(FILE *fp, char *buf)
{
	pushtype = NO_PUSHBACK;
	pushname[0] = '\0';
	yyin = fp;
	bufstart = bufptr = buf;
	_nc_curr_file_pos = 0L;
	_nc_curr_line = 0;
	_nc_curr_col = 0;
}

/*
 * 	int next_char()
 *
 *	Returns the next character in the input stream.  Comments and leading
 *	white space are stripped.
 *
 *	The global state variable 'firstcolumn' is set TRUE if the character
 * 	returned is from the first column of the input line.
 *
 *	The global variable _nc_curr_line is incremented for each new line.
 *	The global variable _nc_curr_file_pos is set to the file offset of the
 *	beginning of each line.
 */

static int
next_char(void)
{
    if (!yyin)
    {
	if (*bufptr == '\0')
	    return(EOF);
    }
    else if (!bufptr || !*bufptr)
    {
	/*
	 * In theory this could be recoded to do its I/O one
	 * character at a time, saving the buffer space.  In
	 * practice, this turns out to be quite hard to get
	 * completely right.  Try it and see.  If you succeed,
	 * don't forget to hack push_back() correspondingly.
	 */
	static char line[LEXBUFSIZ];

	do {
	       _nc_curr_file_pos = ftell(yyin);

	       if ((bufstart = fgets(line, LEXBUFSIZ, yyin)) != NULL) {
		   _nc_curr_line++;
		   _nc_curr_col = 0;
	       }
	       bufptr = bufstart;
	   } while
	       (bufstart != NULL && line[0] == '#');

	if (bufstart == NULL)
	    return (EOF);

	while (iswhite(*bufptr))
	    bufptr++;
    }

    first_column = (bufptr == bufstart);

    _nc_curr_col++;
    return(*bufptr++);
}

static void push_back(char c)
/* push a character back onto the input stream */
{
    if (bufptr == bufstart)
	    _nc_syserr_abort("Can't backspace off beginning of line");
    *--bufptr = c;
}

static long stream_pos(void)
/* return our current character position in the input stream */
{
    return (yyin ? ftell(yyin) : (bufptr ? bufptr - bufstart : 0));
}

static bool end_of_stream(void)
/* are we at end of input? */
{
    return (yyin ? feof(yyin) : (bufptr && *bufptr == '\0'));
}

/* comp_scan.c ends here */
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.