ftp.nice.ch/pub/next/developer/resources/classes/XText.0.9.beta3.s.tar.gz#/XText0.9/XText.subproj/XTAction_parser.m

This is XTAction_parser.m in view mode; [Download] [Up]

#import "ErrorStream.h"
#import "XTAction.h"
#import <sys/types.h>
#import <stdio.h>
#import <stdlib.h>
#import <string.h>
#import <appkit/NXCType.h>;

/*  This file contains all the routines to parse the argument to the
	addBindings:estream: method; it's the most complicated part of the
	whole package.  The strategy is simple recursive descent, and we
	make no attempt to recover from errors.

	The grammar supported is somewhat more general than necessary; for
	example you can nest sequences of instructions, which currently serves
	no purpose (unless you wanted to get around the maximum sequence
	length...).  The idea is just to make it easy to add more complex
	control structure later, if that turns out to be useful.
*/

#define MAX_SEQUENCE_LENGTH 16		//	max number of actions in a sequence
#define MAX_SELECTOR_LENGTH 32		//	max length of a selector name
#define MAX_STRING_LENGTH 256		//	max length of a string argument
#define MAX_ARGS 2					//	max number of args to a message
	// (Note that if you increase MAX_ARGS you'll also have to add a new
	//	subclass of XTAction and augment parse_msg to use it.)

#define MAX_KEYS 8					//	max number of keys affected by a
									//	single binding

#define PRE_ERROR_CONTEXT 32		//	number of characters displayed before
#define POST_ERROR_CONTEXT 16		//	and after a syntax error

typedef charCode keySet[MAX_KEYS];	//	set of keys an action will be bound to


#define ALPHA(c) \
	(((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')) || (c == '_'))

#define WHITE(c) \
	((c == ' ') || (c == '\n') || (c == '\t') || (c == '\r'))

#define DIGIT(c) \
	((c >= '0') && (c <= '9'))


/*	skip_whitespace advances over any white space and returns the first
	non-whitespace character.

	Like the rest of the parsing routines, it's passed a pointer to a
	char pointer, which is advanced as the string is consumed.
*/

char skip_whitespace(const char **p)
{
	while (WHITE(**p))
		++*p;
	return **p;
}

int HexToDecimal(unsigned char c, unsigned char *error)
{
	 *error = 0;
	 if (DIGIT(c)) return (int)(c - '0');
	 else if ((c >= 'A') && (c <= 'F')) return (int)(10 + c - 'A') ;			
     else {
	 	*error = 0; 
	 	return 0;
	 }
}

int RealChar(unsigned char c, int key)
{
   /* Non-printing control characters (c'a, etc) should have their 
    * character code values reduced by certain amounts */
	
	int control_filter = 4;
	int alt_filter = 8;
	
	if(!(key & control_filter) || (key & alt_filter)) return (int)c;

    if( (c >= '@') && (c <= '_') ) return (int)(c - 0x40);
	if( (c >= '`') && (c <= '~') ) return (int)(c - 0x60);
	
	else return (int) c;	
	
}
/*	report_syntax_error is used to report all syntax errors detected during
	parsing.  The args are
		error		optional information about the type of error
		p			a pointer to the place at which the error was detected
		start		a pointer to the beginning of the string being parsed
		errs		the ErrorStream

	The message will contain some or all of the string surrounding the
	error to help identify the problem.
*/

void report_syntax_error(const char *error, const char *p, const char *start,
						 ErrorStream *errs)
{
	char msg[100+PRE_ERROR_CONTEXT+POST_ERROR_CONTEXT];
	const char *prefix;
	int  prefix_length;

	// display at most PRE_ERROR_CONTEXT characters before the error point...

	if (start < (p - PRE_ERROR_CONTEXT)) {
		prefix = p - PRE_ERROR_CONTEXT;
		prefix_length = PRE_ERROR_CONTEXT;
	} else {
		prefix = start;
		prefix_length = p-start;
	}

	// ... and at most POST_ERROR_CONTEXT characters after, except that if
	// there weren't many characters before we can put even more after.

	sprintf(msg, "Syntax error%s in binding:\n    %.*s (here) %.*s",
				error, prefix_length, prefix,
				(PRE_ERROR_CONTEXT+POST_ERROR_CONTEXT-prefix_length), p);
	[errs report:msg];
}

XTAction *parse_action(const char **p, NXZone *z,
					   const char *start, ErrorStream *errs);

/*	parse_seq parses a '{}'-delimited, ';'-separated sequence of actions
	and constructs an XTSeqAction out of them.  The args are
		p		pointer to the current position pointer
		z		zone in which to allocate the XTActions
		start	the beginning of the string (for reporting errors)
		errs	the ErrorStream

	If there are no errors, the new XTAction is returned; otherwise the
	result is nil.
*/

XTAction *parse_seq(const char **p, NXZone *z,
					const char *start, ErrorStream *errs)
{
	// we accumulate the actions in an array on the stack, and then copy
	// them into the specified zone when we find out how many there were.

	XTAction *actions[MAX_SEQUENCE_LENGTH];
	int num_actions = 0;
	XTAction **copied_actions = 0;
	char c;

	// skip over the open brace
	++*p;
	while (1) {
		c = skip_whitespace(p);
		if (c == '}') {
			++*p;
			if (num_actions == 1)
				return actions[0];
			else if (num_actions > 0) {
				size_t size = num_actions * sizeof(XTAction *);
				copied_actions = NXZoneMalloc(z, size);
				memcpy(copied_actions, actions, size);
			}
			return [[XTSeqAction allocFromZone:z]
						initLength:num_actions actions:copied_actions];
			}
		else if (c == ';')
			++*p;
		else {
			if (num_actions >= MAX_SEQUENCE_LENGTH) {
				report_syntax_error(" (sequence too long)", *p, start, errs);
				return nil;
			}
			if (!(actions[num_actions++] = parse_action(p, z, start, errs)))
				return nil;
		}
	}
}

/*	parse_arg parses a message argument, which must be either an integer
	or a '"'-delimited string.  The args are the same as parse_seq, with
	one addition:
		result		a pointer to where the result should be stored

	Only a few escape sequences are recognized: \n, \t, \\, and \".  It
	would be easy to add more.

	If there are no errors, the result (coerced to an int) will be stored
	in *result and parse_arg will return true; otherwise it returns false.
*/

BOOL parse_arg(int *result, const char **p, NXZone *z,
			   const char *start, ErrorStream *errs)
{
	char arg[MAX_STRING_LENGTH];
	int arg_length = 0;
	char c;
	char *copied_arg;

	c = skip_whitespace(p);
	if (DIGIT(c) || (c == '-') || (c == '+'))
		*result = strtol(*p, p, 0);		// ought to check for overflow...
	else if (c == '"') {
		while (1) {
			c = *++*p;
			switch (c) {
			case 0:
				report_syntax_error(" (unterminated string)", *p, start, errs);
				return NO;
			case '"':
				++*p;
				goto at_end;
			case '\\':
				c = *++*p;
				switch (c) {
				case 'n':	c = '\n'; break;
				case 't':	c = '\t'; break;
				case '\\':
				case '"':			  break;
				default:
					report_syntax_error(" (unknown escape sequence)",
										*p, start, errs);
					return NO;
				}
			}
			if (arg_length >= MAX_STRING_LENGTH) {
				report_syntax_error(" (string too long)", *p, start, errs);
				return NO;
			}
			arg[arg_length++] = c;
		}
	at_end:
		copied_arg = NXZoneMalloc(z, arg_length+1);
		memcpy(copied_arg, arg, arg_length);
		copied_arg[arg_length] = '\0';
		*result = (int)copied_arg;
	} else {
		report_syntax_error("", *p, start, errs);
		return NO;
	}
	return YES;
}

/*	parse_msg parses a single message action, such as
			replaceSel:"foobar" length:3
	The args and result are the same as for parse_seq.
*/

XTAction *parse_msg(const char **p, NXZone *z,
					const char *start, ErrorStream *errs)
{
	char sel_name[MAX_SELECTOR_LENGTH];
	int args[MAX_ARGS];
	int sel_length = 0;
	int num_args = 0;
	char c;
	SEL sel;
	char *error;

	c = **p;
	while (1) {
		sel_name[sel_length++] = c;
		if (sel_length >= MAX_SELECTOR_LENGTH) {
			error = " (selector too long)";
			goto syntax_error;
		}
		++*p;
		if (c == ':') {
			if (num_args >= MAX_ARGS) {
				error = " (too many args)";
				goto syntax_error;
			}
			if (!parse_arg(&args[num_args++], p, z, start, errs))
				return nil;
			skip_whitespace(p);
		}
		c = **p;
		if (!(ALPHA(c) || DIGIT(c) || c == ':'))
			break;
	}
	sel_name[sel_length] = '\0';
	sel = sel_getUid(sel_name);
	if (sel == 0) {
		error = " (unknown selector)";
		goto syntax_error;
	}
	return num_args == 0
				? [[XTMsg0Action allocFromZone:z] initSel:sel]
		: num_args == 1
		   		? [[XTMsg1Action allocFromZone:z] initSel:sel arg:args[0]]
		: [[XTMsg2Action allocFromZone:z] initSel:sel arg:args[0] arg:args[1]];

syntax_error:
	report_syntax_error(error, *p, start, errs);
	return nil;
}

/*	parse_action parses an action, which currently must be either a message
	to be sent to the XText object or a sequence of actions.  The args are
	the same as parse_seq.
*/

XTAction *parse_action(const char **p, NXZone *z,
					   const char *start, ErrorStream *errs)
{
	unsigned char c;

	c = skip_whitespace(p);
	if (ALPHA(c))
		return parse_msg(p, z, start, errs);
	if (c == '{')
		return parse_seq(p, z, start, errs);
	report_syntax_error(((c == 0) ? " (unexpected end)" : ""),
						*p, start, errs);
	return nil;
}

/*	parse_keys parses a specification of the keys an action is to be bound
	to.  A specification is a ','-separated sequence, terminated by a '=',
	where each element is zero or more modifiers ('c', 's', 'a', or 'm')
	followed by either a hex key code or ' followed by the character generated
	by the key.  In the latter case there may be several keys that generate
	the character; each is added to the set.  If there are no errors, the
	key codes are stored in keys and true is returned; otherwise false is
	returned.  If there are fewer than MAX_KEYS keys, the first unused entry
	in keys is set to 0 (which happens to be an invalid key code).
*/

BOOL parse_keys(keySet keys, const char **p,
				const char *start, ErrorStream *errs)
{
	int num_keys = 0;
	int key = 0;
	unsigned char c; 
	BOOL found_one;
	char *error;
	unsigned char bad_hex;

	while (1) {
		c = skip_whitespace(p);
		found_one = NO;
		switch (c) {
    	case 'l': key |= 1; break; 
		case 's': key |= 2; break; 
		case 'c': key |= 4; break;
		case 'a': key |= 8; break;
		case 'm': key |= 16; break;
		case 'n': key |= 32; break;
		case 'h': key |= 64; break;
		case '0': case '1': case '2': case '3': case '4': 
		case '5': case '6': case '7': case '8': case '9': 
		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
			key += HexToDecimal(c, &bad_hex) << (4 + NUM_MASKS);
			c = *++*p;
			key += HexToDecimal(c, &bad_hex) << NUM_MASKS;
			if(bad_hex) {
				error = " (bad hex code) ";
				goto syntax_error;
			}
			if (num_keys >= MAX_KEYS) {
				error = " (too many keys)";
				goto syntax_error;
			}
			keys[num_keys++] = key;
			found_one = YES;
			break;
		case '\'':
			c = *++*p;
            key += RealChar(c, key) << NUM_MASKS; // deals with cntl chars
			keys[num_keys++] = key;
			found_one = YES;
			break;
		default:
			error = "";
			goto syntax_error;
		}
		++*p;
		if (found_one) {
			c = skip_whitespace(p);
			++*p;
			if (c == ',')
				{}					// go back for more
			else if (c == '=') {
				if (num_keys < MAX_KEYS)
					keys[num_keys] = 0;
				return YES;
			} else {
				error = "";
				goto syntax_error;
			}
		}
	}
	
syntax_error:
	report_syntax_error(error, *p, start, errs);
	return NO;
}					

@implementation XTDispatchAction(parsing)

/*	Finally, here's the method we've been preparing to implement.
	Note that any XTActions generated will be allocated in the same
	zone as the dispatch action.
*/

- addBindings:(const char *)bindings estream:errs
{
	keySet keys;
	char c;
	const char *cp = bindings;
	XTAction *a;
	NXZone *z = [self zone];
	int i;

	if (!errs) errs = [ErrorStream default];
	while (1) {
		c = skip_whitespace(&cp);
		if (c == 0)
			return self;
		if (c == ';')
			++cp;
		else {
			if (!parse_keys(keys, &cp, bindings, errs))
				return self;
			if (!(a = parse_action(&cp, z, bindings, errs)))
				return self;
			for (i = 0; i < MAX_KEYS; ++i) {
				if (keys[i])
					[self bindKey:keys[i] toAction:a estream:errs];
				else
					break;
			}
		}
	}
}

@end

These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.