This is syntax.c in view mode; [Download] [Up]
//------------------------------------------------------------------------
// ^FILE: syntax.c - implement the ArgSyntax class
//
// ^DESCRIPTION:
// This file uses a SyntaxFSM to implement a class to parse an argument
// syntax string from input and to hold the "compiled" result.
//
// ^HISTORY:
// 03/25/92 Brad Appleton <brad@ssd.csd.harris.com> Created
//-^^---------------------------------------------------------------------
#include <stdlib.h>
#include <iostream.h>
#include <string.h>
#include <ctype.h>
#include <cmdline.h>
#include "syntax.h"
#include "quoted.h"
//------------------------------------------------------------------ copy_token
//-------------------
// ^FUNCTION: copy_token - copy into a token
//
// ^SYNOPSIS:
// copy_token(dest, src)
//
// ^PARAMETERS:
// const char * & dest;
// -- where to house the duplicated token
//
// const SyntaxFSM::token_t & src;
// -- the token to copy.
//
// ^DESCRIPTION:
// Duplicate the token denoted by "src" into "dest".
//
// ^REQUIREMENTS:
// None.
//
// ^SIDE-EFFECTS:
// Allocates storage for "dest" is token length is non-zero.
//
// ^RETURN-VALUE:
// None.
//
// ^ALGORITHM:
// Trivial.
//-^^----------------
void
copy_token(const char * & dest, const SyntaxFSM::token_t & src)
{
char * tok = new char[src.len + 1] ;
::strncpy(tok, src.start, src.len);
tok[src.len] = '\0';
dest = tok;
}
//---------------------------------------------------------------- ArgSyntax
//-------------------
// ^FUNCTION: parse_syntax - parse syntax string
//
// ^SYNOPSIS:
// parse_syntax(str)
//
// ^PARAMETERS:
// const char * str;
// -- the string (containing the argument syntax) to parse.
//
// ^DESCRIPTION:
// Parse the syntax-string and compile it into an internal format
// (namely an ArgSyntax object).
//
// ^REQUIREMENTS:
// "str" should correspond to the following:
//
// [<KEYWORD-SPEC>] [<VALUE-SPEC>]
//
// Where <KEYWORD-SPEC> is of the form:
// c|keyword
//
// Where 'c' is the option-character and "keyword" is the keyword.
//
// (There must be no spaces surrounding the '|', if there arem then a space
// before the '|' means an "empty" option and a space after the '|' means
// an empty keyword).
//
// <VALUE-SPEC> should look like:
// value [...]
//
// Where "value" is the value name and "..." indicates the value is really
// a list of values. The entire VALUE-SPEC should be surrounded by '[' and
// ']' if the value is optional.
//
// If the argument itself is optional then the entire syntax string
// should be inside of square brackets.
//
// Lastly - a positional AND keyword argument may be denoted by
// "[c|keyword] value"
//
// ^SIDE-EFFECTS:
// - modifies all parts of the ArgSyntax object.
// - prints syntax error messages on cout.
//
// ^RETURN-VALUE:
// None.
//
// ^ALGORITHM:
// Too complicated to be described here - follow along.
//-^^----------------
int
ArgSyntax::parse_syntax(const char * syntax)
{
const char * ptr = syntax;
SyntaxFSM fsm;
SyntaxFSM::token_t token;
while (fsm(ptr, token)) {
switch(fsm.state()) {
case SyntaxFSM::OPTION :
// We have an option character - save it and move on
if (token.len) arg_char = *(token.start) ;
if (! fsm.level()) arg_syntax |= CmdArg::isREQ;
break;
case SyntaxFSM::KEYWORD :
// We have a keyword - save it and move on
::copy_token(arg_keyword, token);
if (! fsm.level()) arg_syntax |= CmdArg::isREQ;
break;
case SyntaxFSM::VALUE :
// We have a value - save it and call parse_value to
// figure out what the flags are.
//
if (token.len) ::copy_token(arg_value, token);
parse_value(fsm);
break;
case SyntaxFSM::LIST :
// We have an ellipsis -- update the syntax flags
arg_syntax |= CmdArg::isLIST;
break;
case SyntaxFSM::ERROR :
// Error!
cerr << "syntax error in \"" << syntax << "\"." << endl ;
return -1;
default :
cerr << "internal error in class SyntaxFSM.\n\tunexpected state "
<< "(" << fsm.state() << ") encountered." << endl ;
return -1;
} //switch
} //while
return 0;
}
//-------------------
// ^FUNCTION: parse_value - parse an argument value
//
// ^SYNOPSIS:
// parse_value(fsm)
//
// ^PARAMETERS:
// const SyntaxFSM & fsm;
// -- the finite-state machine that is reading input.
//
// ^DESCRIPTION:
// The "value" has already been read and saved, we need to figure out
// what syntax_flags to associate with the argument.
//
// ^REQUIREMENTS:
// "fsm" MUST be in the SyntaxFSM::VALUE state!
//
// ^SIDE-EFFECTS:
// Modifies the arg_syntax flags of an ArgSyntax object.
//
// ^RETURN-VALUE:
// None.
//
// ^ALGORITHM:
// Too complicated to be described here - follow along.
//
//-^^----------------
void
ArgSyntax::parse_value(const SyntaxFSM & fsm)
{
// Each of the possibilities we encounter in the SyntaxFSM::VALUE state
// will correspond to some combination of num_tokens, num_braces, and
// level. Let us determine all the valid possibilites below:
//
// (num_tokens, num_braces, level) syntax-string
// ------------------------------- ---------------------------
// (1, 0, 0) "value"
// (1, 0, 1) "[value]"
// (3, 0, 0) "c|string value"
// (3, 0, 1) "c|string [value]"
// (3, 0, 1) "[c|string value]"
// (3, 0, 2) "[c|string [value]]"
// (3, 1, 0) "[c|string] value"
// (3, 1, 1) "[c|string] [value]"
// (3, 1, 1) "[[c|string] value]"
//
// There are only two case where a given (num_token, num_braces, level)
// combination corresponds to more than one possible syntax-string. These
// two cases are (3, 0, 1) and (3, 1, 1). We can ignore the "ambiguity"
// of (3, 1, 1) because although the two possible syntax-strings are
// different, they mean exactly the same thing. (3, 0, 1) is a different
// case however: how do we tell if the whole argument is optional or if
// just the value is optional? If the whole argument is required (meaning
// "not optional") then we will already have set the isREQ flag when we
// parsed the option and/or the keyword name.
//
if (fsm.num_tokens() == 1) {
// cases (1, 0, 0) and (1, 0, 1)
arg_syntax |= CmdArg::isPOS;
if (! fsm.level()) {
arg_syntax |= (CmdArg::isREQ | CmdArg::isVALREQ);
} else {
arg_syntax |= (CmdArg::isOPT | CmdArg::isVALOPT);
}
} else {
if (fsm.num_braces()) {
// cases (3, 1, 0) and (3, 1, 1)
arg_syntax |= CmdArg::isPOS;
if (! fsm.level()) {
// case (3, 1, 0)
arg_syntax |= (CmdArg::isREQ | CmdArg::isVALREQ);
} else {
// case (3, 1, 1)
arg_syntax |= (CmdArg::isOPT | CmdArg::isVALOPT);
}
} else {
if (! fsm.level()) {
// case (3, 0, 0)
arg_syntax |= (CmdArg::isREQ | CmdArg::isVALREQ);
} else if (fsm.level() == 1) {
// case (3, 0, 1)
if (arg_syntax & CmdArg::isREQ) {
arg_syntax |= CmdArg::isVALOPT;
} else {
arg_syntax |= CmdArg::isVALREQ;
}
} else {
// case (3, 0, 2)
arg_syntax |= CmdArg::isVALOPT;
} //if level
} //if num-braces
} //if num-tokens
}
//-------------------
// ^FUNCTION: parse_flag - parse a flag
//
// ^SYNOPSIS:
// parse_flag(is)
//
// ^PARAMETERS:
// istream & is;
// -- the input stream to read the flag from.
//
// ^DESCRIPTION:
// By specifying a string that is accepted by "parse_syntax" one
// can specify almost any combination of CmdArg::SyntaxFlags.
// The only ones that cannot be specified in this manner are the
// CmdArg::isVALSTICKY and CmdArg::isVALSEP flags. In order to
// specify these flags, we allow the syntax string to be followed
// by a colon (':') and one of "SEPARATE" or "STICKY".
//
// ^REQUIREMENTS:
// None.
//
// ^SIDE-EFFECTS:
// - modifies the syntax-flags of an ArgSyntax object.
// - prints syntax error messages on stderr.
// - modifies the state of "is" if an error occurs.
// - consumes characters from is.
//
// ^RETURN-VALUE:
// A reference to the input stream used.
//
// ^ALGORITHM:
// Trivial.
//-^^----------------
istream &
ArgSyntax::parse_flag(istream & is)
{
char ch;
is >> ch;
if (! is) return is;
// If `ch' is a quote then the flags were omitted
if ((ch == '\'') || (ch == '"')) {
is.putback(ch);
return is ;
}
// The flags are here, make sure they start with ':'
if (ch != ':') {
cerr << "Unexpected token after syntax string.\n"
<< "\texpecting a colon, or a double or single quote." << endl ;
is.clear(ios::failbit);
return is;
}
// Now parse the flag
char arg_flag[16];
is.width(sizeof(arg_flag) - 1);
is >> arg_flag;
if (! is) {
if (is.eof()) {
cerr << "Error - premature end-of-input.\n"
<< "\texpecting one of \"sticky\" or \"separate\"." << endl ;
} else {
cerr << "Unable to extract argument flag." << endl ;
}
return is;
}
char * flag = arg_flag;
// Skip any leading "CmdArg::isVAL" portion of the flag
if (CmdLine::strmatch("Cmd", flag, 3) == CmdLine::str_EXACT) flag += 3;
if (CmdLine::strmatch("Arg", flag, 3) == CmdLine::str_EXACT) flag += 3;
if (CmdLine::strmatch("::", flag, 2) == CmdLine::str_EXACT) flag += 2;
if (CmdLine::strmatch("is", flag, 2) == CmdLine::str_EXACT) flag += 2;
while ((*flag == '_') || (*flag == '-')) ++flag;
if (CmdLine::strmatch("VAL", flag, 3) == CmdLine::str_EXACT) flag += 3;
while ((*flag == '_') || (*flag == '-')) ++flag;
// check for an ambiguous flag
if (((*flag == 's') || (*flag == 'S')) && (! *(flag + 1))) {
cerr << "Ambiguous flag \"" << flag << "\"." << endl ;
is.clear(ios::failbit);
return is;
}
if (CmdLine::strmatch("Sticky", flag) != CmdLine::str_NONE) {
arg_syntax |= CmdArg::isVALSTICKY ;
} else if (CmdLine::strmatch("Separate", flag) != CmdLine::str_NONE) {
arg_syntax |= CmdArg::isVALSEP ;
} else {
cerr << "Invalid flag \"" << flag << "\".\n"
<< "\tmust be one of \"sticky\" or \"separate\"." << endl ;
is.clear(ios::failbit);
return is;
}
return is ;
}
//------------------------------------------------------------------ operator>>
istream &
operator>>(istream & is, ArgSyntax & arg)
{
QuotedString qstr(256);
is >> qstr ;
if (! is) return is;
if (arg.parse_syntax(qstr)) return is;
return arg.parse_flag(is);
}
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.