ftp.nice.ch/pub/next/unix/database/sybtool.1.3.s.tar.gz#/sybtool-1.3/cmdline-1.04/src/cmd/syntax.c

This is syntax.c in view mode; [Download] [Up]

//------------------------------------------------------------------------
// ^FILE: syntax.c - implement the ArgSyntax class
//
// ^DESCRIPTION:
//    This file uses a SyntaxFSM to implement a class to parse an argument
//    syntax string from input and to hold the "compiled" result.
//
// ^HISTORY:
//    03/25/92	Brad Appleton	<brad@ssd.csd.harris.com>	Created
//-^^---------------------------------------------------------------------

#include <stdlib.h>
#include <iostream.h>
#include <string.h>
#include <ctype.h>

#include <cmdline.h>

#include "syntax.h"
#include "quoted.h"

//------------------------------------------------------------------ copy_token

//-------------------
// ^FUNCTION: copy_token - copy into a token
//
// ^SYNOPSIS:
//    copy_token(dest, src)
//
// ^PARAMETERS:
//    const char * & dest;
//    -- where to house the duplicated token
//
//    const SyntaxFSM::token_t & src;
//    -- the token to copy.
//
// ^DESCRIPTION:
//    Duplicate the token denoted by "src" into "dest".
//
// ^REQUIREMENTS:
//    None.
//
// ^SIDE-EFFECTS:
//    Allocates storage for "dest" is token length is non-zero.
//
// ^RETURN-VALUE:
//    None.
//
// ^ALGORITHM:
//    Trivial.
//-^^----------------
void
copy_token(const char * & dest, const SyntaxFSM::token_t & src)
{
   char * tok = new char[src.len + 1] ;
   ::strncpy(tok, src.start, src.len);
   tok[src.len] = '\0';
   dest = tok;
}

//---------------------------------------------------------------- ArgSyntax

//-------------------
// ^FUNCTION: parse_syntax - parse syntax string
//
// ^SYNOPSIS:
//    parse_syntax(str)
//
// ^PARAMETERS:
//    const char * str;
//    -- the string (containing the argument syntax) to parse.
//
// ^DESCRIPTION:
//    Parse the syntax-string and compile it into an internal format
//    (namely an ArgSyntax object).
//
// ^REQUIREMENTS:
//    "str" should correspond to the following:
//
//       [<KEYWORD-SPEC>] [<VALUE-SPEC>]
//
//    Where <KEYWORD-SPEC> is of the form:
//       c|keyword
//
//    Where 'c' is the option-character and "keyword" is the keyword.
//
//    (There must be no spaces surrounding the '|', if there arem then a space
//    before the '|' means an "empty" option and a space after the '|' means
//    an empty keyword).
//
//    <VALUE-SPEC> should look like:
//        value [...]
//
//    Where "value" is the value name and "..." indicates the value is really
//    a list of values. The entire VALUE-SPEC should be surrounded by '[' and
//    ']' if the value is optional.
//
//    If the argument itself is optional then the entire syntax string
//    should be inside of square brackets.
//
//    Lastly - a positional AND keyword argument may be denoted by
//        "[c|keyword] value"
//
// ^SIDE-EFFECTS:
//    - modifies all parts of the ArgSyntax object.
//    - prints syntax error messages on cout.
//
// ^RETURN-VALUE:
//    None.
//
// ^ALGORITHM:
//    Too complicated to be described here - follow along.
//-^^----------------
int
ArgSyntax::parse_syntax(const char * syntax)
{
   const char * ptr = syntax;
   SyntaxFSM  fsm;
   SyntaxFSM::token_t  token;

   while (fsm(ptr, token)) {
      switch(fsm.state()) {
      case  SyntaxFSM::OPTION :
         // We have an option character - save it and move on
         if (token.len)  arg_char = *(token.start) ;
         if (! fsm.level())  arg_syntax |= CmdArg::isREQ;
         break;

      case  SyntaxFSM::KEYWORD :
         // We have a keyword - save it and move on
         ::copy_token(arg_keyword, token);
         if (! fsm.level())  arg_syntax |= CmdArg::isREQ;
         break;

      case  SyntaxFSM::VALUE :
         // We have a value - save it and call parse_value to
         // figure out what the flags are.
         //
         if (token.len)  ::copy_token(arg_value, token);
         parse_value(fsm);
         break;

      case  SyntaxFSM::LIST :
         // We have an ellipsis -- update the syntax flags
         arg_syntax |= CmdArg::isLIST;
         break;

      case  SyntaxFSM::ERROR :
         // Error!
         cerr << "syntax error in \"" << syntax << "\"." << endl ;
         return  -1;

      default :
         cerr << "internal error in class SyntaxFSM.\n\tunexpected state "
              << "(" << fsm.state() << ") encountered." << endl ;
         return  -1;
      } //switch
   } //while

   return  0;
}


//-------------------
// ^FUNCTION: parse_value - parse an argument value
//
// ^SYNOPSIS:
//    parse_value(fsm)
//
// ^PARAMETERS:
//    const SyntaxFSM & fsm;
//    -- the finite-state machine that is reading input.
//
// ^DESCRIPTION:
//    The "value" has already been read and saved, we need to figure out
//    what syntax_flags to associate with the argument.
//
// ^REQUIREMENTS:
//    "fsm" MUST be in the SyntaxFSM::VALUE state!
//
// ^SIDE-EFFECTS:
//    Modifies the arg_syntax flags of an ArgSyntax object.
//
// ^RETURN-VALUE:
//    None.
//
// ^ALGORITHM:
//    Too complicated to be described here - follow along.
//
//-^^----------------
void
ArgSyntax::parse_value(const SyntaxFSM & fsm)
{
   // Each of the possibilities we encounter in the SyntaxFSM::VALUE state
   // will correspond to some combination of num_tokens, num_braces, and
   // level. Let us determine all the valid possibilites below:
   //
   //   (num_tokens, num_braces, level)            syntax-string
   //   -------------------------------     ---------------------------
   //             (1, 0, 0)                      "value"
   //             (1, 0, 1)                      "[value]"
   //             (3, 0, 0)                      "c|string value"
   //             (3, 0, 1)                      "c|string [value]"
   //             (3, 0, 1)                      "[c|string value]"
   //             (3, 0, 2)                      "[c|string [value]]"
   //             (3, 1, 0)                      "[c|string] value"
   //             (3, 1, 1)                      "[c|string] [value]"
   //             (3, 1, 1)                      "[[c|string] value]"
   //
   // There are only two case where a given (num_token, num_braces, level)
   // combination corresponds to more than one possible syntax-string. These
   // two cases are (3, 0, 1) and (3, 1, 1). We can ignore the "ambiguity"
   // of (3, 1, 1) because although the two possible syntax-strings are
   // different, they mean exactly the same thing. (3, 0, 1) is a different
   // case however: how do we tell if the whole argument is optional or if
   // just the value is optional? If the whole argument is required (meaning
   // "not optional") then we will already have set the isREQ flag when we
   // parsed the option and/or the keyword name.
   //
   if (fsm.num_tokens() == 1) {
      // cases (1, 0, 0) and (1, 0, 1)
      arg_syntax |= CmdArg::isPOS;
      if (! fsm.level()) {
         arg_syntax |= (CmdArg::isREQ | CmdArg::isVALREQ);
      } else {
         arg_syntax |= (CmdArg::isOPT | CmdArg::isVALOPT);
      }
   } else {
      if (fsm.num_braces()) {
         // cases (3, 1, 0) and (3, 1, 1)
         arg_syntax |= CmdArg::isPOS;
         if (! fsm.level()) {
            // case (3, 1, 0)
            arg_syntax |= (CmdArg::isREQ | CmdArg::isVALREQ);
         } else {
            // case (3, 1, 1)
            arg_syntax |= (CmdArg::isOPT | CmdArg::isVALOPT);
         }
      } else {
         if (! fsm.level()) {
            // case (3, 0, 0)
            arg_syntax |= (CmdArg::isREQ | CmdArg::isVALREQ);
         } else if (fsm.level() == 1) {
            // case (3, 0, 1)
            if (arg_syntax & CmdArg::isREQ) {
               arg_syntax |= CmdArg::isVALOPT;
            } else {
               arg_syntax |= CmdArg::isVALREQ;
            }
         } else {
            // case (3, 0, 2)
            arg_syntax |= CmdArg::isVALOPT;
         } //if level
      } //if num-braces
   } //if num-tokens
}


//-------------------
// ^FUNCTION: parse_flag - parse a flag
//
// ^SYNOPSIS:
//    parse_flag(is)
//
// ^PARAMETERS:
//    istream & is;
//    -- the input stream to read the flag from.
//
// ^DESCRIPTION:
//    By specifying a string that is accepted by "parse_syntax" one
//    can specify almost any combination of CmdArg::SyntaxFlags. 
//    The only ones that cannot be specified in this manner are the
//    CmdArg::isVALSTICKY and CmdArg::isVALSEP flags. In order to
//    specify these flags, we allow the syntax string to be followed
//    by a colon (':') and one of "SEPARATE" or "STICKY".
//
// ^REQUIREMENTS:
//    None.
//
// ^SIDE-EFFECTS:
//    - modifies the syntax-flags of an ArgSyntax object.
//    - prints syntax error messages on stderr.
//    - modifies the state of "is" if an error occurs.
//    - consumes characters from is.
//
// ^RETURN-VALUE:
//    A reference to the input stream used.
//
// ^ALGORITHM:
//    Trivial.
//-^^----------------
istream &
ArgSyntax::parse_flag(istream & is)
{
   char  ch;
   is >> ch;
   if (! is)  return  is;

      // If `ch' is a quote then the flags were omitted
   if ((ch == '\'') || (ch == '"')) {
      is.putback(ch);
      return  is ;
   }

      // The flags are here, make sure they start with ':'
   if (ch != ':') {
      cerr << "Unexpected token after syntax string.\n"
           << "\texpecting a colon, or a double or single quote." << endl ;
      is.clear(ios::failbit);
      return  is;
   }

      // Now parse the flag
   char  arg_flag[16];
   is.width(sizeof(arg_flag) - 1);
   is >> arg_flag;
   if (! is) {
      if (is.eof()) {
         cerr << "Error - premature end-of-input.\n"
              << "\texpecting one of \"sticky\" or \"separate\"." << endl ; 
      } else {
         cerr << "Unable to extract argument flag." << endl ;
      }
      return  is;
   }
 
   char * flag = arg_flag;

      // Skip any leading "CmdArg::isVAL" portion of the flag      
   if (CmdLine::strmatch("Cmd", flag, 3) == CmdLine::str_EXACT)  flag += 3;
   if (CmdLine::strmatch("Arg", flag, 3) == CmdLine::str_EXACT)  flag += 3;
   if (CmdLine::strmatch("::", flag, 2) == CmdLine::str_EXACT)   flag += 2;
   if (CmdLine::strmatch("is", flag, 2) == CmdLine::str_EXACT)   flag += 2;
   while ((*flag == '_') || (*flag == '-'))  ++flag;
   if (CmdLine::strmatch("VAL", flag, 3) == CmdLine::str_EXACT)  flag += 3;
   while ((*flag == '_') || (*flag == '-'))  ++flag;

      // check for an ambiguous flag
   if (((*flag == 's') || (*flag == 'S')) && (! *(flag + 1))) {
      cerr << "Ambiguous flag \"" << flag << "\"." << endl ;
      is.clear(ios::failbit);
      return  is;
   }

   if (CmdLine::strmatch("Sticky", flag) != CmdLine::str_NONE) {
      arg_syntax |= CmdArg::isVALSTICKY ;
   } else if (CmdLine::strmatch("Separate", flag) != CmdLine::str_NONE) {
      arg_syntax |= CmdArg::isVALSEP ;
   } else {
      cerr << "Invalid flag \"" << flag << "\".\n"
           << "\tmust be one of \"sticky\" or \"separate\"." << endl ;
      is.clear(ios::failbit);
      return  is;
   }

   return  is ;
}

//------------------------------------------------------------------ operator>>

istream &
operator>>(istream & is, ArgSyntax & arg)
{
   QuotedString  qstr(256);

   is >> qstr ;
   if (! is)  return  is;

   if (arg.parse_syntax(qstr))  return  is;
   return  arg.parse_flag(is);
}

These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.