fsm.c

This is fsm.c in view mode; [Download] [Up]
//------------------------------------------------------------------------
// ^FILE: fsm.c - implement a finite staet machine
//
// ^DESCRIPTION:
//     This file implements a finite state machine tailored to the task of
//     parsing syntax strings for command-line arguments.
//
// ^HISTORY:
//    03/27/92	Brad Appleton	<brad@ssd.csd.harris.com>	Created
//-^^---------------------------------------------------------------------

#include <stdlib.h>
#include <iostream.h>
#include <ctype.h>
#include <string.h>

#include "fsm.h"

   // define the characters that have a "special" meaning
enum {
  c_LBRACE = '[',
  c_RBRACE = ']',
  c_ALT    = '|',
  c_LIST   = '.',
} ;


//-------------------
// ^FUNCTION: SyntaxFSM::skip - skip to the next token
//
// ^SYNOPSIS:
//    SyntaxFSM::skip(input)
//
// ^PARAMETERS:
//    const char * & input;
//    -- the current "read" position in the syntax string.
//
// ^DESCRIPTION:
//    Skip past all whitespace and past square braced (recording the
//    current brace-nesting level and the number of balanced braces
//    parsed).
//
// ^REQUIREMENTS:
//    None.
//
// ^SIDE-EFFECTS:
//    Updates "input" to point to the next token (or eos)
//
// ^RETURN-VALUE:
//    None.
//
// ^ALGORITHM:
//    Trivial.
//-^^----------------
void
SyntaxFSM::skip(const char * & input) {
   if ((! input) || (! *input))  return;

   while (isspace(*input))  ++input;
   while ((*input == c_LBRACE) || (*input == c_RBRACE)) {
      if (*input == c_LBRACE) {
         ++lev;
      } else {
         if (lev > 0) {
            ++nbpairs;
         } else {
            fsm_state = ERROR;
            cerr << "too many '" << char(c_RBRACE) << "' characters." << endl;
         }
         --lev;
      }
      ++input;
      while (isspace(*input))  ++input;
   }//while
}


//-------------------
// ^FUNCTION: SyntaxFSM::parse_token - parse a token
//
// ^SYNOPSIS:
//    SyntaxFSM::parse_token(input)
//
// ^PARAMETERS:
//    const char * & input;
//    -- the current "read" position in the syntax string.
//
// ^DESCRIPTION:
//    Get the next token from the input string.
//
// ^REQUIREMENTS:
//    input should be non-NULL.
//
// ^SIDE-EFFECTS:
//    Updates "input" to point to the next token (or eos)
//
// ^RETURN-VALUE:
//    None.
//
// ^ALGORITHM:
//    Trivial.
//-^^----------------
void
SyntaxFSM::parse_token(const char * & input)
{
   while (*input && (! isspace(*input)) &&
          (*input != c_LBRACE) && (*input != c_RBRACE) &&
          ((*input != c_LIST) || (fsm_state == OPTION)))
   {
      ++input;
   }
}


//-------------------
// ^FUNCTION: SyntaxFSM::operator() - get a token
//
// ^SYNOPSIS:
//    SyntaxFSM::operator()(input, token)
//
// ^PARAMETERS:
//    const char * & input;
//    -- the current "read" position in the syntax string.
//
//    token_t & token;
//    -- where to place the token that we will find.
//
// ^DESCRIPTION:
//    Get the next token from the input string.
//
// ^REQUIREMENTS:
//    None.
//
// ^SIDE-EFFECTS:
//    - updates "input" to point to the next token (or eos)
//    - updates "token" to be the token that we found
//
// ^RETURN-VALUE:
//    0 if we are in a non-FINAL state; non-zero otherwise..
//
// ^ALGORITHM:
//    It gets complicated so follow along.
//-^^----------------
int
SyntaxFSM::operator()(const char * & input, token_t & token)
{
   token.set(NULL, 0);

      // if inout is NULL or empty - then we are finished
   if ((! input) || (! *input)) {
      if (lev) {
         cerr << "not enough '" << char(c_RBRACE) << "' characters." << endl ;
         fsm_state = ERROR;
         return  (fsm_state != FINAL);
      } else {
         fsm_state = FINAL;
         return  (fsm_state != FINAL);
      }
   }

   skip(input);  // skip whitespace

   const char * start = input;

      // the token we are to parse depends on what state we are in
   switch(fsm_state) {
   case  START :
      // We are parsing either an option-character name or a value.
      // If it is an option-character name, the character that stops
      // the input scan will be c_ALT.
      //
      if (*input != c_ALT)  ++input;
      if (*input == c_ALT) {
         fsm_state = OPTION;
         if (start != input)  token.set(start, 1);
      } else {
         parse_token(input);
         fsm_state = VALUE;
         token.set(start, (input - start));
      }
      ++ntoks;
      break;

   case  OPTION :
      // We parsed an option-character already so we had better see a keyword
      // name this time around.
      //
      start = ++input;  // skip past the '|' character
      if (! isspace(*input)) {
         parse_token(input);
         token.set(start, (input - start));
      }
      fsm_state = KEYWORD;
      ++ntoks;
      break;

   case  KEYWORD :
      // We parsed a keyword already - if anything is here then it better be a
      // value name.
      //
      if (*input) {
         parse_token(input);
         fsm_state = VALUE;
         token.set(start, (input - start));
         ++ntoks;
      } else {
         fsm_state = FINAL;
      } 
      break;

   case  VALUE :
      // We already parsed a value name - all that could possibly be left
      // (that we be valid) is an ellipsis ("...") indicating a list.
      //
      if (! *input) {
         fsm_state = FINAL;
      } else if (::strncmp(input, "...", 3) == 0) {
         fsm_state = LIST;
         token.set(input, 3);
         input += 3;
         ++ntoks;
      } else {
         fsm_state = ERROR;
         cerr << "unexpected token \"" << input << "\"." << endl ;
      }
      break;

   case  LIST :
      // We already parsed an ellipsis, there better not be anything left
      if (! *input) {
         fsm_state = FINAL;
      } else {
         fsm_state = ERROR;
         cerr << "unexpected token \"" << input << "\"." << endl ;
      }
      break;

   case  ERROR :
   case  FINAL :
   default :
      break;
   }

   if (fsm_state == FINAL) {
      skip(input);
      if ((! *input) && lev) {
         cerr << "not enough '" << char(c_RBRACE) << "' characters." << endl ;
         fsm_state = ERROR;
      } else if (*input) {
         cerr << "unexpected token \"" << input << "\"." << endl ;
         fsm_state = ERROR;
      }
   }

   return  (fsm_state != FINAL);
}
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.