This is parse.c in view mode; [Download] [Up]
/***************************************************************************** * $Id: parse.c,v 1.11 1997/04/07 01:57:57 darren Exp $ * * Copyright (c) 1996-1997, Darren Hiebert * * Contains functions for parsing and scanning of a source file. *****************************************************************************/ /*============================================================================ = Include files ============================================================================*/ #ifdef HAVE_CONFIG_H # include <config.h> #endif #include "ctags.h" /*============================================================================ = Macros ============================================================================*/ #define swapNameBuffers(st) ((st)->buf1 = !(st)->buf1) /*============================================================================ = Data declarations ============================================================================*/ /* Used to specify type of keyword. */ typedef enum { KEYWORD_UNKNOWN, KEYWORD_CHAR, KEYWORD_CLASS, KEYWORD_CONST, KEYWORD_DOUBLE, KEYWORD_ENUM, KEYWORD_EXTERN, KEYWORD_FLOAT, KEYWORD_INLINE, KEYWORD_INT, KEYWORD_LONG, KEYWORD_OVERLOAD, KEYWORD_PRIVATE, KEYWORD_PUBLIC, KEYWORD_SHORT, KEYWORD_SIGNED, KEYWORD_STATIC, KEYWORD_STRUCT, KEYWORD_THROW, KEYWORD_TYPEDEF, KEYWORD_UNION, KEYWORD_UNSIGNED, KEYWORD_VIRTUAL, KEYWORD_VOID, KEYWORD_VOLATILE } keyword_t; /* Used for reporting the type of object parsed by nextToken(). */ typedef enum { TOK_ARGS, /* a parenthetical pair and its contents */ TOK_BODY, /* a brace enclosed block */ TOK_COMMA, /* the comma character */ TOK_IGNORE, /* a sequence not to be seen by createTags() */ TOK_ENUM_BODY_END, /* the beginning of a list of enumeration values */ TOK_EOF, /* end of file */ TOK_NAME, /* an unknown name */ TOK_SEMICOLON, /* the semicolon character */ TOK_SPEC /* a storage class, qualifier, type, etc. */ } token_t; /* Describes the statement currently undergoing analysis. */ typedef struct { tagScope scope; enum { DECL_MISC, /* unspecified (non-specific) */ DECL_CLASS, /* C++ class */ DECL_ENUM, /* enumeration */ DECL_STRUCT, /* structure */ DECL_UNION, /* union */ DECL_NOMANGLE /* C++ name demangling block */ } declaration; /* describes specifier associated with TOK_SPEC */ token_t token; /* the most recent type of token */ token_t prev[2]; /* the previous tokens */ boolean isPointer; /* whether 'name' is a pointer */ boolean gotName; /* whether a name has yet been parsed */ boolean inEnumBody; /* currently within enumeration value list */ boolean buf1; /* is tag[1] the primary buffer? */ tagInfo tag[2]; /* information regarding last 2 tag candidates */ } statementInfo; typedef struct { char name[MaxNameLength]; boolean gotName; long location; long lineNumber; } parenInfo; /*============================================================================ = Function prototypes ============================================================================*/ /* Parsing functions. */ static int skipToNonWhite __ARGS((void)); static int skipToCharacter __ARGS((const int findchar)); static void skipToFormattedBraceMatch __ARGS((void)); static boolean skipToMatch __ARGS((const char *const pair)); static void readIdendifier __ARGS((const int firstChar, char *const name)); static int skipParameterDeclarations __ARGS((int c, statementInfo *const st)); static boolean analyzePostParens __ARGS((statementInfo *const st, const parenInfo *const paren)); static void saveParenInfo __ARGS((parenInfo *const paren, const int c)); static void doubleParens __ARGS((statementInfo *const st, const int c)); static boolean analyzeParens __ARGS((statementInfo *const st)); static boolean isIgnoreToken __ARGS((const char *const name)); static keyword_t analyzeKeyword __ARGS((const char *const name)); static void analyzeIdentifier __ARGS((statementInfo *const st)); static boolean beginBlock __ARGS((statementInfo *const st, const int nesting)); static boolean endBlock __ARGS((statementInfo *const st, const int nesting)); static void processColon __ARGS((statementInfo *const st)); static int skipInitializer __ARGS((const boolean inEnumBody)); static boolean processInitializer __ARGS((statementInfo *const st)); static boolean processArray __ARGS((statementInfo *const st)); static void processIdentifier __ARGS((statementInfo *const st, const int c)); static boolean nextToken __ARGS((statementInfo *const st, const int nesting)); /* Scanning functions. */ static void initStatement __ARGS((statementInfo *const st)); static void qualifyBlockTag __ARGS((const statementInfo *const st, const tagInfo *const tag, const tagScope declScope)); static void qualifyEnumTag __ARGS((const statementInfo *const st, const tagInfo *const tag, const tagScope declScope)); static void qualifyFunctionTag __ARGS((statementInfo *const st, const tagInfo *const tag)); static void qualifyVariableTag __ARGS((const statementInfo *const st, const tagInfo *const tag, const int nesting)); static void qualifyFunctionDeclTag __ARGS((const statementInfo *const st, const tagInfo *const tag)); /*============================================================================ = Function definitions ============================================================================*/ /* Skip to the next non-white character. */ static int skipToNonWhite() { int c; do { c = cppGetc(); } while (c != EOF && isspace(c)); return c; } /* Skip to the next occurance of the specified character. */ static int skipToCharacter( findchar ) const int findchar; { int c; do c = cppGetc(); while (c != EOF && c != findchar); return c; } /* Skips to the next brace in column 1. This is intended for cases where * preprocessor constructs result in unbalanced braces. */ static void skipToFormattedBraceMatch() { int c, next; c = cppGetc(); next = cppGetc(); while (c != EOF && (c != '\n' || next != '}')) { c = next; next = cppGetc(); } } /* Skip to the matching character indicated by the pair string. If skipping * to a matching brace and any brace is found within a different level of a * #if conditional statement while brace formatting is in effect, we skip to * the brace matched by its formatting. */ static boolean skipToMatch( pair ) const char *const pair; { const int begin = pair[0], end = pair[1]; const int initialLevel = Cpp.directive.level; const boolean braceFormatting =(Option.braceFormat && strcmp("{}",pair)==0); boolean ok = TRUE; int matchLevel = 1; int c = '\0'; while (matchLevel > 0 && (c = cppGetc()) != EOF) { if (c == begin) { ++matchLevel; if (braceFormatting && Cpp.directive.level != initialLevel) { skipToFormattedBraceMatch(); break; } } else if (c == end) { --matchLevel; if (braceFormatting && Cpp.directive.level != initialLevel) { skipToFormattedBraceMatch(); break; } } } if (c == EOF) ok = FALSE; return ok; } /* Read a C identifier beginning with "firstChar" and places it into "name". */ static void readIdendifier( firstChar, name ) const int firstChar; char *const name; { int c, i; name[0] = firstChar; for (i = 1, c = cppGetc() ; i < MaxNameLength - 1 && isident(c) ; i++, c = cppGetc()) { name[i] = c; } name[i] = '\0'; /* null terminate name */ cppUngetc(c); /* unget non-identifier character */ } /* Skips over interveaning characters declaring function parameters * (non-ANSI style function declarations). */ static int skipParameterDeclarations( c, st ) int c; statementInfo *const st; { boolean expectBrace = FALSE; boolean end = FALSE; while (c != EOF && ! end) { if (isident1(c)) { char name[MaxNameLength]; keyword_t keyword; readIdendifier(c, name); keyword = analyzeKeyword(name); switch (keyword) { default: break; case KEYWORD_CLASS: case KEYWORD_ENUM: case KEYWORD_STRUCT: case KEYWORD_UNION: expectBrace = TRUE; break; case KEYWORD_EXTERN: case KEYWORD_STATIC: case KEYWORD_TYPEDEF: if (keyword == KEYWORD_EXTERN) st->scope = SCOPE_EXTERN; else if (keyword == KEYWORD_STATIC) st->scope = SCOPE_STATIC; else if (keyword == KEYWORD_TYPEDEF) st->scope = SCOPE_TYPEDEF; st->declaration = DECL_MISC; st->token = TOK_SPEC; st->gotName = FALSE; c = skipToNonWhite(); end = TRUE; continue; /* skip read of next character */ } } else switch (c) { default: break; /* ignore */ case ';': expectBrace = FALSE; break; case '(': if (! skipToMatch("()")) c = EOF; break; case '[': if (! skipToMatch("[]")) c = EOF; break; case '}': end = TRUE; continue; /* skip read of next character */ case '{': if (! expectBrace) { end = TRUE; continue; /* skip read of next character */ } else if (! skipToMatch("{}")) c = EOF; break; } if (c != EOF) c = cppGetc(); } return c; } static boolean analyzePostParens( st, paren ) statementInfo *const st; const parenInfo *const paren; { boolean ok = TRUE; int c; /* At this point we should be at the character following the * closing parenthesis. */ c = skipToNonWhite(); if (st->gotName) { if (strchr("{;,", c) != NULL) { st->token = TOK_ARGS; /* parameter list to a func. */ st->declaration = DECL_MISC; /* clear any other decl. */ } else if (isident1(c)) { st->token = TOK_ARGS; /* parameter list to a func. */ st->declaration = DECL_MISC; /* clear any other decl. */ c = skipParameterDeclarations(c, st); /* K&R (non-ANSI) style */ } else st->token = TOK_IGNORE; } /* The name inside the parentheses must have been a function or * variable name. */ else if (paren->gotName) { tagInfo *const tag = &st->tag[st->buf1]; st->gotName = TRUE; st->token = TOK_NAME; tag->location = paren->location; tag->lineNumber = paren->lineNumber; strcpy(tag->name, paren->name); } else st->token = TOK_IGNORE; if (c == EOF) ok = FALSE; else cppUngetc(c); return ok; } static void saveParenInfo( paren, c ) parenInfo *const paren; const int c; { if (c == EOF) { #ifdef DEBUG clearString(paren->name, MaxNameLength); #endif paren->gotName = FALSE; paren->location = 0; paren->lineNumber = 0; } else { readIdendifier(c, paren->name); paren->gotName = TRUE; paren->location = File.seek; paren->lineNumber = File.lineNumber; } } static void doubleParens( st, c ) statementInfo *const st; const int c; { /* A double parenthesis almost certainly means one of those conditional * prototype macro thingies (e.g. __ARGS((void)) ). If found, we will use * the previous name, if it is not empty. */ if (st->gotName && *st->tag[!st->buf1].name != '\0') swapNameBuffers(st); cppUngetc(c); /* put back for skipToMatch(), later */ } /* Analyzes the context and contents of parentheses. */ static boolean analyzeParens( st ) statementInfo *const st; { boolean ok = TRUE; int c; c = skipToNonWhite(); if (c == '*') /* this is a function pointer */ { st->gotName = FALSE; /* invalidate previous name */ st->isPointer = TRUE; st->token = TOK_IGNORE; } else { boolean terminate = FALSE; parenInfo paren; saveParenInfo(&paren, EOF); if (isident1(c)) saveParenInfo(&paren, c); /* save identifier in parentheses */ else if (c == ')') /* empty parentheses... */ cppUngetc(c); /* put back for skipToMatch(), below */ else if (c == '(') doubleParens(st, c); else { /* This is an invalid character to be inside a paren in this * context. This must be a macro call. After we read to the * end of the parenthesis seqence, force a termination of the * current statement, */ st->token = TOK_SEMICOLON; st->gotName = FALSE; terminate = TRUE; } ok = skipToMatch("()"); if (ok && ! terminate) ok = analyzePostParens(st, &paren); } return ok; } /* Determines whether or not "name" should be ignored, per the ignore list. */ static boolean isIgnoreToken( name ) const char *const name; { boolean ignore = FALSE; unsigned int i; for (i = 0 ; i < Option.ignore.count ; ++i) { if (strcmp(Option.ignore.list[i], name) == 0) { ignore = TRUE; break; } } return ignore; } /* Analyzes the identifier contained in a statement described by the * statement structure and adjusts the structure according the significance * of the identifier. */ static keyword_t analyzeKeyword( name ) const char *const name; { keyword_t keyword = KEYWORD_UNKNOWN; #define match(word) ((strcmp(name,(word)) == 0)) switch ((unsigned char)name[0]) /* is it a reserved word? */ { case 'c': if (match("class" )) keyword = KEYWORD_CLASS; else if (match("const" )) keyword = KEYWORD_CONST; else if (match("char" )) keyword = KEYWORD_CHAR; break; case 'd': if (match("double" )) keyword = KEYWORD_DOUBLE; break; case 'e': if (match("enum" )) keyword = KEYWORD_ENUM; else if (match("extern" )) keyword = KEYWORD_EXTERN; break; case 'f': if (match("float" )) keyword = KEYWORD_FLOAT; break; case 'i': if (match("int" )) keyword = KEYWORD_INT; else if (match("inline" )) keyword = KEYWORD_INLINE; break; case 'l': if (match("long" )) keyword = KEYWORD_LONG; break; case 'o': if (match("overload")) keyword = KEYWORD_OVERLOAD; break; case 'p': if (match("private")) keyword = KEYWORD_PRIVATE; else if (match("public" )) keyword = KEYWORD_PUBLIC; break; case 's': if (match("static" )) keyword = KEYWORD_STATIC; else if (match("struct" )) keyword = KEYWORD_STRUCT; else if (match("short" )) keyword = KEYWORD_SHORT; else if (match("signed" )) keyword = KEYWORD_SIGNED; break; case 't': if (match("typedef")) keyword = KEYWORD_TYPEDEF; else if (match("throw" )) keyword = KEYWORD_THROW; break; case 'u': if (match("union" )) keyword = KEYWORD_UNION; else if (match("unsigned")) keyword = KEYWORD_UNSIGNED; break; case 'v': if (match("virtual")) keyword = KEYWORD_VIRTUAL; else if (match("void" )) keyword = KEYWORD_VOID; else if (match("volatile")) keyword = KEYWORD_VOLATILE; break; } #undef match return keyword; } /* Analyzes the identifier contained in a statement described by the * statement structure and adjusts the structure according the significance * of the identifier. */ static void analyzeIdentifier( st ) statementInfo *const st; { tagInfo *const tag = &st->tag[st->buf1]; const char *const name = tag->name; st->token = TOK_SPEC; /* default unless otherwise */ if (isIgnoreToken(name)) st->token = TOK_IGNORE; else switch (analyzeKeyword(name)) /* is it a reserved word? */ { case KEYWORD_CLASS: st->declaration = DECL_CLASS; break; case KEYWORD_CONST: st->token = TOK_IGNORE; break; case KEYWORD_CHAR: st->declaration = DECL_MISC; break; case KEYWORD_DOUBLE: st->declaration = DECL_MISC; break; case KEYWORD_ENUM: st->declaration = DECL_ENUM; break; case KEYWORD_EXTERN: st->scope = SCOPE_EXTERN; break; case KEYWORD_FLOAT: st->declaration = DECL_MISC; break; case KEYWORD_INT: st->declaration = DECL_MISC; break; case KEYWORD_INLINE: st->token = TOK_IGNORE; break; case KEYWORD_LONG: st->declaration = DECL_MISC; break; case KEYWORD_OVERLOAD: st->token = TOK_IGNORE; break; case KEYWORD_PRIVATE: st->scope = SCOPE_STATIC; break; case KEYWORD_PUBLIC: st->token = TOK_IGNORE; break; case KEYWORD_STATIC: st->scope = SCOPE_STATIC; break; case KEYWORD_STRUCT: st->declaration = DECL_STRUCT; break; case KEYWORD_SHORT: st->declaration = DECL_MISC; break; case KEYWORD_SIGNED: st->declaration = DECL_MISC; break; case KEYWORD_TYPEDEF: st->scope = SCOPE_TYPEDEF; break; case KEYWORD_THROW: st->token = TOK_IGNORE; break; case KEYWORD_UNION: st->declaration = DECL_UNION; break; case KEYWORD_UNSIGNED: st->declaration = DECL_MISC; break; case KEYWORD_VIRTUAL: st->token = TOK_IGNORE; break; case KEYWORD_VOLATILE: st->token = TOK_IGNORE; break; case KEYWORD_VOID: st->declaration = DECL_MISC; break; case KEYWORD_UNKNOWN: if ((unsigned char)name[0] == STRING_SYMBOL && name[1] == '\0') { if (! st->gotName && st->scope == SCOPE_EXTERN) st->declaration = DECL_NOMANGLE; else st->token = TOK_IGNORE; } else { st->token = TOK_NAME; st->gotName = TRUE; tag->location = File.seek; tag->lineNumber = File.lineNumber; } break; } if (st->token == TOK_IGNORE) tag->name[0] = '\0'; } static void processIdentifier( st, c ) statementInfo *const st; const int c; { if (st->gotName) swapNameBuffers(st); readIdendifier(c, st->tag[st->buf1].name); analyzeIdentifier(st); if (st->gotName && st->token == TOK_IGNORE) swapNameBuffers(st); } static void processColon( st ) statementInfo *const st; { if (st->declaration != DECL_CLASS) st->token = TOK_IGNORE; else { const int c = skipToCharacter('{'); /* skip over intervening junk */ cppUngetc(c); } } /* Skips over any initializing value which may follow a '=' character in a * variable definition. */ static int skipInitializer( inEnumBody ) const boolean inEnumBody; { boolean done = FALSE; int c; while (! done && (c = cppGetc()) != EOF) { switch (c) { default: break; case ',': case ';': done = TRUE; break; case '[': if (! skipToMatch("[]")) c = EOF; break; case '(': if (! skipToMatch("()")) c = EOF; break; case '{': if (! skipToMatch("{}")) c = EOF; break; case '}': if (inEnumBody) done = TRUE; else if (! Option.braceFormat) c = EOF; break; } } return c; } static boolean processInitializer( st ) statementInfo *const st; { boolean ok = TRUE; const int c = skipInitializer(st->inEnumBody); if (c == EOF) ok = FALSE; else if (c == ';') st->token = TOK_SEMICOLON; else if (c == ',') st->token = TOK_COMMA; else if (c == '}' && st->inEnumBody) st->token = TOK_ENUM_BODY_END; return ok; } static boolean processArray( st ) statementInfo *const st; { st->token = TOK_IGNORE; return skipToMatch("[]"); } static boolean beginBlock( st, nesting ) statementInfo *const st; const int nesting; { boolean ok = TRUE; if (st->declaration == DECL_ENUM) { st->inEnumBody = TRUE; st->token = TOK_BODY; } else { if (st->declaration == DECL_STRUCT || st->declaration == DECL_UNION || st->declaration == DECL_CLASS || st->declaration == DECL_NOMANGLE ) { ok = createTags(nesting + 1); } else { ok = skipToMatch("{}"); } st->token = TOK_BODY; } return ok; } static boolean endBlock( st, nesting ) statementInfo *const st; const int nesting; { boolean ok = TRUE; if (st->inEnumBody) { st->inEnumBody = FALSE; st->token = TOK_ENUM_BODY_END; } else if (nesting > 0) st->token = TOK_EOF; /* fake out */ else { st->token = TOK_IGNORE; ok = FALSE; } return ok; } /* Reads characters from the pre-processor and assembles tokens, setting * the current statement state. */ static boolean nextToken( st, nesting ) statementInfo *const st; const int nesting; { int c; boolean ok = TRUE; do { c = cppGetc(); switch (c) { case EOF: st->token = TOK_EOF; break; case '(': ok = analyzeParens(st); break; case '*': st->gotName = FALSE; break; case ',': st->token = TOK_COMMA; break; case ':': processColon(st); break; case ';': st->token = TOK_SEMICOLON; break; case '=': ok = processInitializer(st); break; case '[': ok = processArray(st); break; case '{': ok = beginBlock(st, nesting); break; case '}': ok = endBlock(st, nesting); break; default: if (! isident1(c)) st->token = TOK_IGNORE; else /* start of a name or keyword */ processIdentifier(st, c); } } while (ok && st->token == TOK_IGNORE); return ok; } static void initStatement( st ) statementInfo *const st; { int i; st->scope = SCOPE_GLOBAL; st->declaration = DECL_MISC; st->token = TOK_SEMICOLON; st->prev[0] = TOK_SEMICOLON; st->prev[1] = TOK_SEMICOLON; st->gotName = FALSE; st->isPointer = FALSE; st->inEnumBody = FALSE; st->buf1 = FALSE; for (i = 0 ; i < 2 ; ++i) { tagInfo *const tag = &st->tag[i]; tag->location = 0; tag->lineNumber = 0; tag->name[0] = '\0'; #ifdef DEBUG clearString(tag->name, MaxNameLength); #endif } } static void qualifyBlockTag( st, tag, declScope ) const statementInfo *const st; const tagInfo *const tag; const tagScope declScope; { if (st->declaration == DECL_CLASS || st->declaration == DECL_ENUM || st->declaration == DECL_STRUCT || st->declaration == DECL_UNION) { makeTag(tag, declScope, TAG_BLOCKTAG); } } static void qualifyEnumTag( st, tag, declScope ) const statementInfo *const st; const tagInfo *const tag; const tagScope declScope; { if (st->prev[0] == TOK_NAME) makeTag(tag, declScope, TAG_ENUM); } static void qualifyFunctionTag( st, tag ) statementInfo *const st; const tagInfo *const tag; { if (st->scope == SCOPE_EXTERN) /* allowed for func. def. */ st->scope = SCOPE_GLOBAL; makeTag(tag, st->scope, TAG_FUNCTION); } static void qualifyVariableTag( st, tag, nesting ) const statementInfo *const st; const tagInfo *const tag; const int nesting; { /* We have to watch that we do not interpret a declaration of the * form "struct tag;" as a variable definition. In such a case, the * declaration will be either class, enum, struct or union, and prev[1] * will be empty (i.e. SEMICOLON). */ if (nesting == 0 && (st->declaration == DECL_MISC || st->prev[1] != TOK_SPEC)) { if (st->scope != SCOPE_EXTERN) makeTag(tag, st->scope, TAG_VARIABLE); } } static void qualifyFunctionDeclTag( st, tag ) const statementInfo *const st; const tagInfo *const tag; { if (! File.header) makeTag(tag, SCOPE_STATIC, TAG_FUNCDECL); else if (st->scope == SCOPE_GLOBAL || st->scope==SCOPE_EXTERN) makeTag(tag, SCOPE_GLOBAL, TAG_FUNCDECL); } /* Parses the current file and decides whether to write out and tags that * are discovered. */ extern boolean createTags( nesting ) const int nesting; { const tagScope declScope = File.header ? SCOPE_GLOBAL : SCOPE_STATIC; statementInfo st; boolean ok = TRUE; #ifdef DEBUG if (nesting > 0 && debug(DEBUG_VISUAL | DEBUG_STATUS)) printf("<#++nesting:%d#>", nesting); #endif initStatement(&st); while ((ok = nextToken(&st, nesting))) { tagInfo *const tag = &st.tag[st.buf1]; if (st.token == TOK_EOF) break; else if (! st.gotName) ; else if (st.token == TOK_BODY && st.prev[0] == TOK_NAME) qualifyBlockTag(&st, tag, declScope); else if (st.token == TOK_ENUM_BODY_END || (st.inEnumBody && st.token == TOK_COMMA)) qualifyEnumTag(&st, tag, declScope); else if (st.token == TOK_BODY && st.prev[0] == TOK_ARGS) qualifyFunctionTag(&st, tag); else if (st.token == TOK_SEMICOLON || st.token == TOK_COMMA) { if (st.scope == SCOPE_TYPEDEF) makeTag(tag, declScope, TAG_TYPEDEF); else if (st.prev[0] == TOK_NAME || st.isPointer) qualifyVariableTag(&st, tag, nesting); else if (st.prev[0] == TOK_ARGS) qualifyFunctionDeclTag(&st, tag); } /* Reset after a semicolon or ARGS BODY pair. */ if (st.token == TOK_SEMICOLON || (st.token == TOK_BODY && (st.prev[0] == TOK_ARGS || st.declaration == DECL_NOMANGLE))) { initStatement(&st); Cpp.directive.resolve = FALSE; /* end of statement */ } else Cpp.directive.resolve = TRUE; /* in middle of statement */ st.prev[1] = st.prev[0]; st.prev[0] = st.token; } #ifdef DEBUG if (nesting > 0 && debug(DEBUG_VISUAL | DEBUG_STATUS)) printf("<#--nesting:%d#>", nesting - 1); #endif return ok; } /* vi:set tabstop=8 shiftwidth=4: */
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.