ftp.nice.ch/pub/next/unix/archiver/unzip.5.01.N.s.tar.gz#/unzip/match.c

This is match.c in view mode; [Download] [Up]

/*---------------------------------------------------------------------------

  match.c

  The match() routine recursively compares a string to a "pattern" (regular
  expression), returning TRUE if a match is found or FALSE if not.  This
  version is specifically for use with unzip.c:  as did the previous match()
  from SEA, it leaves the case (upper, lower, or mixed) of the string alone,
  but converts any uppercase characters in the pattern to lowercase if indi-
  cated by the global var pInfo->lcflag (which is to say, string is assumed
  to have been converted to lowercase already, if such was necessary).

  ---------------------------------------------------------------------------*/


#ifdef ZIPINFO
#  undef ZIPINFO   /* make certain there is only one version of match.o */
#endif /* ZIPINFO */
#include "unzip.h"

static int  matche              __((register char *p, register char *t));
static int  matche_after_star   __((register char *p, register char *t));

/* #include "filmatch.h": */
#ifndef BOOLEAN
#  define BOOLEAN short int      /* v1.2 made it short */
#endif

/* match defines */
#define MATCH_PATTERN  6    /* bad pattern */
#define MATCH_LITERAL  5    /* match failure on literal match */
#define MATCH_RANGE    4    /* match failure on [..] construct */
#define MATCH_ABORT    3    /* premature end of text string */
#define MATCH_END      2    /* premature end of pattern string */
#define MATCH_VALID    1    /* valid match */

/* pattern defines */
#define PATTERN_VALID  0    /* valid pattern */
#define PATTERN_ESC   -1    /* literal escape at end of pattern */
#define PATTERN_RANGE -2    /* malformed range in [..] construct */
#define PATTERN_CLOSE -3    /* no end bracket in [..] construct */
#define PATTERN_EMPTY -4    /* [..] contstruct is empty */

/*----------------------------------------------------------------------------
*
*  Match the pattern PATTERN against the string TEXT;
*
*       match() returns TRUE if pattern matches, FALSE otherwise.
*       matche() returns MATCH_VALID if pattern matches, or an errorcode
*           as follows otherwise:
*
*            MATCH_PATTERN  - bad pattern
*            MATCH_RANGE    - match failure on [..] construct
*            MATCH_ABORT    - premature end of text string
*            MATCH_END      - premature end of pattern string
*            MATCH_VALID    - valid match
*
*
*  A match means the entire string TEXT is used up in matching.
*
*  In the pattern string:
*       `*' matches any sequence of characters (zero or more)
*       `?' matches any character
*       [SET] matches any character in the specified set,
*       [!SET] or [^SET] matches any character not in the specified set.
*
*  A set is composed of characters or ranges; a range looks like
*  character hyphen character (as in 0-9 or A-Z).  [0-9a-zA-Z_] is the
*  minimal set of characters allowed in the [..] pattern construct.
*  Other characters are allowed (ie. 8 bit characters) if your system
*  will support them.
*
*  To suppress the special syntactic significance of any of `[]*?!^-\',
*  in a [..] construct and match the character exactly, precede it
*  with a `\'.
*
----------------------------------------------------------------------------*/

/*----------------------------------------------------------------------------
*
*  Match the pattern PATTERN against the string TEXT;
*
*  returns MATCH_VALID if pattern matches, or an errorcode as follows
*  otherwise:
*
*            MATCH_PATTERN  - bad pattern
*            MATCH_RANGE    - match failure on [..] construct
*            MATCH_ABORT    - premature end of text string
*            MATCH_END      - premature end of pattern string
*            MATCH_VALID    - valid match
*
*
*  A match means the entire string TEXT is used up in matching.
*
*  In the pattern string:
*       `*' matches any sequence of characters (zero or more)
*       `?' matches any character
*       [SET] matches any character in the specified set,
*       [!SET] or [^SET] matches any character not in the specified set.
*       \ is allowed within a set to escape a character like ']' or '-'
*
*  A set is composed of characters or ranges; a range looks like
*  character hyphen character (as in 0-9 or A-Z).  [0-9a-zA-Z_] is the
*  minimal set of characters allowed in the [..] pattern construct.
*  Other characters are allowed (ie. 8 bit characters) if your system
*  will support them.
*
*  To suppress the special syntactic significance of any of `[]*?!^-\',
*  within a [..] construct and match the character exactly, precede it
*  with a `\'.
*
----------------------------------------------------------------------------*/

static int matche(p, t)
register char *p;
register char *t;
{
    register char range_start, range_end;  /* start and end in range */

    BOOLEAN invert;             /* is this [..] or [!..] */
    BOOLEAN member_match;       /* have I matched the [..] construct? */
    BOOLEAN loop;               /* should I terminate? */

    for (;  *p;  p++, t++) {

        /* if this is the end of the text then this is the end of the match */
        if (!*t)
            return ((*p == '*') && (*++p == '\0'))?  MATCH_VALID : MATCH_ABORT;

        /* determine and react to pattern type */
        switch (*p) {

            /* single any character match */
            case '?':
                break;

            /* multiple any character match */
            case '*':
                return matche_after_star (p, t);

            /* [..] construct, single member/exclusion character match */
            case '[': {

                /* move to beginning of range */
                p++;

                /* check if this is a member match or exclusion match */
                invert = FALSE;
                if ((*p == '!') || (*p == '^')) {
                    invert = TRUE;
                    p++;
                }

                /* if closing bracket here or at range start then we have a
                   malformed pattern */
                if (*p == ']')
                    return MATCH_PATTERN;

                member_match = FALSE;
                loop = TRUE;

                while (loop) {

                    /* if end of construct then loop is done */
                    if (*p == ']') {
                        loop = FALSE;
                        continue;
                    }

                    /* matching a '!', '^', '-', '\' or a ']' */
                    if (*p == '\\')
                        range_start = range_end = *++p;
                    else
                        range_start = range_end = *p;

                    /* if end of pattern then bad pattern (Missing ']') */
                    if (!*p)
                        return MATCH_PATTERN;

                    /* check for range bar */
                    if (*++p == '-') {

                        /* get the range end */
                        range_end = *++p;

                        /* if end of pattern or construct then bad pattern */
                        if ((range_end == '\0') || (range_end == ']'))
                            return MATCH_PATTERN;

                        /* special character range end */
                        if (range_end == '\\') {
                            range_end = *++p;

                            /* if end of text then we have a bad pattern */
                            if (!range_end)
                                return MATCH_PATTERN;
                        }

                        /* move just beyond this range */
                        p++;
                    }

                    /* if the text character is in range then match found.
                     * make sure the range letters have the proper
                     * relationship to one another before comparison
                     */
                    if (range_start < range_end) {
                        if ((*t >= range_start) && (*t <= range_end)) {
                            member_match = TRUE;
                            loop = FALSE;
                        }
                    } else {
                        if ((*t >= range_end) && (*t <= range_start)) {
                            member_match = TRUE;
                            loop = FALSE;
                        }
                    }
                }

                /* if there was a match in an exclusion set then no match */
                /* if there was no match in a member set then no match */
                if ((invert && member_match) ||
                   !(invert || member_match))
                    return MATCH_RANGE;

                /* if this is not an exclusion then skip the rest of the [...]
                    construct that already matched. */
                if (member_match) {
                    while (*p != ']') {

                        /* bad pattern (Missing ']') */
                        if (!*p)
                            return MATCH_PATTERN;

                        /* skip exact match */
                        if (*p == '\\') {
                            p++;

                            /* if end of text then we have a bad pattern */
                            if (!*p)
                                return MATCH_PATTERN;
                        }

                        /* move to next pattern char */
                        p++;
                    }
                }

                break;
            }  /* switch '[' */

            /* must match this character exactly */
            default:
#ifdef OLDSTUFF
                if (*p != *t)
#else /* !OLDSTUFF */
                /* do it like arcmatch() (old unzip) did it (v1.2) */
                if (*t != (char) ((pInfo->lcflag && isupper((int)(*p)))?
                    tolower((int)(*p)) : *p))
#endif /* ?OLDSTUFF */
                    return MATCH_LITERAL;

        }  /* switch */
    }  /* for */

        /* if end of text not reached then the pattern fails */
    if (*t)
        return MATCH_END;
    else
        return MATCH_VALID;
}


/*----------------------------------------------------------------------------
*
* recursively call matche() with final segment of PATTERN and of TEXT.
*
----------------------------------------------------------------------------*/

static int matche_after_star (p,t)
register char *p;
register char *t;
{
    register int match = 0;
    register int nextp;

    /* pass over existing ? and * in pattern */
    while ((*p == '?') || (*p == '*')) {

        /* take one char for each ? and +; if end of text then no match */
        if ((*p == '?') && (!*t++))
                return MATCH_ABORT;

        /* move to next char in pattern */
        p++;
    }

    /* if end of pattern we have matched regardless of text left */
    if (!*p)
        return MATCH_VALID;

    /* get the next character to match which must be a literal or '[' */
    nextp = *p;

    /* Continue until we run out of text or definite result seen */
    do {
        /* a precondition for matching is that the next character
         * in the pattern match the next character in the text or that
         * the next pattern char is the beginning of a range.  Increment
         * text pointer as we go here.
         */
        if ((nextp == *t) || (nextp == '['))
            match = matche(p, t);

        /* if the end of text is reached then no match */
        if (!*t++)
            match = MATCH_ABORT;

    } while ((match != MATCH_VALID) &&
             (match != MATCH_ABORT) &&
             (match != MATCH_PATTERN));

    /* return result */
    return match;
}


/*----------------------------------------------------------------------------
*
* match() is a shell to matche() to return only BOOLEAN values.
*
----------------------------------------------------------------------------*/

int match(string,pattern)
char *string;
char *pattern;
{
    int error_type;
    error_type = matche(pattern,string);
    return (error_type == MATCH_VALID ) ? TRUE : FALSE;
}


#ifdef TEST_MATCH

/*----------------------------------------------------------------------------
*
* Return TRUE if PATTERN has any special wildcard characters
*
----------------------------------------------------------------------------*/

BOOLEAN is_pattern (char *pattern);

/*----------------------------------------------------------------------------
*
* Return TRUE if PATTERN has is a well formed regular expression according
* to the above syntax
*
* error_type is a return code based on the type of pattern error.  Zero is
* returned in error_type if the pattern is a valid one.  error_type return
* values are as follows:
*
*   PATTERN_VALID - pattern is well formed
*   PATTERN_RANGE - [..] construct has a no end range in a '-' pair (ie [a-])
*   PATTERN_CLOSE - [..] construct has no end bracket (ie [abc-g )
*   PATTERN_EMPTY - [..] construct is empty (ie [])
*
----------------------------------------------------------------------------*/

BOOLEAN is_valid_pattern (char *pattern, int *error_type);
int fast_match_after_star (register char *pattern, register char *text);

/*----------------------------------------------------------------------------
*
* Return TRUE if PATTERN has any special wildcard characters
*
----------------------------------------------------------------------------*/

BOOLEAN is_pattern (char *p)
{
    while (*p)
        switch (*p++) {
            case '?':
            case '*':
            case '[':
                return TRUE;
        }
    return FALSE;
}


/*----------------------------------------------------------------------------
*
* Return TRUE if PATTERN has is a well formed regular expression according
* to the above syntax
*
* error_type is a return code based on the type of pattern error.  Zero is
* returned in error_type if the pattern is a valid one.  error_type return
* values are as follows:
*
*   PATTERN_VALID - pattern is well formed
*   PATTERN_RANGE - [..] construct has a no end range in a '-' pair (ie [a-])
*   PATTERN_CLOSE - [..] construct has no end bracket (ie [abc-g )
*   PATTERN_EMPTY - [..] construct is empty (ie [])
*
----------------------------------------------------------------------------*/

BOOLEAN is_valid_pattern (char *p, int *error_type)
{
    /* init error_type */
    *error_type = PATTERN_VALID;

    /* loop through pattern to EOS */
    while (*p) {

        /* determine pattern type */
        switch (*p) {

            /* the [..] construct must be well formed */
            case '[':
                p++;

                /* if the next character is ']' then bad pattern */
                if (*p == ']') {
                    *error_type = PATTERN_EMPTY;
                    return FALSE;
                }

                /* if end of pattern here then bad pattern */
                if (!*p) {
                    *error_type = PATTERN_CLOSE;
                    return FALSE;
                }

                /* loop to end of [..] construct */
                while (*p != ']') {

                    /* check for literal escape */
                    if (*p == '\\') {
                        p++;

                        /* if end of pattern here then bad pattern */
                        if (!*p++) {
                            *error_type = PATTERN_ESC;
                            return FALSE;
                        }
                    } else
                        p++;

                    /* if end of pattern here then bad pattern */
                    if (!*p) {
                        *error_type = PATTERN_CLOSE;
                        return FALSE;
                    }

                    /* if this a range */
                    if (*p == '-') {

                        /* we must have an end of range */
                        if (!*++p || (*p == ']')) {
                            *error_type = PATTERN_RANGE;
                            return FALSE;
                        } else {

                            /* check for literal escape */
                            if (*p == '\\')
                                p++;

                            /* if end of pattern here then bad pattern */
                            if (!*p++) {
                                *error_type = PATTERN_ESC;
                                return FALSE;
                            }
                        }
                    }
                }
                break;

            /* all other characters are valid pattern elements */
            case '*':
            case '?':
            default:
                p++;                /* "normal" character */
                break;
        }    /* switch */
    }

    return TRUE;
}


    /*
    * This test main expects as first arg the pattern and as second arg
    * the match string.  Output is yay or nay on match.  If nay on
    * match then the error code is parsed and written.
    */

#include <stdio.h>

int main(int argc, char *argv[])
{
    int error;
    int is_valid_error;

    if (argc != 3)
        printf("Usage:  MATCH Pattern Text\n");
    else {
        printf("Pattern: %s\n", argv[1]);
        printf("Text   : %s\n", argv[2]);

        if (!is_pattern(argv[1]))
            printf("    First Argument Is Not A Pattern\n");
        else {
            match(argv[1],argv[2]) ? printf("TRUE") : printf("FALSE");
            error = matche(argv[1],argv[2]);
            is_valid_pattern(argv[1],&is_valid_error);

            switch (error) {
                case MATCH_VALID:
                    printf("    Match Successful");
                    if (is_valid_error != PATTERN_VALID)
                        printf(" -- is_valid_pattern() is complaining\n");
                    else
                        printf("\n");
                    break;
                case MATCH_RANGE:
                    printf("    Match Failed on [..]\n");
                    break;
                case MATCH_ABORT:
                    printf("    Match Failed on Early Text Termination\n");
                    break;
                case MATCH_END:
                    printf("    Match Failed on Early Pattern Termination\n");
                    break;
                case MATCH_PATTERN:
                    switch (is_valid_error) {
                        case PATTERN_VALID:
                            printf("    Internal Disagreement On Pattern\n");
                            break;
                        case PATTERN_RANGE:
                            printf("    No End of Range in [..] Construct\n");
                            break;
                        case PATTERN_CLOSE:
                            printf("    [..] Construct is Open\n");
                            break;
                        case PATTERN_EMPTY:
                            printf("    [..] Construct is Empty\n");
                            break;
                        default:
                            printf("    Internal Error in is_valid_pattern()\n");
                    }
                    break;
                default:
                    printf("    Internal Error in matche()\n");
                    break;
            } /* switch */
        }

    }
    return(0);
}

#endif  /* TEST_MATCH */

These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.