This is rxposix.c in view mode; [Download] [Up]
/* Copyright (C) 1995, 1996 Tom Lord
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Library General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public License
* along with this software; see the file COPYING. If not, write to
* the Free Software Foundation, 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
#include "rxall.h"
#include "rxposix.h"
#include "rxgnucomp.h"
#include "rxbasic.h"
#include "rxsimp.h"
/* regcomp takes a regular expression as a string and compiles it.
*
* PATTERN is the address of the pattern string.
*
* CFLAGS is a series of bits which affect compilation.
*
* If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
* use POSIX basic syntax.
*
* If REG_NEWLINE is set, then . and [^...] don't match newline.
* Also, regexec will try a match beginning after every newline.
*
* If REG_ICASE is set, then we considers upper- and lowercase
* versions of letters to be equivalent when matching.
*
* If REG_NOSUB is set, then when PREG is passed to regexec, that
* routine will report only success or failure, and nothing about the
* registers.
*
* It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
* the return codes and their meanings.)
*/
#ifdef __STDC__
int
regncomp (regex_t * preg, const char * pattern, int len, int cflags)
#else
int
regncomp (preg, pattern, len, cflags)
regex_t * preg;
const char * pattern;
int len;
int cflags;
#endif
{
int ret;
unsigned int syntax;
rx_bzero ((char *)preg, sizeof (*preg));
syntax = ((cflags & REG_EXTENDED)
? RE_SYNTAX_POSIX_EXTENDED
: RE_SYNTAX_POSIX_BASIC);
if (!(cflags & REG_ICASE))
preg->translate = 0;
else
{
unsigned i;
preg->translate = (unsigned char *) malloc (256);
if (!preg->translate)
return (int) REG_ESPACE;
/* Map uppercase characters to corresponding lowercase ones. */
for (i = 0; i < CHAR_SET_SIZE; i++)
preg->translate[i] = isupper (i) ? tolower (i) : i;
}
/* If REG_NEWLINE is set, newlines are treated differently. */
if (!(cflags & REG_NEWLINE))
preg->newline_anchor = 0;
else
{
/* REG_NEWLINE implies neither . nor [^...] match newline. */
syntax &= ~RE_DOT_NEWLINE;
syntax |= RE_HAT_LISTS_NOT_NEWLINE;
/* It also changes the matching behavior. */
preg->newline_anchor = 1;
}
preg->no_sub = !!(cflags & REG_NOSUB);
ret = rx_parse (&preg->pattern,
pattern, len,
syntax,
256,
preg->translate);
/* POSIX doesn't distinguish between an unmatched open-group and an
* unmatched close-group: both are REG_EPAREN.
*/
if (ret == REG_ERPAREN)
ret = REG_EPAREN;
if (!ret)
{
preg->re_nsub = 1;
preg->subexps = 0;
rx_posix_analyze_rexp (&preg->subexps,
&preg->re_nsub,
preg->pattern,
0);
preg->is_nullable = rx_fill_in_fastmap (256,
preg->fastmap,
preg->pattern);
preg->is_anchored = rx_is_anchored_p (preg->pattern);
}
return (int) ret;
}
#ifdef __STDC__
int
regcomp (regex_t * preg, const char * pattern, int cflags)
#else
int
regcomp (preg, pattern, cflags)
regex_t * preg;
const char * pattern;
int cflags;
#endif
{
/* POSIX says a null character in the pattern terminates it, so we
* can use strlen here in compiling the pattern.
*/
return regncomp (preg, pattern, strlen (pattern), cflags);
}
/* Returns a message corresponding to an error code, ERRCODE, returned
from either regcomp or regexec. */
#ifdef __STDC__
size_t
regerror (int errcode, const regex_t *preg,
char *errbuf, size_t errbuf_size)
#else
size_t
regerror (errcode, preg, errbuf, errbuf_size)
int errcode;
const regex_t *preg;
char *errbuf;
size_t errbuf_size;
#endif
{
const char *msg;
size_t msg_size;
msg = rx_error_msg[errcode] == 0 ? "Success" : rx_error_msg[errcode];
msg_size = strlen (msg) + 1; /* Includes the 0. */
if (errbuf_size != 0)
{
if (msg_size > errbuf_size)
{
strncpy (errbuf, msg, errbuf_size - 1);
errbuf[errbuf_size - 1] = 0;
}
else
strcpy (errbuf, msg);
}
return msg_size;
}
#ifdef __STDC__
int
rx_regmatch (regmatch_t pmatch[], const regex_t *preg, struct rx_context_rules * rules, int start, int end, const char *string)
#else
int
rx_regmatch (pmatch, preg, rules, start, end, string)
regmatch_t pmatch[];
const regex_t *preg;
struct rx_context_rules * rules;
int start;
int end;
const char *string;
#endif
{
struct rx_solutions * solutions;
enum rx_answers answer;
struct rx_context_rules local_rules;
int orig_end;
int end_lower_bound;
int end_upper_bound;
local_rules = *rules;
orig_end = end;
if (!preg->pattern)
{
end_lower_bound = start;
end_upper_bound = start;
}
else if (preg->pattern->len >= 0)
{
end_lower_bound = start + preg->pattern->len;
end_upper_bound = start + preg->pattern->len;
}
else
{
end_lower_bound = start;
end_upper_bound = end;
}
end = end_upper_bound;
while (end >= end_lower_bound)
{
local_rules.not_eol = (rules->not_eol
? ( (end == orig_end)
|| !local_rules.newline_anchor
|| (string[end] != '\n'))
: ( (end != orig_end)
&& (!local_rules.newline_anchor
|| (string[end] != '\n'))));
solutions = rx_basic_make_solutions (pmatch, preg->pattern, preg->subexps,
start, end, &local_rules, string);
if (!solutions)
return REG_ESPACE;
answer = rx_next_solution (solutions);
if (answer == rx_yes)
{
if (pmatch)
{
pmatch[0].rm_so = start;
pmatch[0].rm_eo = end;
pmatch[0].final_tag = solutions->final_tag;
}
rx_basic_free_solutions (solutions);
return 0;
}
else
rx_basic_free_solutions (solutions);
--end;
}
switch (answer)
{
default:
case rx_bogus:
return REG_ESPACE;
case rx_no:
return REG_NOMATCH;
}
}
#ifdef __STDC__
int
rx_regexec (regmatch_t pmatch[], const regex_t *preg, struct rx_context_rules * rules, int start, int end, const char *string)
#else
int
rx_regexec (pmatch, preg, rules, start, end, string)
regmatch_t pmatch[];
const regex_t *preg;
struct rx_context_rules * rules;
int start;
int end;
const char *string;
#endif
{
int x;
int stat;
int anchored;
struct rexp_node * simplified;
struct rx_unfa * unfa;
struct rx_classical_system machine;
anchored = preg->is_anchored;
unfa = 0;
if ((end - start) > RX_MANY_CASES)
{
if (0 > rx_simple_rexp (&simplified, 256, preg->pattern, preg->subexps))
return REG_ESPACE;
unfa = rx_unfa (rx_basic_unfaniverse (), simplified, 256);
if (!unfa)
{
rx_free_rexp (simplified);
return REG_ESPACE;
}
rx_init_system (&machine, unfa->nfa);
rx_free_rexp (simplified);
}
for (x = start; x <= end; ++x)
{
if (preg->is_nullable
|| ((x < end)
&& (preg->fastmap[((unsigned char *)string)[x]])))
{
if ((end - start) > RX_MANY_CASES)
{
int amt;
if (rx_start_superstate (&machine) != rx_yes)
{
rx_free_unfa (unfa);
return REG_ESPACE;
}
amt = rx_advance_to_final (&machine, string + x, end - start - x);
if (!machine.final_tag && (amt < (end - start - x)))
goto nomatch;
}
stat = rx_regmatch (pmatch, preg, rules, x, end, string);
if (!stat || (stat != REG_NOMATCH))
{
rx_free_unfa (unfa);
return stat;
}
}
nomatch:
if (anchored)
if (!preg->newline_anchor)
{
rx_free_unfa (unfa);
return REG_NOMATCH;
}
else
while (x < end)
if (string[x] == '\n')
break;
else
++x;
}
rx_free_unfa (unfa);
return REG_NOMATCH;
}
/* regexec searches for a given pattern, specified by PREG, in the
* string STRING.
*
* If NMATCH is zero or REG_NOSUB was set in the cflags argument to
* `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
* least NMATCH elements, and we set them to the offsets of the
* corresponding matched substrings.
*
* EFLAGS specifies `execution flags' which affect matching: if
* REG_NOTBOL is set, then ^ does not match at the beginning of the
* string; if REG_NOTEOL is set, then $ does not match at the end.
*
* We return 0 if we find a match and REG_NOMATCH if not.
*/
#ifdef __STDC__
int
regnexec (const regex_t *preg, const char *string, int len, size_t nmatch, regmatch_t **pmatch, int eflags)
#else
int
regnexec (preg, string, len, nmatch, pmatch, eflags)
const regex_t *preg;
const char *string;
int len;
size_t nmatch;
regmatch_t **pmatch;
int eflags;
#endif
{
int want_reg_info;
struct rx_context_rules rules;
regmatch_t * regs;
size_t nregs;
int stat;
want_reg_info = (!preg->no_sub && (nmatch > 0));
rules.newline_anchor = preg->newline_anchor;
rules.not_bol = !!(eflags & REG_NOTBOL);
rules.not_eol = !!(eflags & REG_NOTEOL);
rules.case_indep = !!(eflags & REG_ICASE);
if (nmatch >= preg->re_nsub)
{
regs = *pmatch;
nregs = nmatch;
}
else
{
regs = (regmatch_t *)malloc (preg->re_nsub * sizeof (*regs));
if (!regs)
return REG_ESPACE;
nregs = preg->re_nsub;
}
{
int x;
for (x = 0; x < nregs; ++x)
regs[x].rm_so = regs[x].rm_eo = -1;
}
stat = rx_regexec (regs, preg, &rules, 0, len, string);
if (!stat && want_reg_info && pmatch && (regs != *pmatch))
{
size_t x;
for (x = 0; x < nmatch; ++x)
(*pmatch)[x] = regs[x];
}
if (!stat && (eflags & REG_ALLOC_REGS))
*pmatch = regs;
else if (regs && (!pmatch || (regs != *pmatch)))
free (regs);
return stat;
}
#ifdef __STDC__
int
regexec (const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags)
#else
int
regexec (preg, string, nmatch, pmatch, eflags)
const regex_t *preg;
const char *string;
size_t nmatch;
regmatch_t pmatch[];
int eflags;
#endif
{
return regnexec (preg,
string,
strlen (string),
nmatch,
&pmatch,
(eflags & ~REG_ALLOC_REGS));
}
/* Free dynamically allocated space used by PREG. */
#ifdef __STDC__
void
regfree (regex_t *preg)
#else
void
regfree (preg)
regex_t *preg;
#endif
{
if (preg->pattern)
{
rx_free_rexp (preg->pattern);
preg->pattern = 0;
}
if (preg->subexps)
{
free (preg->subexps);
preg->subexps = 0;
}
if (preg->translate != 0)
{
free (preg->translate);
preg->translate = 0;
}
}
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.