rxposix.c

This is rxposix.c in view mode; [Download] [Up]
/*	Copyright (C) 1995, 1996 Tom Lord
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Library General Public License as published by
 * the Free Software Foundation; either version 2, or (at your option)
 * any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Library General Public License for more details.
 * 
 * You should have received a copy of the GNU Library General Public License
 * along with this software; see the file COPYING.  If not, write to
 * the Free Software Foundation, 59 Temple Place - Suite 330, 
 * Boston, MA 02111-1307, USA. 
 */



#include "rxall.h"
#include "rxposix.h"
#include "rxgnucomp.h"
#include "rxbasic.h"
#include "rxsimp.h"

/* regcomp takes a regular expression as a string and compiles it.
 *
 * PATTERN is the address of the pattern string.
 *
 * CFLAGS is a series of bits which affect compilation.
 *
 *   If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
 *   use POSIX basic syntax.
 *
 *   If REG_NEWLINE is set, then . and [^...] don't match newline.
 *   Also, regexec will try a match beginning after every newline.
 *
 *   If REG_ICASE is set, then we considers upper- and lowercase
 *   versions of letters to be equivalent when matching.
 *
 *   If REG_NOSUB is set, then when PREG is passed to regexec, that
 *   routine will report only success or failure, and nothing about the
 *   registers.
 *
 * It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
 * the return codes and their meanings.)  
 */


#ifdef __STDC__
int
regncomp (regex_t * preg, const char * pattern, int len, int cflags)
#else
int
regncomp (preg, pattern, len, cflags)
     regex_t * preg;
     const char * pattern;
     int len;
     int cflags;
#endif
{
  int ret;
  unsigned int syntax;

  rx_bzero ((char *)preg, sizeof (*preg));
  syntax = ((cflags & REG_EXTENDED)
	    ? RE_SYNTAX_POSIX_EXTENDED
	    : RE_SYNTAX_POSIX_BASIC);

  if (!(cflags & REG_ICASE))
    preg->translate = 0;
  else
    {
      unsigned i;

      preg->translate = (unsigned char *) malloc (256);
      if (!preg->translate)
        return (int) REG_ESPACE;

      /* Map uppercase characters to corresponding lowercase ones.  */
      for (i = 0; i < CHAR_SET_SIZE; i++)
        preg->translate[i] = isupper (i) ? tolower (i) : i;
    }


  /* If REG_NEWLINE is set, newlines are treated differently.  */
  if (!(cflags & REG_NEWLINE))
    preg->newline_anchor = 0;
  else
    {
      /* REG_NEWLINE implies neither . nor [^...] match newline.  */
      syntax &= ~RE_DOT_NEWLINE;
      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
      /* It also changes the matching behavior.  */
      preg->newline_anchor = 1;
    }

  preg->no_sub = !!(cflags & REG_NOSUB);

  ret = rx_parse (&preg->pattern,
		  pattern, len,
		  syntax,
		  256,
		  preg->translate);

  /* POSIX doesn't distinguish between an unmatched open-group and an
   * unmatched close-group: both are REG_EPAREN.
   */
  if (ret == REG_ERPAREN)
    ret = REG_EPAREN;

  if (!ret)
    {
      preg->re_nsub = 1;
      preg->subexps = 0;
      rx_posix_analyze_rexp (&preg->subexps,
			     &preg->re_nsub,
			     preg->pattern,
			     0);
      preg->is_nullable = rx_fill_in_fastmap (256,
					      preg->fastmap,
					      preg->pattern);

      preg->is_anchored = rx_is_anchored_p (preg->pattern);
    }

  return (int) ret;
}


#ifdef __STDC__
int
regcomp (regex_t * preg, const char * pattern, int cflags)
#else
int
regcomp (preg, pattern, cflags)
     regex_t * preg;
     const char * pattern;
     int cflags;
#endif
{
  /* POSIX says a null character in the pattern terminates it, so we
   * can use strlen here in compiling the pattern.  
   */

  return regncomp (preg, pattern, strlen (pattern), cflags);
}




/* Returns a message corresponding to an error code, ERRCODE, returned
   from either regcomp or regexec.   */

#ifdef __STDC__
size_t
regerror (int errcode, const regex_t *preg,
	  char *errbuf, size_t errbuf_size)
#else
size_t
regerror (errcode, preg, errbuf, errbuf_size)
    int errcode;
    const regex_t *preg;
    char *errbuf;
    size_t errbuf_size;
#endif
{
  const char *msg;
  size_t msg_size;

  msg = rx_error_msg[errcode] == 0 ? "Success" : rx_error_msg[errcode];
  msg_size = strlen (msg) + 1; /* Includes the 0.  */
  if (errbuf_size != 0)
    {
      if (msg_size > errbuf_size)
        {
          strncpy (errbuf, msg, errbuf_size - 1);
          errbuf[errbuf_size - 1] = 0;
        }
      else
        strcpy (errbuf, msg);
    }
  return msg_size;
}



#ifdef __STDC__
int
rx_regmatch (regmatch_t pmatch[], const regex_t *preg, struct rx_context_rules * rules, int start, int end, const char *string)
#else
int
rx_regmatch (pmatch, preg, rules, start, end, string)
     regmatch_t pmatch[];
     const regex_t *preg;
     struct rx_context_rules * rules;
     int start;
     int end;
     const char *string;
#endif
{
  struct rx_solutions * solutions;
  enum rx_answers answer;
  struct rx_context_rules local_rules;
  int orig_end;
  int end_lower_bound;
  int end_upper_bound;
  
  local_rules = *rules;
  orig_end = end;

  if (!preg->pattern)
    {
      end_lower_bound = start;
      end_upper_bound = start;
    }
  else if (preg->pattern->len >= 0)
    {
      end_lower_bound = start + preg->pattern->len;
      end_upper_bound = start + preg->pattern->len;
    }
  else
    {
      end_lower_bound = start;
      end_upper_bound = end;
    }
  end = end_upper_bound;
  while (end >= end_lower_bound)
    {
      local_rules.not_eol = (rules->not_eol
			     ? (   (end == orig_end)
				|| !local_rules.newline_anchor
				|| (string[end] != '\n'))
			     : (   (end != orig_end)
				&& (!local_rules.newline_anchor
				    || (string[end] != '\n'))));
      solutions = rx_basic_make_solutions (pmatch, preg->pattern, preg->subexps,
					   start, end, &local_rules, string);
      if (!solutions)
	return REG_ESPACE;
      
      answer = rx_next_solution (solutions);

      if (answer == rx_yes)
	{
	  if (pmatch)
	    {
	      pmatch[0].rm_so = start;
	      pmatch[0].rm_eo = end;
	      pmatch[0].final_tag = solutions->final_tag;
	    }
	  rx_basic_free_solutions (solutions);
	  return 0;
	}
      else
	rx_basic_free_solutions (solutions);

      --end;
    }

  switch (answer)
    {
    default:
    case rx_bogus:
      return REG_ESPACE;

    case rx_no:
      return REG_NOMATCH;
    }
}


#ifdef __STDC__
int
rx_regexec (regmatch_t pmatch[], const regex_t *preg, struct rx_context_rules * rules, int start, int end, const char *string)
#else
int
rx_regexec (pmatch, preg, rules, start, end, string)
     regmatch_t pmatch[];
     const regex_t *preg;
     struct rx_context_rules * rules;
     int start;
     int end;
     const char *string;
#endif
{
  int x;
  int stat;
  int anchored;
  struct rexp_node * simplified;
  struct rx_unfa * unfa;
  struct rx_classical_system machine;

  anchored = preg->is_anchored;

  unfa = 0;
  if ((end - start) > RX_MANY_CASES)
    {
      if (0 > rx_simple_rexp (&simplified, 256, preg->pattern, preg->subexps))
	return REG_ESPACE;
      unfa = rx_unfa (rx_basic_unfaniverse (), simplified, 256);
      if (!unfa)
	{
	  rx_free_rexp (simplified);
	  return REG_ESPACE;
	}
      rx_init_system (&machine, unfa->nfa);
      rx_free_rexp (simplified);
    }

  for (x = start; x <= end; ++x)
    {
      if (preg->is_nullable
	  || ((x < end)
	      && (preg->fastmap[((unsigned char *)string)[x]])))
	{
	  if ((end - start) > RX_MANY_CASES)
	    {
	      int amt;
	      if (rx_start_superstate (&machine) != rx_yes)
		{
		  rx_free_unfa (unfa);
		  return REG_ESPACE;
		}
	      amt = rx_advance_to_final (&machine, string + x, end - start - x);
	      if (!machine.final_tag && (amt < (end - start - x)))
		goto nomatch;
	    }
	  stat = rx_regmatch (pmatch, preg, rules, x, end, string);
	  if (!stat || (stat != REG_NOMATCH))
	    {
	      rx_free_unfa (unfa);
	      return stat;
	    }
	}
    nomatch:
      if (anchored)
	if (!preg->newline_anchor)
	  {
	    rx_free_unfa (unfa);
	    return REG_NOMATCH;
	  }
	else
	  while (x < end)
	    if (string[x] == '\n')
	      break;
	    else
	      ++x;
    }
  rx_free_unfa (unfa);
  return REG_NOMATCH;
}



/* regexec searches for a given pattern, specified by PREG, in the
 * string STRING.
 *
 * If NMATCH is zero or REG_NOSUB was set in the cflags argument to
 * `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
 * least NMATCH elements, and we set them to the offsets of the
 * corresponding matched substrings.
 *
 * EFLAGS specifies `execution flags' which affect matching: if
 * REG_NOTBOL is set, then ^ does not match at the beginning of the
 * string; if REG_NOTEOL is set, then $ does not match at the end.
 *
 * We return 0 if we find a match and REG_NOMATCH if not.  
 */

#ifdef __STDC__
int
regnexec (const regex_t *preg, const char *string, int len, size_t nmatch, regmatch_t **pmatch, int eflags)
#else
int
regnexec (preg, string, len, nmatch, pmatch, eflags)
     const regex_t *preg;
     const char *string;
     int len;
     size_t nmatch;
     regmatch_t **pmatch;
     int eflags;
#endif
{
  int want_reg_info;
  struct rx_context_rules rules;
  regmatch_t * regs;
  size_t nregs;
  int stat;

  want_reg_info = (!preg->no_sub && (nmatch > 0));

  rules.newline_anchor = preg->newline_anchor;
  rules.not_bol = !!(eflags & REG_NOTBOL);
  rules.not_eol = !!(eflags & REG_NOTEOL);
  rules.case_indep = !!(eflags & REG_ICASE);

  if (nmatch >= preg->re_nsub)
    {
      regs = *pmatch;
      nregs = nmatch;
    }
  else
    {
      regs = (regmatch_t *)malloc (preg->re_nsub * sizeof (*regs));
      if (!regs)
	return REG_ESPACE;
      nregs = preg->re_nsub;
    }

  {
    int x;
    for (x = 0; x < nregs; ++x)
      regs[x].rm_so = regs[x].rm_eo = -1;
  }


  stat = rx_regexec (regs, preg, &rules, 0, len, string);

  if (!stat && want_reg_info && pmatch && (regs != *pmatch))
    {
      size_t x;
      for (x = 0; x < nmatch; ++x)
	(*pmatch)[x] = regs[x];
    }

  if (!stat && (eflags & REG_ALLOC_REGS))
    *pmatch = regs;
  else if (regs && (!pmatch || (regs != *pmatch)))
    free (regs);
  
  return stat;
}

#ifdef __STDC__
int
regexec (const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags)
#else
int
regexec (preg, string, nmatch, pmatch, eflags)
     const regex_t *preg;
     const char *string;
     size_t nmatch;
     regmatch_t pmatch[];
     int eflags;
#endif
{
  return regnexec (preg,
		   string,
		   strlen (string),
		   nmatch,
		   &pmatch,
		   (eflags & ~REG_ALLOC_REGS));
}


/* Free dynamically allocated space used by PREG.  */

#ifdef __STDC__
void
regfree (regex_t *preg)
#else
void
regfree (preg)
    regex_t *preg;
#endif
{
  if (preg->pattern)
    {
      rx_free_rexp (preg->pattern);
      preg->pattern = 0;
    }
  if (preg->subexps)
    {
      free (preg->subexps);
      preg->subexps = 0;
    }
  if (preg->translate != 0)
    {
      free (preg->translate);
      preg->translate = 0;
    }
}
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.