ftp.nice.ch/pub/next/developer/languages/smalltalk/smalltalk.1.2.alpha5.s.tar.gz#/smalltalk-1.2.alpha5/objc/objclib/MiscTBMK.c

This is MiscTBMK.c in view mode; [Download] [Up]

/*  A variation on Hume & Sunday's Tuned Boyer-Moore algorithm
 *  for searching for a literal string pattern in a text.
 *
 *  References:
 *    Hume, A., D.M. Sunday. "Fast String Searching."
 *       _Software--Practice and Experience_. Vol. 21.
 *       No. 11. p. 1221-48. November 1991.
 *
 *  Copyright (c) 1994 Christopher J. Kane.
 *
 *  Version 1.2 (March 22, 1994)
 */

//#include <misckit/MiscTBMK.h>
#include "MiscTBMK.h"
#include <ctype.h>
extern void *calloc(unsigned int, unsigned int);
extern void free(void *);

#define TBMKMAGIC 0x54424D4B

struct Misc_tbmk_pattern {
  unsigned int magic;
  signed int *pattern;
  signed int skip[256];
  signed int patlen;
  signed int direction;
  signed int jump;
};

Misc_TBMKpattern
Misc_TBMKpattern_alloc(const unsigned char *pattern,
                       unsigned int pattern_length,
                       unsigned char reverse,
                       unsigned char nocases)
{
  Misc_TBMKpattern new;
  unsigned char *tmppat;
  int i;

  if (pattern_length==0) {
    return 0;
  }
  new = (Misc_TBMKpattern)calloc(1, sizeof(struct Misc_tbmk_pattern));
  if (new==0) {
    return 0;
  }
  new->pattern = (int *)calloc(pattern_length, sizeof(int));
  if (new->pattern==0) {
    free(new);
    return 0;
  }
  tmppat = (unsigned char *)calloc(pattern_length, sizeof(char));
  if (tmppat==0) {
    free(new->pattern);
    free(new);
    return 0;
  }
  new->magic = TBMKMAGIC;
  for (i = pattern_length; i--;) {
    tmppat[i] = nocases?tolower(pattern[i]):pattern[i];
  }
  new->patlen = reverse?-pattern_length:pattern_length;
  new->direction = reverse?-1:1;
  for (i = 256; i--;) {
    new->skip[i] = new->patlen;
  }
  if (reverse) {
    for (i = pattern_length-1; i>=0; i--) {
      if (nocases) {
        new->skip[tolower(tmppat[i])] = -i;
        new->skip[toupper(tmppat[i])] = -i;
      } else {
        new->skip[tmppat[i]] = -i;
      }
    }
    for (i = 1; i<pattern_length && tmppat[i]!=tmppat[0]; i++);
    new->jump = -i;
    for (i = 0; i<pattern_length; i++) {
      new->pattern[pattern_length-1-i] = new->skip[pattern[i]];
    }
  } else {
    for (i = 0; i<pattern_length; i++) {
      if (nocases) {
        new->skip[tolower(tmppat[i])] = pattern_length-1-i;
        new->skip[toupper(tmppat[i])] = pattern_length-1-i;
      } else {
        new->skip[tmppat[i]] = pattern_length-1-i;
      }
    }
    for (i = pattern_length-2; i>=0 && tmppat[i]!=tmppat[pattern_length-1]; i--);
    new->jump = pattern_length-1-i;
    for (i = 0; i<pattern_length; i++) {
      new->pattern[i] = new->skip[pattern[i]];
    }
  }
  free(tmppat);
  return new;
}

void
Misc_TBMKpattern_free(Misc_TBMKpattern *pattern)
{
  if (pattern!=0 && *pattern!=0 && (*pattern)->magic==TBMKMAGIC) {
    if ((*pattern)->pattern!=0) {
      free((*pattern)->pattern);
    }
    free(*pattern);
    *pattern = 0;
  }
}

#define UNROLL 4

int
Misc_TBMKsearch_memory(const Misc_TBMKpattern pattern,
                       const unsigned char *text,
                       int text_extent,
                       unsigned char all_matches,
                       int *match_position)
{
  const unsigned char *cp, *tmpcp, *lb, *flb, *ub, *fub;
  unsigned int nmatch, i;

  if (!pattern || pattern->magic!=TBMKMAGIC || !text ||
      (pattern->direction<0 && 0<text_extent) ||
      (0<pattern->direction && text_extent<0)) {
    return -1;                                              /* it's bad, bad */
  }
  cp = text+pattern->patlen-pattern->direction;
  if (0<pattern->direction) { /* calculate upper and lower bounds for search */
    ub = text+text_extent-1;
    fub = ub-UNROLL*pattern->patlen;
    lb = flb = cp;
  } else {
    lb = text+text_extent;		/* !!!: this used to be +1 */
    flb = lb+UNROLL*pattern->patlen;
    ub = fub = cp;
  }
  nmatch = 0;
 skip_loop:
  if (ub<cp || cp<lb) {                     /* search is out of bounds; done */
    return nmatch;
  }
  while (pattern->skip[*cp]) {     /* not match of last character in pattern */
    if (flb<cp && cp<fub) {
      cp += pattern->skip[*cp];      /* skip or align to next possible match */
      cp += pattern->skip[*cp];      /* skip or align to next possible match */
      cp += pattern->skip[*cp];      /* skip or align to next possible match */
      cp += pattern->skip[*cp];      /* skip or align to next possible match */
    }
    cp += pattern->skip[*cp];        /* skip or align to next possible match */
    if (ub<cp || cp<lb) {                   /* search is out of bounds; done */
      return nmatch;
    }
  }                                 /* fall out of skip loop, possible match */
  tmpcp = cp-pattern->patlen+pattern->direction;
  for (i = 0; tmpcp!=cp; i++) {                 /* compare pattern with text */
    if (pattern->skip[*tmpcp]!=pattern->pattern[i]) {
      cp += pattern->jump;
      goto skip_loop;                 /* mismatch, go back to fast skip loop */
    }
    tmpcp += pattern->direction;                   /* move to next character */
  }
  *match_position = cp-text;                                  /* match found */
  if (0<pattern->direction) {
    *match_position -= (pattern->patlen-1);
  }
  nmatch++;
  if (all_matches) {
    cp += pattern->patlen;             /* move current position beyond match */
    goto skip_loop;                             /* go back to fast skip loop */
  }
  return nmatch;
}

#if defined(NeXT)

#define STREAM_MAGIC 0xbead0369
#define Tell(S) ((S)->buf_ptr-(S)->buf_base+(S)->offset)
#define Getc(S) ((S)->buf_left>0?(*(S)->buf_ptr):_NXStreamFillBuffer(S))

/* Seek() can handle NX_FROMSTART and NX_FROMCURRENT only. */
#define Seek(S, O, M) ({int _off=(O)+((M)==1?Tell(S):0); \
    if ((S)->eof<Tell(S)) {(S)->eof = Tell(S);} \
    if (_off<0 || (S)->eof<_off) {NX_RAISE(NX_illegalSeek, (S), 0);} \
    (*(S)->functions->seek)((S), _off);})

int
Misc_TBMKsearch_stream(const Misc_TBMKpattern pattern,
                       NXStream *stream,
                       int text_extent,
                       unsigned char all_matches,
                       int *match_position)
{
  unsigned int nmatch, i, writable;
  int skip, current, start;

  if (!pattern || pattern->magic!=TBMKMAGIC ||
      !stream || stream->magic_number!=STREAM_MAGIC ||
      (pattern->direction<0 && 0<text_extent) ||
      (0<pattern->direction && text_extent<0) ||
      !(stream->flags&NX_READFLAG && stream->flags&NX_CANSEEK)) {
    return -1;
  }
  if ((0<pattern->direction && text_extent<pattern->patlen) ||
      (pattern->direction<0 && text_extent>pattern->patlen)) {
    return 0;
  }
  writable = stream->flags&NX_WRITEFLAG;
  stream->flags &= ~NX_WRITEFLAG;
 NX_DURING
  start = Tell(stream);
  Seek(stream, pattern->patlen-pattern->direction, NX_FROMCURRENT);
  text_extent -= pattern->patlen;
  nmatch = 0;
 skip_loop:
  do {                                                     /* fast skip loop */
    skip = pattern->skip[Getc(stream)];
    Seek(stream, skip, NX_FROMCURRENT);
    text_extent -= skip;
    skip = pattern->skip[Getc(stream)];
    Seek(stream, skip, NX_FROMCURRENT);
    text_extent -= skip;
    if ((0<pattern->direction && text_extent<0) ||
        (pattern->direction<0 && text_extent>0)) {
      if (writable) {
        stream->flags |= NX_WRITEFLAG;
      }
      return nmatch;
    }
  } while (skip);
  current = Tell(stream);
  Seek(stream, -pattern->patlen+pattern->direction, NX_FROMCURRENT);
  for (i = 0; Tell(stream)!=current; i++) {     /* compare pattern with text */
    if (pattern->skip[Getc(stream)]!=pattern->pattern[i]) {
      Seek(stream, current+pattern->jump, NX_FROMSTART);
      text_extent -= pattern->jump;
      goto skip_loop;                 /* mismatch, go back to fast skip loop */
    }
    Seek(stream, pattern->direction, NX_FROMCURRENT);
  }
  *match_position = current-start;                            /* match found */
  if (0<pattern->direction) {
    *match_position -= (pattern->patlen-1);
  }
  nmatch++;
  if (!all_matches) {
    if (writable) {
      stream->flags |= NX_WRITEFLAG;
    }
    return nmatch;
  }
  Seek(stream, pattern->patlen, NX_FROMCURRENT);
  text_extent -= pattern->patlen;
  goto skip_loop;                               /* go back to fast skip loop */
 NX_HANDLER
  if (writable) {
    stream->flags |= NX_WRITEFLAG;
  }
  switch (NXLocalHandler.code) {
    case NX_illegalSeek:
      return nmatch;
    default:
      NX_RERAISE();
  }
 NX_ENDHANDLER
  return nmatch; // added by Don to remove warning on HPPA beta compiler.
  // we should never get here, so I don't think this is a problem...
}

#endif /* defined(NeXT) */

These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.