This is MiscTBMK.c in view mode; [Download] [Up]
/* A variation on Hume & Sunday's Tuned Boyer-Moore algorithm * for searching for a literal string pattern in a text. * * References: * Hume, A., D.M. Sunday. "Fast String Searching." * _Software--Practice and Experience_. Vol. 21. * No. 11. p. 1221-48. November 1991. * * Copyright (c) 1994 Christopher J. Kane. * * Version 1.2 (March 22, 1994) */ #include <misckit/MiscTBMK.h> #include <ctype.h> extern void *calloc(unsigned int, unsigned int); extern void free(void *); #define TBMKMAGIC 0x54424D4B struct Misc_tbmk_pattern { unsigned int magic; signed int *pattern; signed int skip[256]; signed int patlen; signed int direction; signed int jump; }; Misc_TBMKpattern Misc_TBMKpattern_alloc(const unsigned char *pattern, unsigned int pattern_length, unsigned char reverse, unsigned char nocases) { Misc_TBMKpattern new; unsigned char *tmppat; int i; if (pattern_length==0) { return 0; } new = (Misc_TBMKpattern)calloc(1, sizeof(struct Misc_tbmk_pattern)); if (new==0) { return 0; } new->pattern = (int *)calloc(pattern_length, sizeof(int)); if (new->pattern==0) { free(new); return 0; } tmppat = (unsigned char *)calloc(pattern_length, sizeof(char)); if (tmppat==0) { free(new->pattern); free(new); return 0; } new->magic = TBMKMAGIC; for (i = pattern_length; i--;) { tmppat[i] = nocases?tolower(pattern[i]):pattern[i]; } new->patlen = reverse?-pattern_length:pattern_length; new->direction = reverse?-1:1; for (i = 256; i--;) { new->skip[i] = new->patlen; } if (reverse) { for (i = pattern_length-1; i>=0; i--) { if (nocases) { new->skip[tolower(tmppat[i])] = -i; new->skip[toupper(tmppat[i])] = -i; } else { new->skip[tmppat[i]] = -i; } } for (i = 1; i<pattern_length && tmppat[i]!=tmppat[0]; i++); new->jump = -i; for (i = 0; i<pattern_length; i++) { new->pattern[pattern_length-1-i] = new->skip[pattern[i]]; } } else { for (i = 0; i<pattern_length; i++) { if (nocases) { new->skip[tolower(tmppat[i])] = pattern_length-1-i; new->skip[toupper(tmppat[i])] = pattern_length-1-i; } else { new->skip[tmppat[i]] = pattern_length-1-i; } } for (i = pattern_length-2; i>=0 && tmppat[i]!=tmppat[pattern_length-1]; i--); new->jump = pattern_length-1-i; for (i = 0; i<pattern_length; i++) { new->pattern[i] = new->skip[pattern[i]]; } } free(tmppat); return new; } void Misc_TBMKpattern_free(Misc_TBMKpattern *pattern) { if (pattern!=0 && *pattern!=0 && (*pattern)->magic==TBMKMAGIC) { if ((*pattern)->pattern!=0) { free((*pattern)->pattern); } free(*pattern); *pattern = 0; } } #define UNROLL 4 int Misc_TBMKsearch_memory(const Misc_TBMKpattern pattern, const unsigned char *text, int text_extent, unsigned char all_matches, int *match_position) { const unsigned char *cp, *tmpcp, *lb, *flb, *ub, *fub; unsigned int nmatch, i; if (!pattern || pattern->magic!=TBMKMAGIC || !text || (pattern->direction<0 && 0<text_extent) || (0<pattern->direction && text_extent<0)) { return -1; /* it's bad, bad */ } cp = text+pattern->patlen-pattern->direction; if (0<pattern->direction) { /* calculate upper and lower bounds for search */ ub = text+text_extent-1; fub = ub-UNROLL*pattern->patlen; lb = flb = cp; } else { lb = text+text_extent; /* !!!: this used to be +1 */ flb = lb+UNROLL*pattern->patlen; ub = fub = cp; } nmatch = 0; skip_loop: if (ub<cp || cp<lb) { /* search is out of bounds; done */ return nmatch; } while (pattern->skip[*cp]) { /* not match of last character in pattern */ if (flb<cp && cp<fub) { cp += pattern->skip[*cp]; /* skip or align to next possible match */ cp += pattern->skip[*cp]; /* skip or align to next possible match */ cp += pattern->skip[*cp]; /* skip or align to next possible match */ cp += pattern->skip[*cp]; /* skip or align to next possible match */ } cp += pattern->skip[*cp]; /* skip or align to next possible match */ if (ub<cp || cp<lb) { /* search is out of bounds; done */ return nmatch; } } /* fall out of skip loop, possible match */ tmpcp = cp-pattern->patlen+pattern->direction; for (i = 0; tmpcp!=cp; i++) { /* compare pattern with text */ if (pattern->skip[*tmpcp]!=pattern->pattern[i]) { cp += pattern->jump; goto skip_loop; /* mismatch, go back to fast skip loop */ } tmpcp += pattern->direction; /* move to next character */ } *match_position = cp-text; /* match found */ if (0<pattern->direction) { *match_position -= (pattern->patlen-1); } nmatch++; if (all_matches) { cp += pattern->patlen; /* move current position beyond match */ goto skip_loop; /* go back to fast skip loop */ } return nmatch; } #if defined(NeXT) #define STREAM_MAGIC 0xbead0369 #define Tell(S) ((S)->buf_ptr-(S)->buf_base+(S)->offset) #define Getc(S) ((S)->buf_left>0?(*(S)->buf_ptr):_NXStreamFillBuffer(S)) /* Seek() can handle NX_FROMSTART and NX_FROMCURRENT only. */ #define Seek(S, O, M) ({int _off=(O)+((M)==1?Tell(S):0); \ if ((S)->eof<Tell(S)) {(S)->eof = Tell(S);} \ if (_off<0 || (S)->eof<_off) {NX_RAISE(NX_illegalSeek, (S), 0);} \ (*(S)->functions->seek)((S), _off);}) int Misc_TBMKsearch_stream(const Misc_TBMKpattern pattern, NXStream *stream, int text_extent, unsigned char all_matches, int *match_position) { unsigned int nmatch, i, writable; int skip, current, start; if (!pattern || pattern->magic!=TBMKMAGIC || !stream || stream->magic_number!=STREAM_MAGIC || (pattern->direction<0 && 0<text_extent) || (0<pattern->direction && text_extent<0) || !(stream->flags&NX_READFLAG && stream->flags&NX_CANSEEK)) { return -1; } if ((0<pattern->direction && text_extent<pattern->patlen) || (pattern->direction<0 && text_extent>pattern->patlen)) { return 0; } writable = stream->flags&NX_WRITEFLAG; stream->flags &= ~NX_WRITEFLAG; NX_DURING start = Tell(stream); Seek(stream, pattern->patlen-pattern->direction, NX_FROMCURRENT); text_extent -= pattern->patlen; nmatch = 0; skip_loop: do { /* fast skip loop */ skip = pattern->skip[Getc(stream)]; Seek(stream, skip, NX_FROMCURRENT); text_extent -= skip; skip = pattern->skip[Getc(stream)]; Seek(stream, skip, NX_FROMCURRENT); text_extent -= skip; if ((0<pattern->direction && text_extent<0) || (pattern->direction<0 && text_extent>0)) { if (writable) { stream->flags |= NX_WRITEFLAG; } return nmatch; } } while (skip); current = Tell(stream); Seek(stream, -pattern->patlen+pattern->direction, NX_FROMCURRENT); for (i = 0; Tell(stream)!=current; i++) { /* compare pattern with text */ if (pattern->skip[Getc(stream)]!=pattern->pattern[i]) { Seek(stream, current+pattern->jump, NX_FROMSTART); text_extent -= pattern->jump; goto skip_loop; /* mismatch, go back to fast skip loop */ } Seek(stream, pattern->direction, NX_FROMCURRENT); } *match_position = current-start; /* match found */ if (0<pattern->direction) { *match_position -= (pattern->patlen-1); } nmatch++; if (!all_matches) { if (writable) { stream->flags |= NX_WRITEFLAG; } return nmatch; } Seek(stream, pattern->patlen, NX_FROMCURRENT); text_extent -= pattern->patlen; goto skip_loop; /* go back to fast skip loop */ NX_HANDLER if (writable) { stream->flags |= NX_WRITEFLAG; } switch (NXLocalHandler.code) { case NX_illegalSeek: return nmatch; default: NX_RERAISE(); } NX_ENDHANDLER return nmatch; // added by Don to remove warning on HPPA beta compiler. // we should never get here, so I don't think this is a problem... } #endif /* defined(NeXT) */
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.