This is MiscTBMK.c in view mode; [Download] [Up]
/* A variation on Hume & Sunday's Tuned Boyer-Moore algorithm
* for searching for a literal string pattern in a text.
*
* References:
* Hume, A., D.M. Sunday. "Fast String Searching."
* _Software--Practice and Experience_. Vol. 21.
* No. 11. p. 1221-48. November 1991.
*
* Copyright (c) 1994 Christopher J. Kane.
*
* Version 1.2 (March 22, 1994)
*/
#include <misckit/MiscTBMK.h>
#include <ctype.h>
extern void *calloc(unsigned int, unsigned int);
extern void free(void *);
#define TBMKMAGIC 0x54424D4B
struct Misc_tbmk_pattern {
unsigned int magic;
signed int *pattern;
signed int skip[256];
signed int patlen;
signed int direction;
signed int jump;
};
Misc_TBMKpattern
Misc_TBMKpattern_alloc(const unsigned char *pattern,
unsigned int pattern_length,
unsigned char reverse,
unsigned char nocases)
{
Misc_TBMKpattern new;
unsigned char *tmppat;
int i;
if (pattern_length==0) {
return 0;
}
new = (Misc_TBMKpattern)calloc(1, sizeof(struct Misc_tbmk_pattern));
if (new==0) {
return 0;
}
new->pattern = (int *)calloc(pattern_length, sizeof(int));
if (new->pattern==0) {
free(new);
return 0;
}
tmppat = (unsigned char *)calloc(pattern_length, sizeof(char));
if (tmppat==0) {
free(new->pattern);
free(new);
return 0;
}
new->magic = TBMKMAGIC;
for (i = pattern_length; i--;) {
tmppat[i] = nocases?tolower(pattern[i]):pattern[i];
}
new->patlen = reverse?-pattern_length:pattern_length;
new->direction = reverse?-1:1;
for (i = 256; i--;) {
new->skip[i] = new->patlen;
}
if (reverse) {
for (i = pattern_length-1; i>=0; i--) {
if (nocases) {
new->skip[tolower(tmppat[i])] = -i;
new->skip[toupper(tmppat[i])] = -i;
} else {
new->skip[tmppat[i]] = -i;
}
}
for (i = 1; i<pattern_length && tmppat[i]!=tmppat[0]; i++);
new->jump = -i;
for (i = 0; i<pattern_length; i++) {
new->pattern[pattern_length-1-i] = new->skip[pattern[i]];
}
} else {
for (i = 0; i<pattern_length; i++) {
if (nocases) {
new->skip[tolower(tmppat[i])] = pattern_length-1-i;
new->skip[toupper(tmppat[i])] = pattern_length-1-i;
} else {
new->skip[tmppat[i]] = pattern_length-1-i;
}
}
for (i = pattern_length-2; i>=0 && tmppat[i]!=tmppat[pattern_length-1]; i--);
new->jump = pattern_length-1-i;
for (i = 0; i<pattern_length; i++) {
new->pattern[i] = new->skip[pattern[i]];
}
}
free(tmppat);
return new;
}
void
Misc_TBMKpattern_free(Misc_TBMKpattern *pattern)
{
if (pattern!=0 && *pattern!=0 && (*pattern)->magic==TBMKMAGIC) {
if ((*pattern)->pattern!=0) {
free((*pattern)->pattern);
}
free(*pattern);
*pattern = 0;
}
}
#define UNROLL 4
int
Misc_TBMKsearch_memory(const Misc_TBMKpattern pattern,
const unsigned char *text,
int text_extent,
unsigned char all_matches,
int *match_position)
{
const unsigned char *cp, *tmpcp, *lb, *flb, *ub, *fub;
unsigned int nmatch, i;
if (!pattern || pattern->magic!=TBMKMAGIC || !text ||
(pattern->direction<0 && 0<text_extent) ||
(0<pattern->direction && text_extent<0)) {
return -1; /* it's bad, bad */
}
cp = text+pattern->patlen-pattern->direction;
if (0<pattern->direction) { /* calculate upper and lower bounds for search */
ub = text+text_extent-1;
fub = ub-UNROLL*pattern->patlen;
lb = flb = cp;
} else {
lb = text+text_extent; /* !!!: this used to be +1 */
flb = lb+UNROLL*pattern->patlen;
ub = fub = cp;
}
nmatch = 0;
skip_loop:
if (ub<cp || cp<lb) { /* search is out of bounds; done */
return nmatch;
}
while (pattern->skip[*cp]) { /* not match of last character in pattern */
if (flb<cp && cp<fub) {
cp += pattern->skip[*cp]; /* skip or align to next possible match */
cp += pattern->skip[*cp]; /* skip or align to next possible match */
cp += pattern->skip[*cp]; /* skip or align to next possible match */
cp += pattern->skip[*cp]; /* skip or align to next possible match */
}
cp += pattern->skip[*cp]; /* skip or align to next possible match */
if (ub<cp || cp<lb) { /* search is out of bounds; done */
return nmatch;
}
} /* fall out of skip loop, possible match */
tmpcp = cp-pattern->patlen+pattern->direction;
for (i = 0; tmpcp!=cp; i++) { /* compare pattern with text */
if (pattern->skip[*tmpcp]!=pattern->pattern[i]) {
cp += pattern->jump;
goto skip_loop; /* mismatch, go back to fast skip loop */
}
tmpcp += pattern->direction; /* move to next character */
}
*match_position = cp-text; /* match found */
if (0<pattern->direction) {
*match_position -= (pattern->patlen-1);
}
nmatch++;
if (all_matches) {
cp += pattern->patlen; /* move current position beyond match */
goto skip_loop; /* go back to fast skip loop */
}
return nmatch;
}
#if defined(NeXT)
#define STREAM_MAGIC 0xbead0369
#define Tell(S) ((S)->buf_ptr-(S)->buf_base+(S)->offset)
#define Getc(S) ((S)->buf_left>0?(*(S)->buf_ptr):_NXStreamFillBuffer(S))
/* Seek() can handle NX_FROMSTART and NX_FROMCURRENT only. */
#define Seek(S, O, M) ({int _off=(O)+((M)==1?Tell(S):0); \
if ((S)->eof<Tell(S)) {(S)->eof = Tell(S);} \
if (_off<0 || (S)->eof<_off) {NX_RAISE(NX_illegalSeek, (S), 0);} \
(*(S)->functions->seek)((S), _off);})
int
Misc_TBMKsearch_stream(const Misc_TBMKpattern pattern,
NXStream *stream,
int text_extent,
unsigned char all_matches,
int *match_position)
{
unsigned int nmatch, i, writable;
int skip, current, start;
if (!pattern || pattern->magic!=TBMKMAGIC ||
!stream || stream->magic_number!=STREAM_MAGIC ||
(pattern->direction<0 && 0<text_extent) ||
(0<pattern->direction && text_extent<0) ||
!(stream->flags&NX_READFLAG && stream->flags&NX_CANSEEK)) {
return -1;
}
if ((0<pattern->direction && text_extent<pattern->patlen) ||
(pattern->direction<0 && text_extent>pattern->patlen)) {
return 0;
}
writable = stream->flags&NX_WRITEFLAG;
stream->flags &= ~NX_WRITEFLAG;
NX_DURING
start = Tell(stream);
Seek(stream, pattern->patlen-pattern->direction, NX_FROMCURRENT);
text_extent -= pattern->patlen;
nmatch = 0;
skip_loop:
do { /* fast skip loop */
skip = pattern->skip[Getc(stream)];
Seek(stream, skip, NX_FROMCURRENT);
text_extent -= skip;
skip = pattern->skip[Getc(stream)];
Seek(stream, skip, NX_FROMCURRENT);
text_extent -= skip;
if ((0<pattern->direction && text_extent<0) ||
(pattern->direction<0 && text_extent>0)) {
if (writable) {
stream->flags |= NX_WRITEFLAG;
}
return nmatch;
}
} while (skip);
current = Tell(stream);
Seek(stream, -pattern->patlen+pattern->direction, NX_FROMCURRENT);
for (i = 0; Tell(stream)!=current; i++) { /* compare pattern with text */
if (pattern->skip[Getc(stream)]!=pattern->pattern[i]) {
Seek(stream, current+pattern->jump, NX_FROMSTART);
text_extent -= pattern->jump;
goto skip_loop; /* mismatch, go back to fast skip loop */
}
Seek(stream, pattern->direction, NX_FROMCURRENT);
}
*match_position = current-start; /* match found */
if (0<pattern->direction) {
*match_position -= (pattern->patlen-1);
}
nmatch++;
if (!all_matches) {
if (writable) {
stream->flags |= NX_WRITEFLAG;
}
return nmatch;
}
Seek(stream, pattern->patlen, NX_FROMCURRENT);
text_extent -= pattern->patlen;
goto skip_loop; /* go back to fast skip loop */
NX_HANDLER
if (writable) {
stream->flags |= NX_WRITEFLAG;
}
switch (NXLocalHandler.code) {
case NX_illegalSeek:
return nmatch;
default:
NX_RERAISE();
}
NX_ENDHANDLER
return nmatch; // added by Don to remove warning on HPPA beta compiler.
// we should never get here, so I don't think this is a problem...
}
#endif /* defined(NeXT) */
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.