This is lookup.c in view mode; [Download] [Up]
#ifndef lint static char Rcs_Id[] = "$Id: lookup.c,v 1.41 1994/01/25 07:11:51 geoff Exp $"; #endif /* * lookup.c - see if a word appears in the dictionary * * Pace Willisson, 1983 * * Copyright 1987, 1988, 1989, 1992, 1993, Geoff Kuenning, Granada Hills, CA * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All modifications to the source code must be clearly marked as * such. Binary redistributions based on modified source code * must be clearly marked as modified versions in the documentation * and/or other materials provided with the distribution. * 4. All advertising materials mentioning features or use of this software * must display the following acknowledgment: * This product includes software developed by Geoff Kuenning and * other unpaid contributors. * 5. The name of Geoff Kuenning may not be used to endorse or promote * products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * $Log: lookup.c,v $ * Revision 1.41 1994/01/25 07:11:51 geoff * Get rid of all old RCS log lines in preparation for the 3.1 release. * */ #include "config.h" #include "ispell.h" #include "proto.h" #include "msgs.h" int linit P ((void)); #ifdef INDEXDUMP static void dumpindex P ((struct flagptr * indexp, int depth)); #endif /* INDEXDUMP */ struct dent * lookup P ((ichar_t * word, int dotree)); static inited = 0; int linit () { int hashfd; register int i; register struct dent * dp; struct flagent * entry; struct flagptr * ind; int nextchar; int viazero; register ichar_t * cp; if (inited) return 0; if ((hashfd = open (hashname, 0)) < 0) { (void) fprintf (stderr, CANT_OPEN, hashname); return (-1); } hashsize = read (hashfd, (char *) &hashheader, sizeof hashheader); if (hashsize < sizeof hashheader) { if (hashsize < 0) (void) fprintf (stderr, LOOKUP_C_CANT_READ, hashname); else if (hashsize == 0) (void) fprintf (stderr, LOOKUP_C_NULL_HASH, hashname); else (void) fprintf (stderr, LOOKUP_C_SHORT_HASH (hashname, hashsize, (int) sizeof hashheader)); return (-1); } else if (hashheader.magic != MAGIC) { (void) fprintf (stderr, LOOKUP_C_BAD_MAGIC (hashname, (unsigned int) MAGIC, (unsigned int) hashheader.magic)); return (-1); } else if (hashheader.magic2 != MAGIC) { (void) fprintf (stderr, LOOKUP_C_BAD_MAGIC2 (hashname, (unsigned int) MAGIC, (unsigned int) hashheader.magic2)); return (-1); } else if (hashheader.compileoptions != COMPILEOPTIONS || hashheader.maxstringchars != MAXSTRINGCHARS || hashheader.maxstringcharlen != MAXSTRINGCHARLEN) { (void) fprintf (stderr, LOOKUP_C_BAD_OPTIONS ((unsigned int) hashheader.compileoptions, hashheader.maxstringchars, hashheader.maxstringcharlen, (unsigned int) COMPILEOPTIONS, MAXSTRINGCHARS, MAXSTRINGCHARLEN)); return (-1); } if (nodictflag) { /* * Dictionary is not needed - create an empty dummy table. We * actually have to have one entry since the hash * algorithm involves a divide by the table size * (actually modulo, but zero is still unacceptable). * So we create an empty entry. */ hashsize = 1; /* This prevents divides by zero */ hashtbl = (struct dent *) calloc (1, sizeof (struct dent)); if (hashtbl == NULL) { (void) fprintf (stderr, LOOKUP_C_NO_HASH_SPACE); return (-1); } hashtbl[0].word = NULL; hashtbl[0].next = NULL; hashtbl[0].flagfield &= ~(USED | KEEP); /* The flag bits don't matter, but calloc cleared them. */ hashstrings = (char *) malloc ((unsigned) hashheader.lstringsize); } else { hashtbl = (struct dent *) malloc ((unsigned) hashheader.tblsize * sizeof (struct dent)); hashsize = hashheader.tblsize; hashstrings = (char *) malloc ((unsigned) hashheader.stringsize); } numsflags = hashheader.stblsize; numpflags = hashheader.ptblsize; sflaglist = (struct flagent *) malloc ((numsflags + numpflags) * sizeof (struct flagent)); if (hashtbl == NULL || hashstrings == NULL || sflaglist == NULL) { (void) fprintf (stderr, LOOKUP_C_NO_HASH_SPACE); return (-1); } pflaglist = sflaglist + numsflags; if (nodictflag) { /* * Read just the strings for the language table, and * skip over the rest of the strings and all of the * hash table. */ if (read (hashfd, hashstrings, (unsigned) hashheader.lstringsize) != hashheader.lstringsize) { (void) fprintf (stderr, LOOKUP_C_BAD_FORMAT); return (-1); } (void) lseek (hashfd, (long) hashheader.stringsize - (long) hashheader.lstringsize + (long) hashheader.tblsize * (long) sizeof (struct dent), 1); } else { if (read (hashfd, hashstrings, (unsigned) hashheader.stringsize) != hashheader.stringsize || read (hashfd, (char *) hashtbl, (unsigned) hashheader.tblsize * sizeof (struct dent)) != hashheader.tblsize * sizeof (struct dent)) { (void) fprintf (stderr, LOOKUP_C_BAD_FORMAT); return (-1); } } if (read (hashfd, (char *) sflaglist, (unsigned) (numsflags + numpflags) * sizeof (struct flagent)) != (numsflags + numpflags) * sizeof (struct flagent)) { (void) fprintf (stderr, LOOKUP_C_BAD_FORMAT); return (-1); } (void) close (hashfd); if (!nodictflag) { for (i = hashsize, dp = hashtbl; --i >= 0; dp++) { if (dp->word == (char *) -1) dp->word = NULL; else dp->word = &hashstrings [ (int)(dp->word) ]; if (dp->next == (struct dent *) -1) dp->next = NULL; else dp->next = &hashtbl [ (int)(dp->next) ]; } } for (i = numsflags + numpflags, entry = sflaglist; --i >= 0; entry++) { if (entry->stripl) entry->strip = (ichar_t *) &hashstrings[(int) entry->strip]; else entry->strip = NULL; if (entry->affl) entry->affix = (ichar_t *) &hashstrings[(int) entry->affix]; else entry->affix = NULL; } /* ** Warning - 'entry' and 'i' are reset in the body of the loop ** below. Don't try to optimize it by (e.g.) moving the decrement ** of i into the loop condition. */ for (i = numsflags, entry = sflaglist; i > 0; i--, entry++) { if (entry->affl == 0) { cp = NULL; ind = &sflagindex[0]; viazero = 1; } else { cp = entry->affix + entry->affl - 1; ind = &sflagindex[*cp]; viazero = 0; while (ind->numents == 0 && ind->pu.fp != NULL) { if (cp == entry->affix) { ind = &ind->pu.fp[0]; viazero = 1; } else { ind = &ind->pu.fp[*--cp]; viazero = 0; } } } if (ind->numents == 0) ind->pu.ent = entry; ind->numents++; /* ** If this index entry has more than MAXSEARCH flags in ** it, we will split it into subentries to reduce the ** searching. However, the split doesn't make sense in ** two cases: (a) if we are already at the end of the ** current affix, or (b) if all the entries in the list ** have identical affixes. Since the list is sorted, (b) ** is true if the first and last affixes in the list ** are identical. */ if (!viazero && ind->numents >= MAXSEARCH && icharcmp (entry->affix, ind->pu.ent->affix) != 0) { /* Sneaky trick: back up and reprocess */ entry = ind->pu.ent - 1; /* -1 is for entry++ in loop */ i = numsflags - (entry - sflaglist); ind->pu.fp = (struct flagptr *) calloc ((unsigned) (SET_SIZE + hashheader.nstrchars), sizeof (struct flagptr)); if (ind->pu.fp == NULL) { (void) fprintf (stderr, LOOKUP_C_NO_LANG_SPACE); return (-1); } ind->numents = 0; } } /* ** Warning - 'entry' and 'i' are reset in the body of the loop ** below. Don't try to optimize it by (e.g.) moving the decrement ** of i into the loop condition. */ for (i = numpflags, entry = pflaglist; i > 0; i--, entry++) { if (entry->affl == 0) { cp = NULL; ind = &pflagindex[0]; viazero = 1; } else { cp = entry->affix; ind = &pflagindex[*cp++]; viazero = 0; while (ind->numents == 0 && ind->pu.fp != NULL) { if (*cp == 0) { ind = &ind->pu.fp[0]; viazero = 1; } else { ind = &ind->pu.fp[*cp++]; viazero = 0; } } } if (ind->numents == 0) ind->pu.ent = entry; ind->numents++; /* ** If this index entry has more than MAXSEARCH flags in ** it, we will split it into subentries to reduce the ** searching. However, the split doesn't make sense in ** two cases: (a) if we are already at the end of the ** current affix, or (b) if all the entries in the list ** have identical affixes. Since the list is sorted, (b) ** is true if the first and last affixes in the list ** are identical. */ if (!viazero && ind->numents >= MAXSEARCH && icharcmp (entry->affix, ind->pu.ent->affix) != 0) { /* Sneaky trick: back up and reprocess */ entry = ind->pu.ent - 1; /* -1 is for entry++ in loop */ i = numpflags - (entry - pflaglist); ind->pu.fp = (struct flagptr *) calloc (SET_SIZE + hashheader.nstrchars, sizeof (struct flagptr)); if (ind->pu.fp == NULL) { (void) fprintf (stderr, LOOKUP_C_NO_LANG_SPACE); return (-1); } ind->numents = 0; } } #ifdef INDEXDUMP (void) fprintf (stderr, "Prefix index table:\n"); dumpindex (pflagindex, 0); (void) fprintf (stderr, "Suffix index table:\n"); dumpindex (sflagindex, 0); #endif if (hashheader.nstrchartype == 0) chartypes = NULL; else { chartypes = (struct strchartype *) malloc (hashheader.nstrchartype * sizeof (struct strchartype)); if (chartypes == NULL) { (void) fprintf (stderr, LOOKUP_C_NO_LANG_SPACE); return (-1); } for (i = 0, nextchar = hashheader.strtypestart; i < hashheader.nstrchartype; i++) { chartypes[i].name = &hashstrings[nextchar]; nextchar += strlen (chartypes[i].name) + 1; chartypes[i].deformatter = &hashstrings[nextchar]; nextchar += strlen (chartypes[i].deformatter) + 1; chartypes[i].suffixes = &hashstrings[nextchar]; while (hashstrings[nextchar] != '\0') nextchar += strlen (&hashstrings[nextchar]) + 1; nextchar++; } } inited = 1; return (0); } #ifdef INDEXDUMP static void dumpindex (indexp, depth) register struct flagptr * indexp; register int depth; { register int i; int j; int k; char stripbuf[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4]; for (i = 0; i < SET_SIZE + hashheader.nstrchars; i++, indexp++) { if (indexp->numents == 0 && indexp->pu.fp != NULL) { for (j = depth; --j >= 0; ) (void) putc (' ', stderr); if (i >= ' ' && i <= '~') (void) putc (i, stderr); else (void) fprintf (stderr, "0x%x", i); (void) putc ('\n', stderr); dumpindex (indexp->pu.fp, depth + 1); } else if (indexp->numents) { for (j = depth; --j >= 0; ) (void) putc (' ', stderr); if (i >= ' ' && i <= '~') (void) putc (i, stderr); else (void) fprintf (stderr, "0x%x", i); (void) fprintf (stderr, " -> %d entries\n", indexp->numents); for (k = 0; k < indexp->numents; k++) { for (j = depth; --j >= 0; ) (void) putc (' ', stderr); if (indexp->pu.ent[k].stripl) { (void) ichartostr (stripbuf, indexp->pu.ent[k].strip, sizeof stripbuf, 1); (void) fprintf (stderr, " entry %d (-%s,%s)\n", &indexp->pu.ent[k] - sflaglist, stripbuf, indexp->pu.ent[k].affl ? ichartosstr (indexp->pu.ent[k].affix, 1) : "-"); } else (void) fprintf (stderr, " entry %d (%s)\n", &indexp->pu.ent[k] - sflaglist, ichartosstr (indexp->pu.ent[k].affix, 1)); } } } } #endif /* n is length of s */ struct dent * lookup (s, dotree) register ichar_t * s; int dotree; { register struct dent * dp; register char * s1; char schar[INPUTWORDLEN + MAXAFFIXLEN]; dp = &hashtbl[hash (s, hashsize)]; if (ichartostr (schar, s, sizeof schar, 1)) (void) fprintf (stderr, WORD_TOO_LONG (schar)); for ( ; dp != NULL; dp = dp->next) { /* quick strcmp, but only for equality */ s1 = dp->word; if (s1 && s1[0] == schar[0] && strcmp (s1 + 1, schar + 1) == 0) return dp; #ifndef NO_CAPITALIZATION_SUPPORT while (dp->flagfield & MOREVARIANTS) /* Skip variations */ dp = dp->next; #endif } if (dotree) { dp = treelookup (s); return dp; } else return NULL; }
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.