This is plzfile.c in view mode; [Download] [Up]
/* $Id: plzfile.c,v 3.1 1993/06/18 16:56:31 klute Exp klute $ */ /* * Copyright 1993 Rainer Klute <klute@irb.informatik.uni-dortmund.de> * * Permission to use, copy, modify, distribute, and sell this software and * its documentation for any purpose is hereby granted without fee, provided * that the above copyright notice appear in all copies and that both that * copyright notice and this permission notice appear in supporting * documentation. The author makes no representations about the suitability * of this software for any purpose. It is provided "as is" without express * or implied warranty. * */ #include <ctype.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include "xplz.h" #include "message.h" #include "plzfile.h" #include "utils.h" void OpenPlzFile (PlzFile *plzFile, char *plzFileName) { char *envPlzDir; char name[256], zname[256]; char msg[300]; int i; if (plzFile->f != (FILE *) 0) return; envPlzDir = getenv ("PLZDIR"); if (envPlzDir == (char *) 0) strcpy (name, PLZDIR); else strcpy (name, envPlzDir); strcat (name, "/"); strcat (name, plzFileName); strcpy (zname, name); strcat (zname, ".plz"); plzFile->f = fopen (name, "r"); plzFile->zip = 0; if (plzFile->f == (FILE *) 0) { plzFile->f = fopen (zname, "rb"); plzFile->zip = 1; } if (plzFile->f == (FILE *) 0) { sprintf (msg, "Datei %s oder\n", name); sprintf (msg, "%s %s konnte nicht geöffnet werden.\n", msg, zname); Message (msg); return; } if (plzFile->zip) { getlong (plzFile->rsize, plzFile->f); getlong (plzFile->bsize, plzFile->f); getlong (plzFile->rnum, plzFile->f); getlong (plzFile->nindex, plzFile->f); getlong (i, plzFile->f); fseek (plzFile->f, i, 0); plzFile->bnum = (plzFile->rnum + plzFile->bsize - 1)/plzFile->bsize; plzFile->keys = (char **) malloc (plzFile->nindex*sizeof (char *)); plzFile->keypos = (int *) malloc (plzFile->nindex*sizeof (int)); plzFile->keylen = (int *) malloc (plzFile->nindex*sizeof (int)); plzFile->index = (long *) malloc ((plzFile->bnum+1) * sizeof (long)); for (i = 0; i < plzFile->bnum + 1; i++) getlong (plzFile->index[i], plzFile->f); for (i = 0; i < plzFile->nindex; i++) { getlong (plzFile->keypos[i], plzFile->f); getlong (plzFile->keylen[i], plzFile->f); plzFile->keys[i] = (char *) malloc ((plzFile->bnum + 1) * plzFile->keylen[i] * sizeof (char)); fread (plzFile->keys[i], 1, (plzFile->bnum + 1) * plzFile->keylen[i], plzFile->f); } plzFile->recordLength = plzFile->rsize; plzFile->size = plzFile->rsize * plzFile->rnum; plzFile->entries = plzFile->rnum; fseek (plzFile->f, plzFile->index[0], 0); for (i = 0; i < ZIPBUFS; i++) plzFile->bufstat[i] = -1; plzFile->nextbuf = -1; } else { /* Die Satzlänge müssen wir zur Laufzeit ermitteln, das je nach * Bezugsquelle der Postdatei die Zeilen mit \r\n oder \n abgeschlossen * werden. */ for (i = 0; fgetc (plzFile->f) != '\n'; i++) ; plzFile->recordLength = i + 1; fseek (plzFile->f, 0L, 2); plzFile->size = ftell (plzFile->f); plzFile->entries = plzFile->size / plzFile->recordLength; fseek (plzFile->f, 0L, 0); } } #if defined (HANDLE_BROKEN_DATAFILES) #define isbroken(x) (x == BROKEN_Ae || x == BROKEN_Oe || x == BROKEN_Ue || \ x == BROKEN_ae || x == BROKEN_oe || x == BROKEN_ue || \ x == BROKEN_ss) #else #define isbroken(x) False #endif extern int memextract(char *,int,char *,int); void fgetr (void *r, int rec, PlzFile *plzFile) { int b, i, outsize, ilen; static char *ibuf, *s_obuf = NULL; extern char *outbuf; extern char *outptr; if (!plzFile->zip) { fseek (plzFile->f, rec * plzFile->recordLength, 0); fgets (r, plzFile->recordLength, plzFile->f); } else { if (s_obuf == NULL) s_obuf = malloc (2048 + 1); b = rec / plzFile->bsize; for (i = 0; i < ZIPBUFS; i++) if (b == plzFile->bufstat[i]) break; if (i == ZIPBUFS) { if (++plzFile->nextbuf == ZIPBUFS) plzFile->nextbuf = 0; i = plzFile->nextbuf; if (plzFile->bufstat[i] < 0) plzFile->obuf[i] = (char*) malloc (plzFile->rsize * plzFile->bsize); plzFile->bufstat[i] = b; ibuf = malloc (plzFile->bsize * plzFile->rsize); outsize = (plzFile->rnum-rec) * plzFile->rsize; if (outsize > plzFile->rsize * plzFile->bsize) outsize = plzFile->rsize * plzFile->bsize; fseek (plzFile->f, plzFile->index[b], 0); ilen = fread (ibuf, 1, plzFile->index[b + 1] - plzFile->index[b], plzFile->f); if (ilen != plzFile->index[b + 1] - plzFile->index[b]) { perror ("short input file"); exit (1); } outbuf = s_obuf; outptr = plzFile->obuf[i]; memextract (outptr, outsize, ibuf, ilen); free (ibuf); } memcpy (r, (plzFile->obuf[i]) + (rec % plzFile->bsize) * plzFile->rsize, plzFile->rsize); } } /* * "BinarySearch" sucht einen Datensatz mit dem Schlüssel "searchKey" und * liefert seine Satznummer als Ergebnis. * Nach jedem Zugriff ruft "BinarySearch" die Funktion "NoteRecord" auf und * übergibt den gerade gelesenen Satz. Falls NoteRecord "False" zurückgibt, * bricht "BinarySearch" die Verarbeitung ab. */ long BinarySearch (FILE *f, int recordLength, long min, long max, int keyPos, int keyLength, char *searchKey, long *unequalMin, long *unequalMax, int searchWhat, int refKeyPos, Boolean NoteRecord (char *), PlzFile *plzFile) { char *r; long try; long hit = -1; int compare; int compareLength; r = (char *) malloc (recordLength + 1); if (r == (char *) 0) { OutOfMemory ("BinarySearch"); return -1L; } *unequalMin = min; *unequalMax = max; compare = try = -1; do { if (try == min && max - min == 1) min = max; try = (min + max) / 2; fgetr (r, try, plzFile); if (NoteRecord (r) == False) { free (r); return -1L; } /* So wie beim vom Benutzer angegebenen Ortsnamen (siehe umsda.c) muß * auch beim aus der Datei "umsda" gelesenen Ortsnamen die signifikante * Länge ("compareLength") ermittelt werden. Erschwerend kommt hinzu, * daß im Schlüssel ("key") Umlaute aufgelöst sind, was wir * berücksichtigen müssen ("specials"). Dazu nutzen wir die * Schreibweise mit Umlauten ("refKey"). */ if (refKeyPos < 0) compareLength = keyLength; else { int specials = 0; int max; unsigned char *c; unsigned char *refKey = (unsigned char *) (r + refKeyPos); unsigned char *key = (unsigned char *) (r + keyPos); /* Schlüssellänge ermitteln: */ for (c = key; *c != ' ' && c - key < keyLength; c++) ; max = c - key; /* Umlaute zählen: */ for (c = refKey; (!(ispunct (*c)) || isbroken (*c)) && c - refKey < max; c++) { if (*c == IBM_Ae || *c == IBM_Oe || *c == IBM_Ue || *c == IBM_ae || *c == IBM_oe || *c == IBM_ue || *c == IBM_ss #if defined (HANDLE_BROKEN_DATAFILES) || *c == BROKEN_Ae || *c == BROKEN_Oe || *c == BROKEN_Ue || *c == BROKEN_ae || *c == BROKEN_oe || *c == BROKEN_ue || *c == BROKEN_ss #endif ) specials++; } compareLength = c - refKey + specials; if (compareLength > keyLength) compareLength = keyLength; } compare = strncmp (searchKey, r + keyPos, compareLength); if (compare < 0) { *unequalMax = max; max = try; } else if (compare > 0) { *unequalMin = min; min = try; } else if (compare == 0) { hit = try; switch (searchWhat) { case SearchFirst: { max = try; break; } case SearchLast: { min = try; break; } case SearchAny: { max = min = try; break; } } } } while (min != max); free (r); return hit; } /* * "BinarySearchAll" sucht alle Datensätze mit dem Schlüssel "searchKey". Dazu * führt es zunächst mit Hilfe von "BinarySearch" eine binäre Suche auf der * Datei durch. Bei einem Treffer sucht es binär rückwärts nach dem ersten und * anschließend vorwärts nach dem letzten Vorkommen. */ Range *BinarySearchAll (FILE *f, int recordLength, long min, long max, int keyPos, int keyLength, char *searchKey, int refKeyPos, Boolean NoteRecord (char *), PlzFile *plzFile) { Range *result; long any; long unequalMin, unequalMax; long dummy; result = (Range *) malloc (sizeof (Range)); if (result == (Range *) 0) { OutOfMemory ("BinarySearch"); return (Range *) 0; } result->first = -1; result->last = -1; any = BinarySearch (f, recordLength, min, max, keyPos, keyLength, searchKey, &unequalMin, &unequalMax, SearchAny, refKeyPos, NoteRecord, plzFile); if (any == -1) return result; /* ersten Satz suchen: */ result->first = BinarySearch (f, recordLength, unequalMin, any, keyPos, keyLength, searchKey, &dummy, &dummy, SearchFirst, refKeyPos, NoteRecord, plzFile); /* letzten Satz suchen: */ result->last = BinarySearch (f, recordLength, any, unequalMax, keyPos, keyLength, searchKey, &dummy, &dummy, SearchLast, refKeyPos, NoteRecord, plzFile); if (result->first == -1L || result->last == -1L) { Message ("Fehler in Datenbank der Deutschen Bundespost Postdienst:\n"); Message ("Sortierreihenfolge falsch.\n"); result->first = -1; result->last = -1; } return result; }
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.