This is plzfile.c in view mode; [Download] [Up]
/* $Id: plzfile.c,v 3.1 1993/06/18 16:56:31 klute Exp klute $ */
/*
* Copyright 1993 Rainer Klute <klute@irb.informatik.uni-dortmund.de>
*
* Permission to use, copy, modify, distribute, and sell this software and
* its documentation for any purpose is hereby granted without fee, provided
* that the above copyright notice appear in all copies and that both that
* copyright notice and this permission notice appear in supporting
* documentation. The author makes no representations about the suitability
* of this software for any purpose. It is provided "as is" without express
* or implied warranty.
*
*/
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "xplz.h"
#include "message.h"
#include "plzfile.h"
#include "utils.h"
void OpenPlzFile (PlzFile *plzFile, char *plzFileName)
{
char *envPlzDir;
char name[256], zname[256];
char msg[300];
int i;
if (plzFile->f != (FILE *) 0)
return;
envPlzDir = getenv ("PLZDIR");
if (envPlzDir == (char *) 0)
strcpy (name, PLZDIR);
else
strcpy (name, envPlzDir);
strcat (name, "/");
strcat (name, plzFileName);
strcpy (zname, name);
strcat (zname, ".plz");
plzFile->f = fopen (name, "r");
plzFile->zip = 0;
if (plzFile->f == (FILE *) 0)
{
plzFile->f = fopen (zname, "rb");
plzFile->zip = 1;
}
if (plzFile->f == (FILE *) 0)
{
sprintf (msg, "Datei %s oder\n", name);
sprintf (msg, "%s %s konnte nicht geöffnet werden.\n", msg, zname);
Message (msg);
return;
}
if (plzFile->zip)
{
getlong (plzFile->rsize, plzFile->f);
getlong (plzFile->bsize, plzFile->f);
getlong (plzFile->rnum, plzFile->f);
getlong (plzFile->nindex, plzFile->f);
getlong (i, plzFile->f);
fseek (plzFile->f, i, 0);
plzFile->bnum = (plzFile->rnum + plzFile->bsize - 1)/plzFile->bsize;
plzFile->keys = (char **) malloc (plzFile->nindex*sizeof (char *));
plzFile->keypos = (int *) malloc (plzFile->nindex*sizeof (int));
plzFile->keylen = (int *) malloc (plzFile->nindex*sizeof (int));
plzFile->index = (long *) malloc ((plzFile->bnum+1) * sizeof (long));
for (i = 0; i < plzFile->bnum + 1; i++)
getlong (plzFile->index[i], plzFile->f);
for (i = 0; i < plzFile->nindex; i++)
{
getlong (plzFile->keypos[i], plzFile->f);
getlong (plzFile->keylen[i], plzFile->f);
plzFile->keys[i] =
(char *) malloc ((plzFile->bnum + 1) * plzFile->keylen[i] *
sizeof (char));
fread (plzFile->keys[i], 1, (plzFile->bnum + 1) *
plzFile->keylen[i], plzFile->f);
}
plzFile->recordLength = plzFile->rsize;
plzFile->size = plzFile->rsize * plzFile->rnum;
plzFile->entries = plzFile->rnum;
fseek (plzFile->f, plzFile->index[0], 0);
for (i = 0; i < ZIPBUFS; i++)
plzFile->bufstat[i] = -1;
plzFile->nextbuf = -1;
}
else
{
/* Die Satzlänge müssen wir zur Laufzeit ermitteln, das je nach
* Bezugsquelle der Postdatei die Zeilen mit \r\n oder \n abgeschlossen
* werden.
*/
for (i = 0; fgetc (plzFile->f) != '\n'; i++)
;
plzFile->recordLength = i + 1;
fseek (plzFile->f, 0L, 2);
plzFile->size = ftell (plzFile->f);
plzFile->entries = plzFile->size / plzFile->recordLength;
fseek (plzFile->f, 0L, 0);
}
}
#if defined (HANDLE_BROKEN_DATAFILES)
#define isbroken(x) (x == BROKEN_Ae || x == BROKEN_Oe || x == BROKEN_Ue || \
x == BROKEN_ae || x == BROKEN_oe || x == BROKEN_ue || \
x == BROKEN_ss)
#else
#define isbroken(x) False
#endif
extern int memextract(char *,int,char *,int);
void fgetr (void *r, int rec, PlzFile *plzFile)
{
int b, i, outsize, ilen;
static char *ibuf, *s_obuf = NULL;
extern char *outbuf;
extern char *outptr;
if (!plzFile->zip)
{
fseek (plzFile->f, rec * plzFile->recordLength, 0);
fgets (r, plzFile->recordLength, plzFile->f);
}
else
{
if (s_obuf == NULL)
s_obuf = malloc (2048 + 1);
b = rec / plzFile->bsize;
for (i = 0; i < ZIPBUFS; i++)
if (b == plzFile->bufstat[i])
break;
if (i == ZIPBUFS)
{
if (++plzFile->nextbuf == ZIPBUFS)
plzFile->nextbuf = 0;
i = plzFile->nextbuf;
if (plzFile->bufstat[i] < 0)
plzFile->obuf[i] =
(char*) malloc (plzFile->rsize * plzFile->bsize);
plzFile->bufstat[i] = b;
ibuf = malloc (plzFile->bsize * plzFile->rsize);
outsize = (plzFile->rnum-rec) * plzFile->rsize;
if (outsize > plzFile->rsize * plzFile->bsize)
outsize = plzFile->rsize * plzFile->bsize;
fseek (plzFile->f, plzFile->index[b], 0);
ilen = fread (ibuf, 1, plzFile->index[b + 1] - plzFile->index[b],
plzFile->f);
if (ilen != plzFile->index[b + 1] - plzFile->index[b])
{
perror ("short input file");
exit (1);
}
outbuf = s_obuf;
outptr = plzFile->obuf[i];
memextract (outptr, outsize, ibuf, ilen);
free (ibuf);
}
memcpy (r, (plzFile->obuf[i]) + (rec % plzFile->bsize) * plzFile->rsize,
plzFile->rsize);
}
}
/*
* "BinarySearch" sucht einen Datensatz mit dem Schlüssel "searchKey" und
* liefert seine Satznummer als Ergebnis.
* Nach jedem Zugriff ruft "BinarySearch" die Funktion "NoteRecord" auf und
* übergibt den gerade gelesenen Satz. Falls NoteRecord "False" zurückgibt,
* bricht "BinarySearch" die Verarbeitung ab.
*/
long BinarySearch (FILE *f, int recordLength, long min, long max, int keyPos,
int keyLength, char *searchKey, long *unequalMin,
long *unequalMax, int searchWhat, int refKeyPos,
Boolean NoteRecord (char *), PlzFile *plzFile)
{
char *r;
long try;
long hit = -1;
int compare;
int compareLength;
r = (char *) malloc (recordLength + 1);
if (r == (char *) 0)
{
OutOfMemory ("BinarySearch");
return -1L;
}
*unequalMin = min;
*unequalMax = max;
compare = try = -1;
do
{
if (try == min && max - min == 1)
min = max;
try = (min + max) / 2;
fgetr (r, try, plzFile);
if (NoteRecord (r) == False)
{
free (r);
return -1L;
}
/* So wie beim vom Benutzer angegebenen Ortsnamen (siehe umsda.c) muß
* auch beim aus der Datei "umsda" gelesenen Ortsnamen die signifikante
* Länge ("compareLength") ermittelt werden. Erschwerend kommt hinzu,
* daß im Schlüssel ("key") Umlaute aufgelöst sind, was wir
* berücksichtigen müssen ("specials"). Dazu nutzen wir die
* Schreibweise mit Umlauten ("refKey").
*/
if (refKeyPos < 0)
compareLength = keyLength;
else
{
int specials = 0;
int max;
unsigned char *c;
unsigned char *refKey = (unsigned char *) (r + refKeyPos);
unsigned char *key = (unsigned char *) (r + keyPos);
/* Schlüssellänge ermitteln: */
for (c = key; *c != ' ' && c - key < keyLength; c++)
;
max = c - key;
/* Umlaute zählen: */
for (c = refKey;
(!(ispunct (*c)) || isbroken (*c)) && c - refKey < max;
c++)
{
if (*c == IBM_Ae || *c == IBM_Oe || *c == IBM_Ue
|| *c == IBM_ae || *c == IBM_oe || *c == IBM_ue || *c == IBM_ss
#if defined (HANDLE_BROKEN_DATAFILES)
|| *c == BROKEN_Ae || *c == BROKEN_Oe || *c == BROKEN_Ue
|| *c == BROKEN_ae || *c == BROKEN_oe || *c == BROKEN_ue
|| *c == BROKEN_ss
#endif
)
specials++;
}
compareLength = c - refKey + specials;
if (compareLength > keyLength)
compareLength = keyLength;
}
compare = strncmp (searchKey, r + keyPos, compareLength);
if (compare < 0)
{
*unequalMax = max;
max = try;
}
else if (compare > 0)
{
*unequalMin = min;
min = try;
}
else if (compare == 0)
{
hit = try;
switch (searchWhat)
{
case SearchFirst:
{
max = try;
break;
}
case SearchLast:
{
min = try;
break;
}
case SearchAny:
{
max = min = try;
break;
}
}
}
}
while (min != max);
free (r);
return hit;
}
/*
* "BinarySearchAll" sucht alle Datensätze mit dem Schlüssel "searchKey". Dazu
* führt es zunächst mit Hilfe von "BinarySearch" eine binäre Suche auf der
* Datei durch. Bei einem Treffer sucht es binär rückwärts nach dem ersten und
* anschließend vorwärts nach dem letzten Vorkommen.
*/
Range *BinarySearchAll (FILE *f, int recordLength, long min, long max,
int keyPos, int keyLength, char *searchKey,
int refKeyPos, Boolean NoteRecord (char *),
PlzFile *plzFile)
{
Range *result;
long any;
long unequalMin, unequalMax;
long dummy;
result = (Range *) malloc (sizeof (Range));
if (result == (Range *) 0)
{
OutOfMemory ("BinarySearch");
return (Range *) 0;
}
result->first = -1;
result->last = -1;
any = BinarySearch (f, recordLength, min, max, keyPos, keyLength,
searchKey, &unequalMin, &unequalMax, SearchAny,
refKeyPos, NoteRecord, plzFile);
if (any == -1)
return result;
/* ersten Satz suchen: */
result->first = BinarySearch (f, recordLength, unequalMin, any, keyPos,
keyLength, searchKey, &dummy, &dummy,
SearchFirst, refKeyPos, NoteRecord, plzFile);
/* letzten Satz suchen: */
result->last = BinarySearch (f, recordLength, any, unequalMax, keyPos,
keyLength, searchKey, &dummy, &dummy,
SearchLast, refKeyPos, NoteRecord, plzFile);
if (result->first == -1L || result->last == -1L)
{
Message ("Fehler in Datenbank der Deutschen Bundespost Postdienst:\n");
Message ("Sortierreihenfolge falsch.\n");
result->first = -1;
result->last = -1;
}
return result;
}
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.