plzfile.c

This is plzfile.c in view mode; [Download] [Up]
/* $Id: plzfile.c,v 3.1 1993/06/18 16:56:31 klute Exp klute $ */

/* 
 * Copyright 1993 Rainer Klute <klute@irb.informatik.uni-dortmund.de>
 *
 * Permission to use, copy, modify, distribute, and sell this software and
 * its documentation for any purpose is hereby granted without fee, provided
 * that the above copyright notice appear in all copies and that both that
 * copyright notice and this permission notice appear in supporting
 * documentation. The author makes no representations about the suitability
 * of this software for any purpose. It is provided "as is" without express
 * or implied warranty.
 *
 */

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "xplz.h"
#include "message.h"
#include "plzfile.h"
#include "utils.h"




void OpenPlzFile (PlzFile *plzFile, char *plzFileName)
{
    char *envPlzDir;
    char name[256], zname[256];
    char msg[300];
    int i;

    if (plzFile->f != (FILE *) 0)
	return;
    envPlzDir = getenv ("PLZDIR");
    if (envPlzDir == (char *) 0)
	strcpy (name, PLZDIR);
    else
	strcpy (name, envPlzDir);
    strcat (name, "/");
    strcat (name, plzFileName);

    strcpy (zname, name);
    strcat (zname, ".plz");
    plzFile->f = fopen (name, "r");
    plzFile->zip = 0;
    if (plzFile->f == (FILE *) 0)
    {
	plzFile->f = fopen (zname, "rb");
	plzFile->zip = 1;
    }

    if (plzFile->f == (FILE *) 0)
    {
	sprintf (msg, "Datei %s oder\n", name);
	sprintf (msg, "%s %s konnte nicht ge�ffnet werden.\n", msg, zname);
	Message (msg);
	return;
    }

    if (plzFile->zip)
    {
	getlong (plzFile->rsize, plzFile->f);
	getlong (plzFile->bsize, plzFile->f);
	getlong (plzFile->rnum, plzFile->f);
	getlong (plzFile->nindex, plzFile->f);
	getlong (i, plzFile->f);
	fseek (plzFile->f, i, 0);

	plzFile->bnum = (plzFile->rnum + plzFile->bsize - 1)/plzFile->bsize;
	
	plzFile->keys = (char **) malloc (plzFile->nindex*sizeof (char *));
	plzFile->keypos = (int *) malloc (plzFile->nindex*sizeof (int));
	plzFile->keylen = (int *) malloc (plzFile->nindex*sizeof (int));
	
	plzFile->index = (long *) malloc ((plzFile->bnum+1) * sizeof (long));
	
	for (i = 0; i < plzFile->bnum + 1; i++)
	    getlong (plzFile->index[i], plzFile->f);

	for (i = 0; i < plzFile->nindex; i++)
	{
	    getlong (plzFile->keypos[i], plzFile->f);
	    getlong (plzFile->keylen[i], plzFile->f);
	    plzFile->keys[i] =
	        (char *) malloc ((plzFile->bnum + 1) * plzFile->keylen[i] *
				 sizeof (char));
	    fread (plzFile->keys[i], 1, (plzFile->bnum + 1) *
		   plzFile->keylen[i], plzFile->f);
	}
	plzFile->recordLength = plzFile->rsize;
	plzFile->size         = plzFile->rsize * plzFile->rnum;
	plzFile->entries      = plzFile->rnum;
	fseek (plzFile->f, plzFile->index[0], 0); 
	
	for (i = 0; i < ZIPBUFS; i++)
	    plzFile->bufstat[i] = -1;
	plzFile->nextbuf = -1;
    }
    else
    {
	/* Die Satzl�nge m�ssen wir zur Laufzeit ermitteln, das je nach
	 * Bezugsquelle der Postdatei die Zeilen mit \r\n oder \n abgeschlossen
	 * werden.
	 */
	for (i = 0; fgetc (plzFile->f) != '\n'; i++)
	    ;
	plzFile->recordLength = i + 1;
	fseek (plzFile->f, 0L, 2);
	plzFile->size = ftell (plzFile->f);
	plzFile->entries = plzFile->size / plzFile->recordLength;
	fseek (plzFile->f, 0L, 0);
    }
}




#if defined (HANDLE_BROKEN_DATAFILES)
#define isbroken(x) (x == BROKEN_Ae || x == BROKEN_Oe || x == BROKEN_Ue || \
		     x == BROKEN_ae || x == BROKEN_oe || x == BROKEN_ue || \
		     x == BROKEN_ss)
#else
#define isbroken(x) False
#endif

extern int memextract(char *,int,char *,int);

void fgetr (void *r, int rec, PlzFile *plzFile)
{
    int b, i, outsize, ilen;
    static char *ibuf, *s_obuf = NULL;
    extern char *outbuf;
    extern char *outptr;

    if (!plzFile->zip)
    { 
	fseek (plzFile->f, rec * plzFile->recordLength, 0);
	fgets (r, plzFile->recordLength, plzFile->f);
    }
    else
    {
	if (s_obuf == NULL)
	    s_obuf = malloc (2048 + 1);
 	b = rec / plzFile->bsize;
	for (i = 0; i < ZIPBUFS; i++)
	    if (b == plzFile->bufstat[i])
		break;
	if (i == ZIPBUFS)
	{
	    if (++plzFile->nextbuf == ZIPBUFS)
		plzFile->nextbuf = 0;
	    i = plzFile->nextbuf;
	    if (plzFile->bufstat[i] < 0)
		plzFile->obuf[i] =
		    (char*) malloc (plzFile->rsize * plzFile->bsize);
	    plzFile->bufstat[i] = b;
         
	    ibuf = malloc (plzFile->bsize * plzFile->rsize);
	    outsize = (plzFile->rnum-rec) * plzFile->rsize;
	    if (outsize > plzFile->rsize * plzFile->bsize)
		outsize = plzFile->rsize * plzFile->bsize; 
	    fseek (plzFile->f, plzFile->index[b], 0); 
	    ilen = fread (ibuf, 1, plzFile->index[b + 1] - plzFile->index[b],
			  plzFile->f);
	    if (ilen != plzFile->index[b + 1] - plzFile->index[b])
	    {
		perror ("short input file");
		exit (1);
	    }

	    outbuf = s_obuf;
	    outptr = plzFile->obuf[i];
	    memextract (outptr, outsize, ibuf, ilen);

	    free (ibuf);
	}
    memcpy (r, (plzFile->obuf[i]) + (rec % plzFile->bsize) * plzFile->rsize,
	    plzFile->rsize);
    }
}




/*
 * "BinarySearch" sucht einen Datensatz mit dem Schl�ssel "searchKey" und
 * liefert seine Satznummer als Ergebnis.
 * Nach jedem Zugriff ruft "BinarySearch" die Funktion "NoteRecord" auf und
 * �bergibt den gerade gelesenen Satz. Falls NoteRecord "False" zur�ckgibt,
 * bricht "BinarySearch" die Verarbeitung ab.
 */

long BinarySearch (FILE *f, int recordLength, long min, long max, int keyPos,
		   int keyLength, char *searchKey, long *unequalMin, 
		   long *unequalMax, int searchWhat, int refKeyPos,
		   Boolean NoteRecord (char *), PlzFile *plzFile)
{
    char *r;
    long try;
    long hit = -1;
    int compare;
    int compareLength;

    r = (char *) malloc (recordLength + 1);
    if (r == (char *) 0)
    {
	OutOfMemory ("BinarySearch");
	return -1L;
    }

    *unequalMin = min;
    *unequalMax = max;
    compare = try = -1;
    do
    {
	if (try == min && max - min == 1)
	    min = max;
	try = (min + max) / 2;

	fgetr (r, try, plzFile);

	if (NoteRecord (r) == False)
	{
	    free (r);
	    return -1L;
	}

	/* So wie beim vom Benutzer angegebenen Ortsnamen (siehe umsda.c) mu�
	 * auch beim aus der Datei "umsda" gelesenen Ortsnamen die signifikante
	 * L�nge ("compareLength") ermittelt werden. Erschwerend kommt hinzu,
	 * da� im Schl�ssel ("key") Umlaute aufgel�st sind, was wir
	 * ber�cksichtigen m�ssen ("specials"). Dazu nutzen wir die
	 * Schreibweise mit Umlauten ("refKey").
	 */

        if (refKeyPos < 0)
	    compareLength = keyLength;
	else
	{
	    int specials = 0;
	    int max;
	    unsigned char *c;
	    unsigned char *refKey = (unsigned char *) (r + refKeyPos);
	    unsigned char *key = (unsigned char *) (r + keyPos);
	    
	    /* Schl�ssell�nge ermitteln: */
	    for (c = key; *c != ' ' && c - key < keyLength; c++)
		;
	    max = c - key;

	    /* Umlaute z�hlen: */
	    for (c = refKey;
	    (!(ispunct (*c)) || isbroken (*c)) && c - refKey < max;
	    c++)
	    {
		if (*c == IBM_Ae || *c == IBM_Oe || *c == IBM_Ue
		|| *c == IBM_ae || *c == IBM_oe || *c == IBM_ue || *c == IBM_ss
#if defined (HANDLE_BROKEN_DATAFILES)
		|| *c == BROKEN_Ae || *c == BROKEN_Oe || *c == BROKEN_Ue
		|| *c == BROKEN_ae || *c == BROKEN_oe || *c == BROKEN_ue
		|| *c == BROKEN_ss
#endif
		)
		    specials++;
	    }
	    compareLength = c - refKey + specials;
	    if (compareLength > keyLength)
		compareLength = keyLength;
	}
        compare = strncmp (searchKey, r + keyPos, compareLength);
	if (compare < 0)
	{
	    *unequalMax = max;
	    max = try;
	}
	else if (compare > 0)
	{
	    *unequalMin = min;
	    min = try;
	}
	else if (compare == 0)
	{
	    hit = try;
	    switch (searchWhat)
	    {
		case SearchFirst:
		{
		    max = try;
		    break;
		}
		case SearchLast:
		{
		    min = try;
		    break;
		}
		case SearchAny:
		{
		    max = min = try;
		    break;
		}
	    }
	}
    }
    while (min != max);
    
    free (r);
    return hit;
}




/*
 * "BinarySearchAll" sucht alle Datens�tze mit dem Schl�ssel "searchKey". Dazu
 * f�hrt es zun�chst mit Hilfe von "BinarySearch" eine bin�re Suche auf der
 * Datei durch. Bei einem Treffer sucht es bin�r r�ckw�rts nach dem ersten und
 * anschlie�end vorw�rts nach dem letzten Vorkommen.
 */

Range *BinarySearchAll (FILE *f, int recordLength, long min, long max, 
			int keyPos, int keyLength, char *searchKey,
			int refKeyPos, Boolean NoteRecord (char *),
			PlzFile *plzFile)
{
    Range *result;
    long any;
    long unequalMin, unequalMax;
    long dummy;

    result = (Range *) malloc (sizeof (Range));
    if (result == (Range *) 0)
    {
	OutOfMemory ("BinarySearch");
	return (Range *) 0;
    }
    result->first = -1;
    result->last = -1;

    any = BinarySearch (f, recordLength, min, max, keyPos, keyLength, 
			searchKey, &unequalMin, &unequalMax, SearchAny,
			refKeyPos, NoteRecord, plzFile);
    if (any == -1)
	return result;

    /* ersten Satz suchen: */

    result->first = BinarySearch (f, recordLength, unequalMin, any, keyPos,
				  keyLength, searchKey, &dummy, &dummy,
				  SearchFirst, refKeyPos, NoteRecord, plzFile);
    /* letzten Satz suchen: */

    result->last = BinarySearch (f, recordLength, any, unequalMax, keyPos, 
				 keyLength, searchKey, &dummy, &dummy, 
				 SearchLast, refKeyPos, NoteRecord, plzFile);

    if (result->first == -1L || result->last == -1L)
    {
	Message ("Fehler in Datenbank der Deutschen Bundespost Postdienst:\n");
	Message ("Sortierreihenfolge falsch.\n");
	result->first = -1;
	result->last = -1;
    }
    return result;
}
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.