pack.pl

This is pack.pl in view mode; [Download] [Up]
#!/usr/local/bin/perl
eval "exec /usr/local/bin/perl $0 $*"
    if $running_under_some_shell;
	# this emulates #! processing on NIH machines.

# Perl-Script zur Umstellung alter Postleitzahlen auf neue.
# Hilfsprogramm zum Packen der Postdateien und Erzeugen von Indexdateien
#
#############################################################################
#    Copyright (C) 1993
#    Andreas Koppenhoefer (koppenh@dia.informatik.uni-stuttgart.de)
#
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program; if not, write to the Free Software
#    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#############################################################################
# @(#)$Id: pack.pl,v 1.3 1993/07/01 13:26:25 koppenh Exp $
# $Log: pack.pl,v $
#    Revision 1.3  1993/07/01  13:26:25  koppenh
#    Korrektur fuer Datensaetze die ';' oder ':' enthalten - seithert wurden
#    sie zerstuemmelt;
#
#    Revision 1.2  1993/06/15  15:38:36  koppenh
#    Neue Release Nummer;
#
#    Revision 1.1.0.1  1993/06/08  19:12:47  koppenh
#    fehlendes '$'-Zeichen vor Variablennamen korrigiert;
#    pofada.index wird jetzt richtig aufgebaut, auch wenn pofada gepackt ist;
#
#############################################################################

# environment variblen:
#	Daten werden gelesen von $PLZHOME/../daten und nach $PLZHOME/daten
#	  geschrieben.
#	$TMPDIR bestimmt ein Verzeichnis, auf dem viel temporaerer Platz
#	  vorhanden ist (> 16 MByte).

$homedir = (getpwnam('plz'))[7];
push (@INC, $homedir) if $homedir;
$homedir = $ENV{'PLZHOME'} if $ENV{'PLZHOME'} && -d $ENV{'PLZHOME'};
$homedir =~ s,([^/])$,\1/,;	# anhaengen von '/' falls nicht schon vorhanden

require 'common.pl';
require 'abkuerzung.pl';
require 'umlaut.pl';

$tmpdir = $ENV{'TMPDIR'};	# temp-dir for sort command
$tmpdir = '/tmp' if (! $tmpdir || ! -d $tmpdir);
$tmpdir =~ s,([^/])$,\1/,;	# anhaengen von '/' falls nicht schon vorhanden

%pack_extension = ('.Z',  'compress -d -c',
		   '.z',  'gzip -d -c',
		   '.gz', 'gzip -d -c');

select(STDOUT); $| = 1;

mkdir ($localprefix, 0755)
    || die "cannot create local data directory\nmkdir($localprefix): $!"
    if (! -d $localprefix);	# 

nextfile:
foreach $file ($globalprefix.$umsdafile, $globalprefix.$ortardafile,
	       $globalprefix.$stradafile, $globalprefix.$pofadafile,
#	       $globalprefix.$sthnrarfile,
	       $localprefix.'.', $tmpdir.'.') {
    $file_reader{$file} = "<$file";
    next nextfile if (-r $file);
    if ($file !~ m,/\.$,) {
	foreach $ext (keys(%pack_extension)) {
	    if (-r $file.$ext) {
		$file_reader{$file} = "$pack_extension{$ext} $file$ext|";
		next nextfile;
	    }
	}
    }
    print "cannot access $file\n";
    $error++;
}
die 'fatal error, aborted' if ($error);

# sthnrar - Ich habe noch keine Ahnung, wie ich die Informationen aus dieser
# Datei nutzen kann.

# print "reading $sthnrarfile:\n";
# open (STHNRAR, "<$globalprefix$sthnrarfile") || die "$!"; 
# while (<STHNRAR>) {
#     chop;
#     print "  $.\r" unless $.%200;
#     y/\r://d;
#     if (!(($kennz, $plzneu, $geschlneu, $nortname, $nortsteil,
# 	   $nstrname, $nhausnr, $nhausnrzus, $geschlalt, $rest)
# 	  = m/^(\d)(\d{4})(\d{8})(.{40})(.{30})(.{40})(...)(.)(\d{8})(.*)$/)) {
# 	die "$sthnrarfile line parse error, aborted";
#     }
#     
# }
# print "  $. lines read\n";

if ($system_has_flock && $lockfile) {
    if (-r $lockfile) {
	open (LOCKFILE, "<$lockfile") || die "$lockfile: $!\naborted";
	die "database files busy (locked), aborted"
	    unless flock (LOCKFILE, &LOCK_EX|&LOCK_NB);
	unlink ($lockfile) || die "cannot unlink $lockfile: $!\naborted";
	print "lockfile '$lockfile' removed.\n";
    } else {
	print "lockfile '$lockfile' not found:\n\tassuming first run.\n";
    }
}

if (-r $globalprefix.$pofadafile) {
    print "linking $globalprefix$pofadafile\n ->\t$localprefix$pofadafile\n";
    unlink ($localprefix.$pofadafile)
	|| die "cannot unlink $localprefix$pofadafile: $!\naborted"
	    if (-r $localprefix.'pofada');
    link ($globalprefix.$pofadafile, $localprefix.$pofadafile)
	|| die 'cannot link '.
	    "$globalprefix$pofadafile -> $localprefix$pofadafile: $!\naborted";
} else {
    print "copying $globalprefix$pofadafile\n ->\t$localprefix$pofadafile\n";
    open (POFOUT, ">$localprefix$pofadafile") || die "$!";
    $copy_pofada++;
}

    
print "creating pofada index:\n";
open (POFADA, $file_reader{$globalprefix.$pofadafile}) || die "$!";
open (PINDEX, ">$localprefix$pofadafile.index") || die "$!";

$lastpos = $last = $ind_count = 0;
while (<POFADA>) {
    next if (($l = length) < 2);
    print "  $.\r" unless $.%500;
    ($ala, $npanst) = m/^(\d{8})(.{3})/;
    if ($last ne $ala) {
	die "pofada: unexpected line order ($lala:$ala),\naborted"
	    if ($last gt $ala);
	print PINDEX "$ala $lastpos\n";
	$ind_count++;
	$last = $ala;
    }
    if ($copy_pofada) {
	s/\r//;			# chop trailing ^M
	print POFOUT $_;
	$lastpos = tell(POFOUT);
    } else {
	$lastpos = tell(POFADA);
    }
    die "tell(pofada): $!" if $lastpos < 0;

}
print "  $ind_count lines index\n";
close PINDEX || die "$!";
close POFADA || die "$!";
close POFOUT || die "$!" if ($copy_pofada);

print "packing strada:\n";
open (STRADA, $file_reader{$globalprefix.$stradafile}) || die "$!";
open (SPACK,  "| sort -T $tmpdir > $localprefix$stradafile.packed") || die "$!";

while (<STRADA>) {
    next if (($l = length) < 2);
    print "  $.\r" unless $.%200;
    y/\r//d;
    warn "Datensatz enthaelt unerwartete Sonderzeichen - ersetzt durch Leerzeichen\n"
	if (y/;:/ /);
    if (!(($alort, $snamesort, $sname, $abkstr, $anzgleist, $nplz_z,
	   $hnrkenn, $hnr1von, $hnr2von, $hnr1bis, $hnr2bis, $plz_z, $rest) =
	  m/^(.{8}).{8}(.{46})..(.{46})(.{23})(\d)(\d)(.)(....)(....)(....)(....)(\d{5})(.*)/)) {
	die "strada: line parse error, aborted";
    }
    $npanst = substr($rest,61,3);
    $ortsteil = substr($rest,16,40);
    $plzalt = substr($rest,56,5);

    # Ein neues Sortierfeld - das vorgegebene stimmt nicht immer!
    $sort = &iso2umlaut(&ibm2iso(&ascii2iso($sname)));
    $sort =~ y/a-z/A-Z/;
    $sort =~ y/A-Z0-9//dc;
    $sort =~ s/STRASSE$/STR/;

    $packed = join(':', $alort, $sort, $sname, $abkstr, $anzgleist, $nplz_z,
		   $hnrkenn, $hnr1von, $hnr2von, $hnr1bis, $hnr2bis, $plz_z,
		   $npanst,$ortsteil,$plzalt);
    $packed =~ s/ +:/:/g;
    $packed =~ s/: +/:/g;
    $packed =~ s/ *$//;
    # umlaute nach iso umformen
    $packed = &ibm2iso(&ascii2iso($packed."\n"));
    $lastpos += length($packed);
    print SPACK $packed;
}
print "  $. lines read and packed.\n";

close SPACK || die "$!";
close STRADA || die "$!";

print "creating strada index:\n";
open (SPACK, "<$localprefix$stradafile.packed") || die "$!";
open (SINDEX,">$localprefix$stradafile.index") || die "$!";

$lastpos = $ind_count = 0; $lastkey = '';
while (<SPACK>) {
    chop;
    s/^(\d{8}):(\w).*/\1\2/;
    if ($lastkey ne $_) {
	$lastkey = $_;
	print SINDEX "$lastkey $lastpos\n";
	print "  $.\r" unless $ind_count++ % 100;
    }
    $lastpos = tell;
}
print "  $ind_count lines index\n";
close SINDEX || die "$!";
close SPACK || die "$!";

print "packing umsda:\n";

open (UMSDA, $file_reader{"$globalprefix$umsdafile"}) || die "$!";
open (UPACK,
      "| sort -T $tmpdir -t: +1 -2 +0 -1 +2n -3 >$localprefix$umsdafile.packed")
    || die "$!";

while (<UMSDA>) {
    y/\r//d;
    next if (($l = length) < 2);
    print "  $.\r" unless $.%200;
    die "umsda: line length error ($l), aborted"
	if ($l != 308 && $l != 303);
    warn "Datensatz enthaelt unerwartete Sonderzeichen - ersetzt durch Leerzeichen\n"
	if (y/;:/ /);
    if (!(($ala, $satzalt, $plz_w_o, $plzalt, $ortname_a, $ortzusa,
	   $oname_sort_a, $geschl, $nplzo_z, $nplzo_p, $npanst_a,
	   $zpanst_a, $nzpanst_a, $alort, $ortname_n,
	   $oname_sort_n, $oname24_n, $nplz_z, $plz_z, $nplz_p, $plz_p) =
	  m/^(.{8})(...)(.)(....)(.{40})(.{30})(.{40})(.{8}).(.)(.)(...)(.{40})(...)(.{8})(.{40})(.{40})(.{24})(.)(.{5})(.)(.*)/)) {
	die "umsda: line parse error, aborted";
    }
    $plz_p =~ y/[0-9]/[0-9]/cd;

    # Falls ein Strassenverz. existiert, aber keine Zustell-PLZ zur Postanst.
    ($nplz_z, $plz_z) = ($nplzo_z, '')
	if ($nplzo_z > 1 && $nplz_z < 1);
    # Falls ein Postfachverz. existiert, aber keine Postfach-PLZ zur Postanst.
    ($nplz_p, $plz_p) = ($nplzo_p, '')
	if ($nplzo_p > 1 && $nplz_p < 1);

    # Ortsabkuerzung weglassen, falls identisch
    $oname24_n = '' if ($oname24_n eq substr($ortname_n, $[, 24));
    # ... ebenso neuer Ortsname
    $ortname_n = '' if ($ortname_n eq $ortname_a);

    # Ein neues Sortierfeld - das vorgegebene stimmt nicht immer!
    $sort = &iso2umlaut(&ibm2iso(&ascii2iso($ortname_a)));
    $sort =~ y/a-z/A-Z/;
    $sort =~ y/A-Z0-9//dc;

    # Wortabkuerzungen in Ortszusatz
    $ortzusa = &abkuerzung($ortzusa);
    $packed = join(':',($plz_w_o.$plzalt, $sort, $npanst_a, $ortname_a,
			$ortzusa, $alort, $geschl, $nplz_z,
			$plz_z, $ortname_n, $nplz_p, $plz_p, $oname24_n));
    $packed =~ s/ +:/:/g;
    $packed =~ s/ +$//;
    # umlaute nach iso umformen
    $packed = &ibm2iso(&ascii2iso($packed));
    print UPACK $packed."\n";
}
print "  $. lines read and packed.\n";
close UMSDA || die "$!";

print "packing ortarda, appending to umsda:\n";
open (ORTARDA, $file_reader{"$globalprefix$ortardafile"}) || die "$!";
$bad_lines = 0;
while(<ORTARDA>) {
    print "  $.\r" unless $.%200;
    y/\r//d;
    warn "Datensatz enthaelt unerwartete Sonderzeichen - ersetzt durch Leerzeichen\n"
	if (y/;:/ /);
    if (!(($ala, $plz_w_o, $plzalt, $ortname_a, $ortname_sort_a, $npanst_a,
	   $zpanst_a, $nzpanst_a, $ae_datum, $alort, $ortname_n, $oname_sort_n,
	   $oname24_n, $nplz_z, $plz_z, $nplz_p, $plz_p) =
	  m/^(.{8})...(.)(\d{4})(.{40})(.{40})(...)(.{40})(...)(....)(\d{8})(.{40})(.{40})(.{24})(\d)(.{5})(.)(.*)/)) {
	warn "ignoring bad line";
	die 'too many errors, aborted' if ($bad_lines++ > 32);
	next;
    }
    $plz_p =~ y/[0-9]/[0-9]/cd;

    # Ein neues Sortierfeld - das vorgegebene stimmt nicht immer!
    $sort = &iso2umlaut(&ibm2iso(&ascii2iso($ortname_a)));
    $sort =~ y/a-z/A-Z/;
    $sort =~ y/A-Z0-9//dc;
    
    # Ortsabkuerzung weglassen, falls identisch
    $oname24_n = '' if ($oname24_n eq substr($ortname_n, $[, 24));
    $ortname_n = '' if ($ortname_n eq $ortname_a);

    $packed = join(':',($plz_w_o.$plzalt, $sort, $npanst_a, $ortname_a,
			'', $alort, '', $nplz_z,
			$plz_z, $ortname_n, $nplz_p, $plz_p, $oname24_n));
    $packed =~ s/ +:/:/g;
    $packed =~ s/: +/:/g;
    $packed =~ s/ *$//;
    # umlaute nach iso umformen
    $packed = &ibm2iso(&ascii2iso($packed."\n"));
    print UPACK $packed;
}
print "  $bad_lines bad lines removed.\n" if ($bad_lines);
print "  $. lines read and packed.\n";
close ORTARDA || die "$!";
close UPACK || die "$!";

print "creating umsda index:\n";
open (UPACK, "<$localprefix$umsdafile.packed") || die "$!";
open (UPIND, ">$localprefix$umsdafile.index") || die "$!";

$lastpos = 0; $lastkey = ''; $indexline = 0; 
$lastkey_4 = ''; $index_4 = '';
$lastkey_5 = ''; $index_5 = '';

while (<UPACK>) {
    $pos = tell;
    ($plza, $oname_sort_a, $rest) = split (/:/, $_, 3);
    if ($last ne $oname_sort_a) {
	die "umsda.packed: unexpected line order ($last-$oname_sort_a),\naborted"
	    if ($last gt $oname_sort_a);
	$last = $oname_sort_a;
    }
    $key   = substr($oname_sort_a, 0, 3);
    $key_4 = substr($oname_sort_a, 0, 4);
    $key_5 = substr($oname_sort_a, 0, 5);
    if ($lastkey ne $key) {
	if ($. > $indexline+500) {
	    print  UPIND $index_5;
	    $ind_count += $ind_count_5;
	} else {
	    if ($. > $indexline+200) {
		print  UPIND $index_4 ;
		$ind_count += $ind_count_4;
	    }
	}
	printf UPIND "%s %d\n", $key, $lastpos;
	$lastkey = $key;
	$lastkey_4 = $key_4;
	$index_4 = '';
	$ind_count_4 = 0;
	$lastkey_5 = $key_5;
	$index_5 = '';
	$ind_count_5 = 0;
	$indexline = $.;
	$ind_count++;
    } else {
	if ($lastkey_4 ne $key_4) {
	    $lastkey_4 = $key_4;
	    $index_4 .= sprintf("%s %d\n", $key_4, $lastpos);
	    $ind_count_4 ++;
	}
	if ($lastkey_5 ne $key_5) {
	    $lastkey_5 = $key_5;
	    $index_5 .= sprintf("%s %d\n", $key_5, $lastpos);
	    $ind_count_5 ++;
	}
    }
    print "  $.\r" unless $.%250;
    $lastpos = $pos;
}
print "  $ind_count lines index.\n";
close UPIND || die "$!";
close UPACK || die "$!";

if ($system_has_flock && ! -r $lockfile) {
    `touch $lockfile`;
    print "lockfile created.\n";
}

print "finished.\n";
exit(0);
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.