ftp.nice.ch/pub/next/unix/text/recode-3.4.s.tar.gz#/recode-3.4/charname.pl

This is charname.pl in view mode; [Download] [Up]

# Automatically derive charname.h from rfc1345.txt.
# Copyright (C) 1993, 1994 Free Software Foundation, Inc.
# Francois Pinard <pinard@iro.umontreal.ca>, 1993.

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

open (HDR, ">charname.h");

print HDR <<END_OF_TEXT;
/* DO NOT MODIFY THIS FILE!  It was generated by "charname.pl".  */

/* Conversion of files between different charsets and usages.
   Copyright (C) 1990, 1993 Free Software Foundation, Inc.
   Francois Pinard <pinard@iro.umontreal.ca>, 1993.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
END_OF_TEXT

# Save a few definitions added after or independantly of RFC 1345.

$charname{"f2"} = "florin";
$max_length = 2;
$code{"florin"}++;

# Read the character comments.  Count words in charnames.

print STDERR "Reading...";

$_ = <>;
while ($_)
{
    chop;

    # Look ahead one line and merge it if it should.

    $next = <>;
    if ($next =~ /^              ( .*)/)
    {
	$_ .= $1;
	$next = <>;
    }

    # Separate fields and save needed information.

    if (/([^ ]+) +[0-9a-f]+ +(.*)/)
    {
	$charname{$1} = $2;
	if (length ($2) > $max_length)
	{
	    $max_length = length ($2);
	}
	foreach $word (split (/ /, $2))
	{
	    $code{$word}++;
	}
    }
    elsif (!/ +e000/)
    {
	print "What about <<", $_, ">>?\n";
    }

    # Prepare for next line.

    $_ = $next;
}

# Establish a mild compression scheme.  Words @word[0] to
# @word[$singles-1] will be represented by a single byte running from
# 1 to $singles.  All remaining words will be represented by two
# bytes, the first one running slowly from $singles+1 to 255, the
# second cycling faster from 1 to 255.

print STDERR "Sorting words...";

@word = sort descending keys %code;
$count = 0 + @word;
$singles = int ((255 * 255 - $count) / 254);

# Transmit a few values for further usage by the C code.

print STDERR "and charnames...";

@symbol = sort keys %charname;

printf HDR "\n#define NUMBER_OF_SINGLES %d\n", $singles;
printf HDR "\n#define MAX_CHARNAME_LENGTH %d\n", $max_length;
printf HDR "\n#define NUMBER_OF_CHARNAMES %d\n", (0 + @symbol);

# Establish a mild compression scheme (one or two bytes per word).

print STDERR "Writing words...";

print HDR "\n";
print HDR "static const char *const word[$count] =\n";
print HDR "  {\n";

$char1 = 1;
$char2 = 1;

for ($counter = 0; $counter < $singles; $counter++)
{
    $word = $word[$counter];
    $word =~ tr/A-Z/a-z/;
    printf HDR "    %-28s/* %0.3o */\n", "\"$word\",", $char1;
    $code{$word[$counter]} = $char1;
    $char1++;
}

for (; $counter < $count; $counter++)
{
    $word = $word[$counter];
    $word =~ tr/A-Z/a-z/;
    printf HDR "    %-28s/* %0.3o %0.3o */\n", "\"$word\",", $char1, $char2;
    $code{$word[$counter]} = 256 * $char1 + $char2;
    if ($char2 == 255)
    {
	$char1++;
	$char2 = 1;
    }
    else
    {
	$char2++;
    }
}
print HDR "  };\n";

# Print compressed charnames for all characters.

print STDERR "and charnames...";

print HDR "\n";
print HDR "struct charname\n";
print HDR "  {\n";
print HDR "    const char *symbol;\n";
print HDR "    const char *crypted;\n";
print HDR "  };\n";

print HDR "\n";
print HDR "static const struct charname charname[NUMBER_OF_CHARNAMES] =\n";
print HDR "  {\n";

foreach $symbol (@symbol)
{
    $string = $symbol;
    $string =~ s/([\"])/\\\1/g;
    print HDR "    {\"$string\", \"";
    foreach $word (split (' ', $charname{$symbol}))
    {
	$code = $code{$word};
	if ($code < 256)
	{
	    printf HDR "\\%0.3o", $code;
	}
	else
	{
	    printf HDR "\\%0.3o\\%0.3o", int ($code / 256), $code % 256;
	}
    }
    print HDR "\"},\n";
}

print HDR "  };\n";

print STDERR "done\n";

close HDR;
exit 0;

# Comparison routine for descending frequency sort.

sub descending
{
    local ($result);

    $result = $code{$b} - $code{$a};
    $result == 0 ? $a cmp $b : $result;
}

These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.