charset.pl

This is charset.pl in view mode; [Download] [Up]
# Automatically derive charset.c from rfc1345.txt.
# Copyright (C) 1993, 1994 Free Software Foundation, Inc.
# Francois Pinard <pinard@iro.umontreal.ca>, 1993.

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

$overall_header = <<END_OF_TEXT;
/* DO NOT MODIFY THIS FILE!  It was generated by "charset.pl".  */

/* Conversion of files between different charsets and usages.
   Copyright (C) 1990, 1993 Free Software Foundation, Inc.
   Francois Pinard <pinard@iro.umontreal.ca>, 1993.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

#include "recode.h"
END_OF_TEXT

$split_point = 80;

$charset_ordinal = 0;
$discard_charset = 0;
$alias_count = 0;

$header = "";

while (<>)
{

    # Recognize `&charset'.

    if (/^&charset (.*)/)
    {

	# Before beginning a new charset, process the previous one.

	$next_charset = $1;

	&complete_charset;

	$charset = $next_charset;

	# Save the charset name for further declaration.  Announce
 	# this charset in the array initialization section, and
 	# initialize its processing.

	print STDERR $charset, "...";

	$header = "\n/* $charset\n";

	$hashname = $charset;
	$hashname =~ tr/A-Z/a-z/;
	$hashname =~ s/[^a-z0-9]//g;
	if ($used{$hashname})
	{
	    printf STDERR " duplicate of %s...", $used{$hashname};
	    $discard_charset = 1;
	    next;
	}
	$used{$hashname} = $charset;

	$alias_count = 0;
	@table = ();
	$codedim = 0;
	$code = 0;
	$list = "";
	$body = "";
	next;
    }

    # Ignore discarded charsets, and character set escapes.

    next if $discard_charset;
    next if /^&g[0-4]esc/;

    # Recognize other `&' directives.

    if (/^&rem (.*)/)
    {

	# Save C comments for Texinfo.

	$body .= "$1\n";
	next;
    }
    if (/^&alias (.*)/)
    {

	# Save synonymous charset names for later declarations.

	$alias = $1;
	$header .= "   $alias\n";

	$hashname = $alias;
	$hashname =~ tr/A-Z/a-z/;
	$hashname =~ s/[^a-z0-9]//g;
	if ($used{$hashname} && $used{$hashname} ne $charset)
	{
	    printf STDERR " duplicate of %s...", $used{$hashname};
	    next;
	}
	$used{$hashname} = $charset;

	$list .= "," if $list;
	$list .= $alias;
	push (@declare_alias, "$alias,$charset");
	$alias_count++;
	next;
    }
    if (/^&code (.*)/)
    {

	# Save the code position.

	$code = $1;
	next;
    }
    if (/^&duplicate/)
    {

	# Ignore duplicates for now.

	next;
    }
    if (/^&([^ ]+)/)
    {

	# This is an unrecognized & line, discard the charset.

	print STDERR " &$1...";
	$discard_charset = 1;
	next;
    }

    # Save all other tokens into the double table.

    foreach $token (split)
    {
	if ($token ne "??" && $token ne "__")
	{
	    $table[$code] = $token;
	    if (length ($token) > $codedim)
	    {
		$codedim = length ($token);
	    }
	}
	$code++;
    }
}

# Process the last accumulated charset.

&complete_charset;
&complete_file (2);

# Print the documentation.

open (TEXI, ">charset.texi");
for $charset (sort keys %body)
{
    print TEXI "\n@item $charset\n";
    @list = sort (split (/,/, $list{$charset}));
    if (@list == 1)
    {
	print TEXI "@code{", $list[0], "} is an alias for this charset.\n";
    }
    elsif (@list > 0)
    {
	$string = "@code{" . join ("}, @code{", @list) . "}";
	$string =~ s/,([^,]+)$/ and\1/;
	print TEXI $string, " are aliases for this charset. \n";
    }
    print TEXI $body{$charset};
}
close TEXI;

exit 0;

# Routine for printing all accumulated initialization information, and
# then closing the file.  Argument is 1 or 2 depending on output file.

sub complete_file
{

    # Print the collectable initialization function.

    print SRC "\n";
    print SRC "void\n";
    printf SRC "module_charset%d (void)\n", $_[0];
    print SRC "{\n";
    $counter = $_[0] == 1 ? 0 : $split_point;
    while ($string = shift @declare_charset)
    {
	$string =~ s/(.*),/"\1", /;
	print SRC "  declare_double_step (&table_$counter, $string);\n";
	$counter++;
    }
    print SRC "\n";
    while ($string = shift @declare_alias)
    {
	$string =~ s/,/", "/;
	print SRC "  declare_alias (\"$string\");\n";
    }
    print SRC "}\n";
    close SRC;
}

# Routine for printing all accumulated information for the charset.
# If the charset should be discarded, adjust tables.

sub complete_charset
{
    if ($discard_charset)
    {
	while ($alias_count-- > 0)
	{
	    pop @declare_alias;
	}
	$discard_charset = 0;
	print STDERR " DISCARDED\n";
    }
    elsif ($header)
    {

	# Save the documentation.

	$list{$charset} = $list;
	$body{$charset} = $body;

	# Open a new source file if needed.

	if ($charset_ordinal == 0)
	{
	    print STDERR " writing on charset1.c...";
	    open (SRC, ">charset1.c");
	    print SRC $overall_header;
	}
	elsif ($charset_ordinal == $split_point)
	{
	    &complete_file (1);
	    
	    print STDERR " writing on charset2.c...";
	    open (SRC, ">charset2.c");
	    print SRC $overall_header;
	}

	# Make introductory C comments.

	print SRC $header;
	print SRC "*/\n";

	# Make the table for this charset.

	print SRC "\n";
	print SRC "static DOUBLE_TABLE table_$charset_ordinal = \n";
	print SRC "  {\n";
	$code = 0;
	for ($index1 = 0; $index1 < 8; $index1++)
	{
	    $header = "";
	    $flag = 0;
	    for ($index2 = 0; $index2 < 32; $index2++)
	    {
		$token = $table[$code++];
		$flag = 1 if $token;
		$token .= " " while length ($token) < $codedim;
		$header .= $token;
	    }
	    if ($flag)
	    {
		$header =~ s/\"/\\\"/g;
		print SRC "    \"", $header, "\",\n";
	    }
	    else
	    {
		print SRC "    NULL,\n";
	    }
	}
	print SRC "  };\n";

	# Register the table.

	push (@declare_charset, "$charset,$codedim");

	print STDERR " done\n";
	$charset_ordinal++;
    }
    $header = "";
}
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.