reformat.pl

This is reformat.pl in view mode; [Download] [Up]
#!/usr/bin/perl -w
#
# Written by: Sean Ahern (ahern@llnl.gov)
#
# this is a reimplementation, as a filter, of the vile paragraph reformatter.
#
# USAGE:  reformat [-w width] [prefix]
# It reads stdin and outputs a reformatted paragraph on stdout.
#
# The vile macro I use to run this is:
#
# ; Format a paragraph
# 1 store-macro
#     set-variable %savcol $curcol
#     set-variable %savline $curline
#     ~if &equal $char &ascii ">"
#         set-variable %pre @"Prefix string: "
#     ~else
#         set-variable %pre ""
#     ~endif
#     ~force filter-til forward-paragraph &cat "reformat -w78 " %pre
#     ~force %savline goto-line
#     ~force %savcol goto-column
# ~endm
# bind-key execute-macro-1 ^P
#
# It does the standard stuff of filtering a paragraph, but it will also prompt
# you for a prefix string if your cursor is on a ">".  I probably should put in
# math to calculate the width from the wrapmargin or the fill-col, rather than
# hardcoding "78", but I'll leave that as an exercise for the reader.
#
# Here it is...  Enjoy!


require("getopt.pl");

$debug = 0;
print "Debugging messages turned on.\n" if ($debug == 1);

$endings = "'\\\"\\\`\\)\\]}";   # Extra slashes because this string is used in
                         # regular expressions.

# Get the width.  If none specified, default to 72.
$opt_w = 72;
&Getopt("w:");
$save_width = $opt_w;

# If we have an argument, use it, otherwise "".
if (scalar(@ARGV) >= 1)
{
    $name = shift(@ARGV);
} else
{
    $name = "";
}

# Get the indents from a line.  If there is a special character sequence (#,
# >, *, or /*) then return that sequence and the second indent.  Otherwise
# just return the first indent and nothing for the characters and second
# indent.
sub get_indent
{
    local($line) = @_;
    local(@ret);

    $ret[0] = "";
    $ret[1] = "";
    $ret[2] = "";

    # if there is a special character at the front of the line...
    if ($line =~ m=^\s*([#>\*]+|/\*)=)
    {
        $line =~ m=^(\s*)([#\*]+|/\*|(\s*>)+)(\s*)=;
        $ret[0] = $1;
        $ret[1] = $2;
        $ret[2] = $4;
    } else
    {
        $line =~ m=^(\s*)=;
        $ret[0] = $1;
    }

    return(@ret);
}

$have_name = 0 if ($name =~ /^\s*$/);
$have_name = 1 if ($name !~ /^\s*$/);

# At the start, we haven't printed anything out.
$length = 0;
$printed_word = 0;
$first_word = 1;
$indent_characters = "";
$C_comment = 0;
$output_line = "";

# Read in all of the input
@lines = <STDIN>;

# Check to see if we already have something of the form "name>" on the first
# line.
if ($lines[0] =~ /^\s*(\w+)>/)
{
    $name = $1;
    $have_name = 1;
    for($i=0;$i<scalar(@lines);$i++)
    {
        $lines[$i] =~ s/$name>/>/;
    }
}

# Process the first line for spaces and indents.
($first_indent[0],$indent_characters,$first_indent[1]) = &get_indent($lines[0]);
print "First indent 0: \"$first_indent[0]\"\n" if ($debug == 1);
print "First indent 1: \"$first_indent[1]\"\n" if ($debug == 1);

if ($indent_characters !~ /^\s*$/)
{
    print "We have an indent of \"$indent_characters\".\n" if ($debug == 1);
    $have_indent = 1;
} else
{
    $have_indent = 0;
}

# If we do have a name set up the indent to be a ">".
if ($have_name == 1)
{
    $indent_characters = ">";
    $indent_characters = "$name$indent_characters";
    $have_indent = 1;
}

# Set up the width for use on the first line.  We'll reset it for later
# lines.
$width = $save_width;

# Adjust the width to accomodate the indent and the spaces.
$save_width -= length($indent_characters);

# If we have more than one line...
if (scalar(@lines) > 1)
{
    # Process the second line for spaces.
    ($second_indent[0],$foobar,$second_indent[1]) = &get_indent($lines[1]);
    $second_indent[0] .= " " if (length($foobar) == 1);
    $second_indent[0] .= "  " if (length($foobar) == 2);
    $foobar = "";
} else
{
    $second_indent[0] = $first_indent[0];
    $second_indent[1] = $first_indent[1];
}

# If we have a newline for the second indent, use the first indent.
$second_indent[0] = $first_indent[0] if ($second_indent[0] =~ /\n/);
$second_indent[1] = $first_indent[1] if ($second_indent[1] =~ /\n/);

print "Second indent 0: \"$second_indent[0]\"\n" if ($debug == 1);
print "Second indent 1: \"$second_indent[1]\"\n" if ($debug == 1);

# If we have a newline for the first indent, just use the second indent.
$first_indent[0] = $second_indent[0] if ($first_indent[0] =~ /\n/);
$first_indent[1] = $second_indent[1] if ($first_indent[1] =~ /\n/);

if (length($indent_characters) != 0)
{
    # Adjust the first indent if we have less of an indent on the second
    # line.
    if (length($first_indent[0]) > length($second_indent[0]))
    {
        $first_indent[1] = substr($first_indent[0],length($second_indent[0]));
        $first_indent[0] = $second_indent[0];
    }

    # Adjust the second indent.
    if (length($second_indent[0]) >=
        (length($first_indent[0]) +
         length($indent_characters) -
         length($name)
        )
       )
    {
        $test = substr($second_indent[0],
                                   length($first_indent[0])+
                                   length($indent_characters)-
                                   length($name));
        if (length($test) != 0)
        {
            $second_indent[1] = $test;
        }
    }

    $second_indent[0] = $first_indent[0];
}

# Print the indent for the first line.  It's different from the rest, most 
# likely.  
if ($have_indent == 1)
{
    $output_line .= "$first_indent[0]$indent_characters$first_indent[1]";
    $length = length($first_indent[0])+length($indent_characters)+
              length($first_indent[1]);
} else
{
    $output_line .= "$first_indent[0]";
    $length = length($first_indent[0]);
}

# Here we go
for($count = 0; defined($lines[$count]); $count++)
{
    $_ = $lines[$count];

    # Get rid of the $indent_characters if they exist at the front of the 
    # string.  
    if ($have_indent == 1)
    {
        if ($have_name == 1)
        {
            $temp_indent = $indent_characters;
            $temp_indent =~ s/^$name//;

            s/^\s*($temp_indent ?)*\s*//;
        } else
        {
            if (($C_comment == 0) || (! m|^\s*\*/|))
            {
                $temp_indent = $indent_characters;
                $temp_indent =~ s/\*/\\\*/;
                $temp_indent =~ s/^\s*//;
                s/^\s*($temp_indent ?)*\s*//;
            }
        }
    }

    # Get rid of any excess white space in all cases.
    s/^\s*//;
    s/\s*$//;

    # Split the line into words by whitespace.
    for $word (split(/\s+/))
    {
        # If printing out the current word would push us over the $width, do
        # a line break.
        if ($length+length($word) > $width)
        {
            # Print out the line so far, after processing.
            $output_line =~ s/\s*$//;
            print "$output_line\n";

            # Reset the line.
            $output_line = "";
            $length = 0;
            $first_word = 1;
            $width = $save_width;

            # If we don't have an indent, preserve the second indent level.
            $output_line .= "$second_indent[0]$indent_characters$second_indent[1]";
            $length += length($second_indent[0])+
                       length($indent_characters)+
                       length($second_indent[1]);
        }
        
        # It's safe to print out the next word and adjust the $length.
        if ($first_word != 1)
        {
            $output_line .= " ";
            $length++;
        }
        $output_line .= "$word ";
        $printed_word = 1;
        $length += length($word)+1;

        # If the word ends in a period, question mark, or exclamation point,
        # we might be ending a sentence.  Check for common abbreviations to
        # see.  If we *are* ending a sentence, we need to add another space
        # to the output.  We also might be right after a colon.
        if ($word =~ /[\.\?!:][$endings]*$/)
        {
            # Check for standard abbs.
            if ( ($word !~ /\bMr\.[$endings]*$/) &&
                 ($word !~ /\bMrs\.[$endings]*$/) &&
                 ($word !~ /\bMs\.[$endings]*$/) &&
                 ($word !~ /\bCo\.[$endings]*$/) &&
                 ($word !~ /\bLtd\.[$endings]*$/) &&
                 ($word !~ /\bltd\.[$endings]*$/) &&
                 ($word !~ /\bp\.m\.[$endings]*$/) &&
                 ($word !~ /\bP\.M\.[$endings]*$/) &&
                 ($word !~ /\ba\.m\.[$endings]*$/) &&
                 ($word !~ /\bA\.M\.[$endings]*$/) &&
                 ($word !~ /\bi\.e\.[$endings]*$/)
               )
            {
                $output_line .= " ";
                $length++;
            }
        }
    }

    # If we have a "/*" indent, change it to be " *".
    if ($indent_characters eq "/\*")
    {
        $C_comment = 1;
        $indent_characters = " *";

        # If we don't have a second indent 1, make one.
        if (length($second_indent[1]) == 0)
        {
            $second_indent[1] = " ";
        }
    }

    # If we got to the end of the first input line without printing anything,
    # do a line break.
    if ($printed_word == 0)
    {
        # Print out the line so far, after processing.
        $output_line =~ s/\s*$//;
        print "$output_line\n";

        # Reset the line.
        $output_line = "";
        $length = 0;
        $width = $save_width;

        # If we don't have an indent, preserve the second indent level.
        $output_line .= "$second_indent[0]$indent_characters$second_indent[1]";
        $length += length($second_indent[0])+
                   length($indent_characters)+
                   length($second_indent[1]);
    }
}

# Print out the last line, after processing.
$output_line =~ s/\s*$//;
print "$output_line\n";
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.