ftp.nice.ch/pub/next/unix/text/recode-3.4.s.tar.gz#/recode-3.4/ibmpc.c

This is ibmpc.c in view mode; [Download] [Up]

/* Conversion of files between different charsets and usages.
   Copyright (C) 1990, 1993, 1994 Free Software Foundation, Inc.
   Francois Pinard <pinard@iro.umontreal.ca>, 1988.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

#include "recode.h"

#define DOS_EOF 26		/* old end of file */
#define DOS_CR 13		/* carriage return */
#define DOS_LF 10		/* line feed */

/* Correspondance for IBM PC ruler graphics characters into ASCII graphics
   approximations.  The current principles are:

   - Single horizontal rulers are made up of dashes.
   - Double horizontal rulers are made up of equal signs.
   - Both single and double vertical are made up with `|'.
   - Both upper corners are rounded down with periods.
   - Lower corners are rounded up with grave/acute accent on left/right.
   - Other crossing rulers are approximated with plus signs, with exceptions
     for double horizontal ruler crossings but not at corners: they are
     equal signs inside a table, and `|' at left or right margin.
*/

unsigned char convert_rulers[48] =
  {
    '#',			/* 176 */
    '#',			/* 177 */
    '#',			/* 178 */
    '|',			/* 179 */
    '+',			/* 180 */
    '|',			/* 181 */
    '+',			/* 182 */
    '.',			/* 183 */
    '.',			/* 184 */
    '|',			/* 185 */
    '|',			/* 186 */
    '.',			/* 187 */
    '\'',			/* 188 */
    '\'',			/* 189 */
    '\'',			/* 190 */
    '.',			/* 191 */
    '`',			/* 192 */
    '+',			/* 193 */
    '+',			/* 194 */
    '+',			/* 195 */
    '-',			/* 196 */
    '+',			/* 197 */
    '|',			/* 198 */
    '+',			/* 199 */
    '`',			/* 200 */
    '.',			/* 201 */
    '=',			/* 202 */
    '=',			/* 203 */
    '|',			/* 204 */
    '=',			/* 205 */
    '=',			/* 206 */
    '=',			/* 207 */
    '+',			/* 208 */
    '=',			/* 209 */
    '+',			/* 210 */
    '`',			/* 211 */
    '`',			/* 212 */
    '.',			/* 213 */
    '.',			/* 214 */
    '+',			/* 215 */
    '=',			/* 216 */
    '\'',			/* 217 */
    '.',			/* 218 */
    '#',			/* 219 */
    '#',			/* 220 */
    '#',			/* 221 */
    '#',			/* 222 */
    '#',			/* 223 */
  };

/* Data for IBM PC to ISO Latin-1 code conversions.  */

static KNOWN_PAIR known_pairs[] =
  {
    { 20, 182},			/* pilcrow sign */
    { 21, 167},			/* section sign */

    {128, 199},			/* capital letter C with cedilla */
    {129, 252},			/* small letter u with diaeresis */
    {130, 233},			/* small letter e with acute accent */
    {131, 226},			/* small letter a with circumflex accent */
    {132, 228},			/* small letter a with diaeresis */
    {133, 224},			/* small letter a with grave accent */
    {134, 229},			/* small letter a with ring above */
    {135, 231},			/* small letter c with cedilla */
    {136, 234},			/* small letter e with circumflex accent */
    {137, 235},			/* small letter e with diaeresis */
    {138, 232},			/* small letter e with grave accent */
    {139, 239},			/* small letter i with diaeresis */
    {140, 238},			/* small letter i with circumflex accent */
    {141, 236},			/* small letter i with grave accent */
    {142, 196},			/* capital letter A with diaeresis */
    {143, 197},			/* capital letter A with ring above */
    {144, 201},			/* capital letter E with acute accent */
    {145, 230},			/* small ligature a with e */
    {146, 198},			/* capital ligature A with E */
    {147, 244},			/* small letter o with circumblex accent */
    {148, 246},			/* small letter o with diaeresis */
    {149, 242},			/* small letter o with grave accent */
    {150, 251},			/* small letter u with circumflex accent */
    {151, 249},			/* small letter u with grave accent */
    {152, 255},			/* small letter y with diaeresis */
    {153, 214},			/* capital letter O with diaeresis */
    {154, 220},			/* capital letter U with diaeresis */
    {155, 162},			/* cent sign */
    {156, 163},			/* pound sign */
    {157, 165},			/* yen sign */

    {160, 225},			/* small letter a with acute accent */
    {161, 237},			/* small letter i with acute accent */
    {162, 243},			/* small letter o with acute accent */
    {163, 250},			/* small letter u with acute accent */
    {164, 241},			/* small letter n with tilde */
    {165, 209},			/* capital letter N with tilde */
    {166, 170},			/* feminine ordinal indicator */
    {167, 186},			/* masculine ordinal indicator */
    {168, 191},			/* inverted question mark */

    {170, 172},			/* not sign */
    {171, 189},			/* vulgar fraction one half */
    {172, 188},			/* vulgar fraction one quarter */
    {173, 161},			/* inverted exclamation mark */
    {174, 171},			/* left angle quotation mark */
    {175, 187},			/* right angle quotation mark */

    {225, 223},			/* small german letter sharp s */

    {230, 181},			/* small Greek letter mu micro sign */

    {241, 177},			/* plus-minus sign */

    {246, 247},			/* division sign */

    {248, 176},			/* degree sign */

    {250, 183},			/* middle dot */

    {253, 178},			/* superscript two */

    {255, 160},			/* no-break space */
  };
#define NUMBER_OF_PAIRS (sizeof (known_pairs) / sizeof (KNOWN_PAIR))

static int
file_latin1_ibmpc (const STEP *step, FILE *input_file, FILE *output_file)
{
  int reversible;		/* reversibility of recoding */
  const unsigned char *table;	/* one to one conversion table */
  const char *const *table2;	/* one to many conversion table */
  int input_char;		/* current character */

  reversible = 1;
  if (strict_mapping)
    {
      table2 = step->one_to_many;
      while (input_char = getc (input_file), input_char != EOF)
	if (input_char == '\n')
	  {
	    putc (DOS_CR, output_file);
	    putc (DOS_LF, output_file);
	  }
	else if (table2[input_char])
	  putc (*table2[input_char], output_file);
	else
	  reversible = 0;
    }
  else
    {
      table = step->one_to_one;
      while (input_char = getc (input_file), input_char != EOF)
	if (input_char == '\n')
	  {
	    putc (DOS_CR, output_file);
	    putc (DOS_LF, output_file);
	  }
	else
	  putc (table[input_char], output_file);
    }
  return reversible;
}

static int
file_ibmpc_latin1 (const STEP *step, FILE *input_file, FILE *output_file)
{
  int reversible;		/* reversibility of recoding */
  const unsigned char *table;	/* one to one conversion table */
  const char *const *table2;	/* one to many conversion table */
  int input_char;		/* current character */

  reversible = 1;
  if (strict_mapping)
    {
      table2 = step->one_to_many;
      input_char = getc (input_file);
      while (1)
	switch (input_char)
	  {
	  case EOF:
	    return reversible;

	  case DOS_EOF:
	    return 0;

	  case DOS_CR:
	    input_char = getc (input_file);
	    if (input_char == DOS_LF)
	      {
		putc ('\n', output_file);
		input_char = getc (input_file);
	      }
	    else if (table2[DOS_CR])
	      putc (*table2[DOS_CR], output_file);
	    else
	      reversible = 0;
	    break;

	  case DOS_LF:
	    reversible = 0;
	    /* Fall through.  */

	  default:
	    if (table2[input_char])
	      putc (*table2[input_char], output_file);
	    else
	      reversible = 0;
	    input_char = getc (input_file);
	  }
    }
  else
    {
      table = step->one_to_one;
      input_char = getc (input_file);
      while (1)
	switch (input_char)
	  {
	  case EOF:
	    return reversible;

	  case DOS_EOF:
	    return 0;

	  case DOS_CR:
	    input_char = getc (input_file);
	    if (input_char == DOS_LF)
	      {
		putc ('\n', output_file);
		input_char = getc (input_file);
	      }
	    else
	      putc (table[DOS_CR], output_file);
	    break;

	  case DOS_LF:
	    reversible = 0;
	    /* Fall through.  */

	  default:
	    putc (table[input_char], output_file);
	    input_char = getc (input_file);
	  }
    }
}

static void
init_latin1_ibmpc (STEP *step)
{
  complete_pairs (step, 1, known_pairs, NUMBER_OF_PAIRS, 1);
  step->file_recode = file_latin1_ibmpc;
}

static void
init_ibmpc_latin1 (STEP *step)
{
  unsigned char *table;

  complete_pairs (step, 1, known_pairs, NUMBER_OF_PAIRS, 0);
  step->file_recode = file_ibmpc_latin1;

  /* FIXME: Allow ascii_graphics even with strict_mapping.  Reported by
     David E. A. Wilson <david@osiris.cs.uow.edu.au>.  */

  if (ascii_graphics)
    {
      table = (unsigned char *) xmalloc (256);
      memcpy (table, step->one_to_one, 256);
      memcpy (table + 176, convert_rulers, 48);
      free ((void *) step->one_to_one);
      step->one_to_one = table;
    }
}

void
module_ibmpc (void)
{
  declare_step ("Latin-1", "IBM-PC", ONE_TO_MANY, init_latin1_ibmpc, NULL);
  declare_step ("IBM-PC", "Latin-1",
		strict_mapping ? MANY_TO_MANY : MANY_TO_ONE,
		init_ibmpc_latin1, NULL);

#if 0
  declare_alias ("IBM-PC", "ibm437");
#endif
}

These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.