ftp.nice.ch/pub/next/unix/text/NeXT_French_Dictionary.3.1.08.I.bs.tar.gz#/NeXT_French_Dictionary3.1.08/src/ispell-3.1.08.tar.gz#/ispell-3.1/ijoin.c

This is ijoin.c in view mode; [Download] [Up]

#ifndef lint
static char Rcs_Id[] =
    "$Id: ijoin.c,v 1.5 1994/01/25 07:11:36 geoff Exp $";
#endif

/*
 * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All modifications to the source code must be clearly marked as
 *    such.  Binary redistributions based on modified source code
 *    must be clearly marked as modified versions in the documentation
 *    and/or other materials provided with the distribution.
 * 4. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgment:
 *      This product includes software developed by Geoff Kuenning and
 *      other unpaid contributors.
 * 5. The name of Geoff Kuenning may not be used to endorse or promote
 *    products derived from this software without specific prior
 *    written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

/*
 * "Join" command for ispell.
 *
 * This command is a complete reimplementation of the UNIX "join"
 * command, except that fields cannot be separated by a newline, it
 * can handle lines of unlimited length, and the preceding sort can
 * treat characters as either signed or unsigned.
 *
 * Usage:
 *
 *	ijoin [options] file1 file2
 *
 * See the UNIX "join" manual page for option descriptions.  Only
 * nonstandard options are described here.
 *
 * Either file1 or file2 may be "-", in which case the standard input
 * is used for that file.
 *
 * Normally, ijoin uses "strcmp" to compare fields.  This is the
 * correct thing to do on most systems if you are using the
 * system-provided "sort" command to sort the input files before
 * feeding them to ijoin.  In some cases, however, the sort command
 * you use will disagree with strcmp about how to handle characters
 * with the high bit set.  If this is the case, you can specify the
 * "-s" (signed comparisons) or "-u" (unsigned comparisons) option to
 * force ijoin to conform to the method used by the sort program.
 * This is only necessary if one of the input files contains 8-bit
 * characters in the field that is being joined on.
 *
 * On some older machines with non-ANSI compilers, the "-s" option
 * will be ineffective because characters default to unsigned.
 * However, this option should be unnecessary on those machines, so no
 * harm will be done.
 */

/*
 * $Log: ijoin.c,v $
 * Revision 1.5  1994/01/25  07:11:36  geoff
 * Get rid of all old RCS log lines in preparation for the 3.1 release.
 *
 */

#include <stdio.h>
#include "config.h"
#include "ispell.h"
#include "proto.h"
#include "fields.h"

#ifdef __STDC__
#define SIGNED	signed
#else /* __STDC */
#define SIGNED
#endif /* __STDC */

int		main P ((int argc, char * argv[])); /* Join files */
static void	usage P ((void));	/* Issue a usage message */
static void	dojoin P ((void));	/* Actually perform the join */
static void	full_output P ((field_t * line1, field_t * line2));
					/* Output everything from both lines */
static void	selected_output P ((field_t * line1, field_t * line2));
					/* Output selected fields */
static int	strscmp P ((SIGNED char * a, SIGNED char * b));
					/* Signed version of strcmp */
static int	strucmp P ((unsigned char * a, unsigned char * b));
					/* Unsigned version of strcmp */

typedef struct
    {
    int		file;			/* Number of file to output from */
    int		field;			/* Number of field to output */
    }
		outlist_t;		/* Output description list */

static int		(*compare) () = strcmp; /* Comparison function */
static char *		emptyfield = ""; /* Use this to replace empty fields */
static FILE *		file1;		/* First file to join */
static FILE *		file2;		/* Second file to join */
static int		join1field = 0;	/* Field to join file 1 on */
static int		join2field = 0;	/* Field to join file 2 on */
static int		maxf[2] = {0, 0}; /* Max field to parse in each file */
static outlist_t *	outlist = NULL;	/* List of fields to write */
static int		outlistsize;	/* Number of elements in outlist */
static int		runs = FLD_RUNS; /* Set if runs of tabchar same as 1 */
static char *		tabchar = " \t"; /* Field separator character(s) */
static int		unpairable1 = 0; /* NZ if -a1 */
static int		unpairable2 = 0; /* NZ if -a2 */

extern int	strcmp ();

int main (argc, argv)			/* Join files */
    int			argc;		/* Argument count */
    char *		argv[];		/* Argument vector */
    {
    
    while (argc > 3  &&  argv[1][0] == '-')
	{
	argc--;
	argv++;
	switch (argv[0][1])
	    {
	    case 'a':			/* produce output for unpairables */
		if (argv[0][2] == '1')
		    unpairable1 = 1;
		else if (argv[0][2] == '2')
		    unpairable2 = 1;
		else if (argv[0][2] == '\0')
		    unpairable1 = unpairable2 = 1;
		else
		    usage ();
		break;
	    case 'e':			/* Replace empty fields with this */
		argc--;
		argv++;
		emptyfield = *argv;
		break;
	    case 'j':			/* Specify field to join on */
		if (argv[0][2] == '1')
		    join1field = atoi (argv[1]) - 1;
		else if (argv[0][2] == '2')
		    join2field = atoi (argv[1]) - 1;
		else if (argv[0][2] == '\0')
		    join1field = join2field = atoi (argv[1]) - 1;
		else
		    usage ();
		argc--;
		argv++;
		break;
	    case 'o':			/* Specify output list */
		/*
		 * We will assume that all remaining switch arguments
		 * are used to describe the output list.  This will
		 * occasionally result in malloc'ing a few too many
		 * elements, but no real harm will be done.
		 */
		outlist =
		  (outlist_t *) malloc ((argc - 3) * sizeof (outlist_t));
		if (outlist == NULL)
		    {
		    (void) fprintf (stderr, "ijoin:  out of memory!\n");
		    return 1;
		    }
		for (outlistsize = 0, argc--, argv++;
		  argc > 2  &&  (argv[0][0] == '1'  ||  argv[0][0] == '2')
		    &&  argv[0][1] == '.';
		  argc--, argv++, outlistsize++)
		    {
		    outlist[outlistsize].file = argv[0][0] - '0';
		    outlist[outlistsize].field = atoi (&argv[0][2]) - 1;
		    if (maxf[outlist[outlistsize].file - 1]
		      < outlist[outlistsize].field)
			maxf[outlist[outlistsize].file - 1] =
			  outlist[outlistsize].field;
		    }
		argc++;			/* Un-do arg that stopped us */
		argv--;
		break;
	    case 't':
		tabchar = &argv[0][2];
		runs &= ~FLD_RUNS;
		break;
	    case 's':
		compare = strscmp;
		break;
	    case 'u':
		compare = strucmp;
		break;
	    default:
		usage ();
		break;
	    }
	}
    if (argc != 3)
	usage ();
    if (strcmp (argv[1], "-") == 0)
	file1 = stdin;
    else
	{
	file1 = fopen (argv[1], "r");
	if (file1 == NULL)
	    perror (argv[1]);
	}
    file2 = fopen (argv[2], "r");
    if (file2 == NULL)
	perror (argv[2]);
    if (file1 == NULL  ||  file2 == NULL)
	return 1;
    dojoin ();
    return 0;
    }

static void usage ()			/* Issue a usage message */
    {

    (void) fprintf (stderr,
      "Usage:  ijoin [-an] [-e s] [-jn m] [-o n.m ...] [-tc] file1 file2\n");
    exit (1);
    }

static void dojoin ()			/* Actually perform the join */
    {
    int			comparison;	/* Result of comparing the lines */
    long		file2pos;	/* Position file 2 started at */
    register field_t *	line1;		/* Line from file 1 */
    register field_t *	line2;		/* Line from file 2 */
    int			pairable;	/* NZ if lines can be paired */
    int			skip2;		/* No. of "unpairable" 2's to skip */

    runs |= FLD_NOSHRINK;		/* Don't do excessive reallocations */
    field_line_inc = BUFSIZ;		/* Allocate line bfr in huge chunks */
    line1 = fieldread (file1, tabchar, runs, maxf[0]);
    file2pos = ftell (file2);
    skip2 = 0;
    if (file2pos == -1)
	{
	(void) fprintf (stderr, "ijoin:  Can't seek file ");
	perror ("2");
	exit (1);
	}
    line2 = fieldread (file2, tabchar, runs, maxf[1]);
    while (line1 != NULL  ||  line2 != NULL)
	{
	/*
	 * Do a little work to reduce the number of calls to realloc, at
	 * the expense of slightly-increased memory usage.
	 */
	if (line1 != NULL  &&  line1->nfields >= field_field_inc)
	    field_field_inc = line1->nfields + 1;
	if (line2 != NULL  &&  line2->nfields >= field_field_inc)
	    field_field_inc = line2->nfields + 1;
	/*
	 * Determine if the lines can be paired.
	 */
	pairable = 1;
	if (line1 == NULL)
	    {
	    pairable = 0;
	    comparison = 1;		/* This causes file 2 to advance */
	    }
	else if (join1field >= line1->nfields)
	    {
	    pairable = 0;
	    comparison = -1;		/* This causes file 1 to advance */
	    }
	if (line2 == NULL)
	    {
	    pairable = 0;
	    comparison = -1;		/* This causes file 1 to advance */
	    }
	else if (join2field >= line2->nfields)
	    {
	    pairable = 0;
	    comparison = 1;		/* This causes file 2 to advance */
	    }
	if (pairable)
	    {
	    comparison = (*compare) (line1->fields[join1field],
	      line2->fields[join2field]);
	    pairable = (comparison == 0);
	    }
	if (pairable)
	    {
	    /*
	     * The two lines can be paired.  Produce output.
	     */
	    if (outlist == NULL)
		full_output (line1, line2);
	    else
		selected_output (line1, line2);
	    }
	/*
	 * Advance through the files
	 */
	if (comparison < 0)
	    {
	    if (unpairable1)
		{
		if (outlist == NULL)
		    (void) fieldwrite (stdout, line1, tabchar[0]);
		else
		    selected_output (line1, (field_t *) NULL);
		}
	    fieldfree (line1);
	    line1 = fieldread (file1, tabchar, runs, maxf[0]);
	    }
	else if (comparison > 0)
	    {
	    if (skip2 > 0)
		skip2--;
	    else if (unpairable2)
		{
		if (outlist == NULL)
		    (void) fieldwrite (stdout, line2, tabchar[0]);
		else
		    selected_output (line2, (field_t *) NULL);
		}
	    fieldfree (line2);
	    file2pos = ftell (file2);
	    line2 = fieldread (file2, tabchar, runs, maxf[1]);
	    }
	else
	    {
	    /*
	     * Here's the tricky part.  We have to advance file 2
	     * until comparisons fail, and then back it up and advance
	     * file 1.
	     */
	    skip2++;
	    fieldfree (line2);
	    line2 = fieldread (file2, tabchar, runs, maxf[1]);
	    if (line2 == NULL
	      ||  join2field >= line2->nfields
	      ||  (*compare) (line1->fields[join1field],
		  line2->fields[join2field])
		!= 0)
		{
		(void) fseek (file2, file2pos, 0);
		fieldfree (line2);
		line2 = fieldread (file2, tabchar, runs, maxf[1]);
		fieldfree (line1);
		line1 = fieldread (file1, tabchar, runs, maxf[0]);
		if (line1 != NULL  &&  line2 != NULL
		  &&  join1field < line1->nfields
		  &&  join2field < line2->nfields
		  &&  (*compare) (line1->fields[join1field],
			line2->fields[join2field])
		    == 0)
		    skip2 = 0;
		}
	    }
	}
    }

static void full_output (line1, line2)	/* Output everything from both lines */
    register field_t *	line1;		/* Line from file 1 */
    register field_t *	line2;		/* Line from file 2 */
    {
    register int	fieldno;	/* Number of field being handled */

    (void) fputs (line1->fields[join1field], stdout);
    for (fieldno = 0;  fieldno < line1->nfields;  fieldno++)
	{
	if (fieldno == join1field)
	    continue;
	(void) putchar (tabchar[0]);
	if (line1->fields[fieldno][0] == '\0')
	    (void) fputs (emptyfield, stdout);
	else
	    (void) fputs (line1->fields[fieldno], stdout);
	}
    for (fieldno = 0;  fieldno < line2->nfields;  fieldno++)
	{
	if (fieldno == join2field)
	    continue;
	(void) putchar (tabchar[0]);
	if (line2->fields[fieldno][0] == '\0')
	    (void) fputs (emptyfield, stdout);
	else
	    (void) fputs (line2->fields[fieldno], stdout);
	}
    (void) putchar ('\n');
    }

static void	selected_output (line1, line2) /* Output selected fields */
    field_t *		line1;		/* Line from file 1 */
    field_t *		line2;		/* Line from file 2 */
    {
    register field_t *	cline;		/* Current line being handled */
    register int	listno;		/* Number of output list entry */
    
    for (listno = 0;  listno < outlistsize;  listno++)
	{
	if (listno != 0)
	    (void) putchar (tabchar[0]);
	if (outlist[listno].file == 1)
	    cline = line1;
	else
	    cline = line2;
	if (cline == NULL
	  ||  outlist[listno].field >= cline->nfields
	  ||  cline->fields[outlist[listno].field][0] == '\0')
	    (void) fputs (emptyfield, stdout);
	else
	    (void) fputs (cline->fields[outlist[listno].field], stdout);
	}
    (void) putchar ('\n');
    }

static int strscmp (a, b)		/* Compare signed strings */
    register SIGNED char * a;		/* First string to compare */
    register SIGNED char * b;		/* Second string to compare */
    {

    while (*a != '\0')
	{
	if (*a++ != *b++)
	    return *--a - *--b;
	}
    return *a - *b;
    }

static int strucmp (a, b)		/* Compare unsigned strings */
    register unsigned char * a;		/* First string to compare */
    register unsigned char * b;		/* Second string to compare */
    {

    while (*a != '\0')
	{
	if (*a++ != *b++)
	    return *--a - *--b;
	}
    return *a - *b;
    }

These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.