ftp.nice.ch/pub/next/unix/file/find.3.8.s.tar.gz#/find-3.8/locate/code.c

This is code.c in view mode; [Download] [Up]

/* code -- code filenames for locate

   Compress a sorted list.
   Works with 'find' to encode a filename database.

   Usage:

   bigram < list > bigrams
   process-bigrams > common_bigrams
   code common_bigrams < list > squeezed_list

   Uses 'front compression' (see ";login:", March 1983, p. 8).
   Output format is, per line, an offset differential count byte
   followed by a partially bigram-encoded ASCII residue.
   
   The codes are:
   
   0-28		likeliest differential counts + offset to make nonnegative
   30		escape code for out-of-range count to follow in next word
   128-255 	bigram codes (128 most common, as determined by 'updatedb')
   32-127  	single character (printable) ASCII residue

   Author: James A. Woods (jaw@riacs.edu)
   Modified by David MacKenzie (djm@ai.mit.edu)
   Public domain. */

#include <stdio.h>
#if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
#include <string.h>
#else
#include <strings.h>
#endif

#ifdef STDC_HEADERS
#include <stdlib.h>
#endif
#include <sys/types.h>
#include "pathmax.h"

char *xmalloc ();
static int prefix_length ();
static int strindex ();

/* Switch code. */
#define	RESET	30

/* The name this program was run with.  */
char *program_name;

static char *path;

static char *oldpath;

static char bigrams[257] = {0};

void
main (argc, argv)
     int argc;
     char *argv[];
{
  int count, oldcount, diffcount;
  int j, code;
  char bigram[3];
  FILE *fp;
  unsigned line_length;
  int path_max;

  program_name = argv[0];
  oldcount = 0;
  bigram[2] = '\0';

  if (argc != 2)
    {
      fprintf (stderr, "Usage: %s common_bigrams < list > coded_list\n",
	       argv[0]);
      exit (2);
    }

  fp = fopen (argv[1], "r");
  if (fp == NULL)
    {
      fprintf (stderr, "%s: ", argv[0]);
      perror (argv[1]);
      exit (1);
    }

  path_max = PATH_MAX;
  path = xmalloc (path_max + 2);
  oldpath = xmalloc (path_max + 2);
  path[path_max] = '\0';
  strcpy (oldpath, " ");

  fgets (bigrams, 257, fp);
  fwrite (bigrams, 1, 256, stdout);

  while (fgets (path, path_max, stdin) != NULL)
    {
      line_length = strlen (path);
      if (line_length == 0)
	fprintf (stderr, "%s: null line in input\n", argv[0]);
      else if (path[line_length - 1] != '\n')
	fprintf (stderr, "%s: long line in input; truncating to `%s'\n",
		 argv[0], path);
      else
	path[line_length - 1] = '\0'; /* Remove newline. */

      /* Squelch unprintable chars so as not to botch decoding. */
      for (j = 0; path[j] != '\0'; j++)
	{
	  path[j] &= 0177;
	  if (path[j] < 040 || path[j] == 0177)
	    path[j] = '?';
	}
      count = prefix_length (oldpath, path);
      diffcount = count - oldcount;
      if (diffcount < -14 || diffcount > 14)
	{
	  putc (RESET, stdout);
	  putw (diffcount + 14, stdout);
	}
      else
	putc (diffcount + 14, stdout);

      for (j = count; path[j] != '\0'; j += 2)
	{
	  if (path[j + 1] == '\0')
	    {
	      putchar (path[j]);
	      break;
	    }
	  bigram[0] = path[j];
	  bigram[1] = path[j + 1];
	  /* Linear search for specific bigram in string table. */
	  code = strindex (bigrams, bigram);
	  if (code % 2 == 0)
	    putchar ((code / 2) | 0200);
	  else
	    fputs (bigram, stdout);
	}
      strcpy (oldpath, path);
      oldcount = count;
    }
  exit (0);
}

/* Return location of PATTERN in STRING or -1. */

static int
strindex (string, pattern)
     char *string, *pattern;
{
  register char *s, *p, *q;

  for (s = string; *s != '\0'; s++)
    if (*s == *pattern)
      {
	/* Fast first char check. */
	for (p = pattern + 1, q = s + 1; *p != '\0'; p++, q++)
	  if (*q != *p)
	    break;
	if (*p == '\0')
	  return q - strlen (pattern) - string;
      }
  return -1;
}

/* Return length of longest common prefix of strings S1 and S2. */

static int
prefix_length (s1, s2)
     char *s1, *s2;
{
  register char *start;

  for (start = s1; *s1 == *s2; s1++, s2++)
    if (*s1 == '\0')
      break;
  return s1 - start;
}

These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.