ftp.nice.ch/pub/next/unix/file/duplink.1.1.I.bs.tar.gz#/duplink1.1/Source/dirtree.c

This is dirtree.c in view mode; [Download] [Up]

/* This file provides functions for reading a directory tree and assigning
   unique id's to files that are unique.

   This file is derived from compressHelp.m which is:

   All rights reserved. (C) 1992, 1993 Frank Thomas
   Frank Thomas <frank@glocke.robin.de>

   Copyright 1993 by Michael L.H. Brouwer (michael@urc.tue.nl)

   This program is free software; you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published by the
   Free Software Foundation; either version 2, or (at your option) any later
   version.

   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
   Public License for more details.

   You should have received a copy of the GNU General Public License along
   with this program; if not, write to the Free Software Foundation, Inc.,
   675 Mass Ave, Cambridge, MA 02139, USA.

   $Modified: Sun Apr  4 23:53:35 1993 by michael $
   $Id: dirtree.c,v 1.11 1993/04/04 22:46:03 michael Exp $  */

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#ifdef HAVE_LIBC_H
#include <libc.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <assert.h>
#include <errno.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/dir.h>
#include <sys/param.h>
#include <sys/stat.h>
#include <sys/wait.h>

#include "global.h"
#include "dirtree.h"
#include "mapfile.h"

/* For qsort.  */
typedef int (*q_cmp) (const void *, const void *);

/* Print progress information if VERBOSE is TRUE.  */
bool verbose = FALSE;
/* Print warnings if WARNINGS is TRUE.  */
bool warnings = TRUE;
/* FALSE if an unknown filetype has been encounterd.  */
bool all_ok;

static char name_prefix[MAXPATHLEN];

static char *
xstrdup (char *s)
{
  size_t len;
  char *mem;
  len = strlen (s) + 1;
  mem = xmalloc (len);
  memcpy (mem, s, len);
  return mem;
} /* xstrdup */

static dt_tree *
dt_alloc (void)
{
  dt_tree *t;
  t = xmalloc (sizeof (*t));
  t->elems = 0;
  t->max_elems = 50;
  t->files = xmalloc (sizeof (dt_file) *t->max_elems);
  return t;
} /* dt_alloc */

static dt_file *
dt_entry_alloc (dt_tree *t)
{
  if (++t->elems>t->max_elems)
    {
      t->max_elems += 100;
      t->files = xrealloc (t->files, sizeof (dt_file) *t->max_elems);
    }
  return &t->files[t->elems - 1];
} /* dt_entry_alloc */

/* Recursivly read a directory into T.  Return 0 on success and -1 on
   failure.  */
static int
dt_read_dir (dt_tree *t)
{
  struct direct *dp;
  DIR *dirp;
  struct stat st;
  int prefix_len;
  char *dir_name;

  prefix_len = strlen (name_prefix);
  dir_name = xstrdup (name_prefix);

  dirp = opendir (".");
  if (!dirp)
    return errorf ("opendir %s", name_prefix);
  for (dp = readdir (dirp); dp != NULL; dp = readdir (dirp))
    {
      if (lstat (dp->d_name, &st))
	{
	  errorf ("lstat %s", dp->d_name);
	  closedir (dirp);
	  return -1;
	}
      switch (st.st_mode & S_IFMT)
	{
	case S_IFDIR:
	  if (!strcmp (dp->d_name, ".") || !strcmp (dp->d_name, ".."))
	    break;
	  if (chdir (dp->d_name))
	    {
	      errorf ("chdir: %s%s", name_prefix, dp->d_name);
	      break;
	    }
	  strcat (strcpy (name_prefix + prefix_len, dp->d_name), "/");
	  if (dt_read_dir (t))
	    return -1;
	  if (chdir (".."))
	    return errorf ("chdir: %s%s/..", name_prefix, dp->d_name);
	  name_prefix[prefix_len] = 0;
	  break;
	case S_IFREG:
	  {
	    dt_file *f;
	    f = dt_entry_alloc (t);
	    f->name = xstrdup (dp->d_name);
	    f->dir = dir_name;
	    f->device = st.st_dev;
	    f->inode = st.st_ino;
	    f->size = st.st_size;
	  }
	  break;
	default:
	  all_ok = FALSE;
	  if (warnings)
	    fprintf (stderr, "%s: illegal filetype.\n", dp->d_name);
	  break;
	}
    }
  closedir (dirp);
  return 0;
} /* dt_read_dir */

/* Calculate a (sort-of) crc for the file called name.  Return -1 if
   something went wrong, otherwise return the crc.  */
static long
file_calc_crc (const char *name, size_t size)
{
  long crc1 = 47110815;
  long crc2 = 28091908;
  map_handle mh;
  long *p;
  char *pb;
  int s;

#ifdef DEBUG
  fprintf (stderr, "CRC: %s\n", name);
#endif

  mh.name = name;
  if (map_file (&mh))
    return -1;

  for (p = (long *) mh.mem, s = mh.size; s > sizeof (long); s -= sizeof (long))
    {
      long crc3;
      crc3 = crc1 ^ *p;
      crc1 = crc2 + *p++;
      crc2 = crc3;
    }
  for (pb = (char *) p; s > 0; s--)
    {
      long crc3;
      crc3 = ((crc1 ^* pb) << 1) + (crc1 >> 31);
      crc1 = ((crc2 +* pb++) << 1) + (crc2 >> 31);
      crc2 = crc3;
    }
  if (unmap_file (&mh))
    return -1;
  crc1 ^= crc2;
  /* Make sure we don't return -1 (which represents an error).  */
  return crc1 == -1 ? 0 : crc1;
} /* file_calc_crc */

/* Compare the named files NAME1 and NAME2.  Return -1 upon failure, 0 if
   they differ and 1 if they are identical.  */
static int
files_equal (const char *name1, const char *name2, off_t size)
{
  map_handle mh1, mh2;
  int r;

#ifdef DEBUG
  fprintf (stderr, "CMP: %s %s\n", name1, name2);
#endif

  mh1.name = name1;
  if (map_file (&mh1))
    return -1;
  mh2.name = name2;
  if (map_file (&mh2))
    {
      unmap_file (&mh1);
      return -1;
    }
  r = !memcmp (mh1.mem, mh2.mem, size);
  if (unmap_file (&mh1) & unmap_file (&mh2))
    return -1;
  return r;
} /* files_equal */

static int
compare_size_and_inode (const dt_file *e1, const dt_file *e2)
{
  int diff;

  diff = e1->size - e2->size;
  if (diff)
    return diff;
  
  diff = e1->device - e2->device;
  if (diff) 
    return diff;
  
  return e1->inode - e2->inode;
} /* compare_size_and_inode */

static int
compare_size_and_crc (const dt_file *e1, const dt_file *e2)
{
  int diff;

  diff = e1->size - e2->size;
  if (diff)
    return diff;
  return e1->crc - e2->crc;
} /* compare_size_and_crc */

static int
compare_name (const dt_file *e1, const dt_file *e2)
{
  int r;

  /* Dir pointers differ IFF dir strings differ (by construction).  */
  if (e1->dir == e2->dir)
    {
      r = strcmp (e1->name, e2->name);
      assert (r != 0);
      return r;
    }
  r = strcmp (e1->dir, e2->dir);
  assert (r != 0);
  return r;
} /* compare_name */

static int
compare_id (const dt_file *e1, const dt_file *e2)
{
  return e1->id-e2->id;
} /* compare_id */

/* Sort the tree T with (lexicgraphically) ascending names.  */
void
sort_by_name (dt_tree *t)
{
  qsort (t->files, t->elems, sizeof (dt_file), (q_cmp) compare_name);
} /* sort_by_id */

/* Sort the tree T with ascending ids.  */
void
sort_by_id (dt_tree *t)
{
  qsort (t->files, t->elems, sizeof (dt_file), (q_cmp) compare_id);
} /* sort_by_id */

/* Read the entire directory tree starting in PWD.  Return -1 if something
   went wrong, otherwise return a newly created dt_tree representing the current
   directory.  */
dt_tree *
dt_read (void)
{
  dt_tree *t;

  /* No unknow filetypes have been encounterd yet.  */
  all_ok = TRUE;
  if (verbose)
    fprintf (stderr, "Reading DirTree\n");
  t = dt_alloc ();
  name_prefix[0] = 0;
  if (dt_read_dir (t))
    return (dt_tree *) -1;
  return t;
} /* dt_read */

/* Give all the unique files in DIRTREE a unique id.  If VERBOSE is nonzero,
   print messages showing progress to stdout.  Return -1 is something went
   wrong, otherwise return the number of unique files in t.  */
int
dt_calc_ids (dt_tree *t)
{
  char name1[MAXPATHLEN], name2[MAXPATHLEN];
  dt_file *entry, *last_entry = NULL;
  long last_crc, id = 0;
  off_t last_size = -1;
  int i;

  /* Calc crc's if needed.  */
  if (verbose)
    fprintf (stderr, "Calcing CRCs\n");
  qsort (t->files, t->elems, sizeof (dt_file), (q_cmp) compare_size_and_inode);
  last_crc = 1;
  for (i = t->elems, entry = t->files; i > 0; i--, entry++)
    {
      if (entry->size != last_size)
	{
	  last_size = entry->size;
	  last_crc = 0;
	}
      else
	{
	  char name[MAXPATHLEN];
	  if (!last_crc)
	    {
	      /* If only two files have the same size don't calculate
	         there crc's, a compare takes just as long and will save
	         some IO if they have the same crc.  */
	      if (i > 1 && entry[1].size != last_size)
		continue;
	      last_crc = 1;
	      strcat (strcpy (name, entry[-1].dir), entry[-1].name);
	      entry[-1].crc = file_calc_crc (name, last_size);
	      if (entry[-1].crc == -1)
		return -1;
	    }
	  /* If two files point to the same inode, give them the same crc.  */
	  if (entry->inode == entry[-1].inode
	      && entry->device == entry[-1].device)
	    entry->crc = entry[-1].crc;
	  else
	    {
	      strcat (strcpy (name, entry->dir), entry->name);
	      entry->crc = file_calc_crc (name, entry->size);
	      if (entry->crc == -1)
		return -1;
	    }
	}
    }
  /* Compare files (if needed) and calc id.  */
  if (verbose)
    fprintf (stderr, "Comparing files\n");
  qsort (t->files, t->elems, sizeof (dt_file), (q_cmp) compare_size_and_crc);
  last_size = -1;
  for (i = t->elems, entry = t->files; i > 0; i--, entry++)
    {
      if (entry->size != last_size)
	{
	  last_size = entry->size;
	  last_crc = entry->crc;
	  last_entry = entry;
	  entry->id = id++;
	  continue;
	}
      if (entry->crc != last_crc)
	{
	  last_crc = entry->crc;
	  last_entry = entry;
	  entry->id = id++;
	  continue;
	}
      {
	dt_file *f = last_entry;
	int c;
	bool found = FALSE;

	while (f != entry)
	  {
	    /* Skip files that point to the same inode.  */
	    c = (entry->inode == f->inode && entry->device == f->device);
	    if (!c)
	      {
		strcat (strcpy (name1, entry->dir), entry->name);
		strcat (strcpy (name2, f->dir), f->name);
		c = files_equal (name1, name2, entry->size);
		if (c == -1)
		  return -1;
	      }
	    if (c)
	      {
		entry->id = f->id;
		if (entry[-1].id != entry->id)
		  {
		    /* Move entry in a way that id's are ascending.  */
		    dt_file tmp;

		    tmp =* entry;
		    for (f = entry - 1; f->id != tmp.id; f--)
		      f[1]=*f;
		    f[1] = tmp;
		  }
		found = TRUE;
		break;
	      }
	    /* Skip files with same id as the one that didn't match.  */
	    while (++f != entry && f->id == f[-1].id);
	  }
	if (!found)
	  entry->id = id++;
      }
    }
  if (verbose)
    fprintf (stderr, "Reduced %d files to %ld\n", t->elems, id);
  return id;
} /* dt_calc_ids */

#ifdef DEBUG
void
dt_print (dt_tree *t)
{
  int i;
  dt_file *f;

  for (i = t->elems, f = t->files; i > 0; i--, f++)
    {
      printf ("%08lx %6ld %s%s\n", f->crc, f->id, f->dir, f->name);
    }
} /* dt_print */
#endif

These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.