This is compress.c in view mode; [Download] [Up]
/*
* Filename: compress.c
* Created : Mon Jul 1 16:15:39 1991
* Author : Vince DeMarco
* <demarco@cpsc.ucalgary.ca>
*/
/*
* compress.c - File compression ala IEEE Computer, June 1984.
*
* Authors: Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas)
* Jim McKie (decvax!mcvax!jim)
* Steve Davies (decvax!vax135!petsd!peora!srd)
* Ken Turkowski (decvax!decwrl!turtlevax!ken)
* James A. Woods (decvax!ihnp4!ames!jaw)
* Joe Orost (decvax!vax135!petsd!joe)
* Dave Mack (csu@alembic.acs.com)
*
* Revision 4.1 91/05/26 csu@alembic.acs.com
*
* Hacked up and stuck into a nice NeXTStep interface by Vince DeMarco
*/
/* Defines *********************/
#define min(a,b) ((a>b) ? b : a)
#define USERMEM 450000 /* Max amount of physical user mem available in bytes */
#define SACREDMEM 0 /* amount of phys mem saved for others comp hogs rest */
#define PBITS 16
#define BITS PBITS
#define HSIZE 69001 /* 95% occupancy */
#define BIT_MASK 0x1f /* Defines for third byte of header */
#define BLOCK_MASK 0x80
#define INIT_BITS 9 /* initial number of bits/code */
#define MAXCODE(n_bits) ((1 << (n_bits)) - 1)
#define CHECK_GAP 50000 /* ratio check interval recommended by jaw */
#define MAXPATHLEN 1024
#define htabof(i) htab[i]
#define codetabof(i) codetab[i]
#define tab_prefixof(i) codetabof(i)
#define tab_suffixof(i) ((char_type *)(htab))[i]
#define de_stack ((char_type *)&tab_suffixof(1<<BITS))
/*
* the next two codes should not be changed lightly, as they must not
* lie within the contiguous general code space.
*/
#define FIRST 257 /* first free entry */
#define CLEAR 256 /* table clear output code */
/* typedefs *********************/
typedef long int code_int; /* a code_int must be able to hold 2**BITS values of */
typedef long int count_int; /* type int, and also -1 */
typedef unsigned char char_type;
#include <stdio.h>
#include <stdlib.h>
#ifdef NeXT
#include <libc.h>
#endif
#include <strings.h>
#include <ctype.h>
#include <signal.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/dir.h>
#include <errno.h>
#include "local_compress_defs.h"
/* global variables *********************/
static char_type magic_header[] = {"\037\235"}; /* 1F 9D */
static int n_bits; /* number of bits/code */
static int maxbits = BITS; /* user settable max # bits/code */
static code_int maxcode; /* maximum code, given n_bits */
static code_int maxmaxcode = 1 << BITS; /* should NEVER generate this
* code */
static count_int htab[HSIZE];
static unsigned short codetab[HSIZE];
static code_int hsize = HSIZE; /* for dynamic table sizing */
static count_int fsize;
/*
* To save much memory, we overlay the table used by compress() with those
* used by decompress(). The tab_prefix table is the same size and type
* as the codetab. The tab_suffix table needs 2**BITS characters. We
* get this from the beginning of htab. The output stack uses the rest
* of htab, and contains characters. There is plenty of room for any
* possible stack (stack used to be 8000 characters).
*/
static code_int free_ent = 0; /* first unused entry */
int exit_stat = 0;
static int nomagic = 0; /* Use a 3-byte magic number header, unless
* old file */
static int zcat_flg = 0; /* Write output on stdout, suppress
* messages */
static int quiet = 1; /* don't tell me about compression */
/*
* block compression parameters -- after all codes are used up,
* and compression rate changes, start over.
*/
static int block_compress = BLOCK_MASK;
static int clear_flg = 0;
static long int ratio = 0;
static count_int checkpoint = CHECK_GAP;
static int force = 0;
static char ofname[MAXPATHLEN];
void (*bgnd_flag) ();
static int do_decomp = 0;
/*****************************************************************
* TAG( main )
*
* Algorithm from "A Technique for High Performance Data Compression",
* Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19.
*
*/
int overwrite = 0; /* Do not overwrite unless given -f
* flag */
int recursive = 0; /* compress directories */
#ifdef TEST
void main(int argc, char *argv[])
{
fprintf(stderr,"Error code %d\n",compress_decompress(0, 0, 1, argv[1]));
}
#endif
/*****************************************************************************************
* Error returns:
*
* 0 everything is okay
* 1 file doesn't exist
* 2 if file size would expand if compressed
* 3 file doesn't end with a .Z
* 4 compressed with different number of bits that i can handle
* 5 file already compressed
* 6 can't open output file
* 7 not in compressed format
* 8 pathname too long
* 9 outputfile already exists
* 10 directory unreadable
*
******************************************************************************************/
int compress_decompress( int do_decompression,
int force_overwrite,
int recursive_comp_decomp,
char *file_name)
{
if ((bgnd_flag = signal(SIGINT, SIG_IGN)) != SIG_IGN) {
signal(SIGINT, onintr);
}
signal(SIGSEGV, oops);
do_decomp = do_decompression;
overwrite = force = force_overwrite;
recursive = recursive_comp_decomp;
maxmaxcode = 1 << maxbits;
if (comprexx(file_name) < 0){
return(exit_stat);
}
return(0);
}
static int comprexx(char *tempname)
{
struct stat statbuf, insbuf;
errno = 0;
if (lstat(tempname, &insbuf) == -1) {
if (do_decomp) {
switch (errno) {
case ENOENT: /* file doesn't exist */
/*
* if the given name doesn't end with .Z, try appending one This
* is obviously the wrong thing to do if it's a directory, but it
* shouldn't do any harm.
*/
if (strcmp(tempname + strlen(tempname) - 2, ".Z") != 0) {
strcat(tempname, ".Z");
errno = 0;
if (lstat(tempname, &insbuf) == -1) {
perror(tempname);
exit_stat = 1;
return (-1);
}
} else {
perror(tempname);
exit_stat = 3;
return (-1);
}
break;
default:
perror(tempname);
return (-1);
} /* end switch */
}
/* endif */
else {
/* we can't stat the file, ignore it */
perror(tempname);
exit_stat = 1;
return (-1);
}
} /* endif */
switch (insbuf.st_mode & S_IFMT) {
case S_IFDIR: /* directory */
if (recursive){
compdir(tempname);
if (exit_stat != 0){
return(-1);
}
}
break;
case S_IFREG: /* regular file */
exit_stat = 0;
if (do_decomp != 0) {
/* DECOMPRESSION */
if (strcmp(tempname + strlen(tempname) - 2, ".Z") != 0) {
exit_stat = 3;
return (-1);
}
/* Open input file */
if ((freopen(tempname, "r", stdin)) == NULL) {
perror(tempname);
exit_stat = 1;
return(-1);
}
/* Check the magic number */
if (nomagic == 0) {
if ((getchar() != (magic_header[0] & 0xFF))
|| (getchar() != (magic_header[1] & 0xFF))) {
exit_stat = 7;
return(-1);
}
maxbits = getchar(); /* set -b from file */
block_compress = maxbits & BLOCK_MASK;
maxbits &= BIT_MASK;
maxmaxcode = 1 << maxbits;
if (maxbits > BITS) {
exit_stat = 4;
return (-1);
}
}
/*
* we have to ignore SIGINT for a while, otherwise a ^C can nuke an
* existing file with ofname
*/
signal(SIGINT, SIG_IGN);
/* Generate output filename */
strcpy(ofname, tempname);
/* Check for .Z suffix */
if (strcmp(tempname + strlen(tempname) - 2, ".Z") == 0) {
ofname[strlen(tempname) - 2] = '\0'; /* Strip off .Z */
}
} else {
/* COMPRESSION */
if (strcmp(tempname + strlen(tempname) - 2, ".Z") == 0) {
exit_stat = 5;
return(-1);
}
/* Open input file */
if ((freopen(tempname, "r", stdin)) == NULL) {
perror(tempname);
exit_stat = 1;
return(-1);
}
fsize = (long)insbuf.st_size;
/*
* tune hash table size for small files -- ad hoc, but the sizes
* match earlier #defines, which serve as upper bounds on the number
* of output codes.
*/
hsize = HSIZE;
if (fsize < (1 << 12))
hsize = min(5003, HSIZE);
else if (fsize < (1 << 13))
hsize = min(9001, HSIZE);
else if (fsize < (1 << 14))
hsize = min(18013, HSIZE);
else if (fsize < (1 << 15))
hsize = min(35023, HSIZE);
else if (fsize < 47000)
hsize = min(50021, HSIZE);
/*
* we have to ignore SIGINT for a while, otherwise a ^C can nuke an
* existing file with ofname
*/
signal(SIGINT, SIG_IGN);
/* Generate output filename */
strcpy(ofname, tempname);
strcat(ofname, ".Z");
}
/* Check for overwrite of existing file */
if (overwrite == 0 && zcat_flg == 0) {
if (stat(ofname, &statbuf) == 0) {
char response[2];
response[0] = 'n';
fprintf(stderr, "%s already exists;", ofname);
if (foreground()) {
fprintf(stderr, " do you wish to overwrite %s (y or n)? ",
ofname);
fflush(stderr);
read(2, response, 2);
while (response[1] != '\n') {
if (read(2, response + 1, 1) < 0) { /* Ack! */
perror("stderr");
break;
}
}
}
if (response[0] != 'y') {
fprintf(stderr, "\tnot overwritten\n");
signal(SIGINT, onintr);
exit_stat = 9;
return(-1);
}
}
}
signal(SIGINT, onintr);
if (zcat_flg == 0) { /* Open output file */
if (freopen(ofname, "w", stdout) == NULL) {
perror(ofname);
exit_stat = 6;
return(-1);
}
}
/* Actually do the compression/decompression */
if (do_decomp == 0)
compress();
if (exit_stat != 0){ /* if something went wrong delete the outputfile and return */
unlink(ofname);
return(-1);
}
else
decompress();
if (zcat_flg == 0) {
copystat(tempname, ofname); /* Copy stats */
if ((exit_stat == 1) || (!quiet))
putc('\n', stderr);
}
default:
break;
} /* end switch */
return(0);
} /* end comprexx */
static void compdir(char *dir)
{
DIR *dirp;
struct direct *dp;
char nbuf[MAXPATHLEN];
char *nptr = nbuf;
dirp = opendir(dir);
if (dirp == NULL) {
printf("%s unreadable\n", dir); /* not stderr! */
exit_stat = 10;
return;
}
while (dp = readdir(dirp)) {
if (dp->d_ino == 0)
continue;
if (strcmp(dp->d_name, ".") == 0 || strcmp(dp->d_name, "..") == 0)
continue;
if ( (strlen(dir)+strlen(dp->d_name)+1) < (MAXPATHLEN - 1)){
strcpy(nbuf,dir);
strcat(nbuf,"/");
strcat(nbuf,dp->d_name);
comprexx(nptr);
}else{
exit_stat = 8;
}
}
closedir(dirp);
return;
} /* end compdir */
static int offset;
long int in_count = 1; /* length of input */
long int bytes_out; /* length of compressed output */
long int out_count = 0; /* # of codes output (for debugging) */
/*
* compress stdin to stdout
*
* Algorithm: use open addressing double hashing (no chaining) on the
* prefix code / next character combination. We do a variant of Knuth's
* algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime
* secondary probe. Here, the modular division first probe is gives way
* to a faster exclusive-or manipulation. Also do block compression with
* an adaptive reset, whereby the code table is cleared when the compression
* ratio decreases, but after the table fills. The variable-length output
* codes are re-sized at this point, and a special CLEAR code is generated
* for the decompressor. Late addition: construct the table according to
* file size for noticeable speed improvement on small files. Please direct
* questions about this implementation to ames!jaw.
*/
static void compress(void)
{
long fcode;
code_int i = 0;
int c;
code_int ent;
int disp;
code_int hsize_reg;
int hshift;
if (nomagic == 0) {
putchar(magic_header[0]);
putchar(magic_header[1]);
putchar((char)(maxbits | block_compress));
if (ferror(stdout))
writeerr();
}
offset = 0;
bytes_out = 3; /* includes 3-byte header mojo */
out_count = 0;
clear_flg = 0;
ratio = 0;
in_count = 1;
checkpoint = CHECK_GAP;
maxcode = MAXCODE(n_bits = INIT_BITS);
free_ent = ((block_compress) ? FIRST : 256);
ent = getchar();
hshift = 0;
for (fcode = (long)hsize; fcode < 65536L; fcode *= 2L)
hshift++;
hshift = 8 - hshift; /* set hash code range bound */
hsize_reg = hsize;
cl_hash((count_int) hsize_reg); /* clear hash table */
while ((c = getchar()) != EOF) {
in_count++;
fcode = (long)(((long)c << maxbits) + ent);
i = ((c << hshift) ^ ent); /* xor hashing */
if (htabof(i) == fcode) {
ent = codetabof(i);
continue;
} else if ((long)htabof(i) < 0) /* empty slot */
goto nomatch;
disp = hsize_reg - i; /* secondary hash (after G. Knott) */
if (i == 0)
disp = 1;
probe:
if ((i -= disp) < 0)
i += hsize_reg;
if (htabof(i) == fcode) {
ent = codetabof(i);
continue;
}
if ((long)htabof(i) > 0)
goto probe;
nomatch:
output((code_int) ent);
out_count++;
ent = c;
if (free_ent < maxmaxcode) {
codetabof(i) = free_ent++; /* code -> hashtable */
htabof(i) = fcode;
} else if ((count_int) in_count >= checkpoint && block_compress)
cl_block();
}
/*
* Put out the final code.
*/
output((code_int) ent);
out_count++;
output((code_int) - 1);
/*
* Print out stats on stderr
*/
if (bytes_out > in_count) /* exit(2) if no savings */
exit_stat = 2;
return;
}
/*****************************************************************
* TAG( output )
*
* Output the given code.
* Inputs:
* code: A n_bits-bit integer. If == -1, then EOF. This assumes
* that n_bits =< (long)wordsize - 1.
* Outputs:
* Outputs code to the file.
* Assumptions:
* Chars are 8 bits long.
* Algorithm:
* Maintain a BITS character long buffer (so that 8 codes will
* fit in it exactly). Use the VAX insv instruction to insert each
* code in turn. When the buffer fills up empty it and start over.
*/
static char buf[BITS];
char_type lmask[9] = {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00};
char_type rmask[9] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
static void output(code_int code)
{
/*
* On the VAX, it is important to have the register declarations in exactly
* the order given, or the asm will break.
*/
int r_off = offset, bits = n_bits;
char *bp = buf;
if (code >= 0) {
/*
* byte/bit numbering on the VAX is simulated by the following code
*/
/*
* Get to the first byte.
*/
bp += (r_off >> 3);
r_off &= 7;
/*
* Since code is always >= 8 bits, only need to mask the first hunk on
* the left.
*/
*bp = (*bp & rmask[r_off]) | (code << r_off) & lmask[r_off];
bp++;
bits -= (8 - r_off);
code >>= 8 - r_off;
/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
if (bits >= 8) {
*bp++ = code;
code >>= 8;
bits -= 8;
}
/* Last bits. */
if (bits)
*bp = code;
offset += n_bits;
if (offset == (n_bits << 3)) {
bp = buf;
bits = n_bits;
bytes_out += bits;
do
putchar(*bp++);
while (--bits);
offset = 0;
}
/*
* If the next entry is going to be too big for the code size, then
* increase it, if possible.
*/
if (free_ent > maxcode || (clear_flg > 0)) {
/*
* Write the whole buffer, because the input side won't discover the
* size increase until after it has read it.
*/
if (offset > 0) {
if (fwrite(buf, 1, n_bits, stdout) != n_bits)
writeerr();
bytes_out += n_bits;
}
offset = 0;
if (clear_flg) {
maxcode = MAXCODE(n_bits = INIT_BITS);
clear_flg = 0;
} else {
n_bits++;
if (n_bits == maxbits)
maxcode = maxmaxcode;
else
maxcode = MAXCODE(n_bits);
}
}
} else {
/*
* At EOF, write the rest of the buffer.
*/
if (offset > 0)
fwrite(buf, 1, (offset + 7) / 8, stdout);
bytes_out += (offset + 7) / 8;
offset = 0;
fflush(stdout);
if (ferror(stdout))
writeerr();
}
}
/*
* Decompress stdin to stdout. This routine adapts to the codes in the
* file building the "string" table on-the-fly; requiring no table to
* be stored in the compressed file. The tables used herein are shared
* with those of the compress() routine. See the definitions above.
*/
static void decompress(void)
{
char_type *stackp;
int finchar;
code_int code, oldcode, incode;
/*
* As above, initialize the first 256 entries in the table.
*/
maxcode = MAXCODE(n_bits = INIT_BITS);
for (code = 255; code >= 0; code--) {
tab_prefixof(code) = 0;
tab_suffixof(code) = (char_type) code;
}
free_ent = ((block_compress) ? FIRST : 256);
finchar = oldcode = getcode();
if (oldcode == -1) /* EOF already? */
return; /* Get out of here */
putchar((char)finchar); /* first code must be 8 bits = char */
if (ferror(stdout)) /* Crash if can't write */
writeerr();
stackp = de_stack;
while ((code = getcode()) > -1) {
if ((code == CLEAR) && block_compress) {
for (code = 255; code >= 0; code--)
tab_prefixof(code) = 0;
clear_flg = 1;
free_ent = FIRST - 1;
if ((code = getcode()) == -1) /* O, untimely death! */
break;
}
incode = code;
/*
* Special case for KwKwK string.
*/
if (code >= free_ent) {
*stackp++ = finchar;
code = oldcode;
}
/*
* Generate output characters in reverse order
*/
while (code >= 256) {
*stackp++ = tab_suffixof(code);
code = tab_prefixof(code);
}
*stackp++ = finchar = tab_suffixof(code);
/*
* And put them out in forward order
*/
do
putchar(*--stackp);
while (stackp > de_stack);
/*
* Generate the new entry.
*/
if ((code = free_ent) < maxmaxcode) {
tab_prefixof(code) = (unsigned short)oldcode;
tab_suffixof(code) = finchar;
free_ent = code + 1;
}
/*
* Remember previous code.
*/
oldcode = incode;
}
fflush(stdout);
if (ferror(stdout))
writeerr();
}
/*****************************************************************
* TAG( getcode )
*
* Read one code from the standard input. If EOF, return -1.
* Inputs:
* stdin
* Outputs:
* code or -1 is returned.
*/
static code_int getcode(void)
{
/*
* On the VAX, it is important to have the register declarations in exactly
* the order given, or the asm will break.
*/
code_int code;
static int offset = 0, size = 0;
static char_type buf[BITS];
int r_off, bits;
char_type *bp = buf;
if (clear_flg > 0 || offset >= size || free_ent > maxcode) {
/*
* If the next entry will be too big for the current code size, then we
* must increase the size. This implies reading a new buffer full, too.
*/
if (free_ent > maxcode) {
n_bits++;
if (n_bits == maxbits)
maxcode = maxmaxcode; /* won't get any bigger now */
else
maxcode = MAXCODE(n_bits);
}
if (clear_flg > 0) {
maxcode = MAXCODE(n_bits = INIT_BITS);
clear_flg = 0;
}
size = fread(buf, 1, n_bits, stdin);
if (size <= 0)
return (-1); /* end of file */
offset = 0;
/* Round size down to integral number of codes */
size = (size << 3) - (n_bits - 1);
}
r_off = offset;
bits = n_bits;
/*
* Get to the first byte.
*/
bp += (r_off >> 3);
r_off &= 7;
/* Get first part (low order bits) */
code = (*bp++ >> r_off);
bits -= (8 - r_off);
r_off = 8 - r_off; /* now, offset into code word */
/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
if (bits >= 8) {
code |= *bp++ << r_off;
r_off += 8;
bits -= 8;
}
/* high order bits. */
code |= (*bp & rmask[bits]) << r_off;
offset += n_bits;
return code;
}
static void writeerr(void)
{
perror(ofname);
unlink(ofname);
exit(1);
}
static void copystat(char *ifname, char *ofname)
{
struct stat statbuf;
int mode;
time_t timep[2];
fclose(stdout);
if (stat(ifname, &statbuf)) { /* Get stat on input file */
perror(ifname);
return;
}
if ((statbuf.st_mode & S_IFMT /* 0170000 */ ) != S_IFREG /* 0100000 */ ) {
if (quiet)
fprintf(stderr, "%s: ", ifname);
fprintf(stderr, " -- not a regular file: unchanged");
exit_stat = 1;
} else if (statbuf.st_nlink > 1 && (!force)) {
if (quiet)
fprintf(stderr, "%s: ", ifname);
fprintf(stderr, " -- has %d other links: unchanged",
statbuf.st_nlink - 1);
exit_stat = 1;
} else if (exit_stat == 2 && (!force)) { /* No compression: remove
* file.Z */
if (!quiet)
fprintf(stderr, " -- file unchanged");
} else { /* ***** Successful Compression ***** */
exit_stat = 0;
mode = statbuf.st_mode & 07777;
if (chmod(ofname, mode))/* Copy modes */
perror(ofname);
chown(ofname, statbuf.st_uid, statbuf.st_gid); /* Copy ownership */
timep[0] = statbuf.st_atime;
timep[1] = statbuf.st_mtime;
utime(ofname, timep); /* Update last accessed and modified times */
if (unlink(ifname)) /* Remove input file */
perror(ifname);
if (!quiet)
fprintf(stderr, " -- replaced with %s", ofname);
return; /* Successful return */
}
/* Unsuccessful return -- one of the tests failed */
if (unlink(ofname))
perror(ofname);
}
/*
* This routine returns 1 if we are running in the foreground and stderr
* is a tty.
*/
static int foreground(void)
{
if (bgnd_flag) { /* background? */
return (0);
} else { /* foreground */
if (isatty(2)) { /* and stderr is a tty */
return (1);
} else {
return (0);
}
}
}
static int onintr(void)
{
unlink(ofname);
exit(1);
}
static int oops(void)
{ /* wild pointer -- assume bad input */
if (do_decomp == 1)
fprintf(stderr, "uncompress: corrupt input\n");
unlink(ofname);
exit(1);
}
static void cl_block(void)
{ /* table clear for block compress */
long int rat;
checkpoint = in_count + CHECK_GAP;
if (in_count > 0x007fffff) {/* shift will overflow */
rat = bytes_out >> 8;
if (rat == 0) { /* Don't divide by zero */
rat = 0x7fffffff;
} else {
rat = in_count / rat;
}
} else {
rat = (in_count << 8) / bytes_out; /* 8 fractional bits */
}
if (rat > ratio) {
ratio = rat;
} else {
ratio = 0;
cl_hash((count_int) hsize);
free_ent = FIRST;
clear_flg = 1;
output((code_int) CLEAR);
}
}
static void cl_hash(hsize) /* reset code table */
count_int hsize;
{
count_int *htab_p = htab + hsize;
long i;
long m1 = -1;
i = hsize - 16;
do { /* might use Sys V memset(3) here */
*(htab_p - 16) = m1;
*(htab_p - 15) = m1;
*(htab_p - 14) = m1;
*(htab_p - 13) = m1;
*(htab_p - 12) = m1;
*(htab_p - 11) = m1;
*(htab_p - 10) = m1;
*(htab_p - 9) = m1;
*(htab_p - 8) = m1;
*(htab_p - 7) = m1;
*(htab_p - 6) = m1;
*(htab_p - 5) = m1;
*(htab_p - 4) = m1;
*(htab_p - 3) = m1;
*(htab_p - 2) = m1;
*(htab_p - 1) = m1;
htab_p -= 16;
} while ((i -= 16) >= 0);
for (i += 16; i > 0; i--)
*--htab_p = m1;
}
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.