This is compress.c in view mode; [Download] [Up]
/* * Filename: compress.c * Created : Mon Jul 1 16:15:39 1991 * Author : Vince DeMarco * <demarco@cpsc.ucalgary.ca> */ /* * compress.c - File compression ala IEEE Computer, June 1984. * * Authors: Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas) * Jim McKie (decvax!mcvax!jim) * Steve Davies (decvax!vax135!petsd!peora!srd) * Ken Turkowski (decvax!decwrl!turtlevax!ken) * James A. Woods (decvax!ihnp4!ames!jaw) * Joe Orost (decvax!vax135!petsd!joe) * Dave Mack (csu@alembic.acs.com) * * Revision 4.1 91/05/26 csu@alembic.acs.com * * Hacked up and stuck into a nice NeXTStep interface by Vince DeMarco */ /* Defines *********************/ #define min(a,b) ((a>b) ? b : a) #define USERMEM 450000 /* Max amount of physical user mem available in bytes */ #define SACREDMEM 0 /* amount of phys mem saved for others comp hogs rest */ #define PBITS 16 #define BITS PBITS #define HSIZE 69001 /* 95% occupancy */ #define BIT_MASK 0x1f /* Defines for third byte of header */ #define BLOCK_MASK 0x80 #define INIT_BITS 9 /* initial number of bits/code */ #define MAXCODE(n_bits) ((1 << (n_bits)) - 1) #define CHECK_GAP 50000 /* ratio check interval recommended by jaw */ #define MAXPATHLEN 1024 #define htabof(i) htab[i] #define codetabof(i) codetab[i] #define tab_prefixof(i) codetabof(i) #define tab_suffixof(i) ((char_type *)(htab))[i] #define de_stack ((char_type *)&tab_suffixof(1<<BITS)) /* * the next two codes should not be changed lightly, as they must not * lie within the contiguous general code space. */ #define FIRST 257 /* first free entry */ #define CLEAR 256 /* table clear output code */ /* typedefs *********************/ typedef long int code_int; /* a code_int must be able to hold 2**BITS values of */ typedef long int count_int; /* type int, and also -1 */ typedef unsigned char char_type; #include <stdio.h> #include <stdlib.h> #ifdef NeXT #include <libc.h> #endif #include <strings.h> #include <ctype.h> #include <signal.h> #include <sys/types.h> #include <sys/stat.h> #include <sys/dir.h> #include <errno.h> #include "local_compress_defs.h" /* global variables *********************/ static char_type magic_header[] = {"\037\235"}; /* 1F 9D */ static int n_bits; /* number of bits/code */ static int maxbits = BITS; /* user settable max # bits/code */ static code_int maxcode; /* maximum code, given n_bits */ static code_int maxmaxcode = 1 << BITS; /* should NEVER generate this * code */ static count_int htab[HSIZE]; static unsigned short codetab[HSIZE]; static code_int hsize = HSIZE; /* for dynamic table sizing */ static count_int fsize; /* * To save much memory, we overlay the table used by compress() with those * used by decompress(). The tab_prefix table is the same size and type * as the codetab. The tab_suffix table needs 2**BITS characters. We * get this from the beginning of htab. The output stack uses the rest * of htab, and contains characters. There is plenty of room for any * possible stack (stack used to be 8000 characters). */ static code_int free_ent = 0; /* first unused entry */ int exit_stat = 0; static int nomagic = 0; /* Use a 3-byte magic number header, unless * old file */ static int zcat_flg = 0; /* Write output on stdout, suppress * messages */ static int quiet = 1; /* don't tell me about compression */ /* * block compression parameters -- after all codes are used up, * and compression rate changes, start over. */ static int block_compress = BLOCK_MASK; static int clear_flg = 0; static long int ratio = 0; static count_int checkpoint = CHECK_GAP; static int force = 0; static char ofname[MAXPATHLEN]; void (*bgnd_flag) (); static int do_decomp = 0; /***************************************************************** * TAG( main ) * * Algorithm from "A Technique for High Performance Data Compression", * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19. * */ int overwrite = 0; /* Do not overwrite unless given -f * flag */ int recursive = 0; /* compress directories */ #ifdef TEST void main(int argc, char *argv[]) { fprintf(stderr,"Error code %d\n",compress_decompress(0, 0, 1, argv[1])); } #endif /***************************************************************************************** * Error returns: * * 0 everything is okay * 1 file doesn't exist * 2 if file size would expand if compressed * 3 file doesn't end with a .Z * 4 compressed with different number of bits that i can handle * 5 file already compressed * 6 can't open output file * 7 not in compressed format * 8 pathname too long * 9 outputfile already exists * 10 directory unreadable * ******************************************************************************************/ int compress_decompress( int do_decompression, int force_overwrite, int recursive_comp_decomp, char *file_name) { if ((bgnd_flag = signal(SIGINT, SIG_IGN)) != SIG_IGN) { signal(SIGINT, onintr); } signal(SIGSEGV, oops); do_decomp = do_decompression; overwrite = force = force_overwrite; recursive = recursive_comp_decomp; maxmaxcode = 1 << maxbits; if (comprexx(file_name) < 0){ return(exit_stat); } return(0); } static int comprexx(char *tempname) { struct stat statbuf, insbuf; errno = 0; if (lstat(tempname, &insbuf) == -1) { if (do_decomp) { switch (errno) { case ENOENT: /* file doesn't exist */ /* * if the given name doesn't end with .Z, try appending one This * is obviously the wrong thing to do if it's a directory, but it * shouldn't do any harm. */ if (strcmp(tempname + strlen(tempname) - 2, ".Z") != 0) { strcat(tempname, ".Z"); errno = 0; if (lstat(tempname, &insbuf) == -1) { perror(tempname); exit_stat = 1; return (-1); } } else { perror(tempname); exit_stat = 3; return (-1); } break; default: perror(tempname); return (-1); } /* end switch */ } /* endif */ else { /* we can't stat the file, ignore it */ perror(tempname); exit_stat = 1; return (-1); } } /* endif */ switch (insbuf.st_mode & S_IFMT) { case S_IFDIR: /* directory */ if (recursive){ compdir(tempname); if (exit_stat != 0){ return(-1); } } break; case S_IFREG: /* regular file */ exit_stat = 0; if (do_decomp != 0) { /* DECOMPRESSION */ if (strcmp(tempname + strlen(tempname) - 2, ".Z") != 0) { exit_stat = 3; return (-1); } /* Open input file */ if ((freopen(tempname, "r", stdin)) == NULL) { perror(tempname); exit_stat = 1; return(-1); } /* Check the magic number */ if (nomagic == 0) { if ((getchar() != (magic_header[0] & 0xFF)) || (getchar() != (magic_header[1] & 0xFF))) { exit_stat = 7; return(-1); } maxbits = getchar(); /* set -b from file */ block_compress = maxbits & BLOCK_MASK; maxbits &= BIT_MASK; maxmaxcode = 1 << maxbits; if (maxbits > BITS) { exit_stat = 4; return (-1); } } /* * we have to ignore SIGINT for a while, otherwise a ^C can nuke an * existing file with ofname */ signal(SIGINT, SIG_IGN); /* Generate output filename */ strcpy(ofname, tempname); /* Check for .Z suffix */ if (strcmp(tempname + strlen(tempname) - 2, ".Z") == 0) { ofname[strlen(tempname) - 2] = '\0'; /* Strip off .Z */ } } else { /* COMPRESSION */ if (strcmp(tempname + strlen(tempname) - 2, ".Z") == 0) { exit_stat = 5; return(-1); } /* Open input file */ if ((freopen(tempname, "r", stdin)) == NULL) { perror(tempname); exit_stat = 1; return(-1); } fsize = (long)insbuf.st_size; /* * tune hash table size for small files -- ad hoc, but the sizes * match earlier #defines, which serve as upper bounds on the number * of output codes. */ hsize = HSIZE; if (fsize < (1 << 12)) hsize = min(5003, HSIZE); else if (fsize < (1 << 13)) hsize = min(9001, HSIZE); else if (fsize < (1 << 14)) hsize = min(18013, HSIZE); else if (fsize < (1 << 15)) hsize = min(35023, HSIZE); else if (fsize < 47000) hsize = min(50021, HSIZE); /* * we have to ignore SIGINT for a while, otherwise a ^C can nuke an * existing file with ofname */ signal(SIGINT, SIG_IGN); /* Generate output filename */ strcpy(ofname, tempname); strcat(ofname, ".Z"); } /* Check for overwrite of existing file */ if (overwrite == 0 && zcat_flg == 0) { if (stat(ofname, &statbuf) == 0) { char response[2]; response[0] = 'n'; fprintf(stderr, "%s already exists;", ofname); if (foreground()) { fprintf(stderr, " do you wish to overwrite %s (y or n)? ", ofname); fflush(stderr); read(2, response, 2); while (response[1] != '\n') { if (read(2, response + 1, 1) < 0) { /* Ack! */ perror("stderr"); break; } } } if (response[0] != 'y') { fprintf(stderr, "\tnot overwritten\n"); signal(SIGINT, onintr); exit_stat = 9; return(-1); } } } signal(SIGINT, onintr); if (zcat_flg == 0) { /* Open output file */ if (freopen(ofname, "w", stdout) == NULL) { perror(ofname); exit_stat = 6; return(-1); } } /* Actually do the compression/decompression */ if (do_decomp == 0) compress(); if (exit_stat != 0){ /* if something went wrong delete the outputfile and return */ unlink(ofname); return(-1); } else decompress(); if (zcat_flg == 0) { copystat(tempname, ofname); /* Copy stats */ if ((exit_stat == 1) || (!quiet)) putc('\n', stderr); } default: break; } /* end switch */ return(0); } /* end comprexx */ static void compdir(char *dir) { DIR *dirp; struct direct *dp; char nbuf[MAXPATHLEN]; char *nptr = nbuf; dirp = opendir(dir); if (dirp == NULL) { printf("%s unreadable\n", dir); /* not stderr! */ exit_stat = 10; return; } while (dp = readdir(dirp)) { if (dp->d_ino == 0) continue; if (strcmp(dp->d_name, ".") == 0 || strcmp(dp->d_name, "..") == 0) continue; if ( (strlen(dir)+strlen(dp->d_name)+1) < (MAXPATHLEN - 1)){ strcpy(nbuf,dir); strcat(nbuf,"/"); strcat(nbuf,dp->d_name); comprexx(nptr); }else{ exit_stat = 8; } } closedir(dirp); return; } /* end compdir */ static int offset; long int in_count = 1; /* length of input */ long int bytes_out; /* length of compressed output */ long int out_count = 0; /* # of codes output (for debugging) */ /* * compress stdin to stdout * * Algorithm: use open addressing double hashing (no chaining) on the * prefix code / next character combination. We do a variant of Knuth's * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime * secondary probe. Here, the modular division first probe is gives way * to a faster exclusive-or manipulation. Also do block compression with * an adaptive reset, whereby the code table is cleared when the compression * ratio decreases, but after the table fills. The variable-length output * codes are re-sized at this point, and a special CLEAR code is generated * for the decompressor. Late addition: construct the table according to * file size for noticeable speed improvement on small files. Please direct * questions about this implementation to ames!jaw. */ static void compress(void) { long fcode; code_int i = 0; int c; code_int ent; int disp; code_int hsize_reg; int hshift; if (nomagic == 0) { putchar(magic_header[0]); putchar(magic_header[1]); putchar((char)(maxbits | block_compress)); if (ferror(stdout)) writeerr(); } offset = 0; bytes_out = 3; /* includes 3-byte header mojo */ out_count = 0; clear_flg = 0; ratio = 0; in_count = 1; checkpoint = CHECK_GAP; maxcode = MAXCODE(n_bits = INIT_BITS); free_ent = ((block_compress) ? FIRST : 256); ent = getchar(); hshift = 0; for (fcode = (long)hsize; fcode < 65536L; fcode *= 2L) hshift++; hshift = 8 - hshift; /* set hash code range bound */ hsize_reg = hsize; cl_hash((count_int) hsize_reg); /* clear hash table */ while ((c = getchar()) != EOF) { in_count++; fcode = (long)(((long)c << maxbits) + ent); i = ((c << hshift) ^ ent); /* xor hashing */ if (htabof(i) == fcode) { ent = codetabof(i); continue; } else if ((long)htabof(i) < 0) /* empty slot */ goto nomatch; disp = hsize_reg - i; /* secondary hash (after G. Knott) */ if (i == 0) disp = 1; probe: if ((i -= disp) < 0) i += hsize_reg; if (htabof(i) == fcode) { ent = codetabof(i); continue; } if ((long)htabof(i) > 0) goto probe; nomatch: output((code_int) ent); out_count++; ent = c; if (free_ent < maxmaxcode) { codetabof(i) = free_ent++; /* code -> hashtable */ htabof(i) = fcode; } else if ((count_int) in_count >= checkpoint && block_compress) cl_block(); } /* * Put out the final code. */ output((code_int) ent); out_count++; output((code_int) - 1); /* * Print out stats on stderr */ if (bytes_out > in_count) /* exit(2) if no savings */ exit_stat = 2; return; } /***************************************************************** * TAG( output ) * * Output the given code. * Inputs: * code: A n_bits-bit integer. If == -1, then EOF. This assumes * that n_bits =< (long)wordsize - 1. * Outputs: * Outputs code to the file. * Assumptions: * Chars are 8 bits long. * Algorithm: * Maintain a BITS character long buffer (so that 8 codes will * fit in it exactly). Use the VAX insv instruction to insert each * code in turn. When the buffer fills up empty it and start over. */ static char buf[BITS]; char_type lmask[9] = {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00}; char_type rmask[9] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff}; static void output(code_int code) { /* * On the VAX, it is important to have the register declarations in exactly * the order given, or the asm will break. */ int r_off = offset, bits = n_bits; char *bp = buf; if (code >= 0) { /* * byte/bit numbering on the VAX is simulated by the following code */ /* * Get to the first byte. */ bp += (r_off >> 3); r_off &= 7; /* * Since code is always >= 8 bits, only need to mask the first hunk on * the left. */ *bp = (*bp & rmask[r_off]) | (code << r_off) & lmask[r_off]; bp++; bits -= (8 - r_off); code >>= 8 - r_off; /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ if (bits >= 8) { *bp++ = code; code >>= 8; bits -= 8; } /* Last bits. */ if (bits) *bp = code; offset += n_bits; if (offset == (n_bits << 3)) { bp = buf; bits = n_bits; bytes_out += bits; do putchar(*bp++); while (--bits); offset = 0; } /* * If the next entry is going to be too big for the code size, then * increase it, if possible. */ if (free_ent > maxcode || (clear_flg > 0)) { /* * Write the whole buffer, because the input side won't discover the * size increase until after it has read it. */ if (offset > 0) { if (fwrite(buf, 1, n_bits, stdout) != n_bits) writeerr(); bytes_out += n_bits; } offset = 0; if (clear_flg) { maxcode = MAXCODE(n_bits = INIT_BITS); clear_flg = 0; } else { n_bits++; if (n_bits == maxbits) maxcode = maxmaxcode; else maxcode = MAXCODE(n_bits); } } } else { /* * At EOF, write the rest of the buffer. */ if (offset > 0) fwrite(buf, 1, (offset + 7) / 8, stdout); bytes_out += (offset + 7) / 8; offset = 0; fflush(stdout); if (ferror(stdout)) writeerr(); } } /* * Decompress stdin to stdout. This routine adapts to the codes in the * file building the "string" table on-the-fly; requiring no table to * be stored in the compressed file. The tables used herein are shared * with those of the compress() routine. See the definitions above. */ static void decompress(void) { char_type *stackp; int finchar; code_int code, oldcode, incode; /* * As above, initialize the first 256 entries in the table. */ maxcode = MAXCODE(n_bits = INIT_BITS); for (code = 255; code >= 0; code--) { tab_prefixof(code) = 0; tab_suffixof(code) = (char_type) code; } free_ent = ((block_compress) ? FIRST : 256); finchar = oldcode = getcode(); if (oldcode == -1) /* EOF already? */ return; /* Get out of here */ putchar((char)finchar); /* first code must be 8 bits = char */ if (ferror(stdout)) /* Crash if can't write */ writeerr(); stackp = de_stack; while ((code = getcode()) > -1) { if ((code == CLEAR) && block_compress) { for (code = 255; code >= 0; code--) tab_prefixof(code) = 0; clear_flg = 1; free_ent = FIRST - 1; if ((code = getcode()) == -1) /* O, untimely death! */ break; } incode = code; /* * Special case for KwKwK string. */ if (code >= free_ent) { *stackp++ = finchar; code = oldcode; } /* * Generate output characters in reverse order */ while (code >= 256) { *stackp++ = tab_suffixof(code); code = tab_prefixof(code); } *stackp++ = finchar = tab_suffixof(code); /* * And put them out in forward order */ do putchar(*--stackp); while (stackp > de_stack); /* * Generate the new entry. */ if ((code = free_ent) < maxmaxcode) { tab_prefixof(code) = (unsigned short)oldcode; tab_suffixof(code) = finchar; free_ent = code + 1; } /* * Remember previous code. */ oldcode = incode; } fflush(stdout); if (ferror(stdout)) writeerr(); } /***************************************************************** * TAG( getcode ) * * Read one code from the standard input. If EOF, return -1. * Inputs: * stdin * Outputs: * code or -1 is returned. */ static code_int getcode(void) { /* * On the VAX, it is important to have the register declarations in exactly * the order given, or the asm will break. */ code_int code; static int offset = 0, size = 0; static char_type buf[BITS]; int r_off, bits; char_type *bp = buf; if (clear_flg > 0 || offset >= size || free_ent > maxcode) { /* * If the next entry will be too big for the current code size, then we * must increase the size. This implies reading a new buffer full, too. */ if (free_ent > maxcode) { n_bits++; if (n_bits == maxbits) maxcode = maxmaxcode; /* won't get any bigger now */ else maxcode = MAXCODE(n_bits); } if (clear_flg > 0) { maxcode = MAXCODE(n_bits = INIT_BITS); clear_flg = 0; } size = fread(buf, 1, n_bits, stdin); if (size <= 0) return (-1); /* end of file */ offset = 0; /* Round size down to integral number of codes */ size = (size << 3) - (n_bits - 1); } r_off = offset; bits = n_bits; /* * Get to the first byte. */ bp += (r_off >> 3); r_off &= 7; /* Get first part (low order bits) */ code = (*bp++ >> r_off); bits -= (8 - r_off); r_off = 8 - r_off; /* now, offset into code word */ /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ if (bits >= 8) { code |= *bp++ << r_off; r_off += 8; bits -= 8; } /* high order bits. */ code |= (*bp & rmask[bits]) << r_off; offset += n_bits; return code; } static void writeerr(void) { perror(ofname); unlink(ofname); exit(1); } static void copystat(char *ifname, char *ofname) { struct stat statbuf; int mode; time_t timep[2]; fclose(stdout); if (stat(ifname, &statbuf)) { /* Get stat on input file */ perror(ifname); return; } if ((statbuf.st_mode & S_IFMT /* 0170000 */ ) != S_IFREG /* 0100000 */ ) { if (quiet) fprintf(stderr, "%s: ", ifname); fprintf(stderr, " -- not a regular file: unchanged"); exit_stat = 1; } else if (statbuf.st_nlink > 1 && (!force)) { if (quiet) fprintf(stderr, "%s: ", ifname); fprintf(stderr, " -- has %d other links: unchanged", statbuf.st_nlink - 1); exit_stat = 1; } else if (exit_stat == 2 && (!force)) { /* No compression: remove * file.Z */ if (!quiet) fprintf(stderr, " -- file unchanged"); } else { /* ***** Successful Compression ***** */ exit_stat = 0; mode = statbuf.st_mode & 07777; if (chmod(ofname, mode))/* Copy modes */ perror(ofname); chown(ofname, statbuf.st_uid, statbuf.st_gid); /* Copy ownership */ timep[0] = statbuf.st_atime; timep[1] = statbuf.st_mtime; utime(ofname, timep); /* Update last accessed and modified times */ if (unlink(ifname)) /* Remove input file */ perror(ifname); if (!quiet) fprintf(stderr, " -- replaced with %s", ofname); return; /* Successful return */ } /* Unsuccessful return -- one of the tests failed */ if (unlink(ofname)) perror(ofname); } /* * This routine returns 1 if we are running in the foreground and stderr * is a tty. */ static int foreground(void) { if (bgnd_flag) { /* background? */ return (0); } else { /* foreground */ if (isatty(2)) { /* and stderr is a tty */ return (1); } else { return (0); } } } static int onintr(void) { unlink(ofname); exit(1); } static int oops(void) { /* wild pointer -- assume bad input */ if (do_decomp == 1) fprintf(stderr, "uncompress: corrupt input\n"); unlink(ofname); exit(1); } static void cl_block(void) { /* table clear for block compress */ long int rat; checkpoint = in_count + CHECK_GAP; if (in_count > 0x007fffff) {/* shift will overflow */ rat = bytes_out >> 8; if (rat == 0) { /* Don't divide by zero */ rat = 0x7fffffff; } else { rat = in_count / rat; } } else { rat = (in_count << 8) / bytes_out; /* 8 fractional bits */ } if (rat > ratio) { ratio = rat; } else { ratio = 0; cl_hash((count_int) hsize); free_ent = FIRST; clear_flg = 1; output((code_int) CLEAR); } } static void cl_hash(hsize) /* reset code table */ count_int hsize; { count_int *htab_p = htab + hsize; long i; long m1 = -1; i = hsize - 16; do { /* might use Sys V memset(3) here */ *(htab_p - 16) = m1; *(htab_p - 15) = m1; *(htab_p - 14) = m1; *(htab_p - 13) = m1; *(htab_p - 12) = m1; *(htab_p - 11) = m1; *(htab_p - 10) = m1; *(htab_p - 9) = m1; *(htab_p - 8) = m1; *(htab_p - 7) = m1; *(htab_p - 6) = m1; *(htab_p - 5) = m1; *(htab_p - 4) = m1; *(htab_p - 3) = m1; *(htab_p - 2) = m1; *(htab_p - 1) = m1; htab_p -= 16; } while ((i -= 16) >= 0); for (i += 16; i > 0; i--) *--htab_p = m1; }
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.