This is compress.c in view mode; [Download] [Up]
/*
** Copyright Michael O'Reilly. All rights reserved.
*/
#include "includes.h"
#include "debug.h"
/* Following used for debugging. */
#if 0
#define ASSERT(a) \
((a) ? 0: abort() )
#else
#define ASSERT(a)
#endif
#define BYTE_WIDTH_OUT tok_byte_width_out
#define BYTE_MASK_OUT tok_byte_mask_out
#define BYTE_MASK_IN tok_byte_mask_in
#define BYTE_WIDTH_IN tok_byte_width_in
int tok_byte_mask_out = 255,
tok_byte_width_out = 8;
int tok_byte_mask_in = 255,
tok_byte_width_in = 8;
int stat_comp_in = 0,
stat_comp_out = 0,
stat_uncomp_in = 0,
stat_uncomp_out = 0;
/*
** token 256 is reserved to signal that dict clearing should take place.
** token 257 is reserved for future use.
**
** the maz token size is 16 bits.
*/
#define DICT_SIZE 8192
#define DICT_CLEAR_2 4096
#define DICT_CLEAR_2_TSIZE 13
/* #define DEBUG **/
typedef struct node {
short ch; /* character for this node */
struct node *next_node; /* Pointer to the next link on this node. */
struct node *child; /* Pointer to a child of this node. */
int len; /* You tell me. */
int num; /* token number for this node */
} NODE;
#ifdef DEBUG
FILE * stdprn = 0;
#endif
#if 0
extern void * malloc (int);
extern void * realloc (char *, int);
#endif
void clear_table (NODE *, int);
int uncomp_dict[DICT_SIZE][2];
NODE comp_dict = {
0, 0, 0, 0, 0
};
static int c_token_mask,
c_token_bits,
unc_token_bits,
unc_token_mask,
comp_table_size,
uncomp_table_size;
static int stack[1024];
static int stk_ptr = 0;
int compress_init (void) {
int i;
if (comp_dict.child) {
for (i = 0; i < 256; ++i)
clear_table (&(comp_dict.child[i]), 255);
}
else
comp_dict.child = (NODE *) malloc (256 * sizeof (NODE));
comp_dict.len = 256;
comp_dict.ch = 0;
comp_dict.num = 0;
comp_dict.next_node = 0; /* Only one NODE in this node. */
for (i = 0; i < 256; ++i) {
uncomp_dict[i][0] = comp_dict.child[i].ch = i;
uncomp_dict[i][1] = -1;
comp_dict.child[i].num = i;
comp_dict.child[i].child = NULL;
comp_dict.child[i].len = 0;
comp_dict.child[i].next_node = &comp_dict.child[i+1];
}
comp_dict.child[255].next_node = (NODE *) 0;
comp_table_size = 258;
uncomp_table_size = 258;
c_token_bits = 9;
c_token_mask = 511;
unc_token_bits = 9;
unc_token_mask = 511;
#ifdef DEBUG
if (!stdprn) stdprn = fopen ("compress.debug", "w");
setbuf(stdprn , 0);
fprintf(stdprn,"init\n");
#endif
return 1;
}
void compress_shut (void) {
#ifdef DEBUG
fclose (stdprn);
#endif
}
/* gets size bits from starting from bit 'curr' in the bit stream data */
unsigned int get_token (un_char *data, int *curr, int max_size) {
unsigned long ret;
int byte,
bit,
read = 0,
tmp;
byte = *curr / BYTE_WIDTH_IN;
bit = *curr % BYTE_WIDTH_IN;
ret = 0;
while (read < unc_token_bits && byte < max_size) {
ret += ((int)(data[byte++] & BYTE_MASK_IN) >> bit) << read;
tmp = BYTE_WIDTH_IN - bit;
read += tmp;
bit = 0;
}
ret = ret & unc_token_mask;
*curr += unc_token_bits;
DEBUG_C(stderr, "c_got: %ld\n", ret);
return (unsigned int) ret;
}
void put_token (un_char *data, int *curr, unsigned int ch) {
int byte,
bit;
DEBUG_C(stderr, "c_put: %d\n", ch);
byte = *curr / BYTE_WIDTH_OUT;
bit = *curr % BYTE_WIDTH_OUT;
ch = ch & c_token_mask;
while (ch) {
data[byte] |= (ch&BYTE_MASK_OUT) << bit;
ch = ch >> (BYTE_WIDTH_OUT - bit);
bit = 0;
++byte;
}
*curr += c_token_bits;
}
/* Add a node to the list of free node's */
NODE * node_list = NULL;
void free_node (n)
NODE * n;
{
n -> next_node = node_list;
node_list = n;
}
/* Take a node of the list of free nodes, creating more nodes if there */
/* aren't enough to spare.. */
NODE * get_node (void) {
NODE * n;
if (!node_list) {
n = (NODE *) malloc (sizeof (*n));
if (!n) abort(); /* fatal */
return n;
}
n = node_list;
node_list = node_list -> next_node;
return n;
}
/* Free this node, and any nodes pointed to by this node. */
void free_nodes(NODE *t) {
if (!t) return;
if (t->child) free_nodes(t->child);
if (t->next_node) free_nodes(t->next_node);
free_node(t);
}
/* Clear all nodes larger than 'limit' */
/* The compression dictionary is a tree , built from a list of lists. */
/* The tree is a 1-256 tree. (each node can have up to 256 children). */
/* This is implemented as each node being a list of child pointers. */
/* Each node has a l for the child node, and an n pointer for the */
/* next node on this level. */
/* Thus, searching the tree consists of running down the 'n' list */
/* untill the first character is matched, and then following the 'l' */
/* link. Then running down the 'n' again, and so on. */
void clear_table (dict, limit)
NODE * dict;
int limit;
{
NODE * t, *t1, *tmp;
/* If we have no dictionary do nothing. */
if (!dict)
return;
/* This is fairly messy. We run across */
/* the current node, and build a list */
/* of all the nodes we still want. One */
/* of the properties of the tree we */
/* take advantage of that that all */
/* lower nodes are guarenteed to */
/* higher token numbers. */
tmp = 0;
for (t = dict -> child; t;) /* Run across the current tree node. */
if (t -> num > limit) { /* If this node is too large... */
t1 = t; /* rember current node, */
t = t->next_node; /* move loop pointer to next one */
t1->next_node = 0; /* and free just the one we had. */
free_nodes(t1);
}
else {
clear_table (t, limit); /* Ok. Recurse through and look */
/* through the lower levels. */
t1 = t->next_node; /* Move to next node, */
t->next_node = tmp; /* and put the current node on the */
/* list of nodes to remember. */
tmp = t;
t = t1;
}
dict->child = tmp; /* ok. We built the list, now store it. */
}
int compress (un_char *outpath, int maxlen, int prefix) {
int out_fd = 0;
int suffix;
int max_bytes = 2048; /* maximum 8:1 compression */
int *kludge;
NODE * prfix, *pt;
int i,
lim;
extern int stat_rare_out;
if ((prefix) < 0) /* Get a byte if we can. */
return 0; /* we can't, so we managed to compress */
/* zero bytes.. */
++stat_comp_in;
prfix = &comp_dict.child[prefix]; /* Move to right point in dictionary.. */
kludge = (int *) outpath; /* this relies on 4 byte int's */
for (i = 0; i < 65; ++i) /* clear the output buffer (need to do */
/* this, as the token out routines use */
/* OR'ing). Assumes the buffer can */
/* hold 260 characters. Thanks to */
/* Janne Sinkkonen for pointing this */
/* out. *blush* */
kludge[i] = 0;
if (comp_table_size >= DICT_SIZE - 2) { /* if table is full. */
#if 0
fprintf(stderr, "Clearing dict table\n");
#endif
for (i = 0; i < 256; ++i)
clear_table (&(comp_dict.child[i]), DICT_CLEAR_2 - 1);
comp_table_size = DICT_CLEAR_2;
put_token (outpath, &out_fd, 256); /* Clear table. */
c_token_mask = 511;
c_token_bits = 9;
}
lim = (maxlen - 4) * BYTE_WIDTH_OUT; /* The maximum number of bits to */
/* output. */
#ifdef DEBUG
fprintf(stdprn, "Setting lim to %d\n", lim);
#endif
while (out_fd < lim && max_bytes >= 0) { /* while we have output less then */
/* 'lim' bits. */
suffix = get_client_byte(); /* get the next byte. */
--max_bytes;
++stat_comp_in;
++stat_rare_out;
DEBUG_C(stderr, "Got byte == %d\n", suffix);
if (suffix < 0) /* We has nothing left to compress so exit. */
break;
suffix &= 255; /* Make it 8 bits. Not needed */
/* theoretically, but.. */
for (pt = prfix -> child; pt; pt = pt -> next_node)
if (pt -> ch == suffix)
break;
if (pt) { /* found */
#ifdef DEBUG
fprintf(stdprn, " found\n");
#endif
prfix = pt; /* Just remember new prefix. */
}
else { /* Ok. We have no token for this */
/* sequence, so we make a new token */
/* for it. */
if (comp_table_size < DICT_SIZE - 1) { /* if the dict isn't */
/* full.. */
pt = get_node (); /* Get a new node.. */
pt -> next_node = prfix -> child; /* and put it into tree. */
prfix -> child = pt;
pt -> ch = suffix;
pt -> child = NULL; /* No child strings. */
pt -> num = comp_table_size++; /* Set the token number. */
}
/* Seeing as we are useing explicit */
/* signaling to increase the token */
/* size, we might as well put it off */
/* for as long as we can. */
while (prfix->num > c_token_mask) { /* do we need to increase token */
/* size?? */
put_token(outpath, &out_fd, 257);
c_token_mask = (c_token_mask <<1) + 1;
c_token_bits ++;
}
put_token (outpath, &out_fd, prfix -> num); /* we output the */
/* token as well. */
prfix = &comp_dict.child[suffix]; /* and init prfix to the new value. */
}
}
while (prfix->num > c_token_mask) { /* Just in case we need to increase */
put_token(outpath, &out_fd, 257); /* the token size. */
c_token_mask = (c_token_mask <<1) + 1;
c_token_bits ++;
}
put_token (outpath, &out_fd, prfix -> num); /* Put the last token we */
/* had out. */
stat_comp_out += (out_fd+BYTE_WIDTH_OUT - 1) / BYTE_WIDTH_OUT;
return out_fd;
}
int uncompress(un_char *data, int len, un_char *outpath) {
int out_fd = 0, in_fd = 0;
static int token, oldtoken, newtoken, finaltoken;
int size;
static int started = 0;
#ifdef DEBUG
fprintf(stdprn, "Uncompress\n");
#endif
if (uncomp_dict == NULL) return -1;
stk_ptr = in_fd = out_fd = 0;
size = len * BYTE_WIDTH_IN ;
if (!started) {
ASSERT(in_fd == 0);
token = get_token(data , &in_fd, len);
oldtoken= token;
finaltoken = token;
outpath[out_fd++]= token;
#ifdef DEBUG
fprintf(stdprn, "[%d]", token);
#endif
}
if (started == 0) started = 2;
else started = 1;
#ifdef DEBUG
fprintf(stdprn, "Uncompress 1\n");
#endif
while (in_fd + unc_token_bits <= size) {
token = get_token(data, &in_fd, len);
ASSERT(in_fd <= size);
#ifdef DEBUG
fprintf(stdprn, "new token == %d\n", token);
#endif
if (token == 256) {
uncomp_table_size = DICT_CLEAR_2;
unc_token_bits = 9;
unc_token_mask = 511;
continue;
}
else if (token == 257) { /* increase token size by one bit. */
unc_token_bits ++;
unc_token_mask <<=1;
++unc_token_mask;
continue;
}
newtoken = token;
if (uncomp_table_size <= newtoken) {
stack[stk_ptr ++ ] = finaltoken;
ASSERT(stk_ptr > 0);
ASSERT(stk_ptr < 1024);
#ifdef DEBUG
fprintf(stdprn, "[%d]", finaltoken);
#endif
token = oldtoken;
}
while (uncomp_dict[token][1]>=0) {
stack[stk_ptr++] = uncomp_dict[token][1];
ASSERT(stk_ptr < 1024);
ASSERT(stk_ptr > 0);
token = uncomp_dict[token][0];
}
outpath[out_fd++] = token;
ASSERT(out_fd < 2048);
ASSERT(out_fd > 0);
#ifdef DEBUG
fprintf(stdprn, "[ %d]", token);
#endif
finaltoken = token;
while (stk_ptr) {
outpath[out_fd++] = stack[--stk_ptr];
ASSERT(out_fd < 2048);
ASSERT(out_fd > 0);
ASSERT(stk_ptr >= 0);
#ifdef DEBUG
fprintf(stdprn, "[%d]", stack[stk_ptr]);
#endif
}
ASSERT(uncomp_table_size < (DICT_SIZE + 1));
if (uncomp_table_size <DICT_SIZE) {
/* if (oldtoken == token) abort(); */
uncomp_dict[uncomp_table_size][0] = oldtoken;
uncomp_dict[uncomp_table_size][1] = token;
#ifdef DEBUG
fprintf(stdprn, "Adding %d %d to table as %d\n", oldtoken, token, uncomp_table_size);
#endif
if (started ==1 ) started = 2;
else ++uncomp_table_size;
}
oldtoken = newtoken;
}
return out_fd;
}
These are the contents of the former NiCE NeXT User Group NeXTSTEP/OpenStep software archive, currently hosted by Netfuture.ch.