about summary refs log tree commit diff
path: root/src/engine/e_huffman.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/engine/e_huffman.c')
-rw-r--r--src/engine/e_huffman.c507
1 files changed, 193 insertions, 314 deletions
diff --git a/src/engine/e_huffman.c b/src/engine/e_huffman.c
index dfb5c817..8adbb1f7 100644
--- a/src/engine/e_huffman.c
+++ b/src/engine/e_huffman.c
@@ -1,383 +1,262 @@
-#include <base/system.h>
-#include <stdlib.h>
-#include <string.h>
-#include <engine/e_huffman.h>
+#include <stdlib.h> /* qsort */
+#include <memory.h> /* memset */
+#include "e_huffman.h"
 
-void huffman_init(HUFFSTATE *huff)
+typedef struct HUFFMAN_CONSTRUCT_NODE
 {
-	mem_zero(huff, sizeof(HUFFSTATE));
-	huff->nodes[0].frequency = 1;
-	huff->nodes[0].symbol_size = -1;
-	huff->num_symbols++;
-	huff->num_nodes++;
-}
-
-void huffman_add_symbol(HUFFSTATE *huff, int frequency, int size, unsigned char *symbol)
-{
-	huff->nodes[huff->num_nodes].frequency = frequency;
-	huff->nodes[huff->num_nodes].symbol_size = size;
-	mem_copy(huff->nodes[huff->num_nodes].symbol, symbol, size);
-	huff->num_nodes++;
-	huff->num_symbols++;
-}
-
+	unsigned short node_id;
+ 	int frequency;
+} HUFFMAN_CONSTRUCT_NODE;
 
 static int sort_func(const void *a, const void *b)
 {
-	if((*(HUFFNODE **)a)->frequency > (*(HUFFNODE **)b)->frequency)
+	if((*(HUFFMAN_CONSTRUCT_NODE **)a)->frequency > (*(HUFFMAN_CONSTRUCT_NODE **)b)->frequency)
 		return -1;
-	if((*(HUFFNODE **)a)->frequency < (*(HUFFNODE **)b)->frequency)
+	if((*(HUFFMAN_CONSTRUCT_NODE **)a)->frequency < (*(HUFFMAN_CONSTRUCT_NODE **)b)->frequency)
 		return 1;
 	return 0;
 }
 
-void huffman_setbits_r(HUFFNODE *node, int bits, int depth)
+static void huffman_setbits_r(HUFFMAN_STATE *huff, HUFFMAN_NODE *node, int bits, int depth)
 {
-	if(node->one)
-		huffman_setbits_r(node->one, (bits<<1)|1, depth+1);
-	if(node->zero)
-		huffman_setbits_r(node->zero, (bits<<1), depth+1);
+	if(node->leafs[1] != 0xffff)
+		huffman_setbits_r(huff, &huff->nodes[node->leafs[1]], bits|(1<<depth), depth+1);
+	if(node->leafs[0] != 0xffff)
+		huffman_setbits_r(huff, &huff->nodes[node->leafs[0]], bits, depth+1);
 		
-	if(node->symbol_size)
+	if(node->num_bits)
 	{
 		node->bits = bits;
 		node->num_bits = depth;
 	}
 }
-	
-void huffman_checktree_r(HUFFNODE *node, int bits, int depth)
+
+static void huffman_construct_tree(HUFFMAN_STATE *huff, const unsigned *frequencies)
 {
-	if(node->one)
-		huffman_checktree_r(node->one, (bits<<1)|1, depth+1);
-	if(node->zero)
-		huffman_checktree_r(node->zero, (bits<<1), depth+1);
-		
-	if(node->symbol_size)
+	HUFFMAN_CONSTRUCT_NODE nodes_left_storage[HUFFMAN_MAX_NODES];
+	HUFFMAN_CONSTRUCT_NODE *nodes_left[HUFFMAN_MAX_NODES];
+	int num_nodes_left = HUFFMAN_MAX_SYMBOLS;
+	int i;
+
+	/* add the symbols */
+	for(i = 0; i < HUFFMAN_MAX_SYMBOLS; i++)
 	{
-		/*dbg_msg("", "%p %p %d %d %d", node->one, node->zero, node->symbol[0], node->bits, node->num_bits);*/
-		
-		if(node->bits != bits || node->num_bits != depth)
-		{
-			dbg_msg("", "crap!   %d %d=%d   %d!=%d", node->bits>>1, node->bits, bits, node->num_bits , depth);
-			/*dbg_msg("", "%p %p %d", node->one, node->zero, node->symbol[0]);*/
-		}
-	}
-}
+		huff->nodes[i].num_bits = -1;
+		huff->nodes[i].symbol = i;
+		huff->nodes[i].leafs[0] = -1;
+		huff->nodes[i].leafs[1] = -1;
 
-void huffman_construct_tree(HUFFSTATE *huff)
-{
-	HUFFNODE *nodes_left[MAX_NODES];
-	int num_nodes_left = huff->num_nodes;
-	int i, k;
+		if(i == HUFFMAN_EOF_SYMBOL)
+			nodes_left_storage[i].frequency = 1;
+		else
+			nodes_left_storage[i].frequency = frequencies[i];
+		nodes_left_storage[i].node_id = i;
+		nodes_left[i] = &nodes_left_storage[i];
+	}
 
-	for(i = 0; i < num_nodes_left; i++)
-		nodes_left[i] = &huff->nodes[i];
+	huff->num_nodes = HUFFMAN_MAX_SYMBOLS;
 	
 	/* construct the table */
 	while(num_nodes_left > 1)
 	{
-		qsort(nodes_left, num_nodes_left, sizeof(HUFFNODE *), sort_func);
+		qsort(nodes_left, num_nodes_left, sizeof(HUFFMAN_CONSTRUCT_NODE *), sort_func);
 		
-		huff->nodes[huff->num_nodes].symbol_size = 0;
-		huff->nodes[huff->num_nodes].frequency = nodes_left[num_nodes_left-1]->frequency + nodes_left[num_nodes_left-2]->frequency;
-		huff->nodes[huff->num_nodes].zero = nodes_left[num_nodes_left-1];
-		huff->nodes[huff->num_nodes].one = nodes_left[num_nodes_left-2];
-		nodes_left[num_nodes_left-1]->parent = &huff->nodes[huff->num_nodes];
-		nodes_left[num_nodes_left-2]->parent = &huff->nodes[huff->num_nodes];
-		nodes_left[num_nodes_left-2] = &huff->nodes[huff->num_nodes];
+		huff->nodes[huff->num_nodes].num_bits = 0;
+		huff->nodes[huff->num_nodes].leafs[0] = nodes_left[num_nodes_left-1]->node_id;
+		huff->nodes[huff->num_nodes].leafs[1] = nodes_left[num_nodes_left-2]->node_id;
+		nodes_left[num_nodes_left-2]->node_id = huff->num_nodes;
+		nodes_left[num_nodes_left-2]->frequency = nodes_left[num_nodes_left-1]->frequency + nodes_left[num_nodes_left-2]->frequency;
 		huff->num_nodes++;
 		num_nodes_left--;
 	}
 
-	dbg_msg("", "%d", huff->num_nodes);
-	for(i = 0; i < huff->num_nodes; i++)
-	{
-		if(huff->nodes[i].symbol_size && (huff->nodes[i].one || huff->nodes[i].zero))
-			dbg_msg("", "tree strangeness");
-			
-		if(!huff->nodes[i].parent)
-		{
-			dbg_msg("", "root %p %p", huff->nodes[i].one, huff->nodes[i].zero);
-			huff->start_node = &huff->nodes[i];
-		}
-	}
+	/* set start node */
+	huff->start_node = &huff->nodes[huff->num_nodes-1];
 	
-	huffman_setbits_r(huff->start_node, 0, 0);
-	
-	/*
-	for(i = 0; i < huff->num_symbols; i++)
-	{
-		unsigned bits = 0;
-		int num_bits = 0;
-		HUFFNODE *n = &huff->nodes[i];
-		HUFFNODE *p = n;
-		HUFFNODE *c = n->parent;
-		
-		while(c)
-		{
-			num_bits++;
-			if(c->one == p)
-				bits |= 1;
-			bits <<= 1;
-			p = c;
-			c = c->parent;
-		}
+	/* build symbol bits */
+	huffman_setbits_r(huff, huff->start_node, 0, 0);
+}
 
-		n->bits = bits;
-		n->num_bits = num_bits;
-	}*/
-	
-	huffman_checktree_r(huff->start_node, 0, 0);
+void huffman_init(HUFFMAN_STATE *huff, const unsigned *frequencies)
+{
+	int i;
 
-	for(i = 0; i < huff->num_symbols; i++)
+	/* make sure to cleanout every thing */
+	memset(huff, 0, sizeof(HUFFMAN_STATE));
+
+	/* construct the tree */
+	huffman_construct_tree(huff, frequencies);
+
+	/* build decode LUT */
+	for(i = 0; i < HUFFMAN_LUTSIZE; i++)
 	{
-		for(k = 0; k < huff->num_symbols; k++)
+		unsigned bits = i;
+		int k;
+		HUFFMAN_NODE *node = huff->start_node;
+		for(k = 0; k < HUFFMAN_LUTBITS; k++)
 		{
-			if(k == i)
-				continue;
-			if(huff->nodes[i].num_bits == huff->nodes[k].num_bits && huff->nodes[i].bits == huff->nodes[k].bits)
-				dbg_msg("", "tree error %d %d %d", i, k, huff->nodes[i].num_bits);
-		}
-	}
+			node = &huff->nodes[node->leafs[bits&1]];
+			bits >>= 1;
 
-}
+			if(!node)
+				break;
 
-typedef struct
-{
-	unsigned char *data;
-	unsigned char current_bits;
-	int num;
-} HUFFBITIO;
+			if(node->num_bits)
+			{
+				huff->decode_lut[i] = node;
+				break;
+			}
+		}
 
-int debug_count = 0;
+		if(k == HUFFMAN_LUTBITS)
+			huff->decode_lut[i] = node;
+	}
 
-static void bitio_init(HUFFBITIO *bitio, unsigned char *data)
-{
-	bitio->data = data;
-	bitio->num = 0;
-	bitio->current_bits = 0;
 }
 
-static void bitio_flush(HUFFBITIO *bitio)
+/*****************************************************************/
+int huffman_compress(HUFFMAN_STATE *huff, const void *input, int input_size, void *output, int output_size)
 {
-	if(bitio->num == 8)
-		*bitio->data = bitio->current_bits << (8-bitio->num);
-	else
-		*bitio->data = bitio->current_bits;
-	bitio->data++;
-	bitio->num = 0;
-	bitio->current_bits = 0;
-}
+	/* this macro loads a symbol for a byte into bits and bitcount */
+#define HUFFMAN_MACRO_LOADSYMBOL(sym) \
+	bits |= huff->nodes[sym].bits << bitcount; \
+	bitcount += huff->nodes[sym].num_bits;
 
-static void bitio_write(HUFFBITIO *bitio, int bit)
-{
-	bitio->current_bits = (bitio->current_bits<<1)|bit;
-	bitio->num++;
-	if(bitio->num == 8)
-		bitio_flush(bitio);
-		
-	if(debug_count)
-	{
-		debug_count--;
-		dbg_msg("", "out %d", bit);
+	/* this macro writes the symbol stored in bits and bitcount to the dst pointer */
+#define HUFFMAN_MACRO_WRITE() \
+	while(bitcount >= 8) \
+	{ \
+		*dst++ = (unsigned char)(bits&0xff); \
+		if(dst == dst_end) \
+			return -1; \
+		bits >>= 8; \
+		bitcount -= 8; \
 	}
-}
 
-static int bitio_read(HUFFBITIO *bitio)
-{
-	int bit;
-	
-	if(!bitio->num)
-	{
-		bitio->current_bits = *bitio->data;
-		bitio->data++;
-		bitio->num = 8;
-	}
-	
-	bitio->num--;
-	bit = (bitio->current_bits>>bitio->num)&1;
+	/* setup buffer pointers */
+	const unsigned char *src = (const unsigned char *)input;
+	const unsigned char *src_end = src + input_size;
+	unsigned char *dst = (unsigned char *)output;
+	unsigned char *dst_end = dst + output_size;
 
-	if(debug_count)
-	{
-		debug_count--;
-		dbg_msg("", "in %d", bit);
-	}
-	return bit;
-}
+	/* symbol variables */
+	unsigned bits = 0;
+	unsigned bitcount = 0;
 
-int huffman_compress(HUFFSTATE *huff, const void *input, int input_size, void *output, int output_size)
-{
-	const unsigned char *src = (const unsigned char *)input;
-	int ret = 0;
-	int quit = 0;
-	HUFFBITIO io;
-	
-	bitio_init(&io, (unsigned char *)output);
-	
-	while(!quit)
+	/* make sure that we have data that we want to compress */
+	if(input_size)
 	{
-		int i;
-		int best_match = -1;
-		int best_match_size = 0;
-		
-		if(input_size)
-		{
-			for(i = 0; i < huff->num_symbols; i++)
-			{
-				if(huff->nodes[i].symbol_size <= input_size && huff->nodes[i].symbol_size > best_match_size)
-				{
-					if(memcmp(src, huff->nodes[i].symbol, huff->nodes[i].symbol_size) == 0)
-					{
-						best_match = i;
-						best_match_size = huff->nodes[i].symbol_size;
-					}
-				}
-			}
-		}
-		else
-		{
-			best_match = 0;
-			best_match_size = 0;
-			quit = 1;
-		}
-		
-		
-		if(best_match == -1)
+		/* {A} load the first symbol */
+		int symbol = *src++;
+
+		while(src != src_end)
 		{
-			dbg_msg("huffman", "couldn't find symbol! %d left", input_size);
-			return -1;
+			/* {B} load the symbol */
+			HUFFMAN_MACRO_LOADSYMBOL(symbol)
+
+			/* {C} fetch next symbol, this is done here because it will reduce dependency in the code */
+			symbol = *src++;
+
+			/* {B} write the symbol loaded at */
+			HUFFMAN_MACRO_WRITE()
 		}
-		
-		i = huff->nodes[best_match].num_bits;
-		for(i = huff->nodes[best_match].num_bits-1; i >= 0; i--)
-			bitio_write(&io, (huff->nodes[best_match].bits>>i)&1);
 
-		if(debug_count)		
-			dbg_msg("", "--");
-		
-		ret += huff->nodes[best_match].num_bits;
-		input_size -= best_match_size;
-		src += best_match_size;
+		/* write the last symbol loaded from {C} or {A} in the case of only 1 byte input buffer */
+		HUFFMAN_MACRO_LOADSYMBOL(symbol)
+		HUFFMAN_MACRO_WRITE()
 	}
-	
-	bitio_flush(&io);
-	return ret;
+
+	/* write EOF symbol */
+	HUFFMAN_MACRO_LOADSYMBOL(HUFFMAN_EOF_SYMBOL)
+	HUFFMAN_MACRO_WRITE()
+
+	/* write out the last bits */
+	*dst++ = bits;
+
+	/* return the size of the output */
+	return (int)(dst - (const unsigned char *)output);
+
+	/* remove macros */
+#undef HUFFMAN_MACRO_LOADSYMBOL
+#undef HUFFMAN_MACRO_WRITE
 }
 
-int huffman_decompress(HUFFSTATE *huff, const void *input, int input_size, void *output, int output_size)
+/*****************************************************************/
+int huffman_decompress(HUFFMAN_STATE *huff, const void *input, int input_size, void *output, int output_size)
 {
+	/* setup buffer pointers */
 	unsigned char *dst = (unsigned char *)output;
-	HUFFBITIO io;
-	int size = 0;
-	
-	bitio_init(&io, (unsigned char *)input);
-	
-	while(size < 1401)
+	unsigned char *src = (unsigned char *)input;
+	unsigned char *dst_end = dst + output_size;
+	unsigned char *src_end = src + input_size;
+
+	unsigned bits = 0;
+	unsigned bitcount = 0;
+
+	HUFFMAN_NODE *eof = &huff->nodes[HUFFMAN_EOF_SYMBOL];
+	HUFFMAN_NODE *node = 0;
+
+	while(1)
 	{
-		HUFFNODE *node = huff->start_node;
-		int i;
-		
-		while(node)
+		/* {A} try to load a node now, this will reduce dependency at location {D} */
+		node = 0;
+		if(bitcount >= HUFFMAN_LUTBITS)
+			node = huff->decode_lut[bits&HUFFMAN_LUTMASK];
+
+		/* {B} fill with new bits */
+		while(bitcount < 24 && src != src_end)
 		{
-			if(node->symbol_size)
-				break;
-				
-			if(bitio_read(&io))
-				node = node->one;
-			else
-				node = node->zero;
+			bits |= (*src++) << bitcount;
+			bitcount += 8;
 		}
 
-		if(debug_count)		
-			dbg_msg("", "-- %d %d", node->bits, node->num_bits);
-		
-		/* check for eof */
-		if(node == &huff->nodes[0])
-			break;
-		
-		for(i = 0; i < node->symbol_size; i++)
+		/* {C} load symbol now if we didn't that earlier at location {A} */
+		if(!node)
+			node = huff->decode_lut[bits&HUFFMAN_LUTMASK];
+
+		/* {D} check if we hit a symbol already */
+		if(node->num_bits)
 		{
-			*dst++ = node->symbol[i];
-			size++;
+			/* remove the bits for that symbol */
+			bits >>= node->num_bits;
+			bitcount -= node->num_bits;
 		}
-	}
-	
-	return size;
-}
-
-unsigned char test_data[1024*64];
-int test_data_size;
+		else
+		{
+			/* remove the bits that the lut checked up for us */
+			bits >>= HUFFMAN_LUTBITS;
+			bitcount -= HUFFMAN_LUTBITS;
 
-unsigned char compressed_data[1024*64];
-int compressed_data_size;
+			/* walk the tree bit by bit */
+			while(1)
+			{
+				/* traverse tree */
+				node = &huff->nodes[node->leafs[bits&1]];
 
-unsigned char output_data[1024*64];
-int output_data_size;
+				/* remove bit */
+				bitcount--;
+				bits >>= 1;
 
-HUFFSTATE state;
+				/* check if we hit a symbol */
+				if(node->num_bits)
+					break;
 
-int huffman_test()
-{
-	huffman_init(&state);
-	
-	dbg_msg("", "test test");
-	
-	/* bitio testing */
-	{
-		char bits[] = {1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1,1,1,1,1,1,1,0};
-		unsigned char buf[64];
-		int i;
-		HUFFBITIO io;
-		
-		bitio_init(&io, buf);
-		for(i = 0; i < sizeof(bits); i++)
-			bitio_write(&io, bits[i]);
-			
-		bitio_flush(&io);
-		bitio_init(&io, buf);
-		for(i = 0; i < sizeof(bits); i++)
-		{
-			if(bitio_read(&io) != bits[i])
-				dbg_msg("", "bitio failed at %d", i);
-		}
-	}
-	
-	/* read test data */
-	{
-		IOHANDLE io = io_open("license.txt", IOFLAG_READ);
-		test_data_size = io_read(io, test_data, sizeof(test_data));
-		io_close(io);
-	}
-	
-	/* add symbols */
-	{
-		int counts[256] = {0};
-		int i;
-		for(i = 0; i < test_data_size; i++)
-			counts[test_data[i]]++;
-		
-		for(i = 0; i < 256; i++)
-		{
-			unsigned char symbol = (unsigned char )i;
-			if(counts[i])
-				huffman_add_symbol(&state, counts[i], 1, &symbol);
+				/* no more bits, decoding error */
+				if(bitcount == 0)
+					return -1;
+			}
 		}
+
+		/* check for eof */
+		if(node == eof)
+			break;
+
+		/* output character */
+		if(dst == dst_end)
+			return -1;
+		*dst++ = node->symbol;
 	}
-	
-	huffman_construct_tree(&state);
-	/*debug_count = 20;*/
-	compressed_data_size = huffman_compress(&state, test_data, test_data_size, compressed_data, sizeof(compressed_data));
-	/*debug_count = 20;*/
-	output_data_size = huffman_decompress(&state, compressed_data, compressed_data_size, output_data, sizeof(output_data));
-	
-	dbg_msg("huffman", "%d -> %d -> %d", test_data_size, compressed_data_size/8, output_data_size);
-	
-	/* write test data */
-	{
-		IOHANDLE io = io_open("out.txt", IOFLAG_WRITE);
-		io_write(io, output_data, output_data_size);
-		io_close(io);
-	}	
-	
-	return 0;
+
+	/* return the size of the decompressed buffer */
+	return (int)(dst - (const unsigned char *)output);
 }