about summary refs log tree commit diff
path: root/src/engine/e_huffman.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/engine/e_huffman.cpp')
-rw-r--r--src/engine/e_huffman.cpp276
1 files changed, 276 insertions, 0 deletions
diff --git a/src/engine/e_huffman.cpp b/src/engine/e_huffman.cpp
new file mode 100644
index 00000000..43914010
--- /dev/null
+++ b/src/engine/e_huffman.cpp
@@ -0,0 +1,276 @@
+#include <memory.h> /* memset */
+#include "e_huffman.h"
+
+typedef struct HUFFMAN_CONSTRUCT_NODE
+{
+	unsigned short node_id;
+ 	int frequency;
+} HUFFMAN_CONSTRUCT_NODE;
+
+static void huffman_setbits_r(HUFFMAN_STATE *huff, HUFFMAN_NODE *node, int bits, int depth)
+{
+	if(node->leafs[1] != 0xffff)
+		huffman_setbits_r(huff, &huff->nodes[node->leafs[1]], bits|(1<<depth), depth+1);
+	if(node->leafs[0] != 0xffff)
+		huffman_setbits_r(huff, &huff->nodes[node->leafs[0]], bits, depth+1);
+		
+	if(node->num_bits)
+	{
+		node->bits = bits;
+		node->num_bits = depth;
+	}
+}
+
+/* TODO: this should be something faster, but it's enough for now */
+void bubblesort(HUFFMAN_CONSTRUCT_NODE **list, int size)
+{
+	int i, changed = 1;
+	HUFFMAN_CONSTRUCT_NODE *temp;
+	
+	while(changed)
+	{
+		changed = 0;
+		for(i = 0; i < size-1; i++)
+		{
+			if(list[i]->frequency < list[i+1]->frequency)
+			{
+				temp = list[i];
+				list[i] = list[i+1];
+				list[i+1] = temp;
+				changed = 1;
+			}
+		}
+	}
+}
+
+static void huffman_construct_tree(HUFFMAN_STATE *huff, const unsigned *frequencies)
+{
+	HUFFMAN_CONSTRUCT_NODE nodes_left_storage[HUFFMAN_MAX_SYMBOLS];
+	HUFFMAN_CONSTRUCT_NODE *nodes_left[HUFFMAN_MAX_SYMBOLS];
+	int num_nodes_left = HUFFMAN_MAX_SYMBOLS;
+	int i;
+
+	/* add the symbols */
+	for(i = 0; i < HUFFMAN_MAX_SYMBOLS; i++)
+	{
+		huff->nodes[i].num_bits = -1;
+		huff->nodes[i].symbol = i;
+		huff->nodes[i].leafs[0] = -1;
+		huff->nodes[i].leafs[1] = -1;
+
+		if(i == HUFFMAN_EOF_SYMBOL)
+			nodes_left_storage[i].frequency = 1;
+		else
+			nodes_left_storage[i].frequency = frequencies[i];
+		nodes_left_storage[i].node_id = i;
+		nodes_left[i] = &nodes_left_storage[i];
+
+	}
+	huff->num_nodes = HUFFMAN_MAX_SYMBOLS;
+	
+	/* construct the table */
+	while(num_nodes_left > 1)
+	{
+		/* we can't rely on stdlib's qsort for this, it can generate different results on different implementations */
+		bubblesort(nodes_left, num_nodes_left);
+		
+		huff->nodes[huff->num_nodes].num_bits = 0;
+		huff->nodes[huff->num_nodes].leafs[0] = nodes_left[num_nodes_left-1]->node_id;
+		huff->nodes[huff->num_nodes].leafs[1] = nodes_left[num_nodes_left-2]->node_id;
+		nodes_left[num_nodes_left-2]->node_id = huff->num_nodes;
+		nodes_left[num_nodes_left-2]->frequency = nodes_left[num_nodes_left-1]->frequency + nodes_left[num_nodes_left-2]->frequency;
+
+		huff->num_nodes++;
+		num_nodes_left--;
+	}
+
+	/* set start node */
+	huff->start_node = &huff->nodes[huff->num_nodes-1];
+	
+	/* build symbol bits */
+	huffman_setbits_r(huff, huff->start_node, 0, 0);
+}
+
+void huffman_init(HUFFMAN_STATE *huff, const unsigned *frequencies)
+{
+	int i;
+
+	/* make sure to cleanout every thing */
+	memset(huff, 0, sizeof(HUFFMAN_STATE));
+
+	/* construct the tree */
+	huffman_construct_tree(huff, frequencies);
+
+	/* build decode LUT */
+	for(i = 0; i < HUFFMAN_LUTSIZE; i++)
+	{
+		unsigned bits = i;
+		int k;
+		HUFFMAN_NODE *node = huff->start_node;
+		for(k = 0; k < HUFFMAN_LUTBITS; k++)
+		{
+			node = &huff->nodes[node->leafs[bits&1]];
+			bits >>= 1;
+
+			if(!node)
+				break;
+
+			if(node->num_bits)
+			{
+				huff->decode_lut[i] = node;
+				break;
+			}
+		}
+
+		if(k == HUFFMAN_LUTBITS)
+			huff->decode_lut[i] = node;
+	}
+
+}
+
+/*****************************************************************/
+int huffman_compress(HUFFMAN_STATE *huff, const void *input, int input_size, void *output, int output_size)
+{
+	/* this macro loads a symbol for a byte into bits and bitcount */
+#define HUFFMAN_MACRO_LOADSYMBOL(sym) \
+	bits |= huff->nodes[sym].bits << bitcount; \
+	bitcount += huff->nodes[sym].num_bits;
+
+	/* this macro writes the symbol stored in bits and bitcount to the dst pointer */
+#define HUFFMAN_MACRO_WRITE() \
+	while(bitcount >= 8) \
+	{ \
+		*dst++ = (unsigned char)(bits&0xff); \
+		if(dst == dst_end) \
+			return -1; \
+		bits >>= 8; \
+		bitcount -= 8; \
+	}
+
+	/* setup buffer pointers */
+	const unsigned char *src = (const unsigned char *)input;
+	const unsigned char *src_end = src + input_size;
+	unsigned char *dst = (unsigned char *)output;
+	unsigned char *dst_end = dst + output_size;
+
+	/* symbol variables */
+	unsigned bits = 0;
+	unsigned bitcount = 0;
+
+	/* make sure that we have data that we want to compress */
+	if(input_size)
+	{
+		/* {A} load the first symbol */
+		int symbol = *src++;
+
+		while(src != src_end)
+		{
+			/* {B} load the symbol */
+			HUFFMAN_MACRO_LOADSYMBOL(symbol)
+
+			/* {C} fetch next symbol, this is done here because it will reduce dependency in the code */
+			symbol = *src++;
+
+			/* {B} write the symbol loaded at */
+			HUFFMAN_MACRO_WRITE()
+		}
+
+		/* write the last symbol loaded from {C} or {A} in the case of only 1 byte input buffer */
+		HUFFMAN_MACRO_LOADSYMBOL(symbol)
+		HUFFMAN_MACRO_WRITE()
+	}
+
+	/* write EOF symbol */
+	HUFFMAN_MACRO_LOADSYMBOL(HUFFMAN_EOF_SYMBOL)
+	HUFFMAN_MACRO_WRITE()
+
+	/* write out the last bits */
+	*dst++ = bits;
+
+	/* return the size of the output */
+	return (int)(dst - (const unsigned char *)output);
+
+	/* remove macros */
+#undef HUFFMAN_MACRO_LOADSYMBOL
+#undef HUFFMAN_MACRO_WRITE
+}
+
+/*****************************************************************/
+int huffman_decompress(HUFFMAN_STATE *huff, const void *input, int input_size, void *output, int output_size)
+{
+	/* setup buffer pointers */
+	unsigned char *dst = (unsigned char *)output;
+	unsigned char *src = (unsigned char *)input;
+	unsigned char *dst_end = dst + output_size;
+	unsigned char *src_end = src + input_size;
+
+	unsigned bits = 0;
+	unsigned bitcount = 0;
+
+	HUFFMAN_NODE *eof = &huff->nodes[HUFFMAN_EOF_SYMBOL];
+	HUFFMAN_NODE *node = 0;
+
+	while(1)
+	{
+		/* {A} try to load a node now, this will reduce dependency at location {D} */
+		node = 0;
+		if(bitcount >= HUFFMAN_LUTBITS)
+			node = huff->decode_lut[bits&HUFFMAN_LUTMASK];
+
+		/* {B} fill with new bits */
+		while(bitcount < 24 && src != src_end)
+		{
+			bits |= (*src++) << bitcount;
+			bitcount += 8;
+		}
+
+		/* {C} load symbol now if we didn't that earlier at location {A} */
+		if(!node)
+			node = huff->decode_lut[bits&HUFFMAN_LUTMASK];
+
+		/* {D} check if we hit a symbol already */
+		if(node->num_bits)
+		{
+			/* remove the bits for that symbol */
+			bits >>= node->num_bits;
+			bitcount -= node->num_bits;
+		}
+		else
+		{
+			/* remove the bits that the lut checked up for us */
+			bits >>= HUFFMAN_LUTBITS;
+			bitcount -= HUFFMAN_LUTBITS;
+
+			/* walk the tree bit by bit */
+			while(1)
+			{
+				/* traverse tree */
+				node = &huff->nodes[node->leafs[bits&1]];
+
+				/* remove bit */
+				bitcount--;
+				bits >>= 1;
+
+				/* check if we hit a symbol */
+				if(node->num_bits)
+					break;
+
+				/* no more bits, decoding error */
+				if(bitcount == 0)
+					return -1;
+			}
+		}
+
+		/* check for eof */
+		if(node == eof)
+			break;
+
+		/* output character */
+		if(dst == dst_end)
+			return -1;
+		*dst++ = node->symbol;
+	}
+
+	/* return the size of the decompressed buffer */
+	return (int)(dst - (const unsigned char *)output);
+}