diff options
Diffstat (limited to 'src/engine/e_huffman.c')
| -rw-r--r-- | src/engine/e_huffman.c | 507 |
1 files changed, 193 insertions, 314 deletions
diff --git a/src/engine/e_huffman.c b/src/engine/e_huffman.c index dfb5c817..8adbb1f7 100644 --- a/src/engine/e_huffman.c +++ b/src/engine/e_huffman.c @@ -1,383 +1,262 @@ -#include <base/system.h> -#include <stdlib.h> -#include <string.h> -#include <engine/e_huffman.h> +#include <stdlib.h> /* qsort */ +#include <memory.h> /* memset */ +#include "e_huffman.h" -void huffman_init(HUFFSTATE *huff) +typedef struct HUFFMAN_CONSTRUCT_NODE { - mem_zero(huff, sizeof(HUFFSTATE)); - huff->nodes[0].frequency = 1; - huff->nodes[0].symbol_size = -1; - huff->num_symbols++; - huff->num_nodes++; -} - -void huffman_add_symbol(HUFFSTATE *huff, int frequency, int size, unsigned char *symbol) -{ - huff->nodes[huff->num_nodes].frequency = frequency; - huff->nodes[huff->num_nodes].symbol_size = size; - mem_copy(huff->nodes[huff->num_nodes].symbol, symbol, size); - huff->num_nodes++; - huff->num_symbols++; -} - + unsigned short node_id; + int frequency; +} HUFFMAN_CONSTRUCT_NODE; static int sort_func(const void *a, const void *b) { - if((*(HUFFNODE **)a)->frequency > (*(HUFFNODE **)b)->frequency) + if((*(HUFFMAN_CONSTRUCT_NODE **)a)->frequency > (*(HUFFMAN_CONSTRUCT_NODE **)b)->frequency) return -1; - if((*(HUFFNODE **)a)->frequency < (*(HUFFNODE **)b)->frequency) + if((*(HUFFMAN_CONSTRUCT_NODE **)a)->frequency < (*(HUFFMAN_CONSTRUCT_NODE **)b)->frequency) return 1; return 0; } -void huffman_setbits_r(HUFFNODE *node, int bits, int depth) +static void huffman_setbits_r(HUFFMAN_STATE *huff, HUFFMAN_NODE *node, int bits, int depth) { - if(node->one) - huffman_setbits_r(node->one, (bits<<1)|1, depth+1); - if(node->zero) - huffman_setbits_r(node->zero, (bits<<1), depth+1); + if(node->leafs[1] != 0xffff) + huffman_setbits_r(huff, &huff->nodes[node->leafs[1]], bits|(1<<depth), depth+1); + if(node->leafs[0] != 0xffff) + huffman_setbits_r(huff, &huff->nodes[node->leafs[0]], bits, depth+1); - if(node->symbol_size) + if(node->num_bits) { node->bits = bits; node->num_bits = depth; } } - -void huffman_checktree_r(HUFFNODE *node, int bits, int depth) + +static void huffman_construct_tree(HUFFMAN_STATE *huff, const unsigned *frequencies) { - if(node->one) - huffman_checktree_r(node->one, (bits<<1)|1, depth+1); - if(node->zero) - huffman_checktree_r(node->zero, (bits<<1), depth+1); - - if(node->symbol_size) + HUFFMAN_CONSTRUCT_NODE nodes_left_storage[HUFFMAN_MAX_NODES]; + HUFFMAN_CONSTRUCT_NODE *nodes_left[HUFFMAN_MAX_NODES]; + int num_nodes_left = HUFFMAN_MAX_SYMBOLS; + int i; + + /* add the symbols */ + for(i = 0; i < HUFFMAN_MAX_SYMBOLS; i++) { - /*dbg_msg("", "%p %p %d %d %d", node->one, node->zero, node->symbol[0], node->bits, node->num_bits);*/ - - if(node->bits != bits || node->num_bits != depth) - { - dbg_msg("", "crap! %d %d=%d %d!=%d", node->bits>>1, node->bits, bits, node->num_bits , depth); - /*dbg_msg("", "%p %p %d", node->one, node->zero, node->symbol[0]);*/ - } - } -} + huff->nodes[i].num_bits = -1; + huff->nodes[i].symbol = i; + huff->nodes[i].leafs[0] = -1; + huff->nodes[i].leafs[1] = -1; -void huffman_construct_tree(HUFFSTATE *huff) -{ - HUFFNODE *nodes_left[MAX_NODES]; - int num_nodes_left = huff->num_nodes; - int i, k; + if(i == HUFFMAN_EOF_SYMBOL) + nodes_left_storage[i].frequency = 1; + else + nodes_left_storage[i].frequency = frequencies[i]; + nodes_left_storage[i].node_id = i; + nodes_left[i] = &nodes_left_storage[i]; + } - for(i = 0; i < num_nodes_left; i++) - nodes_left[i] = &huff->nodes[i]; + huff->num_nodes = HUFFMAN_MAX_SYMBOLS; /* construct the table */ while(num_nodes_left > 1) { - qsort(nodes_left, num_nodes_left, sizeof(HUFFNODE *), sort_func); + qsort(nodes_left, num_nodes_left, sizeof(HUFFMAN_CONSTRUCT_NODE *), sort_func); - huff->nodes[huff->num_nodes].symbol_size = 0; - huff->nodes[huff->num_nodes].frequency = nodes_left[num_nodes_left-1]->frequency + nodes_left[num_nodes_left-2]->frequency; - huff->nodes[huff->num_nodes].zero = nodes_left[num_nodes_left-1]; - huff->nodes[huff->num_nodes].one = nodes_left[num_nodes_left-2]; - nodes_left[num_nodes_left-1]->parent = &huff->nodes[huff->num_nodes]; - nodes_left[num_nodes_left-2]->parent = &huff->nodes[huff->num_nodes]; - nodes_left[num_nodes_left-2] = &huff->nodes[huff->num_nodes]; + huff->nodes[huff->num_nodes].num_bits = 0; + huff->nodes[huff->num_nodes].leafs[0] = nodes_left[num_nodes_left-1]->node_id; + huff->nodes[huff->num_nodes].leafs[1] = nodes_left[num_nodes_left-2]->node_id; + nodes_left[num_nodes_left-2]->node_id = huff->num_nodes; + nodes_left[num_nodes_left-2]->frequency = nodes_left[num_nodes_left-1]->frequency + nodes_left[num_nodes_left-2]->frequency; huff->num_nodes++; num_nodes_left--; } - dbg_msg("", "%d", huff->num_nodes); - for(i = 0; i < huff->num_nodes; i++) - { - if(huff->nodes[i].symbol_size && (huff->nodes[i].one || huff->nodes[i].zero)) - dbg_msg("", "tree strangeness"); - - if(!huff->nodes[i].parent) - { - dbg_msg("", "root %p %p", huff->nodes[i].one, huff->nodes[i].zero); - huff->start_node = &huff->nodes[i]; - } - } + /* set start node */ + huff->start_node = &huff->nodes[huff->num_nodes-1]; - huffman_setbits_r(huff->start_node, 0, 0); - - /* - for(i = 0; i < huff->num_symbols; i++) - { - unsigned bits = 0; - int num_bits = 0; - HUFFNODE *n = &huff->nodes[i]; - HUFFNODE *p = n; - HUFFNODE *c = n->parent; - - while(c) - { - num_bits++; - if(c->one == p) - bits |= 1; - bits <<= 1; - p = c; - c = c->parent; - } + /* build symbol bits */ + huffman_setbits_r(huff, huff->start_node, 0, 0); +} - n->bits = bits; - n->num_bits = num_bits; - }*/ - - huffman_checktree_r(huff->start_node, 0, 0); +void huffman_init(HUFFMAN_STATE *huff, const unsigned *frequencies) +{ + int i; - for(i = 0; i < huff->num_symbols; i++) + /* make sure to cleanout every thing */ + memset(huff, 0, sizeof(HUFFMAN_STATE)); + + /* construct the tree */ + huffman_construct_tree(huff, frequencies); + + /* build decode LUT */ + for(i = 0; i < HUFFMAN_LUTSIZE; i++) { - for(k = 0; k < huff->num_symbols; k++) + unsigned bits = i; + int k; + HUFFMAN_NODE *node = huff->start_node; + for(k = 0; k < HUFFMAN_LUTBITS; k++) { - if(k == i) - continue; - if(huff->nodes[i].num_bits == huff->nodes[k].num_bits && huff->nodes[i].bits == huff->nodes[k].bits) - dbg_msg("", "tree error %d %d %d", i, k, huff->nodes[i].num_bits); - } - } + node = &huff->nodes[node->leafs[bits&1]]; + bits >>= 1; -} + if(!node) + break; -typedef struct -{ - unsigned char *data; - unsigned char current_bits; - int num; -} HUFFBITIO; + if(node->num_bits) + { + huff->decode_lut[i] = node; + break; + } + } -int debug_count = 0; + if(k == HUFFMAN_LUTBITS) + huff->decode_lut[i] = node; + } -static void bitio_init(HUFFBITIO *bitio, unsigned char *data) -{ - bitio->data = data; - bitio->num = 0; - bitio->current_bits = 0; } -static void bitio_flush(HUFFBITIO *bitio) +/*****************************************************************/ +int huffman_compress(HUFFMAN_STATE *huff, const void *input, int input_size, void *output, int output_size) { - if(bitio->num == 8) - *bitio->data = bitio->current_bits << (8-bitio->num); - else - *bitio->data = bitio->current_bits; - bitio->data++; - bitio->num = 0; - bitio->current_bits = 0; -} + /* this macro loads a symbol for a byte into bits and bitcount */ +#define HUFFMAN_MACRO_LOADSYMBOL(sym) \ + bits |= huff->nodes[sym].bits << bitcount; \ + bitcount += huff->nodes[sym].num_bits; -static void bitio_write(HUFFBITIO *bitio, int bit) -{ - bitio->current_bits = (bitio->current_bits<<1)|bit; - bitio->num++; - if(bitio->num == 8) - bitio_flush(bitio); - - if(debug_count) - { - debug_count--; - dbg_msg("", "out %d", bit); + /* this macro writes the symbol stored in bits and bitcount to the dst pointer */ +#define HUFFMAN_MACRO_WRITE() \ + while(bitcount >= 8) \ + { \ + *dst++ = (unsigned char)(bits&0xff); \ + if(dst == dst_end) \ + return -1; \ + bits >>= 8; \ + bitcount -= 8; \ } -} -static int bitio_read(HUFFBITIO *bitio) -{ - int bit; - - if(!bitio->num) - { - bitio->current_bits = *bitio->data; - bitio->data++; - bitio->num = 8; - } - - bitio->num--; - bit = (bitio->current_bits>>bitio->num)&1; + /* setup buffer pointers */ + const unsigned char *src = (const unsigned char *)input; + const unsigned char *src_end = src + input_size; + unsigned char *dst = (unsigned char *)output; + unsigned char *dst_end = dst + output_size; - if(debug_count) - { - debug_count--; - dbg_msg("", "in %d", bit); - } - return bit; -} + /* symbol variables */ + unsigned bits = 0; + unsigned bitcount = 0; -int huffman_compress(HUFFSTATE *huff, const void *input, int input_size, void *output, int output_size) -{ - const unsigned char *src = (const unsigned char *)input; - int ret = 0; - int quit = 0; - HUFFBITIO io; - - bitio_init(&io, (unsigned char *)output); - - while(!quit) + /* make sure that we have data that we want to compress */ + if(input_size) { - int i; - int best_match = -1; - int best_match_size = 0; - - if(input_size) - { - for(i = 0; i < huff->num_symbols; i++) - { - if(huff->nodes[i].symbol_size <= input_size && huff->nodes[i].symbol_size > best_match_size) - { - if(memcmp(src, huff->nodes[i].symbol, huff->nodes[i].symbol_size) == 0) - { - best_match = i; - best_match_size = huff->nodes[i].symbol_size; - } - } - } - } - else - { - best_match = 0; - best_match_size = 0; - quit = 1; - } - - - if(best_match == -1) + /* {A} load the first symbol */ + int symbol = *src++; + + while(src != src_end) { - dbg_msg("huffman", "couldn't find symbol! %d left", input_size); - return -1; + /* {B} load the symbol */ + HUFFMAN_MACRO_LOADSYMBOL(symbol) + + /* {C} fetch next symbol, this is done here because it will reduce dependency in the code */ + symbol = *src++; + + /* {B} write the symbol loaded at */ + HUFFMAN_MACRO_WRITE() } - - i = huff->nodes[best_match].num_bits; - for(i = huff->nodes[best_match].num_bits-1; i >= 0; i--) - bitio_write(&io, (huff->nodes[best_match].bits>>i)&1); - if(debug_count) - dbg_msg("", "--"); - - ret += huff->nodes[best_match].num_bits; - input_size -= best_match_size; - src += best_match_size; + /* write the last symbol loaded from {C} or {A} in the case of only 1 byte input buffer */ + HUFFMAN_MACRO_LOADSYMBOL(symbol) + HUFFMAN_MACRO_WRITE() } - - bitio_flush(&io); - return ret; + + /* write EOF symbol */ + HUFFMAN_MACRO_LOADSYMBOL(HUFFMAN_EOF_SYMBOL) + HUFFMAN_MACRO_WRITE() + + /* write out the last bits */ + *dst++ = bits; + + /* return the size of the output */ + return (int)(dst - (const unsigned char *)output); + + /* remove macros */ +#undef HUFFMAN_MACRO_LOADSYMBOL +#undef HUFFMAN_MACRO_WRITE } -int huffman_decompress(HUFFSTATE *huff, const void *input, int input_size, void *output, int output_size) +/*****************************************************************/ +int huffman_decompress(HUFFMAN_STATE *huff, const void *input, int input_size, void *output, int output_size) { + /* setup buffer pointers */ unsigned char *dst = (unsigned char *)output; - HUFFBITIO io; - int size = 0; - - bitio_init(&io, (unsigned char *)input); - - while(size < 1401) + unsigned char *src = (unsigned char *)input; + unsigned char *dst_end = dst + output_size; + unsigned char *src_end = src + input_size; + + unsigned bits = 0; + unsigned bitcount = 0; + + HUFFMAN_NODE *eof = &huff->nodes[HUFFMAN_EOF_SYMBOL]; + HUFFMAN_NODE *node = 0; + + while(1) { - HUFFNODE *node = huff->start_node; - int i; - - while(node) + /* {A} try to load a node now, this will reduce dependency at location {D} */ + node = 0; + if(bitcount >= HUFFMAN_LUTBITS) + node = huff->decode_lut[bits&HUFFMAN_LUTMASK]; + + /* {B} fill with new bits */ + while(bitcount < 24 && src != src_end) { - if(node->symbol_size) - break; - - if(bitio_read(&io)) - node = node->one; - else - node = node->zero; + bits |= (*src++) << bitcount; + bitcount += 8; } - if(debug_count) - dbg_msg("", "-- %d %d", node->bits, node->num_bits); - - /* check for eof */ - if(node == &huff->nodes[0]) - break; - - for(i = 0; i < node->symbol_size; i++) + /* {C} load symbol now if we didn't that earlier at location {A} */ + if(!node) + node = huff->decode_lut[bits&HUFFMAN_LUTMASK]; + + /* {D} check if we hit a symbol already */ + if(node->num_bits) { - *dst++ = node->symbol[i]; - size++; + /* remove the bits for that symbol */ + bits >>= node->num_bits; + bitcount -= node->num_bits; } - } - - return size; -} - -unsigned char test_data[1024*64]; -int test_data_size; + else + { + /* remove the bits that the lut checked up for us */ + bits >>= HUFFMAN_LUTBITS; + bitcount -= HUFFMAN_LUTBITS; -unsigned char compressed_data[1024*64]; -int compressed_data_size; + /* walk the tree bit by bit */ + while(1) + { + /* traverse tree */ + node = &huff->nodes[node->leafs[bits&1]]; -unsigned char output_data[1024*64]; -int output_data_size; + /* remove bit */ + bitcount--; + bits >>= 1; -HUFFSTATE state; + /* check if we hit a symbol */ + if(node->num_bits) + break; -int huffman_test() -{ - huffman_init(&state); - - dbg_msg("", "test test"); - - /* bitio testing */ - { - char bits[] = {1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1,1,1,1,1,1,1,0}; - unsigned char buf[64]; - int i; - HUFFBITIO io; - - bitio_init(&io, buf); - for(i = 0; i < sizeof(bits); i++) - bitio_write(&io, bits[i]); - - bitio_flush(&io); - bitio_init(&io, buf); - for(i = 0; i < sizeof(bits); i++) - { - if(bitio_read(&io) != bits[i]) - dbg_msg("", "bitio failed at %d", i); - } - } - - /* read test data */ - { - IOHANDLE io = io_open("license.txt", IOFLAG_READ); - test_data_size = io_read(io, test_data, sizeof(test_data)); - io_close(io); - } - - /* add symbols */ - { - int counts[256] = {0}; - int i; - for(i = 0; i < test_data_size; i++) - counts[test_data[i]]++; - - for(i = 0; i < 256; i++) - { - unsigned char symbol = (unsigned char )i; - if(counts[i]) - huffman_add_symbol(&state, counts[i], 1, &symbol); + /* no more bits, decoding error */ + if(bitcount == 0) + return -1; + } } + + /* check for eof */ + if(node == eof) + break; + + /* output character */ + if(dst == dst_end) + return -1; + *dst++ = node->symbol; } - - huffman_construct_tree(&state); - /*debug_count = 20;*/ - compressed_data_size = huffman_compress(&state, test_data, test_data_size, compressed_data, sizeof(compressed_data)); - /*debug_count = 20;*/ - output_data_size = huffman_decompress(&state, compressed_data, compressed_data_size, output_data, sizeof(output_data)); - - dbg_msg("huffman", "%d -> %d -> %d", test_data_size, compressed_data_size/8, output_data_size); - - /* write test data */ - { - IOHANDLE io = io_open("out.txt", IOFLAG_WRITE); - io_write(io, output_data, output_data_size); - io_close(io); - } - - return 0; + + /* return the size of the decompressed buffer */ + return (int)(dst - (const unsigned char *)output); } |