diff options
| author | Magnus Auvinen <magnus.auvinen@gmail.com> | 2008-11-29 06:39:22 +0000 |
|---|---|---|
| committer | Magnus Auvinen <magnus.auvinen@gmail.com> | 2008-11-29 06:39:22 +0000 |
| commit | 4004d13b48fd303c982dbcf7c432add64d128637 (patch) | |
| tree | fb67a18724bb8e1435b0938abe56c24d68fa2dce /src/engine/external | |
| parent | 61d18702f63cc4731c923e9ca8cd8d98cdaca4ad (diff) | |
| download | zcatch-4004d13b48fd303c982dbcf7c432add64d128637.tar.gz zcatch-4004d13b48fd303c982dbcf7c432add64d128637.zip | |
applied patch from Richie Nyhus to convert CRLF to LF, as it should be
Diffstat (limited to 'src/engine/external')
| -rw-r--r-- | src/engine/external/wavpack/arm.S | 922 | ||||
| -rw-r--r-- | src/engine/external/wavpack/arml.S | 982 | ||||
| -rw-r--r-- | src/engine/external/wavpack/bits.c | 280 | ||||
| -rw-r--r-- | src/engine/external/wavpack/coldfire.S | 1050 | ||||
| -rw-r--r-- | src/engine/external/wavpack/float.c | 100 | ||||
| -rw-r--r-- | src/engine/external/wavpack/license.txt | 50 | ||||
| -rw-r--r-- | src/engine/external/wavpack/metadata.c | 210 | ||||
| -rw-r--r-- | src/engine/external/wavpack/readme.txt | 136 | ||||
| -rw-r--r-- | src/engine/external/wavpack/unpack.c | 1570 | ||||
| -rw-r--r-- | src/engine/external/wavpack/wavpack.h | 576 | ||||
| -rw-r--r-- | src/engine/external/wavpack/words.c | 1120 | ||||
| -rw-r--r-- | src/engine/external/wavpack/wputils.c | 702 |
12 files changed, 3849 insertions, 3849 deletions
diff --git a/src/engine/external/wavpack/arm.S b/src/engine/external/wavpack/arm.S index ab882181..4f299892 100644 --- a/src/engine/external/wavpack/arm.S +++ b/src/engine/external/wavpack/arm.S @@ -1,461 +1,461 @@ -//////////////////////////////////////////////////////////////////////////// -// **** WAVPACK **** // -// Hybrid Lossless Wavefile Compressor // -// Copyright (c) 1998 - 2006 Conifer Software. // -// All Rights Reserved. // -// Distributed under the BSD Software License (see license.txt) // -//////////////////////////////////////////////////////////////////////////// - -/* This is an assembly optimized version of the following WavPack function: - * - * void decorr_stereo_pass_cont (struct decorr_pass *dpp, - * long *buffer, long sample_count); - * - * It performs a single pass of stereo decorrelation on the provided buffer. - * Note that this version of the function requires that the 8 previous stereo - * samples are visible and correct. In other words, it ignores the "samples_*" - * fields in the decorr_pass structure and gets the history data directly - * from the buffer. It does, however, return the appropriate history samples - * to the decorr_pass structure before returning. - * - * This is written to work on a ARM7TDMI processor. This version only uses the - * 32-bit multiply-accumulate instruction and so will overflow with 24-bit - * WavPack files. - */ - .text - .align - .global decorr_stereo_pass_cont_arm - -/* - * on entry: - * - * r0 = struct decorr_pass *dpp - * r1 = long *buffer - * r2 = long sample_count - */ - -decorr_stereo_pass_cont_arm: - - stmfd sp!, {r4 - r8, r10, r11, lr} - mov r5, r0 @ r5 = dpp - mov r11, #512 @ r11 = 512 for rounding - ldrsh r6, [r0, #2] @ r6 = dpp->delta - ldrsh r4, [r0, #4] @ r4 = dpp->weight_A - ldrsh r0, [r0, #6] @ r0 = dpp->weight_B - cmp r2, #0 @ exit if no samples to process - beq common_exit - - add r7, r1, r2, asl #3 @ r7 = buffer ending position - ldrsh r2, [r5, #0] @ r2 = dpp->term - cmp r2, #0 - bmi minus_term - - ldr lr, [r1, #-16] @ load 2 sample history from buffer - ldr r10, [r1, #-12] @ for terms 2, 17, and 18 - ldr r8, [r1, #-8] - ldr r3, [r1, #-4] - cmp r2, #17 - beq term_17_loop - cmp r2, #18 - beq term_18_loop - cmp r2, #2 - beq term_2_loop - b term_default_loop @ else handle default (1-8, except 2) - -minus_term: - mov r10, #1024 @ r10 = -1024 for weight clipping - rsb r10, r10, #0 @ (only used for negative terms) - cmn r2, #1 - beq term_minus_1 - cmn r2, #2 - beq term_minus_2 - cmn r2, #3 - beq term_minus_3 - b common_exit - -/* - ****************************************************************************** - * Loop to handle term = 17 condition - * - * r0 = dpp->weight_B r8 = previous left sample - * r1 = bptr r9 = - * r2 = current sample r10 = second previous left sample - * r3 = previous right sample r11 = 512 (for rounding) - * r4 = dpp->weight_A ip = current decorrelation value - * r5 = dpp sp = - * r6 = dpp->delta lr = second previous right sample - * r7 = eptr pc = - ******************************************************************************* - */ - -term_17_loop: - rsbs ip, lr, r8, asl #1 @ decorr value = (2 * prev) - 2nd prev - mov lr, r8 @ previous becomes 2nd previous - ldr r2, [r1], #4 @ get sample & update pointer - mla r8, ip, r4, r11 @ mult decorr value by weight, round, - add r8, r2, r8, asr #10 @ shift, and add to new sample - strne r8, [r1, #-4] @ if change possible, store sample back - cmpne r2, #0 - beq .L325 - teq ip, r2 @ update weight based on signs - submi r4, r4, r6 - addpl r4, r4, r6 - -.L325: rsbs ip, r10, r3, asl #1 @ do same thing for right channel - mov r10, r3 - ldr r2, [r1], #4 - mla r3, ip, r0, r11 - add r3, r2, r3, asr #10 - strne r3, [r1, #-4] - cmpne r2, #0 - beq .L329 - teq ip, r2 - submi r0, r0, r6 - addpl r0, r0, r6 - -.L329: cmp r7, r1 @ loop back if more samples to do - bhi term_17_loop - b store_1718 @ common exit for terms 17 & 18 - -/* - ****************************************************************************** - * Loop to handle term = 18 condition - * - * r0 = dpp->weight_B r8 = previous left sample - * r1 = bptr r9 = - * r2 = current sample r10 = second previous left sample - * r3 = previous right sample r11 = 512 (for rounding) - * r4 = dpp->weight_A ip = decorrelation value - * r5 = dpp sp = - * r6 = dpp->delta lr = second previous right sample - * r7 = eptr pc = - ******************************************************************************* - */ - -term_18_loop: - sub ip, r8, lr @ decorr value = - mov lr, r8 @ ((3 * prev) - 2nd prev) >> 1 - adds ip, r8, ip, asr #1 - ldr r2, [r1], #4 @ get sample & update pointer - mla r8, ip, r4, r11 @ mult decorr value by weight, round, - add r8, r2, r8, asr #10 @ shift, and add to new sample - strne r8, [r1, #-4] @ if change possible, store sample back - cmpne r2, #0 - beq .L337 - teq ip, r2 @ update weight based on signs - submi r4, r4, r6 - addpl r4, r4, r6 - -.L337: sub ip, r3, r10 @ do same thing for right channel - mov r10, r3 - adds ip, r3, ip, asr #1 - ldr r2, [r1], #4 - mla r3, ip, r0, r11 - add r3, r2, r3, asr #10 - strne r3, [r1, #-4] - cmpne r2, #0 - beq .L341 - teq ip, r2 - submi r0, r0, r6 - addpl r0, r0, r6 - -.L341: cmp r7, r1 @ loop back if more samples to do - bhi term_18_loop - -/* common exit for terms 17 & 18 */ - -store_1718: - str r3, [r5, #40] @ store sample history into struct - str r8, [r5, #8] - str r10, [r5, #44] - str lr, [r5, #12] - b common_exit @ and return - -/* - ****************************************************************************** - * Loop to handle term = 2 condition - * (note that this case can be handled by the default term handler (1-8), but - * this special case is faster because it doesn't have to read memory twice) - * - * r0 = dpp->weight_B r8 = previous left sample - * r1 = bptr r9 = - * r2 = current sample r10 = second previous left sample - * r3 = previous right sample r11 = 512 (for rounding) - * r4 = dpp->weight_A ip = decorrelation value - * r5 = dpp sp = - * r6 = dpp->delta lr = second previous right sample - * r7 = eptr pc = - ******************************************************************************* - */ - -term_2_loop: - movs ip, lr @ get decorrelation value & test - mov lr, r8 @ previous becomes 2nd previous - ldr r2, [r1], #4 @ get sample & update pointer - mla r8, ip, r4, r11 @ mult decorr value by weight, round, - add r8, r2, r8, asr #10 @ shift, and add to new sample - strne r8, [r1, #-4] @ if change possible, store sample back - cmpne r2, #0 - beq .L225 - teq ip, r2 @ update weight based on signs - submi r4, r4, r6 - addpl r4, r4, r6 - -.L225: movs ip, r10 @ do same thing for right channel - mov r10, r3 - ldr r2, [r1], #4 - mla r3, ip, r0, r11 - add r3, r2, r3, asr #10 - strne r3, [r1, #-4] - cmpne r2, #0 - beq .L229 - teq ip, r2 - submi r0, r0, r6 - addpl r0, r0, r6 - -.L229: cmp r7, r1 @ loop back if more samples to do - bhi term_2_loop - b default_term_exit @ this exit updates all dpp->samples - -/* - ****************************************************************************** - * Loop to handle default term condition - * - * r0 = dpp->weight_B r8 = result accumulator - * r1 = bptr r9 = - * r2 = dpp->term r10 = - * r3 = decorrelation value r11 = 512 (for rounding) - * r4 = dpp->weight_A ip = current sample - * r5 = dpp sp = - * r6 = dpp->delta lr = - * r7 = eptr pc = - ******************************************************************************* - */ - -term_default_loop: - ldr ip, [r1] @ get original sample - ldr r3, [r1, -r2, asl #3] @ get decorrelation value based on term - mla r8, r3, r4, r11 @ mult decorr value by weight, round, - add r8, ip, r8, asr #10 @ shift and add to new sample - str r8, [r1], #4 @ store update sample - cmp r3, #0 - cmpne ip, #0 - beq .L350 - teq ip, r3 @ update weight based on signs - submi r4, r4, r6 - addpl r4, r4, r6 - -.L350: ldr ip, [r1] @ do the same thing for right channel - ldr r3, [r1, -r2, asl #3] - mla r8, r3, r0, r11 - add r8, ip, r8, asr #10 - str r8, [r1], #4 - cmp r3, #0 - cmpne ip, #0 - beq .L354 - teq ip, r3 - submi r0, r0, r6 - addpl r0, r0, r6 - -.L354: cmp r7, r1 @ loop back if more samples to do - bhi term_default_loop - -/* - * This exit is used by terms 1-8 to store the previous 8 samples into the decorr - * structure (even if they are not all used for the given term) - */ - -default_term_exit: - ldrsh r3, [r5, #0] - sub ip, r3, #1 - mov lr, #7 - -.L358: and r3, ip, #7 - add r3, r5, r3, asl #2 - ldr r2, [r1, #-4] - str r2, [r3, #40] - ldr r2, [r1, #-8]! - str r2, [r3, #8] - sub ip, ip, #1 - sub lr, lr, #1 - cmn lr, #1 - bne .L358 - b common_exit - -/* - ****************************************************************************** - * Loop to handle term = -1 condition - * - * r0 = dpp->weight_B r8 = - * r1 = bptr r9 = - * r2 = intermediate result r10 = -1024 (for clipping) - * r3 = previous right sample r11 = 512 (for rounding) - * r4 = dpp->weight_A ip = current sample - * r5 = dpp sp = - * r6 = dpp->delta lr = updated left sample - * r7 = eptr pc = - ******************************************************************************* - */ - -term_minus_1: - ldr r3, [r1, #-4] - -term_minus_1_loop: - ldr ip, [r1] @ for left channel the decorrelation value - mla r2, r3, r4, r11 @ is the previous right sample (in r3) - add lr, ip, r2, asr #10 - str lr, [r1], #8 - cmp r3, #0 - cmpne ip, #0 - beq .L361 - teq ip, r3 @ update weight based on signs - submi r4, r4, r6 - addpl r4, r4, r6 - cmp r4, #1024 - movgt r4, #1024 - cmp r4, r10 - movlt r4, r10 - -.L361: ldr r2, [r1, #-4] @ for right channel the decorrelation value - mla r3, lr, r0, r11 @ is the just updated right sample (in lr) - add r3, r2, r3, asr #10 - str r3, [r1, #-4] - cmp lr, #0 - cmpne r2, #0 - beq .L369 - teq r2, lr - submi r0, r0, r6 - addpl r0, r0, r6 - cmp r0, #1024 @ then clip weight to +/-1024 - movgt r0, #1024 - cmp r0, r10 - movlt r0, r10 - -.L369: cmp r7, r1 @ loop back if more samples to do - bhi term_minus_1_loop - - str r3, [r5, #8] @ else store right sample and exit - b common_exit - -/* - ****************************************************************************** - * Loop to handle term = -2 condition - * (note that the channels are processed in the reverse order here) - * - * r0 = dpp->weight_B r8 = - * r1 = bptr r9 = - * r2 = intermediate result r10 = -1024 (for clipping) - * r3 = previous left sample r11 = 512 (for rounding) - * r4 = dpp->weight_A ip = current sample - * r5 = dpp sp = - * r6 = dpp->delta lr = updated right sample - * r7 = eptr pc = - ******************************************************************************* - */ - -term_minus_2: - ldr r3, [r1, #-8] - -term_minus_2_loop: - ldr ip, [r1, #4] @ for right channel the decorrelation value - mla r2, r3, r0, r11 @ is the previous left sample (in r3) - add lr, ip, r2, asr #10 - str lr, [r1, #4] - cmp r3, #0 - cmpne ip, #0 - beq .L380 - teq ip, r3 @ update weight based on signs - submi r0, r0, r6 - addpl r0, r0, r6 - cmp r0, #1024 @ then clip weight to +/-1024 - movgt r0, #1024 - cmp r0, r10 - movlt r0, r10 - -.L380: ldr r2, [r1, #0] @ for left channel the decorrelation value - mla r3, lr, r4, r11 @ is the just updated left sample (in lr) - add r3, r2, r3, asr #10 - str r3, [r1], #8 - cmp lr, #0 - cmpne r2, #0 - beq .L388 - teq r2, lr - submi r4, r4, r6 - addpl r4, r4, r6 - cmp r4, #1024 - movgt r4, #1024 - cmp r4, r10 - movlt r4, r10 - -.L388: cmp r7, r1 @ loop back if more samples to do - bhi term_minus_2_loop - - str r3, [r5, #40] @ else store left channel and exit - b common_exit - -/* - ****************************************************************************** - * Loop to handle term = -3 condition - * - * r0 = dpp->weight_B r8 = previous left sample - * r1 = bptr r9 = - * r2 = current left sample r10 = -1024 (for clipping) - * r3 = previous right sample r11 = 512 (for rounding) - * r4 = dpp->weight_A ip = intermediate result - * r5 = dpp sp = - * r6 = dpp->delta lr = - * r7 = eptr pc = - ******************************************************************************* - */ - -term_minus_3: - ldr r3, [r1, #-4] @ load previous samples - ldr r8, [r1, #-8] - -term_minus_3_loop: - ldr ip, [r1] - mla r2, r3, r4, r11 - add r2, ip, r2, asr #10 - str r2, [r1], #4 - cmp r3, #0 - cmpne ip, #0 - beq .L399 - teq ip, r3 @ update weight based on signs - submi r4, r4, r6 - addpl r4, r4, r6 - cmp r4, #1024 @ then clip weight to +/-1024 - movgt r4, #1024 - cmp r4, r10 - movlt r4, r10 - -.L399: movs ip, r8 @ ip = previous left we use now - mov r8, r2 @ r8 = current left we use next time - ldr r2, [r1], #4 - mla r3, ip, r0, r11 - add r3, r2, r3, asr #10 - strne r3, [r1, #-4] - cmpne r2, #0 - beq .L407 - teq ip, r2 - submi r0, r0, r6 - addpl r0, r0, r6 - cmp r0, #1024 - movgt r0, #1024 - cmp r0, r10 - movlt r0, r10 - -.L407: cmp r7, r1 @ loop back if more samples to do - bhi term_minus_3_loop - - str r3, [r5, #8] @ else store previous samples & exit - str r8, [r5, #40] - -/* - * Before finally exiting we must store weights back for next time - */ - -common_exit: - strh r4, [r5, #4] - strh r0, [r5, #6] - ldmfd sp!, {r4 - r8, r10, r11, pc} - +//////////////////////////////////////////////////////////////////////////// +// **** WAVPACK **** // +// Hybrid Lossless Wavefile Compressor // +// Copyright (c) 1998 - 2006 Conifer Software. // +// All Rights Reserved. // +// Distributed under the BSD Software License (see license.txt) // +//////////////////////////////////////////////////////////////////////////// + +/* This is an assembly optimized version of the following WavPack function: + * + * void decorr_stereo_pass_cont (struct decorr_pass *dpp, + * long *buffer, long sample_count); + * + * It performs a single pass of stereo decorrelation on the provided buffer. + * Note that this version of the function requires that the 8 previous stereo + * samples are visible and correct. In other words, it ignores the "samples_*" + * fields in the decorr_pass structure and gets the history data directly + * from the buffer. It does, however, return the appropriate history samples + * to the decorr_pass structure before returning. + * + * This is written to work on a ARM7TDMI processor. This version only uses the + * 32-bit multiply-accumulate instruction and so will overflow with 24-bit + * WavPack files. + */ + .text + .align + .global decorr_stereo_pass_cont_arm + +/* + * on entry: + * + * r0 = struct decorr_pass *dpp + * r1 = long *buffer + * r2 = long sample_count + */ + +decorr_stereo_pass_cont_arm: + + stmfd sp!, {r4 - r8, r10, r11, lr} + mov r5, r0 @ r5 = dpp + mov r11, #512 @ r11 = 512 for rounding + ldrsh r6, [r0, #2] @ r6 = dpp->delta + ldrsh r4, [r0, #4] @ r4 = dpp->weight_A + ldrsh r0, [r0, #6] @ r0 = dpp->weight_B + cmp r2, #0 @ exit if no samples to process + beq common_exit + + add r7, r1, r2, asl #3 @ r7 = buffer ending position + ldrsh r2, [r5, #0] @ r2 = dpp->term + cmp r2, #0 + bmi minus_term + + ldr lr, [r1, #-16] @ load 2 sample history from buffer + ldr r10, [r1, #-12] @ for terms 2, 17, and 18 + ldr r8, [r1, #-8] + ldr r3, [r1, #-4] + cmp r2, #17 + beq term_17_loop + cmp r2, #18 + beq term_18_loop + cmp r2, #2 + beq term_2_loop + b term_default_loop @ else handle default (1-8, except 2) + +minus_term: + mov r10, #1024 @ r10 = -1024 for weight clipping + rsb r10, r10, #0 @ (only used for negative terms) + cmn r2, #1 + beq term_minus_1 + cmn r2, #2 + beq term_minus_2 + cmn r2, #3 + beq term_minus_3 + b common_exit + +/* + ****************************************************************************** + * Loop to handle term = 17 condition + * + * r0 = dpp->weight_B r8 = previous left sample + * r1 = bptr r9 = + * r2 = current sample r10 = second previous left sample + * r3 = previous right sample r11 = 512 (for rounding) + * r4 = dpp->weight_A ip = current decorrelation value + * r5 = dpp sp = + * r6 = dpp->delta lr = second previous right sample + * r7 = eptr pc = + ******************************************************************************* + */ + +term_17_loop: + rsbs ip, lr, r8, asl #1 @ decorr value = (2 * prev) - 2nd prev + mov lr, r8 @ previous becomes 2nd previous + ldr r2, [r1], #4 @ get sample & update pointer + mla r8, ip, r4, r11 @ mult decorr value by weight, round, + add r8, r2, r8, asr #10 @ shift, and add to new sample + strne r8, [r1, #-4] @ if change possible, store sample back + cmpne r2, #0 + beq .L325 + teq ip, r2 @ update weight based on signs + submi r4, r4, r6 + addpl r4, r4, r6 + +.L325: rsbs ip, r10, r3, asl #1 @ do same thing for right channel + mov r10, r3 + ldr r2, [r1], #4 + mla r3, ip, r0, r11 + add r3, r2, r3, asr #10 + strne r3, [r1, #-4] + cmpne r2, #0 + beq .L329 + teq ip, r2 + submi r0, r0, r6 + addpl r0, r0, r6 + +.L329: cmp r7, r1 @ loop back if more samples to do + bhi term_17_loop + b store_1718 @ common exit for terms 17 & 18 + +/* + ****************************************************************************** + * Loop to handle term = 18 condition + * + * r0 = dpp->weight_B r8 = previous left sample + * r1 = bptr r9 = + * r2 = current sample r10 = second previous left sample + * r3 = previous right sample r11 = 512 (for rounding) + * r4 = dpp->weight_A ip = decorrelation value + * r5 = dpp sp = + * r6 = dpp->delta lr = second previous right sample + * r7 = eptr pc = + ******************************************************************************* + */ + +term_18_loop: + sub ip, r8, lr @ decorr value = + mov lr, r8 @ ((3 * prev) - 2nd prev) >> 1 + adds ip, r8, ip, asr #1 + ldr r2, [r1], #4 @ get sample & update pointer + mla r8, ip, r4, r11 @ mult decorr value by weight, round, + add r8, r2, r8, asr #10 @ shift, and add to new sample + strne r8, [r1, #-4] @ if change possible, store sample back + cmpne r2, #0 + beq .L337 + teq ip, r2 @ update weight based on signs + submi r4, r4, r6 + addpl r4, r4, r6 + +.L337: sub ip, r3, r10 @ do same thing for right channel + mov r10, r3 + adds ip, r3, ip, asr #1 + ldr r2, [r1], #4 + mla r3, ip, r0, r11 + add r3, r2, r3, asr #10 + strne r3, [r1, #-4] + cmpne r2, #0 + beq .L341 + teq ip, r2 + submi r0, r0, r6 + addpl r0, r0, r6 + +.L341: cmp r7, r1 @ loop back if more samples to do + bhi term_18_loop + +/* common exit for terms 17 & 18 */ + +store_1718: + str r3, [r5, #40] @ store sample history into struct + str r8, [r5, #8] + str r10, [r5, #44] + str lr, [r5, #12] + b common_exit @ and return + +/* + ****************************************************************************** + * Loop to handle term = 2 condition + * (note that this case can be handled by the default term handler (1-8), but + * this special case is faster because it doesn't have to read memory twice) + * + * r0 = dpp->weight_B r8 = previous left sample + * r1 = bptr r9 = + * r2 = current sample r10 = second previous left sample + * r3 = previous right sample r11 = 512 (for rounding) + * r4 = dpp->weight_A ip = decorrelation value + * r5 = dpp sp = + * r6 = dpp->delta lr = second previous right sample + * r7 = eptr pc = + ******************************************************************************* + */ + +term_2_loop: + movs ip, lr @ get decorrelation value & test + mov lr, r8 @ previous becomes 2nd previous + ldr r2, [r1], #4 @ get sample & update pointer + mla r8, ip, r4, r11 @ mult decorr value by weight, round, + add r8, r2, r8, asr #10 @ shift, and add to new sample + strne r8, [r1, #-4] @ if change possible, store sample back + cmpne r2, #0 + beq .L225 + teq ip, r2 @ update weight based on signs + submi r4, r4, r6 + addpl r4, r4, r6 + +.L225: movs ip, r10 @ do same thing for right channel + mov r10, r3 + ldr r2, [r1], #4 + mla r3, ip, r0, r11 + add r3, r2, r3, asr #10 + strne r3, [r1, #-4] + cmpne r2, #0 + beq .L229 + teq ip, r2 + submi r0, r0, r6 + addpl r0, r0, r6 + +.L229: cmp r7, r1 @ loop back if more samples to do + bhi term_2_loop + b default_term_exit @ this exit updates all dpp->samples + +/* + ****************************************************************************** + * Loop to handle default term condition + * + * r0 = dpp->weight_B r8 = result accumulator + * r1 = bptr r9 = + * r2 = dpp->term r10 = + * r3 = decorrelation value r11 = 512 (for rounding) + * r4 = dpp->weight_A ip = current sample + * r5 = dpp sp = + * r6 = dpp->delta lr = + * r7 = eptr pc = + ******************************************************************************* + */ + +term_default_loop: + ldr ip, [r1] @ get original sample + ldr r3, [r1, -r2, asl #3] @ get decorrelation value based on term + mla r8, r3, r4, r11 @ mult decorr value by weight, round, + add r8, ip, r8, asr #10 @ shift and add to new sample + str r8, [r1], #4 @ store update sample + cmp r3, #0 + cmpne ip, #0 + beq .L350 + teq ip, r3 @ update weight based on signs + submi r4, r4, r6 + addpl r4, r4, r6 + +.L350: ldr ip, [r1] @ do the same thing for right channel + ldr r3, [r1, -r2, asl #3] + mla r8, r3, r0, r11 + add r8, ip, r8, asr #10 + str r8, [r1], #4 + cmp r3, #0 + cmpne ip, #0 + beq .L354 + teq ip, r3 + submi r0, r0, r6 + addpl r0, r0, r6 + +.L354: cmp r7, r1 @ loop back if more samples to do + bhi term_default_loop + +/* + * This exit is used by terms 1-8 to store the previous 8 samples into the decorr + * structure (even if they are not all used for the given term) + */ + +default_term_exit: + ldrsh r3, [r5, #0] + sub ip, r3, #1 + mov lr, #7 + +.L358: and r3, ip, #7 + add r3, r5, r3, asl #2 + ldr r2, [r1, #-4] + str r2, [r3, #40] + ldr r2, [r1, #-8]! + str r2, [r3, #8] + sub ip, ip, #1 + sub lr, lr, #1 + cmn lr, #1 + bne .L358 + b common_exit + +/* + ****************************************************************************** + * Loop to handle term = -1 condition + * + * r0 = dpp->weight_B r8 = + * r1 = bptr r9 = + * r2 = intermediate result r10 = -1024 (for clipping) + * r3 = previous right sample r11 = 512 (for rounding) + * r4 = dpp->weight_A ip = current sample + * r5 = dpp sp = + * r6 = dpp->delta lr = updated left sample + * r7 = eptr pc = + ******************************************************************************* + */ + +term_minus_1: + ldr r3, [r1, #-4] + +term_minus_1_loop: + ldr ip, [r1] @ for left channel the decorrelation value + mla r2, r3, r4, r11 @ is the previous right sample (in r3) + add lr, ip, r2, asr #10 + str lr, [r1], #8 + cmp r3, #0 + cmpne ip, #0 + beq .L361 + teq ip, r3 @ update weight based on signs + submi r4, r4, r6 + addpl r4, r4, r6 + cmp r4, #1024 + movgt r4, #1024 + cmp r4, r10 + movlt r4, r10 + +.L361: ldr r2, [r1, #-4] @ for right channel the decorrelation value + mla r3, lr, r0, r11 @ is the just updated right sample (in lr) + add r3, r2, r3, asr #10 + str r3, [r1, #-4] + cmp lr, #0 + cmpne r2, #0 + beq .L369 + teq r2, lr + submi r0, r0, r6 + addpl r0, r0, r6 + cmp r0, #1024 @ then clip weight to +/-1024 + movgt r0, #1024 + cmp r0, r10 + movlt r0, r10 + +.L369: cmp r7, r1 @ loop back if more samples to do + bhi term_minus_1_loop + + str r3, [r5, #8] @ else store right sample and exit + b common_exit + +/* + ****************************************************************************** + * Loop to handle term = -2 condition + * (note that the channels are processed in the reverse order here) + * + * r0 = dpp->weight_B r8 = + * r1 = bptr r9 = + * r2 = intermediate result r10 = -1024 (for clipping) + * r3 = previous left sample r11 = 512 (for rounding) + * r4 = dpp->weight_A ip = current sample + * r5 = dpp sp = + * r6 = dpp->delta lr = updated right sample + * r7 = eptr pc = + ******************************************************************************* + */ + +term_minus_2: + ldr r3, [r1, #-8] + +term_minus_2_loop: + ldr ip, [r1, #4] @ for right channel the decorrelation value + mla r2, r3, r0, r11 @ is the previous left sample (in r3) + add lr, ip, r2, asr #10 + str lr, [r1, #4] + cmp r3, #0 + cmpne ip, #0 + beq .L380 + teq ip, r3 @ update weight based on signs + submi r0, r0, r6 + addpl r0, r0, r6 + cmp r0, #1024 @ then clip weight to +/-1024 + movgt r0, #1024 + cmp r0, r10 + movlt r0, r10 + +.L380: ldr r2, [r1, #0] @ for left channel the decorrelation value + mla r3, lr, r4, r11 @ is the just updated left sample (in lr) + add r3, r2, r3, asr #10 + str r3, [r1], #8 + cmp lr, #0 + cmpne r2, #0 + beq .L388 + teq r2, lr + submi r4, r4, r6 + addpl r4, r4, r6 + cmp r4, #1024 + movgt r4, #1024 + cmp r4, r10 + movlt r4, r10 + +.L388: cmp r7, r1 @ loop back if more samples to do + bhi term_minus_2_loop + + str r3, [r5, #40] @ else store left channel and exit + b common_exit + +/* + ****************************************************************************** + * Loop to handle term = -3 condition + * + * r0 = dpp->weight_B r8 = previous left sample + * r1 = bptr r9 = + * r2 = current left sample r10 = -1024 (for clipping) + * r3 = previous right sample r11 = 512 (for rounding) + * r4 = dpp->weight_A ip = intermediate result + * r5 = dpp sp = + * r6 = dpp->delta lr = + * r7 = eptr pc = + ******************************************************************************* + */ + +term_minus_3: + ldr r3, [r1, #-4] @ load previous samples + ldr r8, [r1, #-8] + +term_minus_3_loop: + ldr ip, [r1] + mla r2, r3, r4, r11 + add r2, ip, r2, asr #10 + str r2, [r1], #4 + cmp r3, #0 + cmpne ip, #0 + beq .L399 + teq ip, r3 @ update weight based on signs + submi r4, r4, r6 + addpl r4, r4, r6 + cmp r4, #1024 @ then clip weight to +/-1024 + movgt r4, #1024 + cmp r4, r10 + movlt r4, r10 + +.L399: movs ip, r8 @ ip = previous left we use now + mov r8, r2 @ r8 = current left we use next time + ldr r2, [r1], #4 + mla r3, ip, r0, r11 + add r3, r2, r3, asr #10 + strne r3, [r1, #-4] + cmpne r2, #0 + beq .L407 + teq ip, r2 + submi r0, r0, r6 + addpl r0, r0, r6 + cmp r0, #1024 + movgt r0, #1024 + cmp r0, r10 + movlt r0, r10 + +.L407: cmp r7, r1 @ loop back if more samples to do + bhi term_minus_3_loop + + str r3, [r5, #8] @ else store previous samples & exit + str r8, [r5, #40] + +/* + * Before finally exiting we must store weights back for next time + */ + +common_exit: + strh r4, [r5, #4] + strh r0, [r5, #6] + ldmfd sp!, {r4 - r8, r10, r11, pc} + diff --git a/src/engine/external/wavpack/arml.S b/src/engine/external/wavpack/arml.S index 39de5383..7da00513 100644 --- a/src/engine/external/wavpack/arml.S +++ b/src/engine/external/wavpack/arml.S @@ -1,491 +1,491 @@ -//////////////////////////////////////////////////////////////////////////// -// **** WAVPACK **** // -// Hybrid Lossless Wavefile Compressor // -// Copyright (c) 1998 - 2006 Conifer Software. // -// All Rights Reserved. // -// Distributed under the BSD Software License (see license.txt) // -//////////////////////////////////////////////////////////////////////////// - -/* This is an assembly optimized version of the following WavPack function: - * - * void decorr_stereo_pass_cont (struct decorr_pass *dpp, - * long *buffer, long sample_count); - * - * It performs a single pass of stereo decorrelation on the provided buffer. - * Note that this version of the function requires that the 8 previous stereo - * samples are visible and correct. In other words, it ignores the "samples_*" - * fields in the decorr_pass structure and gets the history data directly - * from the buffer. It does, however, return the appropriate history samples - * to the decorr_pass structure before returning. - * - * This is written to work on a ARM7TDMI processor. This version uses the - * 64-bit multiply-accumulate instruction and so can be used with all - * WavPack files. However, for optimum performance with 16-bit WavPack - * files, there is a faster version that only uses the 32-bit MLA - * instruction. - */ - - .text - .align - .global decorr_stereo_pass_cont_arml - -/* - * on entry: - * - * r0 = struct decorr_pass *dpp - * r1 = long *buffer - * r2 = long sample_count - */ - -decorr_stereo_pass_cont_arml: - - stmfd sp!, {r4 - r8, r10, r11, lr} - mov r5, r0 @ r5 = dpp - mov r11, #512 @ r11 = 512 for rounding - ldrsh r6, [r0, #2] @ r6 = dpp->delta - ldrsh r4, [r0, #4] @ r4 = dpp->weight_A - ldrsh r0, [r0, #6] @ r0 = dpp->weight_B - cmp r2, #0 @ exit if no samples to process - beq common_exit - - mov r0, r0, asl #18 @ for 64-bit math we use weights << 18 - mov r4, r4, asl #18 - mov r6, r6, asl #18 - add r7, r1, r2, asl #3 @ r7 = buffer ending position - ldrsh r2, [r5, #0] @ r2 = dpp->term - cmp r2, #0 - blt minus_term - - ldr lr, [r1, #-16] @ load 2 sample history from buffer - ldr r10, [r1, #-12] @ for terms 2, 17, and 18 - ldr r8, [r1, #-8] - ldr r3, [r1, #-4] - - cmp r2, #18 - beq term_18_loop - mov lr, lr, asl #4 - mov r10, r10, asl #4 - cmp r2, #2 - beq term_2_loop - cmp r2, #17 - beq term_17_loop - b term_default_loop - -minus_term: - mov r10, #(1024 << 18) @ r10 = -1024 << 18 for weight clipping - rsb r10, r10, #0 @ (only used for negative terms) - cmn r2, #1 - beq term_minus_1 - cmn r2, #2 - beq term_minus_2 - cmn r2, #3 - beq term_minus_3 - b common_exit - -/* - ****************************************************************************** - * Loop to handle term = 17 condition - * - * r0 = dpp->weight_B r8 = previous left sample - * r1 = bptr r9 = - * r2 = current sample r10 = second previous left sample << 4 - * r3 = previous right sample r11 = lo accumulator (for rounding) - * r4 = dpp->weight_A ip = current decorrelation value - * r5 = dpp sp = - * r6 = dpp->delta lr = second previous right sample << 4 - * r7 = eptr pc = - ******************************************************************************* - */ - -term_17_loop: - rsbs ip, lr, r8, asl #5 @ decorr value = (2 * prev) - 2nd prev - mov lr, r8, asl #4 @ previous becomes 2nd previous - ldr r2, [r1], #4 @ get sample & update pointer - mov r11, #0x80000000 - mov r8, r2 - smlalne r11, r8, r4, ip - strne r8, [r1, #-4] @ if change possible, store sample back - cmpne r2, #0 - beq .L325 - teq ip, r2 @ update weight based on signs - submi r4, r4, r6 - addpl r4, r4, r6 - -.L325: rsbs ip, r10, r3, asl #5 @ do same thing for right channel - mov r10, r3, asl #4 - ldr r2, [r1], #4 - mov r11, #0x80000000 - mov r3, r2 - smlalne r11, r3, r0, ip - strne r3, [r1, #-4] - cmpne r2, #0 - beq .L329 - teq ip, r2 - submi r0, r0, r6 - addpl r0, r0, r6 - -.L329: cmp r7, r1 @ loop back if more samples to do - bhi term_17_loop - mov lr, lr, asr #4 - mov r10, r10, asr #4 - b store_1718 @ common exit for terms 17 & 18 - -/* - ****************************************************************************** - * Loop to handle term = 18 condition - * - * r0 = dpp->weight_B r8 = previous left sample - * r1 = bptr r9 = - * r2 = current sample r10 = second previous left sample - * r3 = previous right sample r11 = lo accumulator (for rounding) - * r4 = dpp->weight_A ip = decorrelation value - * r5 = dpp sp = - * r6 = dpp->delta lr = second previous right sample - * r7 = eptr pc = - ******************************************************************************* - */ - -term_18_loop: - rsb ip, lr, r8 @ decorr value = - mov lr, r8 @ ((3 * prev) - 2nd prev) >> 1 - add ip, lr, ip, asr #1 - movs ip, ip, asl #4 - ldr r2, [r1], #4 @ get sample & update pointer - mov r11, #0x80000000 - mov r8, r2 - smlalne r11, r8, r4, ip - strne r8, [r1, #-4] @ if change possible, store sample back - cmpne r2, #0 - beq .L337 - teq ip, r2 @ update weight based on signs - submi r4, r4, r6 - addpl r4, r4, r6 - -.L337: rsb ip, r10, r3 @ do same thing for right channel - mov r10, r3 - add ip, r10, ip, asr #1 - movs ip, ip, asl #4 - ldr r2, [r1], #4 - mov r11, #0x80000000 - mov r3, r2 - smlalne r11, r3, r0, ip - strne r3, [r1, #-4] - cmpne r2, #0 - beq .L341 - teq ip, r2 - submi r0, r0, r6 - addpl r0, r0, r6 - -.L341: cmp r7, r1 @ loop back if more samples to do - bhi term_18_loop - -/* common exit for terms 17 & 18 */ - -store_1718: - str r3, [r5, #40] @ store sample history into struct - str r8, [r5, #8] - str r10, [r5, #44] - str lr, [r5, #12] - b common_exit @ and return - -/* - ****************************************************************************** - * Loop to handle term = 2 condition - * (note that this case can be handled by the default term handler (1-8), but - * this special case is faster because it doesn't have to read memory twice) - * - * r0 = dpp->weight_B r8 = previous left sample - * r1 = bptr r9 = - * r2 = current sample r10 = second previous left sample << 4 - * r3 = previous right sample r11 = lo accumulator (for rounding) - * r4 = dpp->weight_A ip = decorrelation value - * r5 = dpp sp = - * r6 = dpp->delta lr = second previous right sample << 4 - * r7 = eptr pc = - ******************************************************************************* - */ - -term_2_loop: - movs ip, lr @ get decorrelation value & test - ldr r2, [r1], #4 @ get sample & update pointer - mov lr, r8, asl #4 @ previous becomes 2nd previous - mov r11, #0x80000000 - mov r8, r2 - smlalne r11, r8, r4, ip - strne r8, [r1, #-4] @ if change possible, store sample back - cmpne r2, #0 - beq .L225 - teq ip, r2 @ update weight based on signs - submi r4, r4, r6 - addpl r4, r4, r6 - -.L225: movs ip, r10 @ do same thing for right channel - ldr r2, [r1], #4 - mov r10, r3, asl #4 - mov r11, #0x80000000 - mov r3, r2 - smlalne r11, r3, r0, ip - strne r3, [r1, #-4] - cmpne r2, #0 - beq .L229 - teq ip, r2 - submi r0, r0, r6 - addpl r0, r0, r6 - -.L229: cmp r7, r1 @ loop back if more samples to do - bhi term_2_loop - - b default_term_exit @ this exit updates all dpp->samples - -/* - ****************************************************************************** - * Loop to handle default term condition - * - * r0 = dpp->weight_B r8 = result accumulator - * r1 = bptr r9 = - * r2 = dpp->term r10 = - * r3 = decorrelation value r11 = lo accumulator (for rounding) - * r4 = dpp->weight_A ip = current sample - * r5 = dpp sp = - * r6 = dpp->delta lr = - * r7 = eptr pc = - ******************************************************************************* - */ - -term_default_loop: - ldr r3, [r1, -r2, asl #3] @ get decorrelation value based on term - ldr ip, [r1], #4 @ get original sample and bump ptr - movs r3, r3, asl #4 - mov r11, #0x80000000 - mov r8, ip - smlalne r11, r8, r4, r3 - strne r8, [r1, #-4] @ if possibly changed, store updated sample - cmpne ip, #0 - beq .L350 - teq ip, r3 @ update weight based on signs - submi r4, r4, r6 - addpl r4, r4, r6 - -.L350: ldr r3, [r1, -r2, asl #3] @ do the same thing for right channel - ldr ip, [r1], #4 - movs r3, r3, asl #4 - mov r11, #0x80000000 - mov r8, ip - smlalne r11, r8, r0, r3 - strne r8, [r1, #-4] - cmpne ip, #0 - beq .L354 - teq ip, r3 - submi r0, r0, r6 - addpl r0, r0, r6 - -.L354: cmp r7, r1 @ loop back if more samples to do - bhi term_default_loop - -/* - * This exit is used by terms 1-8 to store the previous 8 samples into the decorr - * structure (even if they are not all used for the given term) - */ - -default_term_exit: - ldrsh r3, [r5, #0] - sub ip, r3, #1 - mov lr, #7 - -.L358: and r3, ip, #7 - add r3, r5, r3, asl #2 - ldr r2, [r1, #-4] - str r2, [r3, #40] - ldr r2, [r1, #-8]! - str r2, [r3, #8] - sub ip, ip, #1 - sub lr, lr, #1 - cmn lr, #1 - bne .L358 - b common_exit - -/* - ****************************************************************************** - * Loop to handle term = -1 condition - * - * r0 = dpp->weight_B r8 = - * r1 = bptr r9 = - * r2 = intermediate result r10 = -1024 (for clipping) - * r3 = previous right sample r11 = lo accumulator (for rounding) - * r4 = dpp->weight_A ip = current sample - * r5 = dpp sp = - * r6 = dpp->delta lr = updated left sample - * r7 = eptr pc = - ******************************************************************************* - */ - -term_minus_1: - ldr r3, [r1, #-4] - -term_minus_1_loop: - ldr ip, [r1], #8 @ for left channel the decorrelation value - movs r3, r3, asl #4 @ is the previous right sample (in r3) - mov r11, #0x80000000 - mov lr, ip - smlalne r11, lr, r4, r3 - strne lr, [r1, #-8] - cmpne ip, #0 - beq .L361 - teq ip, r3 @ update weight based on signs - submi r4, r4, r6 - addpl r4, r4, r6 - cmp r4, #(1024 << 18) - movgt r4, #(1024 << 18) - cmp r4, r10 - movlt r4, r10 - -.L361: ldr r2, [r1, #-4] @ for right channel the decorrelation value - movs lr, lr, asl #4 - mov r11, #0x80000000 - mov r3, r2 - smlalne r11, r3, r0, lr - strne r3, [r1, #-4] - cmpne r2, #0 - beq .L369 - teq r2, lr - submi r0, r0, r6 - addpl r0, r0, r6 - cmp r0, #(1024 << 18) @ then clip weight to +/-1024 - movgt r0, #(1024 << 18) - cmp r0, r10 - movlt r0, r10 - -.L369: cmp r7, r1 @ loop back if more samples to do - bhi term_minus_1_loop - - str r3, [r5, #8] @ else store right sample and exit - b common_exit - -/* - ****************************************************************************** - * Loop to handle term = -2 condition - * (note that the channels are processed in the reverse order here) - * - * r0 = dpp->weight_B r8 = - * r1 = bptr r9 = - * r2 = intermediate result r10 = -1024 (for clipping) - * r3 = previous left sample r11 = lo accumulator (for rounding) - * r4 = dpp->weight_A ip = current sample - * r5 = dpp sp = - * r6 = dpp->delta lr = updated right sample - * r7 = eptr pc = - ******************************************************************************* - */ - -term_minus_2: - ldr r3, [r1, #-8] - -term_minus_2_loop: - ldr ip, [r1, #4] @ for right channel the decorrelation value - movs r3, r3, asl #4 @ is the previous left sample (in r3) - mov r11, #0x80000000 - mov lr, ip - smlalne r11, lr, r0, r3 - strne lr, [r1, #4] - cmpne ip, #0 - beq .L380 - teq ip, r3 @ update weight based on signs - submi r0, r0, r6 - addpl r0, r0, r6 - cmp r0, #(1024 << 18) @ then clip weight to +/-1024 - movgt r0, #(1024 << 18) - cmp r0, r10 - movlt r0, r10 - -.L380: ldr r2, [r1], #8 @ for left channel the decorrelation value - movs lr, lr, asl #4 - mov r11, #0x80000000 - mov r3, r2 - smlalne r11, r3, r4, lr - strne r3, [r1, #-8] - cmpne r2, #0 - beq .L388 - teq r2, lr - submi r4, r4, r6 - addpl r4, r4, r6 - cmp r4, #(1024 << 18) - movgt r4, #(1024 << 18) - cmp r4, r10 - movlt r4, r10 - -.L388: cmp r7, r1 @ loop back if more samples to do - bhi term_minus_2_loop - - str r3, [r5, #40] @ else store left channel and exit - b common_exit - -/* - ****************************************************************************** - * Loop to handle term = -3 condition - * - * r0 = dpp->weight_B r8 = previous left sample - * r1 = bptr r9 = - * r2 = current left sample r10 = -1024 (for clipping) - * r3 = previous right sample r11 = lo accumulator (for rounding) - * r4 = dpp->weight_A ip = intermediate result - * r5 = dpp sp = - * r6 = dpp->delta lr = - * r7 = eptr pc = - ******************************************************************************* - */ - -term_minus_3: - ldr r3, [r1, #-4] @ load previous samples - ldr r8, [r1, #-8] - -term_minus_3_loop: - ldr ip, [r1], #4 - movs r3, r3, asl #4 - mov r11, #0x80000000 - mov r2, ip - smlalne r11, r2, r4, r3 - strne r2, [r1, #-4] - cmpne ip, #0 - beq .L399 - teq ip, r3 @ update weight based on signs - submi r4, r4, r6 - addpl r4, r4, r6 - cmp r4, #(1024 << 18) @ then clip weight to +/-1024 - movgt r4, #(1024 << 18) - cmp r4, r10 - movlt r4, r10 - -.L399: movs ip, r8, asl #4 @ ip = previous left we use now - mov r8, r2 @ r8 = current left we use next time - ldr r2, [r1], #4 - mov r11, #0x80000000 - mov r3, r2 - smlalne r11, r3, r0, ip - strne r3, [r1, #-4] - cmpne r2, #0 - beq .L407 - teq ip, r2 - submi r0, r0, r6 - addpl r0, r0, r6 - cmp r0, #(1024 << 18) - movgt r0, #(1024 << 18) - cmp r0, r10 - movlt r0, r10 - -.L407: cmp r7, r1 @ loop back if more samples to do - bhi term_minus_3_loop - - str r3, [r5, #8] @ else store previous samples & exit - str r8, [r5, #40] - -/* - * Before finally exiting we must store weights back for next time - */ - -common_exit: - mov r0, r0, asr #18 @ restore weights to real magnitude - mov r4, r4, asr #18 - strh r4, [r5, #4] - strh r0, [r5, #6] - ldmfd sp!, {r4 - r8, r10, r11, pc} - +//////////////////////////////////////////////////////////////////////////// +// **** WAVPACK **** // +// Hybrid Lossless Wavefile Compressor // +// Copyright (c) 1998 - 2006 Conifer Software. // +// All Rights Reserved. // +// Distributed under the BSD Software License (see license.txt) // +//////////////////////////////////////////////////////////////////////////// + +/* This is an assembly optimized version of the following WavPack function: + * + * void decorr_stereo_pass_cont (struct decorr_pass *dpp, + * long *buffer, long sample_count); + * + * It performs a single pass of stereo decorrelation on the provided buffer. + * Note that this version of the function requires that the 8 previous stereo + * samples are visible and correct. In other words, it ignores the "samples_*" + * fields in the decorr_pass structure and gets the history data directly + * from the buffer. It does, however, return the appropriate history samples + * to the decorr_pass structure before returning. + * + * This is written to work on a ARM7TDMI processor. This version uses the + * 64-bit multiply-accumulate instruction and so can be used with all + * WavPack files. However, for optimum performance with 16-bit WavPack + * files, there is a faster version that only uses the 32-bit MLA + * instruction. + */ + + .text + .align + .global decorr_stereo_pass_cont_arml + +/* + * on entry: + * + * r0 = struct decorr_pass *dpp + * r1 = long *buffer + * r2 = long sample_count + */ + +decorr_stereo_pass_cont_arml: + + stmfd sp!, {r4 - r8, r10, r11, lr} + mov r5, r0 @ r5 = dpp + mov r11, #512 @ r11 = 512 for rounding + ldrsh r6, [r0, #2] @ r6 = dpp->delta + ldrsh r4, [r0, #4] @ r4 = dpp->weight_A + ldrsh r0, [r0, #6] @ r0 = dpp->weight_B + cmp r2, #0 @ exit if no samples to process + beq common_exit + + mov r0, r0, asl #18 @ for 64-bit math we use weights << 18 + mov r4, r4, asl #18 + mov r6, r6, asl #18 + add r7, r1, r2, asl #3 @ r7 = buffer ending position + ldrsh r2, [r5, #0] @ r2 = dpp->term + cmp r2, #0 + blt minus_term + + ldr lr, [r1, #-16] @ load 2 sample history from buffer + ldr r10, [r1, #-12] @ for terms 2, 17, and 18 + ldr r8, [r1, #-8] + ldr r3, [r1, #-4] + + cmp r2, #18 + beq term_18_loop + mov lr, lr, asl #4 + mov r10, r10, asl #4 + cmp r2, #2 + beq term_2_loop + cmp r2, #17 + beq term_17_loop + b term_default_loop + +minus_term: + mov r10, #(1024 << 18) @ r10 = -1024 << 18 for weight clipping + rsb r10, r10, #0 @ (only used for negative terms) + cmn r2, #1 + beq term_minus_1 + cmn r2, #2 + beq term_minus_2 + cmn r2, #3 + beq term_minus_3 + b common_exit + +/* + ****************************************************************************** + * Loop to handle term = 17 condition + * + * r0 = dpp->weight_B r8 = previous left sample + * r1 = bptr r9 = + * r2 = current sample r10 = second previous left sample << 4 + * r3 = previous right sample r11 = lo accumulator (for rounding) + * r4 = dpp->weight_A ip = current decorrelation value + * r5 = dpp sp = + * r6 = dpp->delta lr = second previous right sample << 4 + * r7 = eptr pc = + ******************************************************************************* + */ + +term_17_loop: + rsbs ip, lr, r8, asl #5 @ decorr value = (2 * prev) - 2nd prev + mov lr, r8, asl #4 @ previous becomes 2nd previous + ldr r2, [r1], #4 @ get sample & update pointer + mov r11, #0x80000000 + mov r8, r2 + smlalne r11, r8, r4, ip + strne r8, [r1, #-4] @ if change possible, store sample back + cmpne r2, #0 + beq .L325 + teq ip, r2 @ update weight based on signs + submi r4, r4, r6 + addpl r4, r4, r6 + +.L325: rsbs ip, r10, r3, asl #5 @ do same thing for right channel + mov r10, r3, asl #4 + ldr r2, [r1], #4 + mov r11, #0x80000000 + mov r3, r2 + smlalne r11, r3, r0, ip + strne r3, [r1, #-4] + cmpne r2, #0 + beq .L329 + teq ip, r2 + submi r0, r0, r6 + addpl r0, r0, r6 + +.L329: cmp r7, r1 @ loop back if more samples to do + bhi term_17_loop + mov lr, lr, asr #4 + mov r10, r10, asr #4 + b store_1718 @ common exit for terms 17 & 18 + +/* + ****************************************************************************** + * Loop to handle term = 18 condition + * + * r0 = dpp->weight_B r8 = previous left sample + * r1 = bptr r9 = + * r2 = current sample r10 = second previous left sample + * r3 = previous right sample r11 = lo accumulator (for rounding) + * r4 = dpp->weight_A ip = decorrelation value + * r5 = dpp sp = + * r6 = dpp->delta lr = second previous right sample + * r7 = eptr pc = + ******************************************************************************* + */ + +term_18_loop: + rsb ip, lr, r8 @ decorr value = + mov lr, r8 @ ((3 * prev) - 2nd prev) >> 1 + add ip, lr, ip, asr #1 + movs ip, ip, asl #4 + ldr r2, [r1], #4 @ get sample & update pointer + mov r11, #0x80000000 + mov r8, r2 + smlalne r11, r8, r4, ip + strne r8, [r1, #-4] @ if change possible, store sample back + cmpne r2, #0 + beq .L337 + teq ip, r2 @ update weight based on signs + submi r4, r4, r6 + addpl r4, r4, r6 + +.L337: rsb ip, r10, r3 @ do same thing for right channel + mov r10, r3 + add ip, r10, ip, asr #1 + movs ip, ip, asl #4 + ldr r2, [r1], #4 + mov r11, #0x80000000 + mov r3, r2 + smlalne r11, r3, r0, ip + strne r3, [r1, #-4] + cmpne r2, #0 + beq .L341 + teq ip, r2 + submi r0, r0, r6 + addpl r0, r0, r6 + +.L341: cmp r7, r1 @ loop back if more samples to do + bhi term_18_loop + +/* common exit for terms 17 & 18 */ + +store_1718: + str r3, [r5, #40] @ store sample history into struct + str r8, [r5, #8] + str r10, [r5, #44] + str lr, [r5, #12] + b common_exit @ and return + +/* + ****************************************************************************** + * Loop to handle term = 2 condition + * (note that this case can be handled by the default term handler (1-8), but + * this special case is faster because it doesn't have to read memory twice) + * + * r0 = dpp->weight_B r8 = previous left sample + * r1 = bptr r9 = + * r2 = current sample r10 = second previous left sample << 4 + * r3 = previous right sample r11 = lo accumulator (for rounding) + * r4 = dpp->weight_A ip = decorrelation value + * r5 = dpp sp = + * r6 = dpp->delta lr = second previous right sample << 4 + * r7 = eptr pc = + ******************************************************************************* + */ + +term_2_loop: + movs ip, lr @ get decorrelation value & test + ldr r2, [r1], #4 @ get sample & update pointer + mov lr, r8, asl #4 @ previous becomes 2nd previous + mov r11, #0x80000000 + mov r8, r2 + smlalne r11, r8, r4, ip + strne r8, [r1, #-4] @ if change possible, store sample back + cmpne r2, #0 + beq .L225 + teq ip, r2 @ update weight based on signs + submi r4, r4, r6 + addpl r4, r4, r6 + +.L225: movs ip, r10 @ do same thing for right channel + ldr r2, [r1], #4 + mov r10, r3, asl #4 + mov r11, #0x80000000 + mov r3, r2 + smlalne r11, r3, r0, ip + strne r3, [r1, #-4] + cmpne r2, #0 + beq .L229 + teq ip, r2 + submi r0, r0, r6 + addpl r0, r0, r6 + +.L229: cmp r7, r1 @ loop back if more samples to do + bhi term_2_loop + + b default_term_exit @ this exit updates all dpp->samples + +/* + ****************************************************************************** + * Loop to handle default term condition + * + * r0 = dpp->weight_B r8 = result accumulator + * r1 = bptr r9 = + * r2 = dpp->term r10 = + * r3 = decorrelation value r11 = lo accumulator (for rounding) + * r4 = dpp->weight_A ip = current sample + * r5 = dpp sp = + * r6 = dpp->delta lr = + * r7 = eptr pc = + ******************************************************************************* + */ + +term_default_loop: + ldr r3, [r1, -r2, asl #3] @ get decorrelation value based on term + ldr ip, [r1], #4 @ get original sample and bump ptr + movs r3, r3, asl #4 + mov r11, #0x80000000 + mov r8, ip + smlalne r11, r8, r4, r3 + strne r8, [r1, #-4] @ if possibly changed, store updated sample + cmpne ip, #0 + beq .L350 + teq ip, r3 @ update weight based on signs + submi r4, r4, r6 + addpl r4, r4, r6 + +.L350: ldr r3, [r1, -r2, asl #3] @ do the same thing for right channel + ldr ip, [r1], #4 + movs r3, r3, asl #4 + mov r11, #0x80000000 + mov r8, ip + smlalne r11, r8, r0, r3 + strne r8, [r1, #-4] + cmpne ip, #0 + beq .L354 + teq ip, r3 + submi r0, r0, r6 + addpl r0, r0, r6 + +.L354: cmp r7, r1 @ loop back if more samples to do + bhi term_default_loop + +/* + * This exit is used by terms 1-8 to store the previous 8 samples into the decorr + * structure (even if they are not all used for the given term) + */ + +default_term_exit: + ldrsh r3, [r5, #0] + sub ip, r3, #1 + mov lr, #7 + +.L358: and r3, ip, #7 + add r3, r5, r3, asl #2 + ldr r2, [r1, #-4] + str r2, [r3, #40] + ldr r2, [r1, #-8]! + str r2, [r3, #8] + sub ip, ip, #1 + sub lr, lr, #1 + cmn lr, #1 + bne .L358 + b common_exit + +/* + ****************************************************************************** + * Loop to handle term = -1 condition + * + * r0 = dpp->weight_B r8 = + * r1 = bptr r9 = + * r2 = intermediate result r10 = -1024 (for clipping) + * r3 = previous right sample r11 = lo accumulator (for rounding) + * r4 = dpp->weight_A ip = current sample + * r5 = dpp sp = + * r6 = dpp->delta lr = updated left sample + * r7 = eptr pc = + ******************************************************************************* + */ + +term_minus_1: + ldr r3, [r1, #-4] + +term_minus_1_loop: + ldr ip, [r1], #8 @ for left channel the decorrelation value + movs r3, r3, asl #4 @ is the previous right sample (in r3) + mov r11, #0x80000000 + mov lr, ip + smlalne r11, lr, r4, r3 + strne lr, [r1, #-8] + cmpne ip, #0 + beq .L361 + teq ip, r3 @ update weight based on signs + submi r4, r4, r6 + addpl r4, r4, r6 + cmp r4, #(1024 << 18) + movgt r4, #(1024 << 18) + cmp r4, r10 + movlt r4, r10 + +.L361: ldr r2, [r1, #-4] @ for right channel the decorrelation value + movs lr, lr, asl #4 + mov r11, #0x80000000 + mov r3, r2 + smlalne r11, r3, r0, lr + strne r3, [r1, #-4] + cmpne r2, #0 + beq .L369 + teq r2, lr + submi r0, r0, r6 + addpl r0, r0, r6 + cmp r0, #(1024 << 18) @ then clip weight to +/-1024 + movgt r0, #(1024 << 18) + cmp r0, r10 + movlt r0, r10 + +.L369: cmp r7, r1 @ loop back if more samples to do + bhi term_minus_1_loop + + str r3, [r5, #8] @ else store right sample and exit + b common_exit + +/* + ****************************************************************************** + * Loop to handle term = -2 condition + * (note that the channels are processed in the reverse order here) + * + * r0 = dpp->weight_B r8 = + * r1 = bptr r9 = + * r2 = intermediate result r10 = -1024 (for clipping) + * r3 = previous left sample r11 = lo accumulator (for rounding) + * r4 = dpp->weight_A ip = current sample + * r5 = dpp sp = + * r6 = dpp->delta lr = updated right sample + * r7 = eptr pc = + ******************************************************************************* + */ + +term_minus_2: + ldr r3, [r1, #-8] + +term_minus_2_loop: + ldr ip, [r1, #4] @ for right channel the decorrelation value + movs r3, r3, asl #4 @ is the previous left sample (in r3) + mov r11, #0x80000000 + mov lr, ip + smlalne r11, lr, r0, r3 + strne lr, [r1, #4] + cmpne ip, #0 + beq .L380 + teq ip, r3 @ update weight based on signs + submi r0, r0, r6 + addpl r0, r0, r6 + cmp r0, #(1024 << 18) @ then clip weight to +/-1024 + movgt r0, #(1024 << 18) + cmp r0, r10 + movlt r0, r10 + +.L380: ldr r2, [r1], #8 @ for left channel the decorrelation value + movs lr, lr, asl #4 + mov r11, #0x80000000 + mov r3, r2 + smlalne r11, r3, r4, lr + strne r3, [r1, #-8] + cmpne r2, #0 + beq .L388 + teq r2, lr + submi r4, r4, r6 + addpl r4, r4, r6 + cmp r4, #(1024 << 18) + movgt r4, #(1024 << 18) + cmp r4, r10 + movlt r4, r10 + +.L388: cmp r7, r1 @ loop back if more samples to do + bhi term_minus_2_loop + + str r3, [r5, #40] @ else store left channel and exit + b common_exit + +/* + ****************************************************************************** + * Loop to handle term = -3 condition + * + * r0 = dpp->weight_B r8 = previous left sample + * r1 = bptr r9 = + * r2 = current left sample r10 = -1024 (for clipping) + * r3 = previous right sample r11 = lo accumulator (for rounding) + * r4 = dpp->weight_A ip = intermediate result + * r5 = dpp sp = + * r6 = dpp->delta lr = + * r7 = eptr pc = + ******************************************************************************* + */ + +term_minus_3: + ldr r3, [r1, #-4] @ load previous samples + ldr r8, [r1, #-8] + +term_minus_3_loop: + ldr ip, [r1], #4 + movs r3, r3, asl #4 + mov r11, #0x80000000 + mov r2, ip + smlalne r11, r2, r4, r3 + strne r2, [r1, #-4] + cmpne ip, #0 + beq .L399 + teq ip, r3 @ update weight based on signs + submi r4, r4, r6 + addpl r4, r4, r6 + cmp r4, #(1024 << 18) @ then clip weight to +/-1024 + movgt r4, #(1024 << 18) + cmp r4, r10 + movlt r4, r10 + +.L399: movs ip, r8, asl #4 @ ip = previous left we use now + mov r8, r2 @ r8 = current left we use next time + ldr r2, [r1], #4 + mov r11, #0x80000000 + mov r3, r2 + smlalne r11, r3, r0, ip + strne r3, [r1, #-4] + cmpne r2, #0 + beq .L407 + teq ip, r2 + submi r0, r0, r6 + addpl r0, r0, r6 + cmp r0, #(1024 << 18) + movgt r0, #(1024 << 18) + cmp r0, r10 + movlt r0, r10 + +.L407: cmp r7, r1 @ loop back if more samples to do + bhi term_minus_3_loop + + str r3, [r5, #8] @ else store previous samples & exit + str r8, [r5, #40] + +/* + * Before finally exiting we must store weights back for next time + */ + +common_exit: + mov r0, r0, asr #18 @ restore weights to real magnitude + mov r4, r4, asr #18 + strh r4, [r5, #4] + strh r0, [r5, #6] + ldmfd sp!, {r4 - r8, r10, r11, pc} + diff --git a/src/engine/external/wavpack/bits.c b/src/engine/external/wavpack/bits.c index dbfa0cae..b4f03385 100644 --- a/src/engine/external/wavpack/bits.c +++ b/src/engine/external/wavpack/bits.c @@ -1,140 +1,140 @@ -//////////////////////////////////////////////////////////////////////////// -// **** WAVPACK **** // -// Hybrid Lossless Wavefile Compressor // -// Copyright (c) 1998 - 2006 Conifer Software. // -// All Rights Reserved. // -// Distributed under the BSD Software License (see license.txt) // -//////////////////////////////////////////////////////////////////////////// - -// bits.c - -// This module provides utilities to support the BitStream structure which is -// used to read and write all WavPack audio data streams. It also contains a -// wrapper for the stream I/O functions and a set of functions dealing with -// endian-ness, both for enhancing portability. Finally, a debug wrapper for -// the malloc() system is provided. - -#include "wavpack.h" - -#include <string.h> -#include <ctype.h> - -////////////////////////// Bitstream functions //////////////////////////////// - -// Open the specified BitStream and associate with the specified buffer. - -static void bs_read (Bitstream *bs); - -void bs_open_read (Bitstream *bs, uchar *buffer_start, uchar *buffer_end, read_stream file, uint32_t file_bytes) -{ - CLEAR (*bs); - bs->buf = buffer_start; - bs->end = buffer_end; - - if (file) { - bs->ptr = bs->end - 1; - bs->file_bytes = file_bytes; - bs->file = file; - } - else - bs->ptr = bs->buf - 1; - - bs->wrap = bs_read; -} - -// This function is only called from the getbit() and getbits() macros when -// the BitStream has been exhausted and more data is required. Sinve these -// bistreams no longer access files, this function simple sets an error and -// resets the buffer. - -static void bs_read (Bitstream *bs) -{ - if (bs->file && bs->file_bytes) { - uint32_t bytes_read, bytes_to_read = bs->end - bs->buf; - - if (bytes_to_read > bs->file_bytes) - bytes_to_read = bs->file_bytes; - - bytes_read = bs->file (bs->buf, bytes_to_read); - - if (bytes_read) { - bs->end = bs->buf + bytes_read; - bs->file_bytes -= bytes_read; - } - else { - memset (bs->buf, -1, bs->end - bs->buf); - bs->error = 1; - } - } - else - bs->error = 1; - - if (bs->error) - memset (bs->buf, -1, bs->end - bs->buf); - - bs->ptr = bs->buf; -} - -/////////////////////// Endian Correction Routines //////////////////////////// - -void little_endian_to_native (void *data, char *format) -{ - uchar *cp = (uchar *) data; - int32_t temp; - - while (*format) { - switch (*format) { - case 'L': - temp = cp [0] + ((int32_t) cp [1] << 8) + ((int32_t) cp [2] << 16) + ((int32_t) cp [3] << 24); - * (int32_t *) cp = temp; - cp += 4; - break; - - case 'S': - temp = cp [0] + (cp [1] << 8); - * (short *) cp = (short) temp; - cp += 2; - break; - - default: - if (isdigit (*format)) - cp += *format - '0'; - - break; - } - - format++; - } -} - -void native_to_little_endian (void *data, char *format) -{ - uchar *cp = (uchar *) data; - int32_t temp; - - while (*format) { - switch (*format) { - case 'L': - temp = * (int32_t *) cp; - *cp++ = (uchar) temp; - *cp++ = (uchar) (temp >> 8); - *cp++ = (uchar) (temp >> 16); - *cp++ = (uchar) (temp >> 24); - break; - - case 'S': - temp = * (short *) cp; - *cp++ = (uchar) temp; - *cp++ = (uchar) (temp >> 8); - break; - - default: - if (isdigit (*format)) - cp += *format - '0'; - - break; - } - - format++; - } -} +//////////////////////////////////////////////////////////////////////////// +// **** WAVPACK **** // +// Hybrid Lossless Wavefile Compressor // +// Copyright (c) 1998 - 2006 Conifer Software. // +// All Rights Reserved. // +// Distributed under the BSD Software License (see license.txt) // +//////////////////////////////////////////////////////////////////////////// + +// bits.c + +// This module provides utilities to support the BitStream structure which is +// used to read and write all WavPack audio data streams. It also contains a +// wrapper for the stream I/O functions and a set of functions dealing with +// endian-ness, both for enhancing portability. Finally, a debug wrapper for +// the malloc() system is provided. + +#include "wavpack.h" + +#include <string.h> +#include <ctype.h> + +////////////////////////// Bitstream functions //////////////////////////////// + +// Open the specified BitStream and associate with the specified buffer. + +static void bs_read (Bitstream *bs); + +void bs_open_read (Bitstream *bs, uchar *buffer_start, uchar *buffer_end, read_stream file, uint32_t file_bytes) +{ + CLEAR (*bs); + bs->buf = buffer_start; + bs->end = buffer_end; + + if (file) { + bs->ptr = bs->end - 1; + bs->file_bytes = file_bytes; + bs->file = file; + } + else + bs->ptr = bs->buf - 1; + + bs->wrap = bs_read; +} + +// This function is only called from the getbit() and getbits() macros when +// the BitStream has been exhausted and more data is required. Sinve these +// bistreams no longer access files, this function simple sets an error and +// resets the buffer. + +static void bs_read (Bitstream *bs) +{ + if (bs->file && bs->file_bytes) { + uint32_t bytes_read, bytes_to_read = bs->end - bs->buf; + + if (bytes_to_read > bs->file_bytes) + bytes_to_read = bs->file_bytes; + + bytes_read = bs->file (bs->buf, bytes_to_read); + + if (bytes_read) { + bs->end = bs->buf + bytes_read; + bs->file_bytes -= bytes_read; + } + else { + memset (bs->buf, -1, bs->end - bs->buf); + bs->error = 1; + } + } + else + bs->error = 1; + + if (bs->error) + memset (bs->buf, -1, bs->end - bs->buf); + + bs->ptr = bs->buf; +} + +/////////////////////// Endian Correction Routines //////////////////////////// + +void little_endian_to_native (void *data, char *format) +{ + uchar *cp = (uchar *) data; + int32_t temp; + + while (*format) { + switch (*format) { + case 'L': + temp = cp [0] + ((int32_t) cp [1] << 8) + ((int32_t) cp [2] << 16) + ((int32_t) cp [3] << 24); + * (int32_t *) cp = temp; + cp += 4; + break; + + case 'S': + temp = cp [0] + (cp [1] << 8); + * (short *) cp = (short) temp; + cp += 2; + break; + + default: + if (isdigit (*format)) + cp += *format - '0'; + + break; + } + + format++; + } +} + +void native_to_little_endian (void *data, char *format) +{ + uchar *cp = (uchar *) data; + int32_t temp; + + while (*format) { + switch (*format) { + case 'L': + temp = * (int32_t *) cp; + *cp++ = (uchar) temp; + *cp++ = (uchar) (temp >> 8); + *cp++ = (uchar) (temp >> 16); + *cp++ = (uchar) (temp >> 24); + break; + + case 'S': + temp = * (short *) cp; + *cp++ = (uchar) temp; + *cp++ = (uchar) (temp >> 8); + break; + + default: + if (isdigit (*format)) + cp += *format - '0'; + + break; + } + + format++; + } +} diff --git a/src/engine/external/wavpack/coldfire.S b/src/engine/external/wavpack/coldfire.S index 93df9d82..83530900 100644 --- a/src/engine/external/wavpack/coldfire.S +++ b/src/engine/external/wavpack/coldfire.S @@ -1,525 +1,525 @@ -//////////////////////////////////////////////////////////////////////////// -// **** WAVPACK **** // -// Hybrid Lossless Wavefile Compressor // -// Copyright (c) 1998 - 2006 Conifer Software. // -// All Rights Reserved. // -// Distributed under the BSD Software License (see license.txt) // -//////////////////////////////////////////////////////////////////////////// - -/* This is an assembly optimized version of the following WavPack function: - * - * void decorr_stereo_pass_cont (struct decorr_pass *dpp, - * long *buffer, long sample_count); - * - * It performs a single pass of stereo decorrelation on the provided buffer. - * Note that this version of the function requires that the 8 previous stereo - * samples are visible and correct. In other words, it ignores the "samples_*" - * fields in the decorr_pass structure and gets the history data directly - * from the buffer. It does, however, return the appropriate history samples - * to the decorr_pass structure before returning. - * - * This is written to work on a MCF5249 processor, or any processor based on - * the ColdFire V2 core with an EMAC unit. The EMAC is perfectly suited for - * the "apply_weight" function of WavPack decorrelation because it provides - * the requires 40-bit product. The fractional rounding mode of the EMAC is not - * configurable and uses "round to even" while WavPack uses "round to larger", - * so the rounding has to be done manually. - */ - - .text - .align 2 - .global decorr_stereo_pass_cont_mcf5249 - -decorr_stereo_pass_cont_mcf5249: - - lea (-44, %sp), %sp - movem.l %d2-%d7/%a2-%a6, (%sp) - move.l 44+4(%sp), %a2 | a2 = dpp-> - move.l 44+8(%sp), %a1 | a1 = bptr - move.w 2(%a2), %a3 | a3 = dpp->delta - move.w 4(%a2), %d3 | d3 = dpp->weight_A (sign extended) - ext.l %d3 - move.w 6(%a2), %d4 | d4 = dpp->weight_B (sign extended) - ext.l %d4 - move.l 44+12(%sp), %d0 | d0 = sample_count - jbeq return_only | if zero, nothing to do - - lsl.l #3, %d0 | d5 = bptr + (sample_count * 8) - move.l %d0, %d5 - add.l %a1, %d5 - - moveq.l #17, %d0 | left shift weights & delta 17 places - asl.l %d0, %d3 - asl.l %d0, %d4 - move.l %a3, %d1 - asl.l %d0, %d1 - move.l %d1, %a3 - - moveq.l #0x20, %d6 - move.l %d6, %macsr | set fractional mode for MAC - move.l #0, %acc1 | acc1 = 0x00 0000 80 (for rounding) - move.l #0x800000, %accext01 - - move.l #1024<<17, %d6 | d6 & d7 are weight clipping limits - move.l #-1024<<17, %d7 | (only used by negative terms) - - move.w (%a2), %d0 | d0 = term - ext.l %d0 - cmp.l #17, %d0 - jbeq term_17 | term = 17 - cmp.l #18, %d0 - jbeq term_18 | term = 18 - addq.l #1, %d0 - jbeq term_minus_1 | term = -1 - addq.l #1, %d0 - jbeq term_minus_2 | term = -2 - addq.l #1, %d0 - jbeq term_minus_3 | term = -3 - jbra term_default | default term = 1 - 8 - -|------------------------------------------------------------------------------ -| Loop to handle term = 17 condition -| -| a0 = d0 = (2 * bptr [-1]) - bptr [-2] -| a1 = bptr d1 = initial bptr [0] -| a2 = dpp-> d2 = updated bptr [0] -| a3 = dpp->delta << 17 d3 = dpp->weight_A << 17 -| a4 = d4 = dpp->weight_B << 17 -| a5 = d5 = eptr -| macsr = 0x20 acc1 = 0x00 0000 80 -|------------------------------------------------------------------------------ - -term_17: - move.l -8(%a1), %d0 | d0 = 2 * bptr [-1] - bptr [-2] - add.l %d0, %d0 - sub.l -16(%a1), %d0 - beq .L251 | if zero, skip calculation - move.l %acc1, %acc0 - asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_A - mac.l %d0, %d3, %acc0 - move.l (%a1), %d1 - beq .L255 - eor.l %d1, %d0 | else compare signs - bge .L256 | if same, add delta to weight - sub.l %a3, %d3 | else subtract delta from weight - sub.l %a3, %d3 | subtract again instead of branch -.L256: add.l %a3, %d3 | add delta to weight - -.L255: move.l %acc0, %d2 | d2 = rounded product - add.l %d1, %d2 | update bptr [0] and store - move.l %d2, (%a1)+ - -.L253: move.l -8(%a1), %d0 | d0 = 2 * bptr [-1] - bptr [-2] - add.l %d0, %d0 - sub.l -16(%a1), %d0 - beq .L257 | if zero, skip calculations - move.l %acc1, %acc0 - asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_B - mac.l %d0, %d4, %acc0 - move.l (%a1), %d1 - beq .L254 - eor.l %d1, %d0 | else compare signs - bge .L259 | if same, add delta to weight - sub.l %a3, %d4 | else subtract delta from weight - sub.l %a3, %d4 | subtract again instead of branch -.L259: add.l %a3, %d4 | add delta to weight - -.L254: move.l %acc0, %d2 | d2 = rounded product - add.l %d1, %d2 | update bptr [0] and store - move.l %d2, (%a1)+ - -.L252: cmp.l %a1, %d5 | loop if bptr < eptr - jbhi term_17 - bra term_17_18_finish | exit through common path - -.L251: addq.l #4, %a1 | update point and jump back into loop - bra .L253 - -.L257: addq.l #4, %a1 | update point and jump back into loop - bra .L252 - -|------------------------------------------------------------------------------ -| Loop to handle term = 18 condition -| -| a0 = d0 = ((3 * bptr [-1]) - bptr [-2]) >> 1 -| a1 = bptr d1 = initial bptr [0] -| a2 = dpp-> d2 = updated bptr [0] -| a3 = dpp->delta << 17 d3 = dpp->weight_A << 17 -| a4 = d4 = dpp->weight_B << 17 -| a5 = d5 = eptr -| macsr = 0x20 acc1 = 0x00 0000 80 -|------------------------------------------------------------------------------ - -term_18: - move.l -8(%a1), %a0 | d0 = (3 * bptr [-1] - bptr [-2]) >> 1 - lea (%a0,%a0.l*2), %a0 - move.l %a0, %d0 - sub.l -16(%a1), %d0 - asr.l #1, %d0 - beq .L260 - move.l %acc1, %acc0 - asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_A - mac.l %d0, %d3, %acc0 - move.l (%a1), %d1 - beq .L266 - eor.l %d1, %d0 | else compare signs - bge .L267 | if same, add delta to weight - sub.l %a3, %d3 | else subtract delta from weight - sub.l %a3, %d3 | subtract again instead of branch -.L267: add.l %a3, %d3 | add delta to weight - -.L266: move.l %acc0, %d2 | d2 = rounded product - add.l %d1, %d2 | add applied weight to bptr [0], store - move.l %d2, (%a1)+ - -.L268: move.l -8(%a1), %a0 | d0 = (3 * bptr [-1] - bptr [-2]) >> 1 - lea (%a0,%a0.l*2), %a0 - move.l %a0, %d0 - sub.l -16(%a1), %d0 - asr.l #1, %d0 - beq .L261 - move.l %acc1, %acc0 - asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_B - mac.l %d0, %d4, %acc0 - move.l (%a1), %d1 - beq .L265 - eor.l %d1, %d0 | else compare signs - bge .L270 | if same, add delta to weight - sub.l %a3, %d4 | else subtract delta from weight - sub.l %a3, %d4 | subtract again instead of branch -.L270: add.l %a3, %d4 | add delta to weight - -.L265: move.l %acc0, %d2 | d2 = rounded product - add.l %d1, %d2 | add applied weight to bptr [0], store - move.l %d2, (%a1)+ - -.L269: cmp.l %a1, %d5 | loop if bptr < eptr - jbhi term_18 - bra term_17_18_finish | exit through common path - -.L260: addq.l #4, %a1 | bump pointer and jump back into loop - bra .L268 - -.L261: addq.l #4, %a1 | bump pointer and jump back into loop - bra .L269 - -term_17_18_finish: - move.l -4(%a1), 40(%a2) | restore dpp->samples_A [0-1], B [0-1] - move.l -8(%a1), 8(%a2) - move.l -12(%a1), 44(%a2) - move.l -16(%a1), 12(%a2) - jbra finish_up - -|------------------------------------------------------------------------------ -| Loop to handle default terms (i.e. 1 - 8) -| -| a0 = tptr d0 = tptr [0] -| a1 = bptr d1 = initial bptr [0] -| a2 = dpp-> d2 = updated bptr [0] -| a3 = dpp->delta << 17 d3 = dpp->weight_A << 17 -| a4 = d4 = dpp->weight_B << 17 -| a5 = d5 = eptr -| macsr = 0x20 acc1 = 0x00 0000 80 -|------------------------------------------------------------------------------ - -term_default: - move.w (%a2), %d0 | a0 = a1 - (dpp->term * 8) - ext.l %d0 - lsl.l #3, %d0 - move.l %a1, %a0 - sub.l %d0, %a0 - -term_default_loop: - move.l (%a0)+, %d0 | d0 = tptr [0], skip ahead if zero - beq .L271 - move.l %acc1, %acc0 - asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_A - mac.l %d0, %d3, %acc0 - move.l (%a1), %d1 - beq .L277 - eor.l %d1, %d0 | else compare signs - bge .L278 | if same, add delta to weight - sub.l %a3, %d3 | else subtract delta from weight - sub.l %a3, %d3 | subtract again instead of branch -.L278: add.l %a3, %d3 | add delta to weight - -.L277: move.l %acc0, %d2 | d2 = rounded product - add.l %d1, %d2 | add applied weight to bptr [0], store - move.l %d2, (%a1)+ - -.L275: move.l (%a0)+, %d0 | d0 = tptr [0], skip ahead if zero - beq .L272 - move.l %acc1, %acc0 - asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_B - mac.l %d0, %d4, %acc0 - move.l (%a1), %d1 - beq .L276 - eor.l %d1, %d0 | else compare signs - bge .L281 | if same, add delta to weight - sub.l %a3, %d4 | else subtract delta from weight - sub.l %a3, %d4 | subtract again instead of branch -.L281: add.l %a3, %d4 | add delta to weight - -.L276: move.l %acc0, %d2 | d2 = rounded product - add.l %d1, %d2 | add applied weight to bptr [0], store - move.l %d2, (%a1)+ - -.L274: cmp.l %a1, %d5 | loop back if bptr < eptr - jbhi term_default_loop - move.w (%a2), %d0 | d0 = term - 1 - moveq.l #8, %d1 | d1 = loop counter - -.L323: subq.l #1, %d0 | back up & mask index - and.l #7, %d0 - move.l -(%a1), 40(%a2,%d0.l*4) | store dpp->samples_B [d0] - move.l -(%a1), 8(%a2,%d0.l*4) | store dpp->samples_A [d0] - subq.l #1, %d1 | loop on count - jbne .L323 - jbra finish_up - -.L271: addq.l #4, %a1 | bump pointer and jump back into loop - bra .L275 - -.L272: addq.l #4, %a1 | bump pointer and jump back into loop - bra .L274 - - -|------------------------------------------------------------------------------ -| Loop to handle term = -1 condition -| -| a0 = d0 = decorrelation sample -| a1 = bptr d1 = initial bptr [0] -| a2 = dpp-> d2 = updated bptr [0] -| a3 = dpp->delta << 17 d3 = dpp->weight_A << 17 -| a4 = d4 = dpp->weight_B << 17 -| a5 = d5 = eptr -| a6 = d6 = 1024 << 17 -| a7 = d7 = -1024 << 17 -| macsr = 0x20 acc1 = 0x00 0000 80 -|------------------------------------------------------------------------------ - -term_minus_1: - move.l -4(%a1), %d0 | d0 = bptr [-1] - beq .L402 - move.l %acc1, %acc0 - asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_A) - mac.l %d0, %d3, %acc0 - move.l (%a1), %d1 - beq .L405 - eor.l %d1, %d0 | else compare signs - bge .L404 | if same, add delta to weight - sub.l %a3, %d3 | else subtract delta from weight - cmp.l %d7, %d3 | check for negative clip limit - bge .L405 - move.l %d7, %d3 - bra .L405 - -.L404: add.l %a3, %d3 | add delta to weight - cmp.l %d6, %d3 | check for positive clip limit - ble .L405 - move.l %d6, %d3 - -.L405: move.l %acc0, %d0 | d2 = rounded product - add.l %d1, %d0 | add applied weight to bptr [0], store - move.l %d0, (%a1)+ - beq .L401 - -.L410: move.l %acc1, %acc0 - asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_B) - mac.l %d0, %d4, %acc0 - move.l (%a1), %d1 - beq .L403 - eor.l %d1, %d0 | else compare signs - bge .L407 | if same, add delta to weight - sub.l %a3, %d4 | else subtract delta from weight - cmp.l %d7, %d4 | check for negative clip limit - bge .L403 - move.l %d7, %d4 - bra .L403 - -.L407: add.l %a3, %d4 | add delta to weight - cmp.l %d6, %d4 | check for positive clip limit - ble .L403 - move.l %d6, %d4 - -.L403: move.l %acc0, %d2 | d2 = rounded product - add.l %d1, %d2 | add applied weight to bptr [1], store - move.l %d2, (%a1)+ - -.L411: cmp.l %a1, %d5 | loop back if bptr < eptr - jbhi term_minus_1 - move.l -4(%a1), 8(%a2) | dpp->samples_A [0] = bptr [-1] - jbra finish_up - -.L402: move.l (%a1)+, %d0 - bne .L410 - -.L401: addq.l #4, %a1 - bra .L411 - - -|------------------------------------------------------------------------------ -| Loop to handle term = -2 condition -| -| a0 = d0 = decorrelation sample -| a1 = bptr d1 = initial bptr [0] -| a2 = dpp-> d2 = updated bptr [0] -| a3 = dpp->delta << 17 d3 = dpp->weight_A << 17 -| a4 = d4 = dpp->weight_B << 17 -| a5 = d5 = eptr -| a6 = d6 = 1024 << 17 -| a7 = d7 = -1024 << 17 -| macsr = 0x20 acc1 = 0x00 0000 80 -|------------------------------------------------------------------------------ - -term_minus_2: - move.l -8(%a1), %d0 | d0 = bptr [-2] - beq .L511 - move.l %acc1, %acc0 - asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_B) - mac.l %d0, %d4, %acc0 - move.l 4(%a1), %d1 - beq .L505 - eor.l %d1, %d0 | else compare signs - bge .L504 | if same, add delta to weight - sub.l %a3, %d4 | else subtract delta from weight - cmp.l %d7, %d4 | ckeck for negative clip limit - bge .L505 - move.l %d7, %d4 - bra .L505 - -.L504: add.l %a3, %d4 | add delta to weight - cmp.l %d6, %d4 | check for positive clip limit - ble .L505 - move.l %d6, %d4 - -.L505: move.l %acc0, %d0 | d2 = rounded product - add.l %d1, %d0 | add applied weight to bptr [0], store - move.l %d0, 4(%a1) - beq .L512 - -.L510: move.l %acc1, %acc0 - asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_A) - mac.l %d0, %d3, %acc0 - move.l (%a1), %d1 - beq .L503 - eor.l %d1, %d0 | else compare signs - bge .L507 | if same, add delta to weight - sub.l %a3, %d3 | else subtract delta from weight - cmp.l %d7, %d3 | check for negative clip limit - bge .L503 - move.l %d7, %d3 - bra .L503 - -.L507: add.l %a3, %d3 | add delta to weight - cmp.l %d6, %d3 | check for negative clip limit - ble .L503 - move.l %d6, %d3 - -.L503: move.l %acc0, %d2 | d2 = rounded product - add.l %d1, %d2 | add applied weight to bptr [1], store - move.l %d2, (%a1) - -.L512: addq.l #8, %a1 - cmp.l %a1, %d5 | loop if bptr < eptr - jbhi term_minus_2 - move.l -8(%a1), 40(%a2) | dpp->samples_B [0] = bptr [-4] - jbra finish_up - -.L511: move.l 4(%a1), %d0 - beq .L512 - bra .L510 - - -|------------------------------------------------------------------------------ -| Loop to handle term = -3 condition -| -| a0 = d0 = decorrelation sample -| a1 = bptr d1 = initial bptr [0] -| a2 = dpp-> d2 = updated bptr [0] -| a3 = dpp->delta << 17 d3 = dpp->weight_A << 17 -| a4 = d4 = dpp->weight_B << 17 -| a5 = d5 = eptr -| a6 = d6 = 1024 << 17 -| a7 = d7 = -1024 << 17 -| macsr = 0x20 acc1 = 0x00 0000 80 -|------------------------------------------------------------------------------ - -term_minus_3: - move.l -4(%a1), %d0 | d0 = bptr [-1] - beq .L301 - move.l %acc1, %acc0 - asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_A) - mac.l %d0, %d3, %acc0 - move.l (%a1), %d1 - beq .L320 - eor.l %d1, %d0 | else compare signs - bge .L319 | if same, add delta to weight - sub.l %a3, %d3 | else subtract delta from weight - cmp.l %d7, %d3 | check for negative clip limit - bge .L320 - move.l %d7, %d3 - bra .L320 - -.L319: add.l %a3, %d3 | add delta to weight - cmp.l %d6, %d3 | check for positive clip limit - ble .L320 - move.l %d6, %d3 - -.L320: move.l %acc0, %d2 | d2 = rounded product - add.l %d1, %d2 | add applied weight to bptr [0], store - move.l %d2, (%a1)+ - -.L330: move.l -12(%a1), %d0 | d0 = bptr [-2] - beq .L302 - move.l %acc1, %acc0 - asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_B) - mac.l %d0, %d4, %acc0 - move.l (%a1), %d1 - beq .L318 - eor.l %d1, %d0 | else compare signs - bge .L322 | if same, add delta to weight - sub.l %a3, %d4 | else subtract delta from weight - cmp.l %d7, %d4 | check for negative clip limit - bge .L318 - move.l %d7, %d4 - bra .L318 - -.L322: add.l %a3, %d4 | add delta to weight - cmp.l %d6, %d4 | check for positive clip limit - ble .L318 - move.l %d6, %d4 - -.L318: move.l %acc0, %d2 | d2 = rounded product - add.l %d1, %d2 | add applied weight to bptr [1], store - move.l %d2, (%a1)+ - -.L331: cmp.l %a1, %d5 | bptr, eptr - jbhi term_minus_3 - move.l -4(%a1), 8(%a2) | dpp->samples_A [0] = bptr [-1] - move.l -8(%a1), 40(%a2) | dpp->samples_B [0] = bptr [-2] - jbra finish_up - -.L301: addq.l #4, %a1 - bra .L330 - -.L302: addq.l #4, %a1 - bra .L331 - -| finish and return - -finish_up: - moveq.l #17, %d0 - asr.l %d0, %d3 - asr.l %d0, %d4 - move.w %d3, 4(%a2) | weight_A, dpp->weight_A - move.w %d4, 6(%a2) | weight_B, dpp->weight_B - - clr.l %d0 | clear up EMAC - move.l %d0, %acc0 - move.l %d0, %acc1 - -return_only: - movem.l (%sp), %d2-%d7/%a2-%a6 - lea (44,%sp), %sp - rts +//////////////////////////////////////////////////////////////////////////// +// **** WAVPACK **** // +// Hybrid Lossless Wavefile Compressor // +// Copyright (c) 1998 - 2006 Conifer Software. // +// All Rights Reserved. // +// Distributed under the BSD Software License (see license.txt) // +//////////////////////////////////////////////////////////////////////////// + +/* This is an assembly optimized version of the following WavPack function: + * + * void decorr_stereo_pass_cont (struct decorr_pass *dpp, + * long *buffer, long sample_count); + * + * It performs a single pass of stereo decorrelation on the provided buffer. + * Note that this version of the function requires that the 8 previous stereo + * samples are visible and correct. In other words, it ignores the "samples_*" + * fields in the decorr_pass structure and gets the history data directly + * from the buffer. It does, however, return the appropriate history samples + * to the decorr_pass structure before returning. + * + * This is written to work on a MCF5249 processor, or any processor based on + * the ColdFire V2 core with an EMAC unit. The EMAC is perfectly suited for + * the "apply_weight" function of WavPack decorrelation because it provides + * the requires 40-bit product. The fractional rounding mode of the EMAC is not + * configurable and uses "round to even" while WavPack uses "round to larger", + * so the rounding has to be done manually. + */ + + .text + .align 2 + .global decorr_stereo_pass_cont_mcf5249 + +decorr_stereo_pass_cont_mcf5249: + + lea (-44, %sp), %sp + movem.l %d2-%d7/%a2-%a6, (%sp) + move.l 44+4(%sp), %a2 | a2 = dpp-> + move.l 44+8(%sp), %a1 | a1 = bptr + move.w 2(%a2), %a3 | a3 = dpp->delta + move.w 4(%a2), %d3 | d3 = dpp->weight_A (sign extended) + ext.l %d3 + move.w 6(%a2), %d4 | d4 = dpp->weight_B (sign extended) + ext.l %d4 + move.l 44+12(%sp), %d0 | d0 = sample_count + jbeq return_only | if zero, nothing to do + + lsl.l #3, %d0 | d5 = bptr + (sample_count * 8) + move.l %d0, %d5 + add.l %a1, %d5 + + moveq.l #17, %d0 | left shift weights & delta 17 places + asl.l %d0, %d3 + asl.l %d0, %d4 + move.l %a3, %d1 + asl.l %d0, %d1 + move.l %d1, %a3 + + moveq.l #0x20, %d6 + move.l %d6, %macsr | set fractional mode for MAC + move.l #0, %acc1 | acc1 = 0x00 0000 80 (for rounding) + move.l #0x800000, %accext01 + + move.l #1024<<17, %d6 | d6 & d7 are weight clipping limits + move.l #-1024<<17, %d7 | (only used by negative terms) + + move.w (%a2), %d0 | d0 = term + ext.l %d0 + cmp.l #17, %d0 + jbeq term_17 | term = 17 + cmp.l #18, %d0 + jbeq term_18 | term = 18 + addq.l #1, %d0 + jbeq term_minus_1 | term = -1 + addq.l #1, %d0 + jbeq term_minus_2 | term = -2 + addq.l #1, %d0 + jbeq term_minus_3 | term = -3 + jbra term_default | default term = 1 - 8 + +|------------------------------------------------------------------------------ +| Loop to handle term = 17 condition +| +| a0 = d0 = (2 * bptr [-1]) - bptr [-2] +| a1 = bptr d1 = initial bptr [0] +| a2 = dpp-> d2 = updated bptr [0] +| a3 = dpp->delta << 17 d3 = dpp->weight_A << 17 +| a4 = d4 = dpp->weight_B << 17 +| a5 = d5 = eptr +| macsr = 0x20 acc1 = 0x00 0000 80 +|------------------------------------------------------------------------------ + +term_17: + move.l -8(%a1), %d0 | d0 = 2 * bptr [-1] - bptr [-2] + add.l %d0, %d0 + sub.l -16(%a1), %d0 + beq .L251 | if zero, skip calculation + move.l %acc1, %acc0 + asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_A + mac.l %d0, %d3, %acc0 + move.l (%a1), %d1 + beq .L255 + eor.l %d1, %d0 | else compare signs + bge .L256 | if same, add delta to weight + sub.l %a3, %d3 | else subtract delta from weight + sub.l %a3, %d3 | subtract again instead of branch +.L256: add.l %a3, %d3 | add delta to weight + +.L255: move.l %acc0, %d2 | d2 = rounded product + add.l %d1, %d2 | update bptr [0] and store + move.l %d2, (%a1)+ + +.L253: move.l -8(%a1), %d0 | d0 = 2 * bptr [-1] - bptr [-2] + add.l %d0, %d0 + sub.l -16(%a1), %d0 + beq .L257 | if zero, skip calculations + move.l %acc1, %acc0 + asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_B + mac.l %d0, %d4, %acc0 + move.l (%a1), %d1 + beq .L254 + eor.l %d1, %d0 | else compare signs + bge .L259 | if same, add delta to weight + sub.l %a3, %d4 | else subtract delta from weight + sub.l %a3, %d4 | subtract again instead of branch +.L259: add.l %a3, %d4 | add delta to weight + +.L254: move.l %acc0, %d2 | d2 = rounded product + add.l %d1, %d2 | update bptr [0] and store + move.l %d2, (%a1)+ + +.L252: cmp.l %a1, %d5 | loop if bptr < eptr + jbhi term_17 + bra term_17_18_finish | exit through common path + +.L251: addq.l #4, %a1 | update point and jump back into loop + bra .L253 + +.L257: addq.l #4, %a1 | update point and jump back into loop + bra .L252 + +|------------------------------------------------------------------------------ +| Loop to handle term = 18 condition +| +| a0 = d0 = ((3 * bptr [-1]) - bptr [-2]) >> 1 +| a1 = bptr d1 = initial bptr [0] +| a2 = dpp-> d2 = updated bptr [0] +| a3 = dpp->delta << 17 d3 = dpp->weight_A << 17 +| a4 = d4 = dpp->weight_B << 17 +| a5 = d5 = eptr +| macsr = 0x20 acc1 = 0x00 0000 80 +|------------------------------------------------------------------------------ + +term_18: + move.l -8(%a1), %a0 | d0 = (3 * bptr [-1] - bptr [-2]) >> 1 + lea (%a0,%a0.l*2), %a0 + move.l %a0, %d0 + sub.l -16(%a1), %d0 + asr.l #1, %d0 + beq .L260 + move.l %acc1, %acc0 + asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_A + mac.l %d0, %d3, %acc0 + move.l (%a1), %d1 + beq .L266 + eor.l %d1, %d0 | else compare signs + bge .L267 | if same, add delta to weight + sub.l %a3, %d3 | else subtract delta from weight + sub.l %a3, %d3 | subtract again instead of branch +.L267: add.l %a3, %d3 | add delta to weight + +.L266: move.l %acc0, %d2 | d2 = rounded product + add.l %d1, %d2 | add applied weight to bptr [0], store + move.l %d2, (%a1)+ + +.L268: move.l -8(%a1), %a0 | d0 = (3 * bptr [-1] - bptr [-2]) >> 1 + lea (%a0,%a0.l*2), %a0 + move.l %a0, %d0 + sub.l -16(%a1), %d0 + asr.l #1, %d0 + beq .L261 + move.l %acc1, %acc0 + asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_B + mac.l %d0, %d4, %acc0 + move.l (%a1), %d1 + beq .L265 + eor.l %d1, %d0 | else compare signs + bge .L270 | if same, add delta to weight + sub.l %a3, %d4 | else subtract delta from weight + sub.l %a3, %d4 | subtract again instead of branch +.L270: add.l %a3, %d4 | add delta to weight + +.L265: move.l %acc0, %d2 | d2 = rounded product + add.l %d1, %d2 | add applied weight to bptr [0], store + move.l %d2, (%a1)+ + +.L269: cmp.l %a1, %d5 | loop if bptr < eptr + jbhi term_18 + bra term_17_18_finish | exit through common path + +.L260: addq.l #4, %a1 | bump pointer and jump back into loop + bra .L268 + +.L261: addq.l #4, %a1 | bump pointer and jump back into loop + bra .L269 + +term_17_18_finish: + move.l -4(%a1), 40(%a2) | restore dpp->samples_A [0-1], B [0-1] + move.l -8(%a1), 8(%a2) + move.l -12(%a1), 44(%a2) + move.l -16(%a1), 12(%a2) + jbra finish_up + +|------------------------------------------------------------------------------ +| Loop to handle default terms (i.e. 1 - 8) +| +| a0 = tptr d0 = tptr [0] +| a1 = bptr d1 = initial bptr [0] +| a2 = dpp-> d2 = updated bptr [0] +| a3 = dpp->delta << 17 d3 = dpp->weight_A << 17 +| a4 = d4 = dpp->weight_B << 17 +| a5 = d5 = eptr +| macsr = 0x20 acc1 = 0x00 0000 80 +|------------------------------------------------------------------------------ + +term_default: + move.w (%a2), %d0 | a0 = a1 - (dpp->term * 8) + ext.l %d0 + lsl.l #3, %d0 + move.l %a1, %a0 + sub.l %d0, %a0 + +term_default_loop: + move.l (%a0)+, %d0 | d0 = tptr [0], skip ahead if zero + beq .L271 + move.l %acc1, %acc0 + asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_A + mac.l %d0, %d3, %acc0 + move.l (%a1), %d1 + beq .L277 + eor.l %d1, %d0 | else compare signs + bge .L278 | if same, add delta to weight + sub.l %a3, %d3 | else subtract delta from weight + sub.l %a3, %d3 | subtract again instead of branch +.L278: add.l %a3, %d3 | add delta to weight + +.L277: move.l %acc0, %d2 | d2 = rounded product + add.l %d1, %d2 | add applied weight to bptr [0], store + move.l %d2, (%a1)+ + +.L275: move.l (%a0)+, %d0 | d0 = tptr [0], skip ahead if zero + beq .L272 + move.l %acc1, %acc0 + asl.l #4, %d0 | acc0 = acc1 + (d0 << 4) * weight_B + mac.l %d0, %d4, %acc0 + move.l (%a1), %d1 + beq .L276 + eor.l %d1, %d0 | else compare signs + bge .L281 | if same, add delta to weight + sub.l %a3, %d4 | else subtract delta from weight + sub.l %a3, %d4 | subtract again instead of branch +.L281: add.l %a3, %d4 | add delta to weight + +.L276: move.l %acc0, %d2 | d2 = rounded product + add.l %d1, %d2 | add applied weight to bptr [0], store + move.l %d2, (%a1)+ + +.L274: cmp.l %a1, %d5 | loop back if bptr < eptr + jbhi term_default_loop + move.w (%a2), %d0 | d0 = term - 1 + moveq.l #8, %d1 | d1 = loop counter + +.L323: subq.l #1, %d0 | back up & mask index + and.l #7, %d0 + move.l -(%a1), 40(%a2,%d0.l*4) | store dpp->samples_B [d0] + move.l -(%a1), 8(%a2,%d0.l*4) | store dpp->samples_A [d0] + subq.l #1, %d1 | loop on count + jbne .L323 + jbra finish_up + +.L271: addq.l #4, %a1 | bump pointer and jump back into loop + bra .L275 + +.L272: addq.l #4, %a1 | bump pointer and jump back into loop + bra .L274 + + +|------------------------------------------------------------------------------ +| Loop to handle term = -1 condition +| +| a0 = d0 = decorrelation sample +| a1 = bptr d1 = initial bptr [0] +| a2 = dpp-> d2 = updated bptr [0] +| a3 = dpp->delta << 17 d3 = dpp->weight_A << 17 +| a4 = d4 = dpp->weight_B << 17 +| a5 = d5 = eptr +| a6 = d6 = 1024 << 17 +| a7 = d7 = -1024 << 17 +| macsr = 0x20 acc1 = 0x00 0000 80 +|------------------------------------------------------------------------------ + +term_minus_1: + move.l -4(%a1), %d0 | d0 = bptr [-1] + beq .L402 + move.l %acc1, %acc0 + asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_A) + mac.l %d0, %d3, %acc0 + move.l (%a1), %d1 + beq .L405 + eor.l %d1, %d0 | else compare signs + bge .L404 | if same, add delta to weight + sub.l %a3, %d3 | else subtract delta from weight + cmp.l %d7, %d3 | check for negative clip limit + bge .L405 + move.l %d7, %d3 + bra .L405 + +.L404: add.l %a3, %d3 | add delta to weight + cmp.l %d6, %d3 | check for positive clip limit + ble .L405 + move.l %d6, %d3 + +.L405: move.l %acc0, %d0 | d2 = rounded product + add.l %d1, %d0 | add applied weight to bptr [0], store + move.l %d0, (%a1)+ + beq .L401 + +.L410: move.l %acc1, %acc0 + asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_B) + mac.l %d0, %d4, %acc0 + move.l (%a1), %d1 + beq .L403 + eor.l %d1, %d0 | else compare signs + bge .L407 | if same, add delta to weight + sub.l %a3, %d4 | else subtract delta from weight + cmp.l %d7, %d4 | check for negative clip limit + bge .L403 + move.l %d7, %d4 + bra .L403 + +.L407: add.l %a3, %d4 | add delta to weight + cmp.l %d6, %d4 | check for positive clip limit + ble .L403 + move.l %d6, %d4 + +.L403: move.l %acc0, %d2 | d2 = rounded product + add.l %d1, %d2 | add applied weight to bptr [1], store + move.l %d2, (%a1)+ + +.L411: cmp.l %a1, %d5 | loop back if bptr < eptr + jbhi term_minus_1 + move.l -4(%a1), 8(%a2) | dpp->samples_A [0] = bptr [-1] + jbra finish_up + +.L402: move.l (%a1)+, %d0 + bne .L410 + +.L401: addq.l #4, %a1 + bra .L411 + + +|------------------------------------------------------------------------------ +| Loop to handle term = -2 condition +| +| a0 = d0 = decorrelation sample +| a1 = bptr d1 = initial bptr [0] +| a2 = dpp-> d2 = updated bptr [0] +| a3 = dpp->delta << 17 d3 = dpp->weight_A << 17 +| a4 = d4 = dpp->weight_B << 17 +| a5 = d5 = eptr +| a6 = d6 = 1024 << 17 +| a7 = d7 = -1024 << 17 +| macsr = 0x20 acc1 = 0x00 0000 80 +|------------------------------------------------------------------------------ + +term_minus_2: + move.l -8(%a1), %d0 | d0 = bptr [-2] + beq .L511 + move.l %acc1, %acc0 + asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_B) + mac.l %d0, %d4, %acc0 + move.l 4(%a1), %d1 + beq .L505 + eor.l %d1, %d0 | else compare signs + bge .L504 | if same, add delta to weight + sub.l %a3, %d4 | else subtract delta from weight + cmp.l %d7, %d4 | ckeck for negative clip limit + bge .L505 + move.l %d7, %d4 + bra .L505 + +.L504: add.l %a3, %d4 | add delta to weight + cmp.l %d6, %d4 | check for positive clip limit + ble .L505 + move.l %d6, %d4 + +.L505: move.l %acc0, %d0 | d2 = rounded product + add.l %d1, %d0 | add applied weight to bptr [0], store + move.l %d0, 4(%a1) + beq .L512 + +.L510: move.l %acc1, %acc0 + asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_A) + mac.l %d0, %d3, %acc0 + move.l (%a1), %d1 + beq .L503 + eor.l %d1, %d0 | else compare signs + bge .L507 | if same, add delta to weight + sub.l %a3, %d3 | else subtract delta from weight + cmp.l %d7, %d3 | check for negative clip limit + bge .L503 + move.l %d7, %d3 + bra .L503 + +.L507: add.l %a3, %d3 | add delta to weight + cmp.l %d6, %d3 | check for negative clip limit + ble .L503 + move.l %d6, %d3 + +.L503: move.l %acc0, %d2 | d2 = rounded product + add.l %d1, %d2 | add applied weight to bptr [1], store + move.l %d2, (%a1) + +.L512: addq.l #8, %a1 + cmp.l %a1, %d5 | loop if bptr < eptr + jbhi term_minus_2 + move.l -8(%a1), 40(%a2) | dpp->samples_B [0] = bptr [-4] + jbra finish_up + +.L511: move.l 4(%a1), %d0 + beq .L512 + bra .L510 + + +|------------------------------------------------------------------------------ +| Loop to handle term = -3 condition +| +| a0 = d0 = decorrelation sample +| a1 = bptr d1 = initial bptr [0] +| a2 = dpp-> d2 = updated bptr [0] +| a3 = dpp->delta << 17 d3 = dpp->weight_A << 17 +| a4 = d4 = dpp->weight_B << 17 +| a5 = d5 = eptr +| a6 = d6 = 1024 << 17 +| a7 = d7 = -1024 << 17 +| macsr = 0x20 acc1 = 0x00 0000 80 +|------------------------------------------------------------------------------ + +term_minus_3: + move.l -4(%a1), %d0 | d0 = bptr [-1] + beq .L301 + move.l %acc1, %acc0 + asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_A) + mac.l %d0, %d3, %acc0 + move.l (%a1), %d1 + beq .L320 + eor.l %d1, %d0 | else compare signs + bge .L319 | if same, add delta to weight + sub.l %a3, %d3 | else subtract delta from weight + cmp.l %d7, %d3 | check for negative clip limit + bge .L320 + move.l %d7, %d3 + bra .L320 + +.L319: add.l %a3, %d3 | add delta to weight + cmp.l %d6, %d3 | check for positive clip limit + ble .L320 + move.l %d6, %d3 + +.L320: move.l %acc0, %d2 | d2 = rounded product + add.l %d1, %d2 | add applied weight to bptr [0], store + move.l %d2, (%a1)+ + +.L330: move.l -12(%a1), %d0 | d0 = bptr [-2] + beq .L302 + move.l %acc1, %acc0 + asl.l #4, %d0 | acc0 = acc1 + ((d0 << 4) * weight_B) + mac.l %d0, %d4, %acc0 + move.l (%a1), %d1 + beq .L318 + eor.l %d1, %d0 | else compare signs + bge .L322 | if same, add delta to weight + sub.l %a3, %d4 | else subtract delta from weight + cmp.l %d7, %d4 | check for negative clip limit + bge .L318 + move.l %d7, %d4 + bra .L318 + +.L322: add.l %a3, %d4 | add delta to weight + cmp.l %d6, %d4 | check for positive clip limit + ble .L318 + move.l %d6, %d4 + +.L318: move.l %acc0, %d2 | d2 = rounded product + add.l %d1, %d2 | add applied weight to bptr [1], store + move.l %d2, (%a1)+ + +.L331: cmp.l %a1, %d5 | bptr, eptr + jbhi term_minus_3 + move.l -4(%a1), 8(%a2) | dpp->samples_A [0] = bptr [-1] + move.l -8(%a1), 40(%a2) | dpp->samples_B [0] = bptr [-2] + jbra finish_up + +.L301: addq.l #4, %a1 + bra .L330 + +.L302: addq.l #4, %a1 + bra .L331 + +| finish and return + +finish_up: + moveq.l #17, %d0 + asr.l %d0, %d3 + asr.l %d0, %d4 + move.w %d3, 4(%a2) | weight_A, dpp->weight_A + move.w %d4, 6(%a2) | weight_B, dpp->weight_B + + clr.l %d0 | clear up EMAC + move.l %d0, %acc0 + move.l %d0, %acc1 + +return_only: + movem.l (%sp), %d2-%d7/%a2-%a6 + lea (44,%sp), %sp + rts diff --git a/src/engine/external/wavpack/float.c b/src/engine/external/wavpack/float.c index 4b9b44ee..09f0e4c3 100644 --- a/src/engine/external/wavpack/float.c +++ b/src/engine/external/wavpack/float.c @@ -1,50 +1,50 @@ -//////////////////////////////////////////////////////////////////////////// -// **** WAVPACK **** // -// Hybrid Lossless Wavefile Compressor // -// Copyright (c) 1998 - 2006 Conifer Software. // -// All Rights Reserved. // -// Distributed under the BSD Software License (see license.txt) // -//////////////////////////////////////////////////////////////////////////// - -// float.c - -#include "wavpack.h" - -int read_float_info (WavpackStream *wps, WavpackMetadata *wpmd) -{ - int bytecnt = wpmd->byte_length; - char *byteptr = wpmd->data; - - if (bytecnt != 4) - return FALSE; - - wps->float_flags = *byteptr++; - wps->float_shift = *byteptr++; - wps->float_max_exp = *byteptr++; - wps->float_norm_exp = *byteptr; - return TRUE; -} - -void float_values (WavpackStream *wps, int32_t *values, int32_t num_values) -{ - int shift = wps->float_max_exp - wps->float_norm_exp + wps->float_shift; - - if (shift > 32) - shift = 32; - else if (shift < -32) - shift = -32; - - while (num_values--) { - if (shift > 0) - *values <<= shift; - else if (shift < 0) - *values >>= -shift; - - if (*values > 8388607L) - *values = 8388607L; - else if (*values < -8388608L) - *values = -8388608L; - - values++; - } -} +//////////////////////////////////////////////////////////////////////////// +// **** WAVPACK **** // +// Hybrid Lossless Wavefile Compressor // +// Copyright (c) 1998 - 2006 Conifer Software. // +// All Rights Reserved. // +// Distributed under the BSD Software License (see license.txt) // +//////////////////////////////////////////////////////////////////////////// + +// float.c + +#include "wavpack.h" + +int read_float_info (WavpackStream *wps, WavpackMetadata *wpmd) +{ + int bytecnt = wpmd->byte_length; + char *byteptr = wpmd->data; + + if (bytecnt != 4) + return FALSE; + + wps->float_flags = *byteptr++; + wps->float_shift = *byteptr++; + wps->float_max_exp = *byteptr++; + wps->float_norm_exp = *byteptr; + return TRUE; +} + +void float_values (WavpackStream *wps, int32_t *values, int32_t num_values) +{ + int shift = wps->float_max_exp - wps->float_norm_exp + wps->float_shift; + + if (shift > 32) + shift = 32; + else if (shift < -32) + shift = -32; + + while (num_values--) { + if (shift > 0) + *values <<= shift; + else if (shift < 0) + *values >>= -shift; + + if (*values > 8388607L) + *values = 8388607L; + else if (*values < -8388608L) + *values = -8388608L; + + values++; + } +} diff --git a/src/engine/external/wavpack/license.txt b/src/engine/external/wavpack/license.txt index 98f6e6b1..d2ee90fc 100644 --- a/src/engine/external/wavpack/license.txt +++ b/src/engine/external/wavpack/license.txt @@ -1,25 +1,25 @@ - Copyright (c) 1998 - 2006 Conifer Software - All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of Conifer Software nor the names of its contributors - may be used to endorse or promote products derived from this software - without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + Copyright (c) 1998 - 2006 Conifer Software + All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Conifer Software nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/src/engine/external/wavpack/metadata.c b/src/engine/external/wavpack/metadata.c index 578b17f9..6a19ac9b 100644 --- a/src/engine/external/wavpack/metadata.c +++ b/src/engine/external/wavpack/metadata.c @@ -1,105 +1,105 @@ -//////////////////////////////////////////////////////////////////////////// -// **** WAVPACK **** // -// Hybrid Lossless Wavefile Compressor // -// Copyright (c) 1998 - 2006 Conifer Software. // -// All Rights Reserved. // -// Distributed under the BSD Software License (see license.txt) // -//////////////////////////////////////////////////////////////////////////// - -// metadata.c - -// This module handles the metadata structure introduced in WavPack 4.0 - -#include "wavpack.h" - -int read_metadata_buff (WavpackContext *wpc, WavpackMetadata *wpmd) -{ - uchar tchar; - - if (!wpc->infile (&wpmd->id, 1) || !wpc->infile (&tchar, 1)) - return FALSE; - - wpmd->byte_length = tchar << 1; - - if (wpmd->id & ID_LARGE) { - wpmd->id &= ~ID_LARGE; - - if (!wpc->infile (&tchar, 1)) - return FALSE; - - wpmd->byte_length += (int32_t) tchar << 9; - - if (!wpc->infile (&tchar, 1)) - return FALSE; - - wpmd->byte_length += (int32_t) tchar << 17; - } - - if (wpmd->id & ID_ODD_SIZE) { - wpmd->id &= ~ID_ODD_SIZE; - wpmd->byte_length--; - } - - if (wpmd->byte_length && wpmd->byte_length <= sizeof (wpc->read_buffer)) { - uint32_t bytes_to_read = wpmd->byte_length + (wpmd->byte_length & 1); - - if (wpc->infile (wpc->read_buffer, bytes_to_read) != (int32_t) bytes_to_read) { - wpmd->data = NULL; - return FALSE; - } - - wpmd->data = wpc->read_buffer; - } - else - wpmd->data = NULL; - - return TRUE; -} - -int process_metadata (WavpackContext *wpc, WavpackMetadata *wpmd) -{ - WavpackStream *wps = &wpc->stream; - - switch (wpmd->id) { - case ID_DUMMY: - return TRUE; - - case ID_DECORR_TERMS: - return read_decorr_terms (wps, wpmd); - - case ID_DECORR_WEIGHTS: - return read_decorr_weights (wps, wpmd); - - case ID_DECORR_SAMPLES: - return read_decorr_samples (wps, wpmd); - - case ID_ENTROPY_VARS: - return read_entropy_vars (wps, wpmd); - - case ID_HYBRID_PROFILE: - return read_hybrid_profile (wps, wpmd); - - case ID_FLOAT_INFO: - return read_float_info (wps, wpmd); - - case ID_INT32_INFO: - return read_int32_info (wps, wpmd); - - case ID_CHANNEL_INFO: - return read_channel_info (wpc, wpmd); - - case ID_CONFIG_BLOCK: - return read_config_info (wpc, wpmd); - - case ID_WV_BITSTREAM: - return init_wv_bitstream (wpc, wpmd); - - case ID_SHAPING_WEIGHTS: - case ID_WVC_BITSTREAM: - case ID_WVX_BITSTREAM: - return TRUE; - - default: - return (wpmd->id & ID_OPTIONAL_DATA) ? TRUE : FALSE; - } -} +//////////////////////////////////////////////////////////////////////////// +// **** WAVPACK **** // +// Hybrid Lossless Wavefile Compressor // +// Copyright (c) 1998 - 2006 Conifer Software. // +// All Rights Reserved. // +// Distributed under the BSD Software License (see license.txt) // +//////////////////////////////////////////////////////////////////////////// + +// metadata.c + +// This module handles the metadata structure introduced in WavPack 4.0 + +#include "wavpack.h" + +int read_metadata_buff (WavpackContext *wpc, WavpackMetadata *wpmd) +{ + uchar tchar; + + if (!wpc->infile (&wpmd->id, 1) || !wpc->infile (&tchar, 1)) + return FALSE; + + wpmd->byte_length = tchar << 1; + + if (wpmd->id & ID_LARGE) { + wpmd->id &= ~ID_LARGE; + + if (!wpc->infile (&tchar, 1)) + return FALSE; + + wpmd->byte_length += (int32_t) tchar << 9; + + if (!wpc->infile (&tchar, 1)) + return FALSE; + + wpmd->byte_length += (int32_t) tchar << 17; + } + + if (wpmd->id & ID_ODD_SIZE) { + wpmd->id &= ~ID_ODD_SIZE; + wpmd->byte_length--; + } + + if (wpmd->byte_length && wpmd->byte_length <= sizeof (wpc->read_buffer)) { + uint32_t bytes_to_read = wpmd->byte_length + (wpmd->byte_length & 1); + + if (wpc->infile (wpc->read_buffer, bytes_to_read) != (int32_t) bytes_to_read) { + wpmd->data = NULL; + return FALSE; + } + + wpmd->data = wpc->read_buffer; + } + else + wpmd->data = NULL; + + return TRUE; +} + +int process_metadata (WavpackContext *wpc, WavpackMetadata *wpmd) +{ + WavpackStream *wps = &wpc->stream; + + switch (wpmd->id) { + case ID_DUMMY: + return TRUE; + + case ID_DECORR_TERMS: + return read_decorr_terms (wps, wpmd); + + case ID_DECORR_WEIGHTS: + return read_decorr_weights (wps, wpmd); + + case ID_DECORR_SAMPLES: + return read_decorr_samples (wps, wpmd); + + case ID_ENTROPY_VARS: + return read_entropy_vars (wps, wpmd); + + case ID_HYBRID_PROFILE: + return read_hybrid_profile (wps, wpmd); + + case ID_FLOAT_INFO: + return read_float_info (wps, wpmd); + + case ID_INT32_INFO: + return read_int32_info (wps, wpmd); + + case ID_CHANNEL_INFO: + return read_channel_info (wpc, wpmd); + + case ID_CONFIG_BLOCK: + return read_config_info (wpc, wpmd); + + case ID_WV_BITSTREAM: + return init_wv_bitstream (wpc, wpmd); + + case ID_SHAPING_WEIGHTS: + case ID_WVC_BITSTREAM: + case ID_WVX_BITSTREAM: + return TRUE; + + default: + return (wpmd->id & ID_OPTIONAL_DATA) ? TRUE : FALSE; + } +} diff --git a/src/engine/external/wavpack/readme.txt b/src/engine/external/wavpack/readme.txt index 4ccbdf42..07b7a261 100644 --- a/src/engine/external/wavpack/readme.txt +++ b/src/engine/external/wavpack/readme.txt @@ -1,68 +1,68 @@ -//////////////////////////////////////////////////////////////////////////// -// **** WAVPACK **** // -// Hybrid Lossless Wavefile Compressor // -// Copyright (c) 1998 - 2006 Conifer Software. // -// All Rights Reserved. // -// Distributed under the BSD Software License (see license.txt) // -//////////////////////////////////////////////////////////////////////////// - -This package contains a tiny version of the WavPack 4.40 decoder that might -be used in a "resource limited" CPU environment or form the basis for a -hardware decoding implementation. It is packaged with a demo command-line -program that accepts a WavPack audio file on stdin and outputs a RIFF wav -file to stdout. The program is standard C, and a win32 executable is -included which was compiled under MS Visual C++ 6.0 using this command: - -cl /O1 /DWIN32 wvfilter.c wputils.c unpack.c float.c metadata.c words.c bits.c - -WavPack data is read with a stream reading callback. No direct seeking is -provided for, but it is possible to start decoding anywhere in a WavPack -stream. In this case, WavPack will be able to provide the sample-accurate -position when it synchs with the data and begins decoding. The WIN32 macro -is used for Windows to force the stdin and stdout streams to be binary mode. - -Compared to the previous version, this library has been optimized somewhat -for improved performance in exchange for slightly larger code size. The -library also now includes hand-optimized assembly language versions of the -decorrelation functions for both the ColdFire (w/EMAC) and ARM processors. - -For demonstration purposes this uses a single static copy of the -WavpackContext structure, so obviously it cannot be used for more than one -file at a time. Also, this decoder will not handle "correction" files, plays -only the first two channels of multi-channel files, and is limited in -resolution in some large integer or floating point files (but always -provides at least 24 bits of resolution). It also will not accept WavPack -files from before version 4.0. - -The previous version of this library would handle float files by returning -32-bit floating-point data (even though no floating point math was used). -Because this library would normally be used for simply playing WavPack -files where lossless performance (beyond 24-bits) is not relevant, I have -changed this behavior. Now, these files will generate clipped 24-bit data. -The MODE_FLOAT flag will still be returned by WavpackGetMode(), but the -BitsPerSample and BytesPerSample queries will be 24 and 3, respectfully. -What this means is that an application that can handle 24-bit data will -now be able to handle floating point data (assuming that the MODE_FLOAT -flag is ignored). - -To make this code viable on the greatest number of hardware platforms, the -following are true: - - speed is about 5x realtime on an AMD K6 300 MHz - ("high" mode 16/44 stereo; normal mode is about twice that fast) - - no floating-point math required; just 32b * 32b = 32b int multiply - - large data areas are static and less than 4K total - executable code and tables are less than 40K - no malloc / free usage - -To maintain compatibility on various platforms, the following conventions -are used: - - a "char" must be exactly 8-bits - a "short" must be exactly 16-bits - an "int" must be at least 16-bits, but may be larger - the "long" type is not used to avoid problems with 64-bit compilers - -Questions or comments should be directed to david@wavpack.com +//////////////////////////////////////////////////////////////////////////// +// **** WAVPACK **** // +// Hybrid Lossless Wavefile Compressor // +// Copyright (c) 1998 - 2006 Conifer Software. // +// All Rights Reserved. // +// Distributed under the BSD Software License (see license.txt) // +//////////////////////////////////////////////////////////////////////////// + +This package contains a tiny version of the WavPack 4.40 decoder that might +be used in a "resource limited" CPU environment or form the basis for a +hardware decoding implementation. It is packaged with a demo command-line +program that accepts a WavPack audio file on stdin and outputs a RIFF wav +file to stdout. The program is standard C, and a win32 executable is +included which was compiled under MS Visual C++ 6.0 using this command: + +cl /O1 /DWIN32 wvfilter.c wputils.c unpack.c float.c metadata.c words.c bits.c + +WavPack data is read with a stream reading callback. No direct seeking is +provided for, but it is possible to start decoding anywhere in a WavPack +stream. In this case, WavPack will be able to provide the sample-accurate +position when it synchs with the data and begins decoding. The WIN32 macro +is used for Windows to force the stdin and stdout streams to be binary mode. + +Compared to the previous version, this library has been optimized somewhat +for improved performance in exchange for slightly larger code size. The +library also now includes hand-optimized assembly language versions of the +decorrelation functions for both the ColdFire (w/EMAC) and ARM processors. + +For demonstration purposes this uses a single static copy of the +WavpackContext structure, so obviously it cannot be used for more than one +file at a time. Also, this decoder will not handle "correction" files, plays +only the first two channels of multi-channel files, and is limited in +resolution in some large integer or floating point files (but always +provides at least 24 bits of resolution). It also will not accept WavPack +files from before version 4.0. + +The previous version of this library would handle float files by returning +32-bit floating-point data (even though no floating point math was used). +Because this library would normally be used for simply playing WavPack +files where lossless performance (beyond 24-bits) is not relevant, I have +changed this behavior. Now, these files will generate clipped 24-bit data. +The MODE_FLOAT flag will still be returned by WavpackGetMode(), but the +BitsPerSample and BytesPerSample queries will be 24 and 3, respectfully. +What this means is that an application that can handle 24-bit data will +now be able to handle floating point data (assuming that the MODE_FLOAT +flag is ignored). + +To make this code viable on the greatest number of hardware platforms, the +following are true: + + speed is about 5x realtime on an AMD K6 300 MHz + ("high" mode 16/44 stereo; normal mode is about twice that fast) + + no floating-point math required; just 32b * 32b = 32b int multiply + + large data areas are static and less than 4K total + executable code and tables are less than 40K + no malloc / free usage + +To maintain compatibility on various platforms, the following conventions +are used: + + a "char" must be exactly 8-bits + a "short" must be exactly 16-bits + an "int" must be at least 16-bits, but may be larger + the "long" type is not used to avoid problems with 64-bit compilers + +Questions or comments should be directed to david@wavpack.com diff --git a/src/engine/external/wavpack/unpack.c b/src/engine/external/wavpack/unpack.c index 2bed5a0c..317fe222 100644 --- a/src/engine/external/wavpack/unpack.c +++ b/src/engine/external/wavpack/unpack.c @@ -1,785 +1,785 @@ -//////////////////////////////////////////////////////////////////////////// -// **** WAVPACK **** // -// Hybrid Lossless Wavefile Compressor // -// Copyright (c) 1998 - 2006 Conifer Software. // -// All Rights Reserved. // -// Distributed under the BSD Software License (see license.txt) // -//////////////////////////////////////////////////////////////////////////// - -// unpack.c - -// This module actually handles the decompression of the audio data, except -// for the entropy decoding which is handled by the words.c module. For -// maximum efficiency, the conversion is isolated to tight loops that handle -// an entire buffer. - -#include "wavpack.h" - -#include <stdlib.h> -#include <string.h> - -#define LOSSY_MUTE - -///////////////////////////// executable code //////////////////////////////// - -// This function initializes everything required to unpack a WavPack block -// and must be called before unpack_samples() is called to obtain audio data. -// It is assumed that the WavpackHeader has been read into the wps->wphdr -// (in the current WavpackStream). This is where all the metadata blocks are -// scanned up to the one containing the audio bitstream. - -int unpack_init (WavpackContext *wpc) -{ - WavpackStream *wps = &wpc->stream; - WavpackMetadata wpmd; - - if (wps->wphdr.block_samples && wps->wphdr.block_index != (uint32_t) -1) - wps->sample_index = wps->wphdr.block_index; - - wps->mute_error = FALSE; - wps->crc = 0xffffffff; - CLEAR (wps->wvbits); - CLEAR (wps->decorr_passes); - CLEAR (wps->w); - - while (read_metadata_buff (wpc, &wpmd)) { - if (!process_metadata (wpc, &wpmd)) { - strcpy (wpc->error_message, "invalid metadata!"); - return FALSE; - } - - if (wpmd.id == ID_WV_BITSTREAM) - break; - } - - if (wps->wphdr.block_samples && !bs_is_open (&wps->wvbits)) { - strcpy (wpc->error_message, "invalid WavPack file!"); - return FALSE; - } - - if (wps->wphdr.block_samples) { - if ((wps->wphdr.flags & INT32_DATA) && wps->int32_sent_bits) - wpc->lossy_blocks = TRUE; - - if ((wps->wphdr.flags & FLOAT_DATA) && - wps->float_flags & (FLOAT_EXCEPTIONS | FLOAT_ZEROS_SENT | FLOAT_SHIFT_SENT | FLOAT_SHIFT_SAME)) - wpc->lossy_blocks = TRUE; - } - - return TRUE; -} - -// This function initialzes the main bitstream for audio samples, which must -// be in the "wv" file. - -int init_wv_bitstream (WavpackContext *wpc, WavpackMetadata *wpmd) -{ - WavpackStream *wps = &wpc->stream; - - if (wpmd->data) - bs_open_read (&wps->wvbits, wpmd->data, (unsigned char *) wpmd->data + wpmd->byte_length, NULL, 0); - else if (wpmd->byte_length) - bs_open_read (&wps->wvbits, wpc->read_buffer, wpc->read_buffer + sizeof (wpc->read_buffer), - wpc->infile, wpmd->byte_length + (wpmd->byte_length & 1)); - - return TRUE; -} - -// Read decorrelation terms from specified metadata block into the -// decorr_passes array. The terms range from -3 to 8, plus 17 & 18; -// other values are reserved and generate errors for now. The delta -// ranges from 0 to 7 with all values valid. Note that the terms are -// stored in the opposite order in the decorr_passes array compared -// to packing. - -int read_decorr_terms (WavpackStream *wps, WavpackMetadata *wpmd) -{ - int termcnt = wpmd->byte_length; - uchar *byteptr = wpmd->data; - struct decorr_pass *dpp; - - if (termcnt > MAX_NTERMS) - return FALSE; - - wps->num_terms = termcnt; - - for (dpp = wps->decorr_passes + termcnt - 1; termcnt--; dpp--) { - dpp->term = (int)(*byteptr & 0x1f) - 5; - dpp->delta = (*byteptr++ >> 5) & 0x7; - - if (!dpp->term || dpp->term < -3 || (dpp->term > MAX_TERM && dpp->term < 17) || dpp->term > 18) - return FALSE; - } - - return TRUE; -} - -// Read decorrelation weights from specified metadata block into the -// decorr_passes array. The weights range +/-1024, but are rounded and -// truncated to fit in signed chars for metadata storage. Weights are -// separate for the two channels and are specified from the "last" term -// (first during encode). Unspecified weights are set to zero. - -int read_decorr_weights (WavpackStream *wps, WavpackMetadata *wpmd) -{ - int termcnt = wpmd->byte_length, tcount; - signed char *byteptr = wpmd->data; - struct decorr_pass *dpp; - - if (!(wps->wphdr.flags & MONO_DATA)) - termcnt /= 2; - - if (termcnt > wps->num_terms) - return FALSE; - - for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) - dpp->weight_A = dpp->weight_B = 0; - - while (--dpp >= wps->decorr_passes && termcnt--) { - dpp->weight_A = restore_weight (*byteptr++); - - if (!(wps->wphdr.flags & MONO_DATA)) - dpp->weight_B = restore_weight (*byteptr++); - } - - return TRUE; -} - -// Read decorrelation samples from specified metadata block into the -// decorr_passes array. The samples are signed 32-bit values, but are -// converted to signed log2 values for storage in metadata. Values are -// stored for both channels and are specified from the "last" term -// (first during encode) with unspecified samples set to zero. The -// number of samples stored varies with the actual term value, so -// those must obviously come first in the metadata. - -int read_decorr_samples (WavpackStream *wps, WavpackMetadata *wpmd) -{ - uchar *byteptr = wpmd->data; - uchar *endptr = byteptr + wpmd->byte_length; - struct decorr_pass *dpp; - int tcount; - - for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) { - CLEAR (dpp->samples_A); - CLEAR (dpp->samples_B); - } - - if (wps->wphdr.version == 0x402 && (wps->wphdr.flags & HYBRID_FLAG)) { - byteptr += 2; - - if (!(wps->wphdr.flags & MONO_DATA)) - byteptr += 2; - } - - while (dpp-- > wps->decorr_passes && byteptr < endptr) - if (dpp->term > MAX_TERM) { - dpp->samples_A [0] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8))); - dpp->samples_A [1] = exp2s ((short)(byteptr [2] + (byteptr [3] << 8))); - byteptr += 4; - - if (!(wps->wphdr.flags & MONO_DATA)) { - dpp->samples_B [0] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8))); - dpp->samples_B [1] = exp2s ((short)(byteptr [2] + (byteptr [3] << 8))); - byteptr += 4; - } - } - else if (dpp->term < 0) { - dpp->samples_A [0] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8))); - dpp->samples_B [0] = exp2s ((short)(byteptr [2] + (byteptr [3] << 8))); - byteptr += 4; - } - else { - int m = 0, cnt = dpp->term; - - while (cnt--) { - dpp->samples_A [m] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8))); - byteptr += 2; - - if (!(wps->wphdr.flags & MONO_DATA)) { - dpp->samples_B [m] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8))); - byteptr += 2; - } - - m++; - } - } - - return byteptr == endptr; -} - -// Read the int32 data from the specified metadata into the specified stream. -// This data is used for integer data that has more than 24 bits of magnitude -// or, in some cases, used to eliminate redundant bits from any audio stream. - -int read_int32_info (WavpackStream *wps, WavpackMetadata *wpmd) -{ - int bytecnt = wpmd->byte_length; - char *byteptr = wpmd->data; - - if (bytecnt != 4) - return FALSE; - - wps->int32_sent_bits = *byteptr++; - wps->int32_zeros = *byteptr++; - wps->int32_ones = *byteptr++; - wps->int32_dups = *byteptr; - return TRUE; -} - -// Read multichannel information from metadata. The first byte is the total -// number of channels and the following bytes represent the channel_mask -// as described for Microsoft WAVEFORMATEX. - -int read_channel_info (WavpackContext *wpc, WavpackMetadata *wpmd) -{ - int bytecnt = wpmd->byte_length, shift = 0; - char *byteptr = wpmd->data; - uint32_t mask = 0; - - if (!bytecnt || bytecnt > 5) - return FALSE; - - wpc->config.num_channels = *byteptr++; - - while (--bytecnt) { - mask |= (uint32_t) *byteptr++ << shift; - shift += 8; - } - - wpc->config.channel_mask = mask; - return TRUE; -} - -// Read configuration information from metadata. - -int read_config_info (WavpackContext *wpc, WavpackMetadata *wpmd) -{ - int bytecnt = wpmd->byte_length; - uchar *byteptr = wpmd->data; - - if (bytecnt >= 3) { - wpc->config.flags &= 0xff; - wpc->config.flags |= (int32_t) *byteptr++ << 8; - wpc->config.flags |= (int32_t) *byteptr++ << 16; - wpc->config.flags |= (int32_t) *byteptr << 24; - } - - return TRUE; -} - -// This monster actually unpacks the WavPack bitstream(s) into the specified -// buffer as 32-bit integers or floats (depending on orignal data). Lossy -// samples will be clipped to their original limits (i.e. 8-bit samples are -// clipped to -128/+127) but are still returned in int32_ts. It is up to the -// caller to potentially reformat this for the final output including any -// multichannel distribution, block alignment or endian compensation. The -// function unpack_init() must have been called and the entire WavPack block -// must still be visible (although wps->blockbuff will not be accessed again). -// For maximum clarity, the function is broken up into segments that handle -// various modes. This makes for a few extra infrequent flag checks, but -// makes the code easier to follow because the nesting does not become so -// deep. For maximum efficiency, the conversion is isolated to tight loops -// that handle an entire buffer. The function returns the total number of -// samples unpacked, which can be less than the number requested if an error -// occurs or the end of the block is reached. - -#if defined(CPU_COLDFIRE) && !defined(SIMULATOR) -extern void decorr_stereo_pass_cont_mcf5249 (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count); -#elif defined(CPU_ARM) && !defined(SIMULATOR) -extern void decorr_stereo_pass_cont_arm (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count); -extern void decorr_stereo_pass_cont_arml (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count); -#else -static void decorr_stereo_pass_cont (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count); -#endif - -static void decorr_mono_pass (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count); -static void decorr_stereo_pass (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count); -static void fixup_samples (WavpackStream *wps, int32_t *buffer, uint32_t sample_count); - -int32_t unpack_samples (WavpackContext *wpc, int32_t *buffer, uint32_t sample_count) -{ - WavpackStream *wps = &wpc->stream; - uint32_t flags = wps->wphdr.flags, crc = wps->crc, i; - int32_t mute_limit = (1L << ((flags & MAG_MASK) >> MAG_LSB)) + 2; - struct decorr_pass *dpp; - int32_t *bptr, *eptr; - int tcount; - - if (wps->sample_index + sample_count > wps->wphdr.block_index + wps->wphdr.block_samples) - sample_count = wps->wphdr.block_index + wps->wphdr.block_samples - wps->sample_index; - - if (wps->mute_error) { - memset (buffer, 0, sample_count * (flags & MONO_FLAG ? 4 : 8)); - wps->sample_index += sample_count; - return sample_count; - } - - if (flags & HYBRID_FLAG) - mute_limit *= 2; - - ///////////////////// handle version 4 mono data ///////////////////////// - - if (flags & MONO_DATA) { - eptr = buffer + sample_count; - i = get_words (buffer, sample_count, flags, &wps->w, &wps->wvbits); - - for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) - decorr_mono_pass (dpp, buffer, sample_count); - - for (bptr = buffer; bptr < eptr; ++bptr) { - if (labs (bptr [0]) > mute_limit) { - i = bptr - buffer; - break; - } - - crc = crc * 3 + bptr [0]; - } - } - - //////////////////// handle version 4 stereo data //////////////////////// - - else { - eptr = buffer + (sample_count * 2); - i = get_words (buffer, sample_count, flags, &wps->w, &wps->wvbits); - - if (sample_count < 16) - for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) - decorr_stereo_pass (dpp, buffer, sample_count); - else - for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) { - decorr_stereo_pass (dpp, buffer, 8); -#if defined(CPU_COLDFIRE) && !defined(SIMULATOR) - decorr_stereo_pass_cont_mcf5249 (dpp, buffer + 16, sample_count - 8); -#elif defined(CPU_ARM) && !defined(SIMULATOR) - if (((flags & MAG_MASK) >> MAG_LSB) > 15) - decorr_stereo_pass_cont_arml (dpp, buffer + 16, sample_count - 8); - else - decorr_stereo_pass_cont_arm (dpp, buffer + 16, sample_count - 8); -#else - decorr_stereo_pass_cont (dpp, buffer + 16, sample_count - 8); -#endif - } - - if (flags & JOINT_STEREO) - for (bptr = buffer; bptr < eptr; bptr += 2) { - bptr [0] += (bptr [1] -= (bptr [0] >> 1)); - - if (labs (bptr [0]) > mute_limit || labs (bptr [1]) > mute_limit) { - i = (bptr - buffer) / 2; - break; - } - - crc = (crc * 3 + bptr [0]) * 3 + bptr [1]; - } - else - for (bptr = buffer; bptr < eptr; bptr += 2) { - if (labs (bptr [0]) > mute_limit || labs (bptr [1]) > mute_limit) { - i = (bptr - buffer) / 2; - break; - } - - crc = (crc * 3 + bptr [0]) * 3 + bptr [1]; - } - } - - if (i != sample_count) { - memset (buffer, 0, sample_count * (flags & MONO_FLAG ? 4 : 8)); - wps->mute_error = TRUE; - i = sample_count; - } - - fixup_samples (wps, buffer, i); - - if (flags & FALSE_STEREO) { - int32_t *dptr = buffer + i * 2; - int32_t *sptr = buffer + i; - int32_t c = i; - - while (c--) { - *--dptr = *--sptr; - *--dptr = *sptr; - } - } - - wps->sample_index += i; - wps->crc = crc; - - return i; -} - -static void decorr_stereo_pass (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count) -{ - int32_t delta = dpp->delta, weight_A = dpp->weight_A, weight_B = dpp->weight_B; - int32_t *bptr, *eptr = buffer + (sample_count * 2), sam_A, sam_B; - int m, k; - - switch (dpp->term) { - - case 17: - for (bptr = buffer; bptr < eptr; bptr += 2) { - sam_A = 2 * dpp->samples_A [0] - dpp->samples_A [1]; - dpp->samples_A [1] = dpp->samples_A [0]; - dpp->samples_A [0] = apply_weight (weight_A, sam_A) + bptr [0]; - update_weight (weight_A, delta, sam_A, bptr [0]); - bptr [0] = dpp->samples_A [0]; - - sam_A = 2 * dpp->samples_B [0] - dpp->samples_B [1]; - dpp->samples_B [1] = dpp->samples_B [0]; - dpp->samples_B [0] = apply_weight (weight_B, sam_A) + bptr [1]; - update_weight (weight_B, delta, sam_A, bptr [1]); - bptr [1] = dpp->samples_B [0]; - } - - break; - - case 18: - for (bptr = buffer; bptr < eptr; bptr += 2) { - sam_A = (3 * dpp->samples_A [0] - dpp->samples_A [1]) >> 1; - dpp->samples_A [1] = dpp->samples_A [0]; - dpp->samples_A [0] = apply_weight (weight_A, sam_A) + bptr [0]; - update_weight (weight_A, delta, sam_A, bptr [0]); - bptr [0] = dpp->samples_A [0]; - - sam_A = (3 * dpp->samples_B [0] - dpp->samples_B [1]) >> 1; - dpp->samples_B [1] = dpp->samples_B [0]; - dpp->samples_B [0] = apply_weight (weight_B, sam_A) + bptr [1]; - update_weight (weight_B, delta, sam_A, bptr [1]); - bptr [1] = dpp->samples_B [0]; - } - - break; - - default: - for (m = 0, k = dpp->term & (MAX_TERM - 1), bptr = buffer; bptr < eptr; bptr += 2) { - sam_A = dpp->samples_A [m]; - dpp->samples_A [k] = apply_weight (weight_A, sam_A) + bptr [0]; - update_weight (weight_A, delta, sam_A, bptr [0]); - bptr [0] = dpp->samples_A [k]; - - sam_A = dpp->samples_B [m]; - dpp->samples_B [k] = apply_weight (weight_B, sam_A) + bptr [1]; - update_weight (weight_B, delta, sam_A, bptr [1]); - bptr [1] = dpp->samples_B [k]; - - m = (m + 1) & (MAX_TERM - 1); - k = (k + 1) & (MAX_TERM - 1); - } - - if (m) { - int32_t temp_samples [MAX_TERM]; - - memcpy (temp_samples, dpp->samples_A, sizeof (dpp->samples_A)); - - for (k = 0; k < MAX_TERM; k++, m++) - dpp->samples_A [k] = temp_samples [m & (MAX_TERM - 1)]; - - memcpy (temp_samples, dpp->samples_B, sizeof (dpp->samples_B)); - - for (k = 0; k < MAX_TERM; k++, m++) - dpp->samples_B [k] = temp_samples [m & (MAX_TERM - 1)]; - } - - break; - - case -1: - for (bptr = buffer; bptr < eptr; bptr += 2) { - sam_A = bptr [0] + apply_weight (weight_A, dpp->samples_A [0]); - update_weight_clip (weight_A, delta, dpp->samples_A [0], bptr [0]); - bptr [0] = sam_A; - dpp->samples_A [0] = bptr [1] + apply_weight (weight_B, sam_A); - update_weight_clip (weight_B, delta, sam_A, bptr [1]); - bptr [1] = dpp->samples_A [0]; - } - - break; - - case -2: - for (bptr = buffer; bptr < eptr; bptr += 2) { - sam_B = bptr [1] + apply_weight (weight_B, dpp->samples_B [0]); - update_weight_clip (weight_B, delta, dpp->samples_B [0], bptr [1]); - bptr [1] = sam_B; - dpp->samples_B [0] = bptr [0] + apply_weight (weight_A, sam_B); - update_weight_clip (weight_A, delta, sam_B, bptr [0]); - bptr [0] = dpp->samples_B [0]; - } - - break; - - case -3: - for (bptr = buffer; bptr < eptr; bptr += 2) { - sam_A = bptr [0] + apply_weight (weight_A, dpp->samples_A [0]); - update_weight_clip (weight_A, delta, dpp->samples_A [0], bptr [0]); - sam_B = bptr [1] + apply_weight (weight_B, dpp->samples_B [0]); - update_weight_clip (weight_B, delta, dpp->samples_B [0], bptr [1]); - bptr [0] = dpp->samples_B [0] = sam_A; - bptr [1] = dpp->samples_A [0] = sam_B; - } - - break; - } - - dpp->weight_A = weight_A; - dpp->weight_B = weight_B; -} - -#if (!defined(CPU_COLDFIRE) && !defined(CPU_ARM)) || defined(SIMULATOR) - -static void decorr_stereo_pass_cont (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count) -{ - int32_t delta = dpp->delta, weight_A = dpp->weight_A, weight_B = dpp->weight_B; - int32_t *bptr, *tptr, *eptr = buffer + (sample_count * 2), sam_A, sam_B; - int k, i; - - switch (dpp->term) { - - case 17: - for (bptr = buffer; bptr < eptr; bptr += 2) { - sam_A = 2 * bptr [-2] - bptr [-4]; - bptr [0] = apply_weight (weight_A, sam_A) + (sam_B = bptr [0]); - update_weight (weight_A, delta, sam_A, sam_B); - - sam_A = 2 * bptr [-1] - bptr [-3]; - bptr [1] = apply_weight (weight_B, sam_A) + (sam_B = bptr [1]); - update_weight (weight_B, delta, sam_A, sam_B); - } - - dpp->samples_B [0] = bptr [-1]; - dpp->samples_A [0] = bptr [-2]; - dpp->samples_B [1] = bptr [-3]; - dpp->samples_A [1] = bptr [-4]; - break; - - case 18: - for (bptr = buffer; bptr < eptr; bptr += 2) { - sam_A = (3 * bptr [-2] - bptr [-4]) >> 1; - bptr [0] = apply_weight (weight_A, sam_A) + (sam_B = bptr [0]); - update_weight (weight_A, delta, sam_A, sam_B); - - sam_A = (3 * bptr [-1] - bptr [-3]) >> 1; - bptr [1] = apply_weight (weight_B, sam_A) + (sam_B = bptr [1]); - update_weight (weight_B, delta, sam_A, sam_B); - } - - dpp->samples_B [0] = bptr [-1]; - dpp->samples_A [0] = bptr [-2]; - dpp->samples_B [1] = bptr [-3]; - dpp->samples_A [1] = bptr [-4]; - break; - - default: - for (bptr = buffer, tptr = buffer - (dpp->term * 2); bptr < eptr; bptr += 2, tptr += 2) { - bptr [0] = apply_weight (weight_A, tptr [0]) + (sam_A = bptr [0]); - update_weight (weight_A, delta, tptr [0], sam_A); - - bptr [1] = apply_weight (weight_B, tptr [1]) + (sam_A = bptr [1]); - update_weight (weight_B, delta, tptr [1], sam_A); - } - - for (k = dpp->term - 1, i = 8; i--; k--) { - dpp->samples_B [k & (MAX_TERM - 1)] = *--bptr; - dpp->samples_A [k & (MAX_TERM - 1)] = *--bptr; - } - - break; - - case -1: - for (bptr = buffer; bptr < eptr; bptr += 2) { - bptr [0] = apply_weight (weight_A, bptr [-1]) + (sam_A = bptr [0]); - update_weight_clip (weight_A, delta, bptr [-1], sam_A); - bptr [1] = apply_weight (weight_B, bptr [0]) + (sam_A = bptr [1]); - update_weight_clip (weight_B, delta, bptr [0], sam_A); - } - - dpp->samples_A [0] = bptr [-1]; - break; - - case -2: - for (bptr = buffer; bptr < eptr; bptr += 2) { - bptr [1] = apply_weight (weight_B, bptr [-2]) + (sam_A = bptr [1]); - update_weight_clip (weight_B, delta, bptr [-2], sam_A); - bptr [0] = apply_weight (weight_A, bptr [1]) + (sam_A = bptr [0]); - update_weight_clip (weight_A, delta, bptr [1], sam_A); - } - - dpp->samples_B [0] = bptr [-2]; - break; - - case -3: - for (bptr = buffer; bptr < eptr; bptr += 2) { - bptr [0] = apply_weight (weight_A, bptr [-1]) + (sam_A = bptr [0]); - update_weight_clip (weight_A, delta, bptr [-1], sam_A); - bptr [1] = apply_weight (weight_B, bptr [-2]) + (sam_A = bptr [1]); - update_weight_clip (weight_B, delta, bptr [-2], sam_A); - } - - dpp->samples_A [0] = bptr [-1]; - dpp->samples_B [0] = bptr [-2]; - break; - } - - dpp->weight_A = weight_A; - dpp->weight_B = weight_B; -} - -#endif - -static void decorr_mono_pass (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count) -{ - int32_t delta = dpp->delta, weight_A = dpp->weight_A; - int32_t *bptr, *eptr = buffer + sample_count, sam_A; - int m, k; - - switch (dpp->term) { - - case 17: - for (bptr = buffer; bptr < eptr; bptr++) { - sam_A = 2 * dpp->samples_A [0] - dpp->samples_A [1]; - dpp->samples_A [1] = dpp->samples_A [0]; - dpp->samples_A [0] = apply_weight (weight_A, sam_A) + bptr [0]; - update_weight (weight_A, delta, sam_A, bptr [0]); - bptr [0] = dpp->samples_A [0]; - } - - break; - - case 18: - for (bptr = buffer; bptr < eptr; bptr++) { - sam_A = (3 * dpp->samples_A [0] - dpp->samples_A [1]) >> 1; - dpp->samples_A [1] = dpp->samples_A [0]; - dpp->samples_A [0] = apply_weight (weight_A, sam_A) + bptr [0]; - update_weight (weight_A, delta, sam_A, bptr [0]); - bptr [0] = dpp->samples_A [0]; - } - - break; - - default: - for (m = 0, k = dpp->term & (MAX_TERM - 1), bptr = buffer; bptr < eptr; bptr++) { - sam_A = dpp->samples_A [m]; - dpp->samples_A [k] = apply_weight (weight_A, sam_A) + bptr [0]; - update_weight (weight_A, delta, sam_A, bptr [0]); - bptr [0] = dpp->samples_A [k]; - m = (m + 1) & (MAX_TERM - 1); - k = (k + 1) & (MAX_TERM - 1); - } - - if (m) { - int32_t temp_samples [MAX_TERM]; - - memcpy (temp_samples, dpp->samples_A, sizeof (dpp->samples_A)); - - for (k = 0; k < MAX_TERM; k++, m++) - dpp->samples_A [k] = temp_samples [m & (MAX_TERM - 1)]; - } - - break; - } - - dpp->weight_A = weight_A; -} - - -// This is a helper function for unpack_samples() that applies several final -// operations. First, if the data is 32-bit float data, then that conversion -// is done in the float.c module (whether lossy or lossless) and we return. -// Otherwise, if the extended integer data applies, then that operation is -// executed first. If the unpacked data is lossy (and not corrected) then -// it is clipped and shifted in a single operation. Otherwise, if it's -// lossless then the last step is to apply the final shift (if any). - -static void fixup_samples (WavpackStream *wps, int32_t *buffer, uint32_t sample_count) -{ - uint32_t flags = wps->wphdr.flags; - int shift = (flags & SHIFT_MASK) >> SHIFT_LSB; - - if (flags & FLOAT_DATA) { - float_values (wps, buffer, (flags & MONO_FLAG) ? sample_count : sample_count * 2); - return; - } - - if (flags & INT32_DATA) { - uint32_t count = (flags & MONO_FLAG) ? sample_count : sample_count * 2; - int sent_bits = wps->int32_sent_bits, zeros = wps->int32_zeros; - int ones = wps->int32_ones, dups = wps->int32_dups; - int32_t *dptr = buffer; - - if (!(flags & HYBRID_FLAG) && !sent_bits && (zeros + ones + dups)) - while (count--) { - if (zeros) - *dptr <<= zeros; - else if (ones) - *dptr = ((*dptr + 1) << ones) - 1; - else if (dups) - *dptr = ((*dptr + (*dptr & 1)) << dups) - (*dptr & 1); - - dptr++; - } - else - shift += zeros + sent_bits + ones + dups; - } - - if (flags & HYBRID_FLAG) { - int32_t min_value, max_value, min_shifted, max_shifted; - - switch (flags & BYTES_STORED) { - case 0: - min_shifted = (min_value = -128 >> shift) << shift; - max_shifted = (max_value = 127 >> shift) << shift; - break; - - case 1: - min_shifted = (min_value = -32768 >> shift) << shift; - max_shifted = (max_value = 32767 >> shift) << shift; - break; - - case 2: - min_shifted = (min_value = -8388608 >> shift) << shift; - max_shifted = (max_value = 8388607 >> shift) << shift; - break; - - case 3: - default: - min_shifted = (min_value = (int32_t) 0x80000000 >> shift) << shift; - max_shifted = (max_value = (int32_t) 0x7FFFFFFF >> shift) << shift; - break; - } - - if (!(flags & MONO_FLAG)) - sample_count *= 2; - - while (sample_count--) { - if (*buffer < min_value) - *buffer++ = min_shifted; - else if (*buffer > max_value) - *buffer++ = max_shifted; - else - *buffer++ <<= shift; - } - } - else if (shift) { - if (!(flags & MONO_FLAG)) - sample_count *= 2; - - while (sample_count--) - *buffer++ <<= shift; - } -} - -// This function checks the crc value(s) for an unpacked block, returning the -// number of actual crc errors detected for the block. The block must be -// completely unpacked before this test is valid. For losslessly unpacked -// blocks of float or extended integer data the extended crc is also checked. -// Note that WavPack's crc is not a CCITT approved polynomial algorithm, but -// is a much simpler method that is virtually as robust for real world data. - -int check_crc_error (WavpackContext *wpc) -{ - WavpackStream *wps = &wpc->stream; - int result = 0; - - if (wps->crc != wps->wphdr.crc) - ++result; - - return result; -} +//////////////////////////////////////////////////////////////////////////// +// **** WAVPACK **** // +// Hybrid Lossless Wavefile Compressor // +// Copyright (c) 1998 - 2006 Conifer Software. // +// All Rights Reserved. // +// Distributed under the BSD Software License (see license.txt) // +//////////////////////////////////////////////////////////////////////////// + +// unpack.c + +// This module actually handles the decompression of the audio data, except +// for the entropy decoding which is handled by the words.c module. For +// maximum efficiency, the conversion is isolated to tight loops that handle +// an entire buffer. + +#include "wavpack.h" + +#include <stdlib.h> +#include <string.h> + +#define LOSSY_MUTE + +///////////////////////////// executable code //////////////////////////////// + +// This function initializes everything required to unpack a WavPack block +// and must be called before unpack_samples() is called to obtain audio data. +// It is assumed that the WavpackHeader has been read into the wps->wphdr +// (in the current WavpackStream). This is where all the metadata blocks are +// scanned up to the one containing the audio bitstream. + +int unpack_init (WavpackContext *wpc) +{ + WavpackStream *wps = &wpc->stream; + WavpackMetadata wpmd; + + if (wps->wphdr.block_samples && wps->wphdr.block_index != (uint32_t) -1) + wps->sample_index = wps->wphdr.block_index; + + wps->mute_error = FALSE; + wps->crc = 0xffffffff; + CLEAR (wps->wvbits); + CLEAR (wps->decorr_passes); + CLEAR (wps->w); + + while (read_metadata_buff (wpc, &wpmd)) { + if (!process_metadata (wpc, &wpmd)) { + strcpy (wpc->error_message, "invalid metadata!"); + return FALSE; + } + + if (wpmd.id == ID_WV_BITSTREAM) + break; + } + + if (wps->wphdr.block_samples && !bs_is_open (&wps->wvbits)) { + strcpy (wpc->error_message, "invalid WavPack file!"); + return FALSE; + } + + if (wps->wphdr.block_samples) { + if ((wps->wphdr.flags & INT32_DATA) && wps->int32_sent_bits) + wpc->lossy_blocks = TRUE; + + if ((wps->wphdr.flags & FLOAT_DATA) && + wps->float_flags & (FLOAT_EXCEPTIONS | FLOAT_ZEROS_SENT | FLOAT_SHIFT_SENT | FLOAT_SHIFT_SAME)) + wpc->lossy_blocks = TRUE; + } + + return TRUE; +} + +// This function initialzes the main bitstream for audio samples, which must +// be in the "wv" file. + +int init_wv_bitstream (WavpackContext *wpc, WavpackMetadata *wpmd) +{ + WavpackStream *wps = &wpc->stream; + + if (wpmd->data) + bs_open_read (&wps->wvbits, wpmd->data, (unsigned char *) wpmd->data + wpmd->byte_length, NULL, 0); + else if (wpmd->byte_length) + bs_open_read (&wps->wvbits, wpc->read_buffer, wpc->read_buffer + sizeof (wpc->read_buffer), + wpc->infile, wpmd->byte_length + (wpmd->byte_length & 1)); + + return TRUE; +} + +// Read decorrelation terms from specified metadata block into the +// decorr_passes array. The terms range from -3 to 8, plus 17 & 18; +// other values are reserved and generate errors for now. The delta +// ranges from 0 to 7 with all values valid. Note that the terms are +// stored in the opposite order in the decorr_passes array compared +// to packing. + +int read_decorr_terms (WavpackStream *wps, WavpackMetadata *wpmd) +{ + int termcnt = wpmd->byte_length; + uchar *byteptr = wpmd->data; + struct decorr_pass *dpp; + + if (termcnt > MAX_NTERMS) + return FALSE; + + wps->num_terms = termcnt; + + for (dpp = wps->decorr_passes + termcnt - 1; termcnt--; dpp--) { + dpp->term = (int)(*byteptr & 0x1f) - 5; + dpp->delta = (*byteptr++ >> 5) & 0x7; + + if (!dpp->term || dpp->term < -3 || (dpp->term > MAX_TERM && dpp->term < 17) || dpp->term > 18) + return FALSE; + } + + return TRUE; +} + +// Read decorrelation weights from specified metadata block into the +// decorr_passes array. The weights range +/-1024, but are rounded and +// truncated to fit in signed chars for metadata storage. Weights are +// separate for the two channels and are specified from the "last" term +// (first during encode). Unspecified weights are set to zero. + +int read_decorr_weights (WavpackStream *wps, WavpackMetadata *wpmd) +{ + int termcnt = wpmd->byte_length, tcount; + signed char *byteptr = wpmd->data; + struct decorr_pass *dpp; + + if (!(wps->wphdr.flags & MONO_DATA)) + termcnt /= 2; + + if (termcnt > wps->num_terms) + return FALSE; + + for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) + dpp->weight_A = dpp->weight_B = 0; + + while (--dpp >= wps->decorr_passes && termcnt--) { + dpp->weight_A = restore_weight (*byteptr++); + + if (!(wps->wphdr.flags & MONO_DATA)) + dpp->weight_B = restore_weight (*byteptr++); + } + + return TRUE; +} + +// Read decorrelation samples from specified metadata block into the +// decorr_passes array. The samples are signed 32-bit values, but are +// converted to signed log2 values for storage in metadata. Values are +// stored for both channels and are specified from the "last" term +// (first during encode) with unspecified samples set to zero. The +// number of samples stored varies with the actual term value, so +// those must obviously come first in the metadata. + +int read_decorr_samples (WavpackStream *wps, WavpackMetadata *wpmd) +{ + uchar *byteptr = wpmd->data; + uchar *endptr = byteptr + wpmd->byte_length; + struct decorr_pass *dpp; + int tcount; + + for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) { + CLEAR (dpp->samples_A); + CLEAR (dpp->samples_B); + } + + if (wps->wphdr.version == 0x402 && (wps->wphdr.flags & HYBRID_FLAG)) { + byteptr += 2; + + if (!(wps->wphdr.flags & MONO_DATA)) + byteptr += 2; + } + + while (dpp-- > wps->decorr_passes && byteptr < endptr) + if (dpp->term > MAX_TERM) { + dpp->samples_A [0] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8))); + dpp->samples_A [1] = exp2s ((short)(byteptr [2] + (byteptr [3] << 8))); + byteptr += 4; + + if (!(wps->wphdr.flags & MONO_DATA)) { + dpp->samples_B [0] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8))); + dpp->samples_B [1] = exp2s ((short)(byteptr [2] + (byteptr [3] << 8))); + byteptr += 4; + } + } + else if (dpp->term < 0) { + dpp->samples_A [0] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8))); + dpp->samples_B [0] = exp2s ((short)(byteptr [2] + (byteptr [3] << 8))); + byteptr += 4; + } + else { + int m = 0, cnt = dpp->term; + + while (cnt--) { + dpp->samples_A [m] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8))); + byteptr += 2; + + if (!(wps->wphdr.flags & MONO_DATA)) { + dpp->samples_B [m] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8))); + byteptr += 2; + } + + m++; + } + } + + return byteptr == endptr; +} + +// Read the int32 data from the specified metadata into the specified stream. +// This data is used for integer data that has more than 24 bits of magnitude +// or, in some cases, used to eliminate redundant bits from any audio stream. + +int read_int32_info (WavpackStream *wps, WavpackMetadata *wpmd) +{ + int bytecnt = wpmd->byte_length; + char *byteptr = wpmd->data; + + if (bytecnt != 4) + return FALSE; + + wps->int32_sent_bits = *byteptr++; + wps->int32_zeros = *byteptr++; + wps->int32_ones = *byteptr++; + wps->int32_dups = *byteptr; + return TRUE; +} + +// Read multichannel information from metadata. The first byte is the total +// number of channels and the following bytes represent the channel_mask +// as described for Microsoft WAVEFORMATEX. + +int read_channel_info (WavpackContext *wpc, WavpackMetadata *wpmd) +{ + int bytecnt = wpmd->byte_length, shift = 0; + char *byteptr = wpmd->data; + uint32_t mask = 0; + + if (!bytecnt || bytecnt > 5) + return FALSE; + + wpc->config.num_channels = *byteptr++; + + while (--bytecnt) { + mask |= (uint32_t) *byteptr++ << shift; + shift += 8; + } + + wpc->config.channel_mask = mask; + return TRUE; +} + +// Read configuration information from metadata. + +int read_config_info (WavpackContext *wpc, WavpackMetadata *wpmd) +{ + int bytecnt = wpmd->byte_length; + uchar *byteptr = wpmd->data; + + if (bytecnt >= 3) { + wpc->config.flags &= 0xff; + wpc->config.flags |= (int32_t) *byteptr++ << 8; + wpc->config.flags |= (int32_t) *byteptr++ << 16; + wpc->config.flags |= (int32_t) *byteptr << 24; + } + + return TRUE; +} + +// This monster actually unpacks the WavPack bitstream(s) into the specified +// buffer as 32-bit integers or floats (depending on orignal data). Lossy +// samples will be clipped to their original limits (i.e. 8-bit samples are +// clipped to -128/+127) but are still returned in int32_ts. It is up to the +// caller to potentially reformat this for the final output including any +// multichannel distribution, block alignment or endian compensation. The +// function unpack_init() must have been called and the entire WavPack block +// must still be visible (although wps->blockbuff will not be accessed again). +// For maximum clarity, the function is broken up into segments that handle +// various modes. This makes for a few extra infrequent flag checks, but +// makes the code easier to follow because the nesting does not become so +// deep. For maximum efficiency, the conversion is isolated to tight loops +// that handle an entire buffer. The function returns the total number of +// samples unpacked, which can be less than the number requested if an error +// occurs or the end of the block is reached. + +#if defined(CPU_COLDFIRE) && !defined(SIMULATOR) +extern void decorr_stereo_pass_cont_mcf5249 (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count); +#elif defined(CPU_ARM) && !defined(SIMULATOR) +extern void decorr_stereo_pass_cont_arm (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count); +extern void decorr_stereo_pass_cont_arml (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count); +#else +static void decorr_stereo_pass_cont (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count); +#endif + +static void decorr_mono_pass (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count); +static void decorr_stereo_pass (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count); +static void fixup_samples (WavpackStream *wps, int32_t *buffer, uint32_t sample_count); + +int32_t unpack_samples (WavpackContext *wpc, int32_t *buffer, uint32_t sample_count) +{ + WavpackStream *wps = &wpc->stream; + uint32_t flags = wps->wphdr.flags, crc = wps->crc, i; + int32_t mute_limit = (1L << ((flags & MAG_MASK) >> MAG_LSB)) + 2; + struct decorr_pass *dpp; + int32_t *bptr, *eptr; + int tcount; + + if (wps->sample_index + sample_count > wps->wphdr.block_index + wps->wphdr.block_samples) + sample_count = wps->wphdr.block_index + wps->wphdr.block_samples - wps->sample_index; + + if (wps->mute_error) { + memset (buffer, 0, sample_count * (flags & MONO_FLAG ? 4 : 8)); + wps->sample_index += sample_count; + return sample_count; + } + + if (flags & HYBRID_FLAG) + mute_limit *= 2; + + ///////////////////// handle version 4 mono data ///////////////////////// + + if (flags & MONO_DATA) { + eptr = buffer + sample_count; + i = get_words (buffer, sample_count, flags, &wps->w, &wps->wvbits); + + for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) + decorr_mono_pass (dpp, buffer, sample_count); + + for (bptr = buffer; bptr < eptr; ++bptr) { + if (labs (bptr [0]) > mute_limit) { + i = bptr - buffer; + break; + } + + crc = crc * 3 + bptr [0]; + } + } + + //////////////////// handle version 4 stereo data //////////////////////// + + else { + eptr = buffer + (sample_count * 2); + i = get_words (buffer, sample_count, flags, &wps->w, &wps->wvbits); + + if (sample_count < 16) + for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) + decorr_stereo_pass (dpp, buffer, sample_count); + else + for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) { + decorr_stereo_pass (dpp, buffer, 8); +#if defined(CPU_COLDFIRE) && !defined(SIMULATOR) + decorr_stereo_pass_cont_mcf5249 (dpp, buffer + 16, sample_count - 8); +#elif defined(CPU_ARM) && !defined(SIMULATOR) + if (((flags & MAG_MASK) >> MAG_LSB) > 15) + decorr_stereo_pass_cont_arml (dpp, buffer + 16, sample_count - 8); + else + decorr_stereo_pass_cont_arm (dpp, buffer + 16, sample_count - 8); +#else + decorr_stereo_pass_cont (dpp, buffer + 16, sample_count - 8); +#endif + } + + if (flags & JOINT_STEREO) + for (bptr = buffer; bptr < eptr; bptr += 2) { + bptr [0] += (bptr [1] -= (bptr [0] >> 1)); + + if (labs (bptr [0]) > mute_limit || labs (bptr [1]) > mute_limit) { + i = (bptr - buffer) / 2; + break; + } + + crc = (crc * 3 + bptr [0]) * 3 + bptr [1]; + } + else + for (bptr = buffer; bptr < eptr; bptr += 2) { + if (labs (bptr [0]) > mute_limit || labs (bptr [1]) > mute_limit) { + i = (bptr - buffer) / 2; + break; + } + + crc = (crc * 3 + bptr [0]) * 3 + bptr [1]; + } + } + + if (i != sample_count) { + memset (buffer, 0, sample_count * (flags & MONO_FLAG ? 4 : 8)); + wps->mute_error = TRUE; + i = sample_count; + } + + fixup_samples (wps, buffer, i); + + if (flags & FALSE_STEREO) { + int32_t *dptr = buffer + i * 2; + int32_t *sptr = buffer + i; + int32_t c = i; + + while (c--) { + *--dptr = *--sptr; + *--dptr = *sptr; + } + } + + wps->sample_index += i; + wps->crc = crc; + + return i; +} + +static void decorr_stereo_pass (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count) +{ + int32_t delta = dpp->delta, weight_A = dpp->weight_A, weight_B = dpp->weight_B; + int32_t *bptr, *eptr = buffer + (sample_count * 2), sam_A, sam_B; + int m, k; + + switch (dpp->term) { + + case 17: + for (bptr = buffer; bptr < eptr; bptr += 2) { + sam_A = 2 * dpp->samples_A [0] - dpp->samples_A [1]; + dpp->samples_A [1] = dpp->samples_A [0]; + dpp->samples_A [0] = apply_weight (weight_A, sam_A) + bptr [0]; + update_weight (weight_A, delta, sam_A, bptr [0]); + bptr [0] = dpp->samples_A [0]; + + sam_A = 2 * dpp->samples_B [0] - dpp->samples_B [1]; + dpp->samples_B [1] = dpp->samples_B [0]; + dpp->samples_B [0] = apply_weight (weight_B, sam_A) + bptr [1]; + update_weight (weight_B, delta, sam_A, bptr [1]); + bptr [1] = dpp->samples_B [0]; + } + + break; + + case 18: + for (bptr = buffer; bptr < eptr; bptr += 2) { + sam_A = (3 * dpp->samples_A [0] - dpp->samples_A [1]) >> 1; + dpp->samples_A [1] = dpp->samples_A [0]; + dpp->samples_A [0] = apply_weight (weight_A, sam_A) + bptr [0]; + update_weight (weight_A, delta, sam_A, bptr [0]); + bptr [0] = dpp->samples_A [0]; + + sam_A = (3 * dpp->samples_B [0] - dpp->samples_B [1]) >> 1; + dpp->samples_B [1] = dpp->samples_B [0]; + dpp->samples_B [0] = apply_weight (weight_B, sam_A) + bptr [1]; + update_weight (weight_B, delta, sam_A, bptr [1]); + bptr [1] = dpp->samples_B [0]; + } + + break; + + default: + for (m = 0, k = dpp->term & (MAX_TERM - 1), bptr = buffer; bptr < eptr; bptr += 2) { + sam_A = dpp->samples_A [m]; + dpp->samples_A [k] = apply_weight (weight_A, sam_A) + bptr [0]; + update_weight (weight_A, delta, sam_A, bptr [0]); + bptr [0] = dpp->samples_A [k]; + + sam_A = dpp->samples_B [m]; + dpp->samples_B [k] = apply_weight (weight_B, sam_A) + bptr [1]; + update_weight (weight_B, delta, sam_A, bptr [1]); + bptr [1] = dpp->samples_B [k]; + + m = (m + 1) & (MAX_TERM - 1); + k = (k + 1) & (MAX_TERM - 1); + } + + if (m) { + int32_t temp_samples [MAX_TERM]; + + memcpy (temp_samples, dpp->samples_A, sizeof (dpp->samples_A)); + + for (k = 0; k < MAX_TERM; k++, m++) + dpp->samples_A [k] = temp_samples [m & (MAX_TERM - 1)]; + + memcpy (temp_samples, dpp->samples_B, sizeof (dpp->samples_B)); + + for (k = 0; k < MAX_TERM; k++, m++) + dpp->samples_B [k] = temp_samples [m & (MAX_TERM - 1)]; + } + + break; + + case -1: + for (bptr = buffer; bptr < eptr; bptr += 2) { + sam_A = bptr [0] + apply_weight (weight_A, dpp->samples_A [0]); + update_weight_clip (weight_A, delta, dpp->samples_A [0], bptr [0]); + bptr [0] = sam_A; + dpp->samples_A [0] = bptr [1] + apply_weight (weight_B, sam_A); + update_weight_clip (weight_B, delta, sam_A, bptr [1]); + bptr [1] = dpp->samples_A [0]; + } + + break; + + case -2: + for (bptr = buffer; bptr < eptr; bptr += 2) { + sam_B = bptr [1] + apply_weight (weight_B, dpp->samples_B [0]); + update_weight_clip (weight_B, delta, dpp->samples_B [0], bptr [1]); + bptr [1] = sam_B; + dpp->samples_B [0] = bptr [0] + apply_weight (weight_A, sam_B); + update_weight_clip (weight_A, delta, sam_B, bptr [0]); + bptr [0] = dpp->samples_B [0]; + } + + break; + + case -3: + for (bptr = buffer; bptr < eptr; bptr += 2) { + sam_A = bptr [0] + apply_weight (weight_A, dpp->samples_A [0]); + update_weight_clip (weight_A, delta, dpp->samples_A [0], bptr [0]); + sam_B = bptr [1] + apply_weight (weight_B, dpp->samples_B [0]); + update_weight_clip (weight_B, delta, dpp->samples_B [0], bptr [1]); + bptr [0] = dpp->samples_B [0] = sam_A; + bptr [1] = dpp->samples_A [0] = sam_B; + } + + break; + } + + dpp->weight_A = weight_A; + dpp->weight_B = weight_B; +} + +#if (!defined(CPU_COLDFIRE) && !defined(CPU_ARM)) || defined(SIMULATOR) + +static void decorr_stereo_pass_cont (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count) +{ + int32_t delta = dpp->delta, weight_A = dpp->weight_A, weight_B = dpp->weight_B; + int32_t *bptr, *tptr, *eptr = buffer + (sample_count * 2), sam_A, sam_B; + int k, i; + + switch (dpp->term) { + + case 17: + for (bptr = buffer; bptr < eptr; bptr += 2) { + sam_A = 2 * bptr [-2] - bptr [-4]; + bptr [0] = apply_weight (weight_A, sam_A) + (sam_B = bptr [0]); + update_weight (weight_A, delta, sam_A, sam_B); + + sam_A = 2 * bptr [-1] - bptr [-3]; + bptr [1] = apply_weight (weight_B, sam_A) + (sam_B = bptr [1]); + update_weight (weight_B, delta, sam_A, sam_B); + } + + dpp->samples_B [0] = bptr [-1]; + dpp->samples_A [0] = bptr [-2]; + dpp->samples_B [1] = bptr [-3]; + dpp->samples_A [1] = bptr [-4]; + break; + + case 18: + for (bptr = buffer; bptr < eptr; bptr += 2) { + sam_A = (3 * bptr [-2] - bptr [-4]) >> 1; + bptr [0] = apply_weight (weight_A, sam_A) + (sam_B = bptr [0]); + update_weight (weight_A, delta, sam_A, sam_B); + + sam_A = (3 * bptr [-1] - bptr [-3]) >> 1; + bptr [1] = apply_weight (weight_B, sam_A) + (sam_B = bptr [1]); + update_weight (weight_B, delta, sam_A, sam_B); + } + + dpp->samples_B [0] = bptr [-1]; + dpp->samples_A [0] = bptr [-2]; + dpp->samples_B [1] = bptr [-3]; + dpp->samples_A [1] = bptr [-4]; + break; + + default: + for (bptr = buffer, tptr = buffer - (dpp->term * 2); bptr < eptr; bptr += 2, tptr += 2) { + bptr [0] = apply_weight (weight_A, tptr [0]) + (sam_A = bptr [0]); + update_weight (weight_A, delta, tptr [0], sam_A); + + bptr [1] = apply_weight (weight_B, tptr [1]) + (sam_A = bptr [1]); + update_weight (weight_B, delta, tptr [1], sam_A); + } + + for (k = dpp->term - 1, i = 8; i--; k--) { + dpp->samples_B [k & (MAX_TERM - 1)] = *--bptr; + dpp->samples_A [k & (MAX_TERM - 1)] = *--bptr; + } + + break; + + case -1: + for (bptr = buffer; bptr < eptr; bptr += 2) { + bptr [0] = apply_weight (weight_A, bptr [-1]) + (sam_A = bptr [0]); + update_weight_clip (weight_A, delta, bptr [-1], sam_A); + bptr [1] = apply_weight (weight_B, bptr [0]) + (sam_A = bptr [1]); + update_weight_clip (weight_B, delta, bptr [0], sam_A); + } + + dpp->samples_A [0] = bptr [-1]; + break; + + case -2: + for (bptr = buffer; bptr < eptr; bptr += 2) { + bptr [1] = apply_weight (weight_B, bptr [-2]) + (sam_A = bptr [1]); + update_weight_clip (weight_B, delta, bptr [-2], sam_A); + bptr [0] = apply_weight (weight_A, bptr [1]) + (sam_A = bptr [0]); + update_weight_clip (weight_A, delta, bptr [1], sam_A); + } + + dpp->samples_B [0] = bptr [-2]; + break; + + case -3: + for (bptr = buffer; bptr < eptr; bptr += 2) { + bptr [0] = apply_weight (weight_A, bptr [-1]) + (sam_A = bptr [0]); + update_weight_clip (weight_A, delta, bptr [-1], sam_A); + bptr [1] = apply_weight (weight_B, bptr [-2]) + (sam_A = bptr [1]); + update_weight_clip (weight_B, delta, bptr [-2], sam_A); + } + + dpp->samples_A [0] = bptr [-1]; + dpp->samples_B [0] = bptr [-2]; + break; + } + + dpp->weight_A = weight_A; + dpp->weight_B = weight_B; +} + +#endif + +static void decorr_mono_pass (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count) +{ + int32_t delta = dpp->delta, weight_A = dpp->weight_A; + int32_t *bptr, *eptr = buffer + sample_count, sam_A; + int m, k; + + switch (dpp->term) { + + case 17: + for (bptr = buffer; bptr < eptr; bptr++) { + sam_A = 2 * dpp->samples_A [0] - dpp->samples_A [1]; + dpp->samples_A [1] = dpp->samples_A [0]; + dpp->samples_A [0] = apply_weight (weight_A, sam_A) + bptr [0]; + update_weight (weight_A, delta, sam_A, bptr [0]); + bptr [0] = dpp->samples_A [0]; + } + + break; + + case 18: + for (bptr = buffer; bptr < eptr; bptr++) { + sam_A = (3 * dpp->samples_A [0] - dpp->samples_A [1]) >> 1; + dpp->samples_A [1] = dpp->samples_A [0]; + dpp->samples_A [0] = apply_weight (weight_A, sam_A) + bptr [0]; + update_weight (weight_A, delta, sam_A, bptr [0]); + bptr [0] = dpp->samples_A [0]; + } + + break; + + default: + for (m = 0, k = dpp->term & (MAX_TERM - 1), bptr = buffer; bptr < eptr; bptr++) { + sam_A = dpp->samples_A [m]; + dpp->samples_A [k] = apply_weight (weight_A, sam_A) + bptr [0]; + update_weight (weight_A, delta, sam_A, bptr [0]); + bptr [0] = dpp->samples_A [k]; + m = (m + 1) & (MAX_TERM - 1); + k = (k + 1) & (MAX_TERM - 1); + } + + if (m) { + int32_t temp_samples [MAX_TERM]; + + memcpy (temp_samples, dpp->samples_A, sizeof (dpp->samples_A)); + + for (k = 0; k < MAX_TERM; k++, m++) + dpp->samples_A [k] = temp_samples [m & (MAX_TERM - 1)]; + } + + break; + } + + dpp->weight_A = weight_A; +} + + +// This is a helper function for unpack_samples() that applies several final +// operations. First, if the data is 32-bit float data, then that conversion +// is done in the float.c module (whether lossy or lossless) and we return. +// Otherwise, if the extended integer data applies, then that operation is +// executed first. If the unpacked data is lossy (and not corrected) then +// it is clipped and shifted in a single operation. Otherwise, if it's +// lossless then the last step is to apply the final shift (if any). + +static void fixup_samples (WavpackStream *wps, int32_t *buffer, uint32_t sample_count) +{ + uint32_t flags = wps->wphdr.flags; + int shift = (flags & SHIFT_MASK) >> SHIFT_LSB; + + if (flags & FLOAT_DATA) { + float_values (wps, buffer, (flags & MONO_FLAG) ? sample_count : sample_count * 2); + return; + } + + if (flags & INT32_DATA) { + uint32_t count = (flags & MONO_FLAG) ? sample_count : sample_count * 2; + int sent_bits = wps->int32_sent_bits, zeros = wps->int32_zeros; + int ones = wps->int32_ones, dups = wps->int32_dups; + int32_t *dptr = buffer; + + if (!(flags & HYBRID_FLAG) && !sent_bits && (zeros + ones + dups)) + while (count--) { + if (zeros) + *dptr <<= zeros; + else if (ones) + *dptr = ((*dptr + 1) << ones) - 1; + else if (dups) + *dptr = ((*dptr + (*dptr & 1)) << dups) - (*dptr & 1); + + dptr++; + } + else + shift += zeros + sent_bits + ones + dups; + } + + if (flags & HYBRID_FLAG) { + int32_t min_value, max_value, min_shifted, max_shifted; + + switch (flags & BYTES_STORED) { + case 0: + min_shifted = (min_value = -128 >> shift) << shift; + max_shifted = (max_value = 127 >> shift) << shift; + break; + + case 1: + min_shifted = (min_value = -32768 >> shift) << shift; + max_shifted = (max_value = 32767 >> shift) << shift; + break; + + case 2: + min_shifted = (min_value = -8388608 >> shift) << shift; + max_shifted = (max_value = 8388607 >> shift) << shift; + break; + + case 3: + default: + min_shifted = (min_value = (int32_t) 0x80000000 >> shift) << shift; + max_shifted = (max_value = (int32_t) 0x7FFFFFFF >> shift) << shift; + break; + } + + if (!(flags & MONO_FLAG)) + sample_count *= 2; + + while (sample_count--) { + if (*buffer < min_value) + *buffer++ = min_shifted; + else if (*buffer > max_value) + *buffer++ = max_shifted; + else + *buffer++ <<= shift; + } + } + else if (shift) { + if (!(flags & MONO_FLAG)) + sample_count *= 2; + + while (sample_count--) + *buffer++ <<= shift; + } +} + +// This function checks the crc value(s) for an unpacked block, returning the +// number of actual crc errors detected for the block. The block must be +// completely unpacked before this test is valid. For losslessly unpacked +// blocks of float or extended integer data the extended crc is also checked. +// Note that WavPack's crc is not a CCITT approved polynomial algorithm, but +// is a much simpler method that is virtually as robust for real world data. + +int check_crc_error (WavpackContext *wpc) +{ + WavpackStream *wps = &wpc->stream; + int result = 0; + + if (wps->crc != wps->wphdr.crc) + ++result; + + return result; +} diff --git a/src/engine/external/wavpack/wavpack.h b/src/engine/external/wavpack/wavpack.h index 29e6af75..2e22fc51 100644 --- a/src/engine/external/wavpack/wavpack.h +++ b/src/engine/external/wavpack/wavpack.h @@ -5,56 +5,56 @@ /* All Rights Reserved. // */ /* Distributed under the BSD Software License (see license.txt) // */ /*////////////////////////////////////////////////////////////////////////// */ - + /* wavpack.h */ - -#include <sys/types.h> - + +#include <sys/types.h> + /* This header file contains all the definitions required by WavPack. */ - -#ifdef __BORLANDC__ -typedef unsigned long uint32_t; -typedef long int32_t; -#elif defined(_WIN32) && !defined(__MINGW32__) -#include <stdlib.h> -typedef unsigned __int64 uint64_t; -typedef unsigned __int32 uint32_t; -typedef __int64 int64_t; -typedef __int32 int32_t; -#else -#include <inttypes.h> -#endif - -typedef unsigned char uchar; - -#if !defined(__GNUC__) || defined(WIN32) -typedef unsigned short ushort; -typedef unsigned int uint; -#endif - -#include <stdio.h> - -#define FALSE 0 -#define TRUE 1 - + +#ifdef __BORLANDC__ +typedef unsigned long uint32_t; +typedef long int32_t; +#elif defined(_WIN32) && !defined(__MINGW32__) +#include <stdlib.h> +typedef unsigned __int64 uint64_t; +typedef unsigned __int32 uint32_t; +typedef __int64 int64_t; +typedef __int32 int32_t; +#else +#include <inttypes.h> +#endif + +typedef unsigned char uchar; + +#if !defined(__GNUC__) || defined(WIN32) +typedef unsigned short ushort; +typedef unsigned int uint; +#endif + +#include <stdio.h> + +#define FALSE 0 +#define TRUE 1 + /*//////////////////////////// WavPack Header ///////////////////////////////// */ - + /* Note that this is the ONLY structure that is written to (or read from) */ /* WavPack 4.0 files, and is the preamble to every block in both the .wv */ /* and .wvc files. */ - -typedef struct { - char ckID [4]; - uint32_t ckSize; - short version; - uchar track_no, index_no; - uint32_t total_samples, block_index, block_samples, flags, crc; -} WavpackHeader; - -#define WavpackHeaderFormat "4LS2LLLLL" - + +typedef struct { + char ckID [4]; + uint32_t ckSize; + short version; + uchar track_no, index_no; + uint32_t total_samples, block_index, block_samples, flags, crc; +} WavpackHeader; + +#define WavpackHeaderFormat "4LS2LLLLL" + /* or-values for "flags" */ - + #define BYTES_STORED 3 /* 1-4 bytes/sample */ #define MONO_FLAG 4 /* not stereo */ #define HYBRID_FLAG 8 /* hybrid mode */ @@ -62,83 +62,83 @@ typedef struct { #define CROSS_DECORR 0x20 /* no-delay cross decorrelation */ #define HYBRID_SHAPE 0x40 /* noise shape (hybrid mode only) */ #define FLOAT_DATA 0x80 /* ieee 32-bit floating point data */ - + #define INT32_DATA 0x100 /* special extended int handling */ #define HYBRID_BITRATE 0x200 /* bitrate noise (hybrid mode only) */ #define HYBRID_BALANCE 0x400 /* balance noise (hybrid stereo mode only) */ - + #define INITIAL_BLOCK 0x800 /* initial block of multichannel segment */ #define FINAL_BLOCK 0x1000 /* final block of multichannel segment */ - -#define SHIFT_LSB 13 -#define SHIFT_MASK (0x1fL << SHIFT_LSB) - -#define MAG_LSB 18 -#define MAG_MASK (0x1fL << MAG_LSB) - -#define SRATE_LSB 23 -#define SRATE_MASK (0xfL << SRATE_LSB) - + +#define SHIFT_LSB 13 +#define SHIFT_MASK (0x1fL << SHIFT_LSB) + +#define MAG_LSB 18 +#define MAG_MASK (0x1fL << MAG_LSB) + +#define SRATE_LSB 23 +#define SRATE_MASK (0xfL << SRATE_LSB) + #define FALSE_STEREO 0x40000000 /* block is stereo, but data is mono */ - + #define IGNORED_FLAGS 0x18000000 /* reserved, but ignore if encountered */ #define NEW_SHAPING 0x20000000 /* use IIR filter for negative shaping */ #define UNKNOWN_FLAGS 0x80000000 /* also reserved, but refuse decode if */ /* encountered */ - -#define MONO_DATA (MONO_FLAG | FALSE_STEREO) - + +#define MONO_DATA (MONO_FLAG | FALSE_STEREO) + #define MIN_STREAM_VERS 0x402 /* lowest stream version we'll decode */ #define MAX_STREAM_VERS 0x410 /* highest stream version we'll decode */ - + /*////////////////////////// WavPack Metadata ///////////////////////////////// */ - + /* This is an internal representation of metadata. */ - -typedef struct { - int32_t byte_length; - void *data; - uchar id; -} WavpackMetadata; - -#define ID_OPTIONAL_DATA 0x20 -#define ID_ODD_SIZE 0x40 -#define ID_LARGE 0x80 - -#define ID_DUMMY 0x0 -#define ID_ENCODER_INFO 0x1 -#define ID_DECORR_TERMS 0x2 -#define ID_DECORR_WEIGHTS 0x3 -#define ID_DECORR_SAMPLES 0x4 -#define ID_ENTROPY_VARS 0x5 -#define ID_HYBRID_PROFILE 0x6 -#define ID_SHAPING_WEIGHTS 0x7 -#define ID_FLOAT_INFO 0x8 -#define ID_INT32_INFO 0x9 -#define ID_WV_BITSTREAM 0xa -#define ID_WVC_BITSTREAM 0xb -#define ID_WVX_BITSTREAM 0xc -#define ID_CHANNEL_INFO 0xd - -#define ID_RIFF_HEADER (ID_OPTIONAL_DATA | 0x1) -#define ID_RIFF_TRAILER (ID_OPTIONAL_DATA | 0x2) -#define ID_REPLAY_GAIN (ID_OPTIONAL_DATA | 0x3) -#define ID_CUESHEET (ID_OPTIONAL_DATA | 0x4) -#define ID_CONFIG_BLOCK (ID_OPTIONAL_DATA | 0x5) -#define ID_MD5_CHECKSUM (ID_OPTIONAL_DATA | 0x6) - + +typedef struct { + int32_t byte_length; + void *data; + uchar id; +} WavpackMetadata; + +#define ID_OPTIONAL_DATA 0x20 +#define ID_ODD_SIZE 0x40 +#define ID_LARGE 0x80 + +#define ID_DUMMY 0x0 +#define ID_ENCODER_INFO 0x1 +#define ID_DECORR_TERMS 0x2 +#define ID_DECORR_WEIGHTS 0x3 +#define ID_DECORR_SAMPLES 0x4 +#define ID_ENTROPY_VARS 0x5 +#define ID_HYBRID_PROFILE 0x6 +#define ID_SHAPING_WEIGHTS 0x7 +#define ID_FLOAT_INFO 0x8 +#define ID_INT32_INFO 0x9 +#define ID_WV_BITSTREAM 0xa +#define ID_WVC_BITSTREAM 0xb +#define ID_WVX_BITSTREAM 0xc +#define ID_CHANNEL_INFO 0xd + +#define ID_RIFF_HEADER (ID_OPTIONAL_DATA | 0x1) +#define ID_RIFF_TRAILER (ID_OPTIONAL_DATA | 0x2) +#define ID_REPLAY_GAIN (ID_OPTIONAL_DATA | 0x3) +#define ID_CUESHEET (ID_OPTIONAL_DATA | 0x4) +#define ID_CONFIG_BLOCK (ID_OPTIONAL_DATA | 0x5) +#define ID_MD5_CHECKSUM (ID_OPTIONAL_DATA | 0x6) + /*/////////////////////// WavPack Configuration /////////////////////////////// */ - + /* This internal structure is used during encode to provide configuration to */ /* the encoding engine and during decoding to provide fle information back to */ /* the higher level functions. Not all fields are used in both modes. */ - -typedef struct { - int bits_per_sample, bytes_per_sample; - int num_channels, float_norm_exp; - uint32_t flags, sample_rate, channel_mask; -} WavpackConfig; - + +typedef struct { + int bits_per_sample, bytes_per_sample; + int num_channels, float_norm_exp; + uint32_t flags, sample_rate, channel_mask; +} WavpackConfig; + #define CONFIG_BYTES_STORED 3 /* 1-4 bytes/sample */ #define CONFIG_MONO_FLAG 4 /* not stereo */ #define CONFIG_HYBRID_FLAG 8 /* hybrid mode */ @@ -146,7 +146,7 @@ typedef struct { #define CONFIG_CROSS_DECORR 0x20 /* no-delay cross decorrelation */ #define CONFIG_HYBRID_SHAPE 0x40 /* noise shape (hybrid mode only) */ #define CONFIG_FLOAT_DATA 0x80 /* ieee 32-bit floating point data */ - + #define CONFIG_FAST_FLAG 0x200 /* fast mode */ #define CONFIG_HIGH_FLAG 0x800 /* high quality mode */ #define CONFIG_VERY_HIGH_FLAG 0x1000 /* very high */ @@ -163,127 +163,127 @@ typedef struct { #define CONFIG_SKIP_WVX 0x4000000 /* no wvx stream w/ floats & big ints */ #define CONFIG_MD5_CHECKSUM 0x8000000 /* compute & store MD5 signature */ #define CONFIG_OPTIMIZE_MONO 0x80000000 /* optimize for mono streams posing as stereo */ - + /*////////////////////////////// WavPack Stream /////////////////////////////// */ - + /* This internal structure contains everything required to handle a WavPack */ /* "stream", which is defined as a stereo or mono stream of audio samples. For */ /* multichannel audio several of these would be required. Each stream contains */ /* pointers to hold a complete allocated block of WavPack data, although it's */ /* possible to decode WavPack blocks without buffering an entire block. */ - -typedef int32_t (*read_stream)(void *, int32_t); - -typedef struct bs { - uchar *buf, *end, *ptr; - void (*wrap)(struct bs *bs); - uint32_t file_bytes, sr; - int error, bc; - read_stream file; -} Bitstream; - -#define MAX_NTERMS 16 -#define MAX_TERM 8 - -struct decorr_pass { - short term, delta, weight_A, weight_B; - int32_t samples_A [MAX_TERM], samples_B [MAX_TERM]; -}; - -struct entropy_data { - uint32_t median [3], slow_level, error_limit; -}; - -struct words_data { - uint32_t bitrate_delta [2], bitrate_acc [2]; - uint32_t pend_data, holding_one, zeros_acc; - int holding_zero, pend_count; - struct entropy_data c [2]; -}; - -typedef struct { - WavpackHeader wphdr; - Bitstream wvbits; - - struct words_data w; - - int num_terms, mute_error; - uint32_t sample_index, crc; - - uchar int32_sent_bits, int32_zeros, int32_ones, int32_dups; - uchar float_flags, float_shift, float_max_exp, float_norm_exp; - - struct decorr_pass decorr_passes [MAX_NTERMS]; - -} WavpackStream; - + +typedef int32_t (*read_stream)(void *, int32_t); + +typedef struct bs { + uchar *buf, *end, *ptr; + void (*wrap)(struct bs *bs); + uint32_t file_bytes, sr; + int error, bc; + read_stream file; +} Bitstream; + +#define MAX_NTERMS 16 +#define MAX_TERM 8 + +struct decorr_pass { + short term, delta, weight_A, weight_B; + int32_t samples_A [MAX_TERM], samples_B [MAX_TERM]; +}; + +struct entropy_data { + uint32_t median [3], slow_level, error_limit; +}; + +struct words_data { + uint32_t bitrate_delta [2], bitrate_acc [2]; + uint32_t pend_data, holding_one, zeros_acc; + int holding_zero, pend_count; + struct entropy_data c [2]; +}; + +typedef struct { + WavpackHeader wphdr; + Bitstream wvbits; + + struct words_data w; + + int num_terms, mute_error; + uint32_t sample_index, crc; + + uchar int32_sent_bits, int32_zeros, int32_ones, int32_dups; + uchar float_flags, float_shift, float_max_exp, float_norm_exp; + + struct decorr_pass decorr_passes [MAX_NTERMS]; + +} WavpackStream; + /* flags for float_flags: */ - + #define FLOAT_SHIFT_ONES 1 /* bits left-shifted into float = '1' */ #define FLOAT_SHIFT_SAME 2 /* bits left-shifted into float are the same */ #define FLOAT_SHIFT_SENT 4 /* bits shifted into float are sent literally */ #define FLOAT_ZEROS_SENT 8 /* "zeros" are not all real zeros */ #define FLOAT_NEG_ZEROS 0x10 /* contains negative zeros */ #define FLOAT_EXCEPTIONS 0x20 /* contains exceptions (inf, nan, etc.) */ - + /*///////////////////////////// WavPack Context /////////////////////////////// */ - + /* This internal structure holds everything required to encode or decode WavPack */ /* files. It is recommended that direct access to this structure be minimized */ /* and the provided utilities used instead. */ - -typedef struct { - WavpackConfig config; - WavpackStream stream; - - uchar read_buffer [1024]; - char error_message [80]; - - read_stream infile; - uint32_t total_samples, crc_errors, first_flags; - int open_flags, norm_offset, reduced_channels, lossy_blocks; - -} WavpackContext; - + +typedef struct { + WavpackConfig config; + WavpackStream stream; + + uchar read_buffer [1024]; + char error_message [80]; + + read_stream infile; + uint32_t total_samples, crc_errors, first_flags; + int open_flags, norm_offset, reduced_channels, lossy_blocks; + +} WavpackContext; + /*////////////////////// function prototypes and macros ////////////////////// */ - -#define CLEAR(destin) memset (&destin, 0, sizeof (destin)); - + +#define CLEAR(destin) memset (&destin, 0, sizeof (destin)); + /* bits.c */ - -void bs_open_read (Bitstream *bs, uchar *buffer_start, uchar *buffer_end, read_stream file, uint32_t file_bytes); - -#define bs_is_open(bs) ((bs)->ptr != NULL) - -#define getbit(bs) ( \ - (((bs)->bc) ? \ - ((bs)->bc--, (bs)->sr & 1) : \ - (((++((bs)->ptr) != (bs)->end) ? (void) 0 : (bs)->wrap (bs)), (bs)->bc = 7, ((bs)->sr = *((bs)->ptr)) & 1) \ - ) ? \ - ((bs)->sr >>= 1, 1) : \ - ((bs)->sr >>= 1, 0) \ -) - -#define getbits(value, nbits, bs) { \ - while ((nbits) > (bs)->bc) { \ - if (++((bs)->ptr) == (bs)->end) (bs)->wrap (bs); \ - (bs)->sr |= (int32_t)*((bs)->ptr) << (bs)->bc; \ - (bs)->bc += 8; \ - } \ - *(value) = (bs)->sr; \ - if ((bs)->bc > 32) { \ - (bs)->bc -= (nbits); \ - (bs)->sr = *((bs)->ptr) >> (8 - (bs)->bc); \ - } \ - else { \ - (bs)->bc -= (nbits); \ - (bs)->sr >>= (nbits); \ - } \ -} - -void little_endian_to_native (void *data, char *format); -void native_to_little_endian (void *data, char *format); - + +void bs_open_read (Bitstream *bs, uchar *buffer_start, uchar *buffer_end, read_stream file, uint32_t file_bytes); + +#define bs_is_open(bs) ((bs)->ptr != NULL) + +#define getbit(bs) ( \ + (((bs)->bc) ? \ + ((bs)->bc--, (bs)->sr & 1) : \ + (((++((bs)->ptr) != (bs)->end) ? (void) 0 : (bs)->wrap (bs)), (bs)->bc = 7, ((bs)->sr = *((bs)->ptr)) & 1) \ + ) ? \ + ((bs)->sr >>= 1, 1) : \ + ((bs)->sr >>= 1, 0) \ +) + +#define getbits(value, nbits, bs) { \ + while ((nbits) > (bs)->bc) { \ + if (++((bs)->ptr) == (bs)->end) (bs)->wrap (bs); \ + (bs)->sr |= (int32_t)*((bs)->ptr) << (bs)->bc; \ + (bs)->bc += 8; \ + } \ + *(value) = (bs)->sr; \ + if ((bs)->bc > 32) { \ + (bs)->bc -= (nbits); \ + (bs)->sr = *((bs)->ptr) >> (8 - (bs)->bc); \ + } \ + else { \ + (bs)->bc -= (nbits); \ + (bs)->sr >>= (nbits); \ + } \ +} + +void little_endian_to_native (void *data, char *format); +void native_to_little_endian (void *data, char *format); + /* These macros implement the weight application and update operations */ /* that are at the heart of the decorrelation loops. Note that when there */ /* are several alternative versions of the same macro (marked with PERFCOND) */ @@ -291,94 +291,94 @@ void native_to_little_endian (void *data, char *format); /* decoding and the user should choose the one that provides the best */ /* performance. This may be easier to check when NOT using the assembly */ /* language optimizations. */ - + #if 1 /* PERFCOND */ -#define apply_weight_i(weight, sample) ((weight * sample + 512) >> 10) -#else -#define apply_weight_i(weight, sample) ((((weight * sample) >> 8) + 2) >> 2) -#endif - -#define apply_weight_f(weight, sample) (((((sample & 0xffffL) * weight) >> 9) + \ - (((sample & ~0xffffL) >> 9) * weight) + 1) >> 1) - +#define apply_weight_i(weight, sample) ((weight * sample + 512) >> 10) +#else +#define apply_weight_i(weight, sample) ((((weight * sample) >> 8) + 2) >> 2) +#endif + +#define apply_weight_f(weight, sample) (((((sample & 0xffffL) * weight) >> 9) + \ + (((sample & ~0xffffL) >> 9) * weight) + 1) >> 1) + #if 1 /* PERFCOND */ -#define apply_weight(weight, sample) (sample != (short) sample ? \ - apply_weight_f (weight, sample) : apply_weight_i (weight, sample)) -#else -#define apply_weight(weight, sample) ((int32_t)((weight * (int64_t) sample + 512) >> 10)) -#endif - +#define apply_weight(weight, sample) (sample != (short) sample ? \ + apply_weight_f (weight, sample) : apply_weight_i (weight, sample)) +#else +#define apply_weight(weight, sample) ((int32_t)((weight * (int64_t) sample + 512) >> 10)) +#endif + #if 0 /* PERFCOND */ -#define update_weight(weight, delta, source, result) \ - if (source && result) { int32_t s = (int32_t) (source ^ result) >> 31; weight = (delta ^ s) + (weight - s); } -#elif 1 -#define update_weight(weight, delta, source, result) \ - if (source && result) weight += (((source ^ result) >> 30) | 1) * delta -#else -#define update_weight(weight, delta, source, result) \ - if (source && result) (source ^ result) < 0 ? (weight -= delta) : (weight += delta) -#endif - -#define update_weight_clip(weight, delta, source, result) \ - if (source && result && ((source ^ result) < 0 ? (weight -= delta) < -1024 : (weight += delta) > 1024)) \ - weight = weight < 0 ? -1024 : 1024 - +#define update_weight(weight, delta, source, result) \ + if (source && result) { int32_t s = (int32_t) (source ^ result) >> 31; weight = (delta ^ s) + (weight - s); } +#elif 1 +#define update_weight(weight, delta, source, result) \ + if (source && result) weight += (((source ^ result) >> 30) | 1) * delta +#else +#define update_weight(weight, delta, source, result) \ + if (source && result) (source ^ result) < 0 ? (weight -= delta) : (weight += delta) +#endif + +#define update_weight_clip(weight, delta, source, result) \ + if (source && result && ((source ^ result) < 0 ? (weight -= delta) < -1024 : (weight += delta) > 1024)) \ + weight = weight < 0 ? -1024 : 1024 + /* unpack.c */ - -int unpack_init (WavpackContext *wpc); -int init_wv_bitstream (WavpackContext *wpc, WavpackMetadata *wpmd); -int read_decorr_terms (WavpackStream *wps, WavpackMetadata *wpmd); -int read_decorr_weights (WavpackStream *wps, WavpackMetadata *wpmd); -int read_decorr_samples (WavpackStream *wps, WavpackMetadata *wpmd); -int read_float_info (WavpackStream *wps, WavpackMetadata *wpmd); -int read_int32_info (WavpackStream *wps, WavpackMetadata *wpmd); -int read_channel_info (WavpackContext *wpc, WavpackMetadata *wpmd); -int read_config_info (WavpackContext *wpc, WavpackMetadata *wpmd); -int32_t unpack_samples (WavpackContext *wpc, int32_t *buffer, uint32_t sample_count); -int check_crc_error (WavpackContext *wpc); - + +int unpack_init (WavpackContext *wpc); +int init_wv_bitstream (WavpackContext *wpc, WavpackMetadata *wpmd); +int read_decorr_terms (WavpackStream *wps, WavpackMetadata *wpmd); +int read_decorr_weights (WavpackStream *wps, WavpackMetadata *wpmd); +int read_decorr_samples (WavpackStream *wps, WavpackMetadata *wpmd); +int read_float_info (WavpackStream *wps, WavpackMetadata *wpmd); +int read_int32_info (WavpackStream *wps, WavpackMetadata *wpmd); +int read_channel_info (WavpackContext *wpc, WavpackMetadata *wpmd); +int read_config_info (WavpackContext *wpc, WavpackMetadata *wpmd); +int32_t unpack_samples (WavpackContext *wpc, int32_t *buffer, uint32_t sample_count); +int check_crc_error (WavpackContext *wpc); + /* metadata.c stuff */ - -int read_metadata_buff (WavpackContext *wpc, WavpackMetadata *wpmd); -int process_metadata (WavpackContext *wpc, WavpackMetadata *wpmd); - + +int read_metadata_buff (WavpackContext *wpc, WavpackMetadata *wpmd); +int process_metadata (WavpackContext *wpc, WavpackMetadata *wpmd); + /* words.c stuff */ - -int read_entropy_vars (WavpackStream *wps, WavpackMetadata *wpmd); -int read_hybrid_profile (WavpackStream *wps, WavpackMetadata *wpmd); -int32_t get_words (int32_t *buffer, int nsamples, uint32_t flags, - struct words_data *w, Bitstream *bs); -int32_t exp2s (int log); -int restore_weight (signed char weight); - -#define WORD_EOF (1L << 31) - + +int read_entropy_vars (WavpackStream *wps, WavpackMetadata *wpmd); +int read_hybrid_profile (WavpackStream *wps, WavpackMetadata *wpmd); +int32_t get_words (int32_t *buffer, int nsamples, uint32_t flags, + struct words_data *w, Bitstream *bs); +int32_t exp2s (int log); +int restore_weight (signed char weight); + +#define WORD_EOF (1L << 31) + /* float.c */ - -int read_float_info (WavpackStream *wps, WavpackMetadata *wpmd); -void float_values (WavpackStream *wps, int32_t *values, int32_t num_values); - + +int read_float_info (WavpackStream *wps, WavpackMetadata *wpmd); +void float_values (WavpackStream *wps, int32_t *values, int32_t num_values); + /* wputils.c */ - -WavpackContext *WavpackOpenFileInput (read_stream infile, char *error); - -int WavpackGetMode (WavpackContext *wpc); - -#define MODE_WVC 0x1 -#define MODE_LOSSLESS 0x2 -#define MODE_HYBRID 0x4 -#define MODE_FLOAT 0x8 -#define MODE_VALID_TAG 0x10 -#define MODE_HIGH 0x20 -#define MODE_FAST 0x40 - -uint32_t WavpackUnpackSamples (WavpackContext *wpc, int32_t *buffer, uint32_t samples); -uint32_t WavpackGetNumSamples (WavpackContext *wpc); -uint32_t WavpackGetSampleIndex (WavpackContext *wpc); -int WavpackGetNumErrors (WavpackContext *wpc); -int WavpackLossyBlocks (WavpackContext *wpc); -uint32_t WavpackGetSampleRate (WavpackContext *wpc); -int WavpackGetBitsPerSample (WavpackContext *wpc); -int WavpackGetBytesPerSample (WavpackContext *wpc); -int WavpackGetNumChannels (WavpackContext *wpc); -int WavpackGetReducedChannels (WavpackContext *wpc); + +WavpackContext *WavpackOpenFileInput (read_stream infile, char *error); + +int WavpackGetMode (WavpackContext *wpc); + +#define MODE_WVC 0x1 +#define MODE_LOSSLESS 0x2 +#define MODE_HYBRID 0x4 +#define MODE_FLOAT 0x8 +#define MODE_VALID_TAG 0x10 +#define MODE_HIGH 0x20 +#define MODE_FAST 0x40 + +uint32_t WavpackUnpackSamples (WavpackContext *wpc, int32_t *buffer, uint32_t samples); +uint32_t WavpackGetNumSamples (WavpackContext *wpc); +uint32_t WavpackGetSampleIndex (WavpackContext *wpc); +int WavpackGetNumErrors (WavpackContext *wpc); +int WavpackLossyBlocks (WavpackContext *wpc); +uint32_t WavpackGetSampleRate (WavpackContext *wpc); +int WavpackGetBitsPerSample (WavpackContext *wpc); +int WavpackGetBytesPerSample (WavpackContext *wpc); +int WavpackGetNumChannels (WavpackContext *wpc); +int WavpackGetReducedChannels (WavpackContext *wpc); diff --git a/src/engine/external/wavpack/words.c b/src/engine/external/wavpack/words.c index 0e5a3db7..d39563b0 100644 --- a/src/engine/external/wavpack/words.c +++ b/src/engine/external/wavpack/words.c @@ -1,560 +1,560 @@ -//////////////////////////////////////////////////////////////////////////// -// **** WAVPACK **** // -// Hybrid Lossless Wavefile Compressor // -// Copyright (c) 1998 - 2006 Conifer Software. // -// All Rights Reserved. // -// Distributed under the BSD Software License (see license.txt) // -//////////////////////////////////////////////////////////////////////////// - -// words.c - -// This module provides entropy word encoding and decoding functions using -// a variation on the Rice method. This was introduced in version 3.93 -// because it allows splitting the data into a "lossy" stream and a -// "correction" stream in a very efficient manner and is therefore ideal -// for the "hybrid" mode. For 4.0, the efficiency of this method was -// significantly improved by moving away from the normal Rice restriction of -// using powers of two for the modulus divisions and now the method can be -// used for both hybrid and pure lossless encoding. - -// Samples are divided by median probabilities at 5/7 (71.43%), 10/49 (20.41%), -// and 20/343 (5.83%). Each zone has 3.5 times fewer samples than the -// previous. Using standard Rice coding on this data would result in 1.4 -// bits per sample average (not counting sign bit). However, there is a -// very simple encoding that is over 99% efficient with this data and -// results in about 1.22 bits per sample. - -#include "wavpack.h" - -#include <string.h> - -//////////////////////////////// local macros ///////////////////////////////// - -#define LIMIT_ONES 16 // maximum consecutive 1s sent for "div" data - -// these control the time constant "slow_level" which is used for hybrid mode -// that controls bitrate as a function of residual level (HYBRID_BITRATE). -#define SLS 8 -#define SLO ((1 << (SLS - 1))) - -// these control the time constant of the 3 median level breakpoints -#define DIV0 128 // 5/7 of samples -#define DIV1 64 // 10/49 of samples -#define DIV2 32 // 20/343 of samples - -// this macro retrieves the specified median breakpoint (without frac; min = 1) -#define GET_MED(med) (((c->median [med]) >> 4) + 1) - -// These macros update the specified median breakpoints. Note that the median -// is incremented when the sample is higher than the median, else decremented. -// They are designed so that the median will never drop below 1 and the value -// is essentially stationary if there are 2 increments for every 5 decrements. - -#define INC_MED0() (c->median [0] += ((c->median [0] + DIV0) / DIV0) * 5) -#define DEC_MED0() (c->median [0] -= ((c->median [0] + (DIV0-2)) / DIV0) * 2) -#define INC_MED1() (c->median [1] += ((c->median [1] + DIV1) / DIV1) * 5) -#define DEC_MED1() (c->median [1] -= ((c->median [1] + (DIV1-2)) / DIV1) * 2) -#define INC_MED2() (c->median [2] += ((c->median [2] + DIV2) / DIV2) * 5) -#define DEC_MED2() (c->median [2] -= ((c->median [2] + (DIV2-2)) / DIV2) * 2) - -#define count_bits(av) ( \ - (av) < (1 << 8) ? nbits_table [av] : \ - ( \ - (av) < (1L << 16) ? nbits_table [(av) >> 8] + 8 : \ - ((av) < (1L << 24) ? nbits_table [(av) >> 16] + 16 : nbits_table [(av) >> 24] + 24) \ - ) \ -) - -///////////////////////////// local table storage //////////////////////////// - -const char nbits_table [] = { - 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, // 0 - 15 - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, // 16 - 31 - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, // 32 - 47 - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, // 48 - 63 - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 64 - 79 - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 80 - 95 - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 96 - 111 - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 112 - 127 - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 128 - 143 - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 144 - 159 - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 160 - 175 - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 176 - 191 - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 192 - 207 - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 208 - 223 - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 224 - 239 - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 // 240 - 255 -}; - -static const uchar log2_table [] = { - 0x00, 0x01, 0x03, 0x04, 0x06, 0x07, 0x09, 0x0a, 0x0b, 0x0d, 0x0e, 0x10, 0x11, 0x12, 0x14, 0x15, - 0x16, 0x18, 0x19, 0x1a, 0x1c, 0x1d, 0x1e, 0x20, 0x21, 0x22, 0x24, 0x25, 0x26, 0x28, 0x29, 0x2a, - 0x2c, 0x2d, 0x2e, 0x2f, 0x31, 0x32, 0x33, 0x34, 0x36, 0x37, 0x38, 0x39, 0x3b, 0x3c, 0x3d, 0x3e, - 0x3f, 0x41, 0x42, 0x43, 0x44, 0x45, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4d, 0x4e, 0x4f, 0x50, 0x51, - 0x52, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, - 0x64, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x74, 0x75, - 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, - 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, - 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, - 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb2, - 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc0, - 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcb, 0xcc, 0xcd, 0xce, - 0xcf, 0xd0, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd8, 0xd9, 0xda, 0xdb, - 0xdc, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe4, 0xe5, 0xe6, 0xe7, 0xe7, - 0xe8, 0xe9, 0xea, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xee, 0xef, 0xf0, 0xf1, 0xf1, 0xf2, 0xf3, 0xf4, - 0xf4, 0xf5, 0xf6, 0xf7, 0xf7, 0xf8, 0xf9, 0xf9, 0xfa, 0xfb, 0xfc, 0xfc, 0xfd, 0xfe, 0xff, 0xff -}; - -static const uchar exp2_table [] = { - 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08, 0x09, 0x0a, 0x0b, - 0x0b, 0x0c, 0x0d, 0x0e, 0x0e, 0x0f, 0x10, 0x10, 0x11, 0x12, 0x13, 0x13, 0x14, 0x15, 0x16, 0x16, - 0x17, 0x18, 0x19, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1d, 0x1e, 0x1f, 0x20, 0x20, 0x21, 0x22, 0x23, - 0x24, 0x24, 0x25, 0x26, 0x27, 0x28, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, - 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3a, 0x3b, 0x3c, 0x3d, - 0x3e, 0x3f, 0x40, 0x41, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x48, 0x49, 0x4a, 0x4b, - 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, - 0x5b, 0x5c, 0x5d, 0x5e, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, - 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, - 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x87, 0x88, 0x89, 0x8a, - 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, - 0x9c, 0x9d, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, - 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, - 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc8, 0xc9, 0xca, 0xcb, 0xcd, 0xce, 0xcf, 0xd0, 0xd2, 0xd3, 0xd4, - 0xd6, 0xd7, 0xd8, 0xd9, 0xdb, 0xdc, 0xdd, 0xde, 0xe0, 0xe1, 0xe2, 0xe4, 0xe5, 0xe6, 0xe8, 0xe9, - 0xea, 0xec, 0xed, 0xee, 0xf0, 0xf1, 0xf2, 0xf4, 0xf5, 0xf6, 0xf8, 0xf9, 0xfa, 0xfc, 0xfd, 0xff -}; - -static const char ones_count_table [] = { - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,7, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5, - 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,8 -}; - -///////////////////////////// executable code //////////////////////////////// - -void init_words (WavpackStream *wps) -{ - CLEAR (wps->w); -} - -static int mylog2 (uint32_t avalue); - -// Read the median log2 values from the specifed metadata structure, convert -// them back to 32-bit unsigned values and store them. If length is not -// exactly correct then we flag and return an error. - -int read_entropy_vars (WavpackStream *wps, WavpackMetadata *wpmd) -{ - uchar *byteptr = wpmd->data; - - if (wpmd->byte_length != ((wps->wphdr.flags & MONO_DATA) ? 6 : 12)) - return FALSE; - - wps->w.c [0].median [0] = exp2s (byteptr [0] + (byteptr [1] << 8)); - wps->w.c [0].median [1] = exp2s (byteptr [2] + (byteptr [3] << 8)); - wps->w.c [0].median [2] = exp2s (byteptr [4] + (byteptr [5] << 8)); - - if (!(wps->wphdr.flags & MONO_DATA)) { - wps->w.c [1].median [0] = exp2s (byteptr [6] + (byteptr [7] << 8)); - wps->w.c [1].median [1] = exp2s (byteptr [8] + (byteptr [9] << 8)); - wps->w.c [1].median [2] = exp2s (byteptr [10] + (byteptr [11] << 8)); - } - - return TRUE; -} - -// Read the hybrid related values from the specifed metadata structure, convert -// them back to their internal formats and store them. The extended profile -// stuff is not implemented yet, so return an error if we get more data than -// we know what to do with. - -int read_hybrid_profile (WavpackStream *wps, WavpackMetadata *wpmd) -{ - uchar *byteptr = wpmd->data; - uchar *endptr = byteptr + wpmd->byte_length; - - if (wps->wphdr.flags & HYBRID_BITRATE) { - wps->w.c [0].slow_level = exp2s (byteptr [0] + (byteptr [1] << 8)); - byteptr += 2; - - if (!(wps->wphdr.flags & MONO_DATA)) { - wps->w.c [1].slow_level = exp2s (byteptr [0] + (byteptr [1] << 8)); - byteptr += 2; - } - } - - wps->w.bitrate_acc [0] = (int32_t)(byteptr [0] + (byteptr [1] << 8)) << 16; - byteptr += 2; - - if (!(wps->wphdr.flags & MONO_DATA)) { - wps->w.bitrate_acc [1] = (int32_t)(byteptr [0] + (byteptr [1] << 8)) << 16; - byteptr += 2; - } - - if (byteptr < endptr) { - wps->w.bitrate_delta [0] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8))); - byteptr += 2; - - if (!(wps->wphdr.flags & MONO_DATA)) { - wps->w.bitrate_delta [1] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8))); - byteptr += 2; - } - - if (byteptr < endptr) - return FALSE; - } - else - wps->w.bitrate_delta [0] = wps->w.bitrate_delta [1] = 0; - - return TRUE; -} - -// This function is called during both encoding and decoding of hybrid data to -// update the "error_limit" variable which determines the maximum sample error -// allowed in the main bitstream. In the HYBRID_BITRATE mode (which is the only -// currently implemented) this is calculated from the slow_level values and the -// bitrate accumulators. Note that the bitrate accumulators can be changing. - -void update_error_limit (struct words_data *w, uint32_t flags) -{ - int bitrate_0 = (w->bitrate_acc [0] += w->bitrate_delta [0]) >> 16; - - if (flags & MONO_DATA) { - if (flags & HYBRID_BITRATE) { - int slow_log_0 = (w->c [0].slow_level + SLO) >> SLS; - - if (slow_log_0 - bitrate_0 > -0x100) - w->c [0].error_limit = exp2s (slow_log_0 - bitrate_0 + 0x100); - else - w->c [0].error_limit = 0; - } - else - w->c [0].error_limit = exp2s (bitrate_0); - } - else { - int bitrate_1 = (w->bitrate_acc [1] += w->bitrate_delta [1]) >> 16; - - if (flags & HYBRID_BITRATE) { - int slow_log_0 = (w->c [0].slow_level + SLO) >> SLS; - int slow_log_1 = (w->c [1].slow_level + SLO) >> SLS; - - if (flags & HYBRID_BALANCE) { - int balance = (slow_log_1 - slow_log_0 + bitrate_1 + 1) >> 1; - - if (balance > bitrate_0) { - bitrate_1 = bitrate_0 * 2; - bitrate_0 = 0; - } - else if (-balance > bitrate_0) { - bitrate_0 = bitrate_0 * 2; - bitrate_1 = 0; - } - else { - bitrate_1 = bitrate_0 + balance; - bitrate_0 = bitrate_0 - balance; - } - } - - if (slow_log_0 - bitrate_0 > -0x100) - w->c [0].error_limit = exp2s (slow_log_0 - bitrate_0 + 0x100); - else - w->c [0].error_limit = 0; - - if (slow_log_1 - bitrate_1 > -0x100) - w->c [1].error_limit = exp2s (slow_log_1 - bitrate_1 + 0x100); - else - w->c [1].error_limit = 0; - } - else { - w->c [0].error_limit = exp2s (bitrate_0); - w->c [1].error_limit = exp2s (bitrate_1); - } - } -} - -static uint32_t read_code (Bitstream *bs, uint32_t maxcode); - -// Read the next word from the bitstream "wvbits" and return the value. This -// function can be used for hybrid or lossless streams, but since an -// optimized version is available for lossless this function would normally -// be used for hybrid only. If a hybrid lossless stream is being read then -// the "correction" offset is written at the specified pointer. A return value -// of WORD_EOF indicates that the end of the bitstream was reached (all 1s) or -// some other error occurred. - -int32_t get_words (int32_t *buffer, int nsamples, uint32_t flags, - struct words_data *w, Bitstream *bs) -{ - register struct entropy_data *c = w->c; - int csamples; - - if (!(flags & MONO_DATA)) - nsamples *= 2; - - for (csamples = 0; csamples < nsamples; ++csamples) { - uint32_t ones_count, low, mid, high; - - if (!(flags & MONO_DATA)) - c = w->c + (csamples & 1); - - if (!(w->c [0].median [0] & ~1) && !w->holding_zero && !w->holding_one && !(w->c [1].median [0] & ~1)) { - uint32_t mask; - int cbits; - - if (w->zeros_acc) { - if (--w->zeros_acc) { - c->slow_level -= (c->slow_level + SLO) >> SLS; - *buffer++ = 0; - continue; - } - } - else { - for (cbits = 0; cbits < 33 && getbit (bs); ++cbits); - - if (cbits == 33) - break; - - if (cbits < 2) - w->zeros_acc = cbits; - else { - for (mask = 1, w->zeros_acc = 0; --cbits; mask <<= 1) - if (getbit (bs)) - w->zeros_acc |= mask; - - w->zeros_acc |= mask; - } - - if (w->zeros_acc) { - c->slow_level -= (c->slow_level + SLO) >> SLS; - CLEAR (w->c [0].median); - CLEAR (w->c [1].median); - *buffer++ = 0; - continue; - } - } - } - - if (w->holding_zero) - ones_count = w->holding_zero = 0; - else { - int next8; - - if (bs->bc < 8) { - if (++(bs->ptr) == bs->end) - bs->wrap (bs); - - next8 = (bs->sr |= *(bs->ptr) << bs->bc) & 0xff; - bs->bc += 8; - } - else - next8 = bs->sr & 0xff; - - if (next8 == 0xff) { - bs->bc -= 8; - bs->sr >>= 8; - - for (ones_count = 8; ones_count < (LIMIT_ONES + 1) && getbit (bs); ++ones_count); - - if (ones_count == (LIMIT_ONES + 1)) - break; - - if (ones_count == LIMIT_ONES) { - uint32_t mask; - int cbits; - - for (cbits = 0; cbits < 33 && getbit (bs); ++cbits); - - if (cbits == 33) - break; - - if (cbits < 2) - ones_count = cbits; - else { - for (mask = 1, ones_count = 0; --cbits; mask <<= 1) - if (getbit (bs)) - ones_count |= mask; - - ones_count |= mask; - } - - ones_count += LIMIT_ONES; - } - } - else { - bs->bc -= (ones_count = ones_count_table [next8]) + 1; - bs->sr >>= ones_count + 1; - } - - if (w->holding_one) { - w->holding_one = ones_count & 1; - ones_count = (ones_count >> 1) + 1; - } - else { - w->holding_one = ones_count & 1; - ones_count >>= 1; - } - - w->holding_zero = ~w->holding_one & 1; - } - - if ((flags & HYBRID_FLAG) && ((flags & MONO_DATA) || !(csamples & 1))) - update_error_limit (w, flags); - - if (ones_count == 0) { - low = 0; - high = GET_MED (0) - 1; - DEC_MED0 (); - } - else { - low = GET_MED (0); - INC_MED0 (); - - if (ones_count == 1) { - high = low + GET_MED (1) - 1; - DEC_MED1 (); - } - else { - low += GET_MED (1); - INC_MED1 (); - - if (ones_count == 2) { - high = low + GET_MED (2) - 1; - DEC_MED2 (); - } - else { - low += (ones_count - 2) * GET_MED (2); - high = low + GET_MED (2) - 1; - INC_MED2 (); - } - } - } - - mid = (high + low + 1) >> 1; - - if (!c->error_limit) - mid = read_code (bs, high - low) + low; - else while (high - low > c->error_limit) { - if (getbit (bs)) - mid = (high + (low = mid) + 1) >> 1; - else - mid = ((high = mid - 1) + low + 1) >> 1; - } - - *buffer++ = getbit (bs) ? ~mid : mid; - - if (flags & HYBRID_BITRATE) - c->slow_level = c->slow_level - ((c->slow_level + SLO) >> SLS) + mylog2 (mid); - } - - return (flags & MONO_DATA) ? csamples : (csamples / 2); -} - -// Read a single unsigned value from the specified bitstream with a value -// from 0 to maxcode. If there are exactly a power of two number of possible -// codes then this will read a fixed number of bits; otherwise it reads the -// minimum number of bits and then determines whether another bit is needed -// to define the code. - -static uint32_t read_code (Bitstream *bs, uint32_t maxcode) -{ - int bitcount = count_bits (maxcode); - uint32_t extras = (1L << bitcount) - maxcode - 1, code; - - if (!bitcount) - return 0; - - getbits (&code, bitcount - 1, bs); - code &= (1L << (bitcount - 1)) - 1; - - if (code >= extras) { - code = (code << 1) - extras; - - if (getbit (bs)) - ++code; - } - - return code; -} - -// The concept of a base 2 logarithm is used in many parts of WavPack. It is -// a way of sufficiently accurately representing 32-bit signed and unsigned -// values storing only 16 bits (actually fewer). It is also used in the hybrid -// mode for quickly comparing the relative magnitude of large values (i.e. -// division) and providing smooth exponentials using only addition. - -// These are not strict logarithms in that they become linear around zero and -// can therefore represent both zero and negative values. They have 8 bits -// of precision and in "roundtrip" conversions the total error never exceeds 1 -// part in 225 except for the cases of +/-115 and +/-195 (which error by 1). - - -// This function returns the log2 for the specified 32-bit unsigned value. -// The maximum value allowed is about 0xff800000 and returns 8447. - -static int mylog2 (uint32_t avalue) -{ - int dbits; - - if ((avalue += avalue >> 9) < (1 << 8)) { - dbits = nbits_table [avalue]; - return (dbits << 8) + log2_table [(avalue << (9 - dbits)) & 0xff]; - } - else { - if (avalue < (1L << 16)) - dbits = nbits_table [avalue >> 8] + 8; - else if (avalue < (1L << 24)) - dbits = nbits_table [avalue >> 16] + 16; - else - dbits = nbits_table [avalue >> 24] + 24; - - return (dbits << 8) + log2_table [(avalue >> (dbits - 9)) & 0xff]; - } -} - -// This function returns the log2 for the specified 32-bit signed value. -// All input values are valid and the return values are in the range of -// +/- 8192. - -int log2s (int32_t value) -{ - return (value < 0) ? -mylog2 (-value) : mylog2 (value); -} - -// This function returns the original integer represented by the supplied -// logarithm (at least within the provided accuracy). The log is signed, -// but since a full 32-bit value is returned this can be used for unsigned -// conversions as well (i.e. the input range is -8192 to +8447). - -int32_t exp2s (int log) -{ - uint32_t value; - - if (log < 0) - return -exp2s (-log); - - value = exp2_table [log & 0xff] | 0x100; - - if ((log >>= 8) <= 9) - return value >> (9 - log); - else - return value << (log - 9); -} - -// These two functions convert internal weights (which are normally +/-1024) -// to and from an 8-bit signed character version for storage in metadata. The -// weights are clipped here in the case that they are outside that range. - -int restore_weight (signed char weight) -{ - int result; - - if ((result = (int) weight << 3) > 0) - result += (result + 64) >> 7; - - return result; -} +//////////////////////////////////////////////////////////////////////////// +// **** WAVPACK **** // +// Hybrid Lossless Wavefile Compressor // +// Copyright (c) 1998 - 2006 Conifer Software. // +// All Rights Reserved. // +// Distributed under the BSD Software License (see license.txt) // +//////////////////////////////////////////////////////////////////////////// + +// words.c + +// This module provides entropy word encoding and decoding functions using +// a variation on the Rice method. This was introduced in version 3.93 +// because it allows splitting the data into a "lossy" stream and a +// "correction" stream in a very efficient manner and is therefore ideal +// for the "hybrid" mode. For 4.0, the efficiency of this method was +// significantly improved by moving away from the normal Rice restriction of +// using powers of two for the modulus divisions and now the method can be +// used for both hybrid and pure lossless encoding. + +// Samples are divided by median probabilities at 5/7 (71.43%), 10/49 (20.41%), +// and 20/343 (5.83%). Each zone has 3.5 times fewer samples than the +// previous. Using standard Rice coding on this data would result in 1.4 +// bits per sample average (not counting sign bit). However, there is a +// very simple encoding that is over 99% efficient with this data and +// results in about 1.22 bits per sample. + +#include "wavpack.h" + +#include <string.h> + +//////////////////////////////// local macros ///////////////////////////////// + +#define LIMIT_ONES 16 // maximum consecutive 1s sent for "div" data + +// these control the time constant "slow_level" which is used for hybrid mode +// that controls bitrate as a function of residual level (HYBRID_BITRATE). +#define SLS 8 +#define SLO ((1 << (SLS - 1))) + +// these control the time constant of the 3 median level breakpoints +#define DIV0 128 // 5/7 of samples +#define DIV1 64 // 10/49 of samples +#define DIV2 32 // 20/343 of samples + +// this macro retrieves the specified median breakpoint (without frac; min = 1) +#define GET_MED(med) (((c->median [med]) >> 4) + 1) + +// These macros update the specified median breakpoints. Note that the median +// is incremented when the sample is higher than the median, else decremented. +// They are designed so that the median will never drop below 1 and the value +// is essentially stationary if there are 2 increments for every 5 decrements. + +#define INC_MED0() (c->median [0] += ((c->median [0] + DIV0) / DIV0) * 5) +#define DEC_MED0() (c->median [0] -= ((c->median [0] + (DIV0-2)) / DIV0) * 2) +#define INC_MED1() (c->median [1] += ((c->median [1] + DIV1) / DIV1) * 5) +#define DEC_MED1() (c->median [1] -= ((c->median [1] + (DIV1-2)) / DIV1) * 2) +#define INC_MED2() (c->median [2] += ((c->median [2] + DIV2) / DIV2) * 5) +#define DEC_MED2() (c->median [2] -= ((c->median [2] + (DIV2-2)) / DIV2) * 2) + +#define count_bits(av) ( \ + (av) < (1 << 8) ? nbits_table [av] : \ + ( \ + (av) < (1L << 16) ? nbits_table [(av) >> 8] + 8 : \ + ((av) < (1L << 24) ? nbits_table [(av) >> 16] + 16 : nbits_table [(av) >> 24] + 24) \ + ) \ +) + +///////////////////////////// local table storage //////////////////////////// + +const char nbits_table [] = { + 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, // 0 - 15 + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, // 16 - 31 + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, // 32 - 47 + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, // 48 - 63 + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 64 - 79 + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 80 - 95 + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 96 - 111 + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 112 - 127 + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 128 - 143 + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 144 - 159 + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 160 - 175 + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 176 - 191 + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 192 - 207 + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 208 - 223 + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 224 - 239 + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 // 240 - 255 +}; + +static const uchar log2_table [] = { + 0x00, 0x01, 0x03, 0x04, 0x06, 0x07, 0x09, 0x0a, 0x0b, 0x0d, 0x0e, 0x10, 0x11, 0x12, 0x14, 0x15, + 0x16, 0x18, 0x19, 0x1a, 0x1c, 0x1d, 0x1e, 0x20, 0x21, 0x22, 0x24, 0x25, 0x26, 0x28, 0x29, 0x2a, + 0x2c, 0x2d, 0x2e, 0x2f, 0x31, 0x32, 0x33, 0x34, 0x36, 0x37, 0x38, 0x39, 0x3b, 0x3c, 0x3d, 0x3e, + 0x3f, 0x41, 0x42, 0x43, 0x44, 0x45, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4d, 0x4e, 0x4f, 0x50, 0x51, + 0x52, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, + 0x64, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x74, 0x75, + 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, + 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, + 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, + 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb2, + 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc0, + 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcb, 0xcc, 0xcd, 0xce, + 0xcf, 0xd0, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd8, 0xd9, 0xda, 0xdb, + 0xdc, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe4, 0xe5, 0xe6, 0xe7, 0xe7, + 0xe8, 0xe9, 0xea, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xee, 0xef, 0xf0, 0xf1, 0xf1, 0xf2, 0xf3, 0xf4, + 0xf4, 0xf5, 0xf6, 0xf7, 0xf7, 0xf8, 0xf9, 0xf9, 0xfa, 0xfb, 0xfc, 0xfc, 0xfd, 0xfe, 0xff, 0xff +}; + +static const uchar exp2_table [] = { + 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08, 0x09, 0x0a, 0x0b, + 0x0b, 0x0c, 0x0d, 0x0e, 0x0e, 0x0f, 0x10, 0x10, 0x11, 0x12, 0x13, 0x13, 0x14, 0x15, 0x16, 0x16, + 0x17, 0x18, 0x19, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1d, 0x1e, 0x1f, 0x20, 0x20, 0x21, 0x22, 0x23, + 0x24, 0x24, 0x25, 0x26, 0x27, 0x28, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3a, 0x3b, 0x3c, 0x3d, + 0x3e, 0x3f, 0x40, 0x41, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x48, 0x49, 0x4a, 0x4b, + 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, + 0x5b, 0x5c, 0x5d, 0x5e, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, + 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, + 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x87, 0x88, 0x89, 0x8a, + 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, + 0x9c, 0x9d, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, + 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, + 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc8, 0xc9, 0xca, 0xcb, 0xcd, 0xce, 0xcf, 0xd0, 0xd2, 0xd3, 0xd4, + 0xd6, 0xd7, 0xd8, 0xd9, 0xdb, 0xdc, 0xdd, 0xde, 0xe0, 0xe1, 0xe2, 0xe4, 0xe5, 0xe6, 0xe8, 0xe9, + 0xea, 0xec, 0xed, 0xee, 0xf0, 0xf1, 0xf2, 0xf4, 0xf5, 0xf6, 0xf8, 0xf9, 0xfa, 0xfc, 0xfd, 0xff +}; + +static const char ones_count_table [] = { + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,7, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5, + 0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,8 +}; + +///////////////////////////// executable code //////////////////////////////// + +void init_words (WavpackStream *wps) +{ + CLEAR (wps->w); +} + +static int mylog2 (uint32_t avalue); + +// Read the median log2 values from the specifed metadata structure, convert +// them back to 32-bit unsigned values and store them. If length is not +// exactly correct then we flag and return an error. + +int read_entropy_vars (WavpackStream *wps, WavpackMetadata *wpmd) +{ + uchar *byteptr = wpmd->data; + + if (wpmd->byte_length != ((wps->wphdr.flags & MONO_DATA) ? 6 : 12)) + return FALSE; + + wps->w.c [0].median [0] = exp2s (byteptr [0] + (byteptr [1] << 8)); + wps->w.c [0].median [1] = exp2s (byteptr [2] + (byteptr [3] << 8)); + wps->w.c [0].median [2] = exp2s (byteptr [4] + (byteptr [5] << 8)); + + if (!(wps->wphdr.flags & MONO_DATA)) { + wps->w.c [1].median [0] = exp2s (byteptr [6] + (byteptr [7] << 8)); + wps->w.c [1].median [1] = exp2s (byteptr [8] + (byteptr [9] << 8)); + wps->w.c [1].median [2] = exp2s (byteptr [10] + (byteptr [11] << 8)); + } + + return TRUE; +} + +// Read the hybrid related values from the specifed metadata structure, convert +// them back to their internal formats and store them. The extended profile +// stuff is not implemented yet, so return an error if we get more data than +// we know what to do with. + +int read_hybrid_profile (WavpackStream *wps, WavpackMetadata *wpmd) +{ + uchar *byteptr = wpmd->data; + uchar *endptr = byteptr + wpmd->byte_length; + + if (wps->wphdr.flags & HYBRID_BITRATE) { + wps->w.c [0].slow_level = exp2s (byteptr [0] + (byteptr [1] << 8)); + byteptr += 2; + + if (!(wps->wphdr.flags & MONO_DATA)) { + wps->w.c [1].slow_level = exp2s (byteptr [0] + (byteptr [1] << 8)); + byteptr += 2; + } + } + + wps->w.bitrate_acc [0] = (int32_t)(byteptr [0] + (byteptr [1] << 8)) << 16; + byteptr += 2; + + if (!(wps->wphdr.flags & MONO_DATA)) { + wps->w.bitrate_acc [1] = (int32_t)(byteptr [0] + (byteptr [1] << 8)) << 16; + byteptr += 2; + } + + if (byteptr < endptr) { + wps->w.bitrate_delta [0] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8))); + byteptr += 2; + + if (!(wps->wphdr.flags & MONO_DATA)) { + wps->w.bitrate_delta [1] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8))); + byteptr += 2; + } + + if (byteptr < endptr) + return FALSE; + } + else + wps->w.bitrate_delta [0] = wps->w.bitrate_delta [1] = 0; + + return TRUE; +} + +// This function is called during both encoding and decoding of hybrid data to +// update the "error_limit" variable which determines the maximum sample error +// allowed in the main bitstream. In the HYBRID_BITRATE mode (which is the only +// currently implemented) this is calculated from the slow_level values and the +// bitrate accumulators. Note that the bitrate accumulators can be changing. + +void update_error_limit (struct words_data *w, uint32_t flags) +{ + int bitrate_0 = (w->bitrate_acc [0] += w->bitrate_delta [0]) >> 16; + + if (flags & MONO_DATA) { + if (flags & HYBRID_BITRATE) { + int slow_log_0 = (w->c [0].slow_level + SLO) >> SLS; + + if (slow_log_0 - bitrate_0 > -0x100) + w->c [0].error_limit = exp2s (slow_log_0 - bitrate_0 + 0x100); + else + w->c [0].error_limit = 0; + } + else + w->c [0].error_limit = exp2s (bitrate_0); + } + else { + int bitrate_1 = (w->bitrate_acc [1] += w->bitrate_delta [1]) >> 16; + + if (flags & HYBRID_BITRATE) { + int slow_log_0 = (w->c [0].slow_level + SLO) >> SLS; + int slow_log_1 = (w->c [1].slow_level + SLO) >> SLS; + + if (flags & HYBRID_BALANCE) { + int balance = (slow_log_1 - slow_log_0 + bitrate_1 + 1) >> 1; + + if (balance > bitrate_0) { + bitrate_1 = bitrate_0 * 2; + bitrate_0 = 0; + } + else if (-balance > bitrate_0) { + bitrate_0 = bitrate_0 * 2; + bitrate_1 = 0; + } + else { + bitrate_1 = bitrate_0 + balance; + bitrate_0 = bitrate_0 - balance; + } + } + + if (slow_log_0 - bitrate_0 > -0x100) + w->c [0].error_limit = exp2s (slow_log_0 - bitrate_0 + 0x100); + else + w->c [0].error_limit = 0; + + if (slow_log_1 - bitrate_1 > -0x100) + w->c [1].error_limit = exp2s (slow_log_1 - bitrate_1 + 0x100); + else + w->c [1].error_limit = 0; + } + else { + w->c [0].error_limit = exp2s (bitrate_0); + w->c [1].error_limit = exp2s (bitrate_1); + } + } +} + +static uint32_t read_code (Bitstream *bs, uint32_t maxcode); + +// Read the next word from the bitstream "wvbits" and return the value. This +// function can be used for hybrid or lossless streams, but since an +// optimized version is available for lossless this function would normally +// be used for hybrid only. If a hybrid lossless stream is being read then +// the "correction" offset is written at the specified pointer. A return value +// of WORD_EOF indicates that the end of the bitstream was reached (all 1s) or +// some other error occurred. + +int32_t get_words (int32_t *buffer, int nsamples, uint32_t flags, + struct words_data *w, Bitstream *bs) +{ + register struct entropy_data *c = w->c; + int csamples; + + if (!(flags & MONO_DATA)) + nsamples *= 2; + + for (csamples = 0; csamples < nsamples; ++csamples) { + uint32_t ones_count, low, mid, high; + + if (!(flags & MONO_DATA)) + c = w->c + (csamples & 1); + + if (!(w->c [0].median [0] & ~1) && !w->holding_zero && !w->holding_one && !(w->c [1].median [0] & ~1)) { + uint32_t mask; + int cbits; + + if (w->zeros_acc) { + if (--w->zeros_acc) { + c->slow_level -= (c->slow_level + SLO) >> SLS; + *buffer++ = 0; + continue; + } + } + else { + for (cbits = 0; cbits < 33 && getbit (bs); ++cbits); + + if (cbits == 33) + break; + + if (cbits < 2) + w->zeros_acc = cbits; + else { + for (mask = 1, w->zeros_acc = 0; --cbits; mask <<= 1) + if (getbit (bs)) + w->zeros_acc |= mask; + + w->zeros_acc |= mask; + } + + if (w->zeros_acc) { + c->slow_level -= (c->slow_level + SLO) >> SLS; + CLEAR (w->c [0].median); + CLEAR (w->c [1].median); + *buffer++ = 0; + continue; + } + } + } + + if (w->holding_zero) + ones_count = w->holding_zero = 0; + else { + int next8; + + if (bs->bc < 8) { + if (++(bs->ptr) == bs->end) + bs->wrap (bs); + + next8 = (bs->sr |= *(bs->ptr) << bs->bc) & 0xff; + bs->bc += 8; + } + else + next8 = bs->sr & 0xff; + + if (next8 == 0xff) { + bs->bc -= 8; + bs->sr >>= 8; + + for (ones_count = 8; ones_count < (LIMIT_ONES + 1) && getbit (bs); ++ones_count); + + if (ones_count == (LIMIT_ONES + 1)) + break; + + if (ones_count == LIMIT_ONES) { + uint32_t mask; + int cbits; + + for (cbits = 0; cbits < 33 && getbit (bs); ++cbits); + + if (cbits == 33) + break; + + if (cbits < 2) + ones_count = cbits; + else { + for (mask = 1, ones_count = 0; --cbits; mask <<= 1) + if (getbit (bs)) + ones_count |= mask; + + ones_count |= mask; + } + + ones_count += LIMIT_ONES; + } + } + else { + bs->bc -= (ones_count = ones_count_table [next8]) + 1; + bs->sr >>= ones_count + 1; + } + + if (w->holding_one) { + w->holding_one = ones_count & 1; + ones_count = (ones_count >> 1) + 1; + } + else { + w->holding_one = ones_count & 1; + ones_count >>= 1; + } + + w->holding_zero = ~w->holding_one & 1; + } + + if ((flags & HYBRID_FLAG) && ((flags & MONO_DATA) || !(csamples & 1))) + update_error_limit (w, flags); + + if (ones_count == 0) { + low = 0; + high = GET_MED (0) - 1; + DEC_MED0 (); + } + else { + low = GET_MED (0); + INC_MED0 (); + + if (ones_count == 1) { + high = low + GET_MED (1) - 1; + DEC_MED1 (); + } + else { + low += GET_MED (1); + INC_MED1 (); + + if (ones_count == 2) { + high = low + GET_MED (2) - 1; + DEC_MED2 (); + } + else { + low += (ones_count - 2) * GET_MED (2); + high = low + GET_MED (2) - 1; + INC_MED2 (); + } + } + } + + mid = (high + low + 1) >> 1; + + if (!c->error_limit) + mid = read_code (bs, high - low) + low; + else while (high - low > c->error_limit) { + if (getbit (bs)) + mid = (high + (low = mid) + 1) >> 1; + else + mid = ((high = mid - 1) + low + 1) >> 1; + } + + *buffer++ = getbit (bs) ? ~mid : mid; + + if (flags & HYBRID_BITRATE) + c->slow_level = c->slow_level - ((c->slow_level + SLO) >> SLS) + mylog2 (mid); + } + + return (flags & MONO_DATA) ? csamples : (csamples / 2); +} + +// Read a single unsigned value from the specified bitstream with a value +// from 0 to maxcode. If there are exactly a power of two number of possible +// codes then this will read a fixed number of bits; otherwise it reads the +// minimum number of bits and then determines whether another bit is needed +// to define the code. + +static uint32_t read_code (Bitstream *bs, uint32_t maxcode) +{ + int bitcount = count_bits (maxcode); + uint32_t extras = (1L << bitcount) - maxcode - 1, code; + + if (!bitcount) + return 0; + + getbits (&code, bitcount - 1, bs); + code &= (1L << (bitcount - 1)) - 1; + + if (code >= extras) { + code = (code << 1) - extras; + + if (getbit (bs)) + ++code; + } + + return code; +} + +// The concept of a base 2 logarithm is used in many parts of WavPack. It is +// a way of sufficiently accurately representing 32-bit signed and unsigned +// values storing only 16 bits (actually fewer). It is also used in the hybrid +// mode for quickly comparing the relative magnitude of large values (i.e. +// division) and providing smooth exponentials using only addition. + +// These are not strict logarithms in that they become linear around zero and +// can therefore represent both zero and negative values. They have 8 bits +// of precision and in "roundtrip" conversions the total error never exceeds 1 +// part in 225 except for the cases of +/-115 and +/-195 (which error by 1). + + +// This function returns the log2 for the specified 32-bit unsigned value. +// The maximum value allowed is about 0xff800000 and returns 8447. + +static int mylog2 (uint32_t avalue) +{ + int dbits; + + if ((avalue += avalue >> 9) < (1 << 8)) { + dbits = nbits_table [avalue]; + return (dbits << 8) + log2_table [(avalue << (9 - dbits)) & 0xff]; + } + else { + if (avalue < (1L << 16)) + dbits = nbits_table [avalue >> 8] + 8; + else if (avalue < (1L << 24)) + dbits = nbits_table [avalue >> 16] + 16; + else + dbits = nbits_table [avalue >> 24] + 24; + + return (dbits << 8) + log2_table [(avalue >> (dbits - 9)) & 0xff]; + } +} + +// This function returns the log2 for the specified 32-bit signed value. +// All input values are valid and the return values are in the range of +// +/- 8192. + +int log2s (int32_t value) +{ + return (value < 0) ? -mylog2 (-value) : mylog2 (value); +} + +// This function returns the original integer represented by the supplied +// logarithm (at least within the provided accuracy). The log is signed, +// but since a full 32-bit value is returned this can be used for unsigned +// conversions as well (i.e. the input range is -8192 to +8447). + +int32_t exp2s (int log) +{ + uint32_t value; + + if (log < 0) + return -exp2s (-log); + + value = exp2_table [log & 0xff] | 0x100; + + if ((log >>= 8) <= 9) + return value >> (9 - log); + else + return value << (log - 9); +} + +// These two functions convert internal weights (which are normally +/-1024) +// to and from an 8-bit signed character version for storage in metadata. The +// weights are clipped here in the case that they are outside that range. + +int restore_weight (signed char weight) +{ + int result; + + if ((result = (int) weight << 3) > 0) + result += (result + 64) >> 7; + + return result; +} diff --git a/src/engine/external/wavpack/wputils.c b/src/engine/external/wavpack/wputils.c index 0d71af53..1bc65610 100644 --- a/src/engine/external/wavpack/wputils.c +++ b/src/engine/external/wavpack/wputils.c @@ -1,351 +1,351 @@ -//////////////////////////////////////////////////////////////////////////// -// **** WAVPACK **** // -// Hybrid Lossless Wavefile Compressor // -// Copyright (c) 1998 - 2006 Conifer Software. // -// All Rights Reserved. // -// Distributed under the BSD Software License (see license.txt) // -//////////////////////////////////////////////////////////////////////////// - -// wputils.c - -// This module provides a high-level interface for decoding WavPack 4.0 audio -// streams and files. WavPack data is read with a stream reading callback. No -// direct seeking is provided for, but it is possible to start decoding -// anywhere in a WavPack stream. In this case, WavPack will be able to provide -// the sample-accurate position when it synchs with the data and begins -// decoding. - -#include "wavpack.h" - -#include <string.h> - -///////////////////////////// local table storage //////////////////////////// - -const uint32_t sample_rates [] = { 6000, 8000, 9600, 11025, 12000, 16000, 22050, - 24000, 32000, 44100, 48000, 64000, 88200, 96000, 192000 }; - -///////////////////////////// executable code //////////////////////////////// - -static uint32_t read_next_header (read_stream infile, WavpackHeader *wphdr); - -// This function reads data from the specified stream in search of a valid -// WavPack 4.0 audio block. If this fails in 1 megabyte (or an invalid or -// unsupported WavPack block is encountered) then an appropriate message is -// copied to "error" and NULL is returned, otherwise a pointer to a -// WavpackContext structure is returned (which is used to call all other -// functions in this module). This can be initiated at the beginning of a -// WavPack file, or anywhere inside a WavPack file. To determine the exact -// position within the file use WavpackGetSampleIndex(). For demonstration -// purposes this uses a single static copy of the WavpackContext structure, -// so obviously it cannot be used for more than one file at a time. Also, -// this function will not handle "correction" files, plays only the first -// two channels of multi-channel files, and is limited in resolution in some -// large integer or floating point files (but always provides at least 24 bits -// of resolution). - -static WavpackContext wpc; - -WavpackContext *WavpackOpenFileInput (read_stream infile, char *error) -{ - WavpackStream *wps = &wpc.stream; - uint32_t bcount; - - CLEAR (wpc); - wpc.infile = infile; - wpc.total_samples = (uint32_t) -1; - wpc.norm_offset = 0; - wpc.open_flags = 0; - - // open the source file for reading and store the size - - while (!wps->wphdr.block_samples) { - - bcount = read_next_header (wpc.infile, &wps->wphdr); - - if (bcount == (uint32_t) -1) { - strcpy (error, "not compatible with this version of WavPack file!"); - return NULL; - } - - if (wps->wphdr.block_samples && wps->wphdr.total_samples != (uint32_t) -1) - wpc.total_samples = wps->wphdr.total_samples; - - if (!unpack_init (&wpc)) { - strcpy (error, wpc.error_message [0] ? wpc.error_message : - "not compatible with this version of WavPack file!"); - - return NULL; - } - } - - wpc.config.flags &= ~0xff; - wpc.config.flags |= wps->wphdr.flags & 0xff; - wpc.config.bytes_per_sample = (wps->wphdr.flags & BYTES_STORED) + 1; - wpc.config.float_norm_exp = wps->float_norm_exp; - - wpc.config.bits_per_sample = (wpc.config.bytes_per_sample * 8) - - ((wps->wphdr.flags & SHIFT_MASK) >> SHIFT_LSB); - - if (wpc.config.flags & FLOAT_DATA) { - wpc.config.bytes_per_sample = 3; - wpc.config.bits_per_sample = 24; - } - - if (!wpc.config.sample_rate) { - if (!wps || !wps->wphdr.block_samples || (wps->wphdr.flags & SRATE_MASK) == SRATE_MASK) - wpc.config.sample_rate = 44100; - else - wpc.config.sample_rate = sample_rates [(wps->wphdr.flags & SRATE_MASK) >> SRATE_LSB]; - } - - if (!wpc.config.num_channels) { - wpc.config.num_channels = (wps->wphdr.flags & MONO_FLAG) ? 1 : 2; - wpc.config.channel_mask = 0x5 - wpc.config.num_channels; - } - - if (!(wps->wphdr.flags & FINAL_BLOCK)) - wpc.reduced_channels = (wps->wphdr.flags & MONO_FLAG) ? 1 : 2; - - return &wpc; -} - -// This function obtains general information about an open file and returns -// a mask with the following bit values: - -// MODE_LOSSLESS: file is lossless (pure lossless only) -// MODE_HYBRID: file is hybrid mode (lossy part only) -// MODE_FLOAT: audio data is 32-bit ieee floating point (but will provided -// in 24-bit integers for convenience) -// MODE_HIGH: file was created in "high" mode (information only) -// MODE_FAST: file was created in "fast" mode (information only) - -int WavpackGetMode (WavpackContext *wpc) -{ - int mode = 0; - - if (wpc) { - if (wpc->config.flags & CONFIG_HYBRID_FLAG) - mode |= MODE_HYBRID; - else if (!(wpc->config.flags & CONFIG_LOSSY_MODE)) - mode |= MODE_LOSSLESS; - - if (wpc->lossy_blocks) - mode &= ~MODE_LOSSLESS; - - if (wpc->config.flags & CONFIG_FLOAT_DATA) - mode |= MODE_FLOAT; - - if (wpc->config.flags & CONFIG_HIGH_FLAG) - mode |= MODE_HIGH; - - if (wpc->config.flags & CONFIG_FAST_FLAG) - mode |= MODE_FAST; - } - - return mode; -} - -// Unpack the specified number of samples from the current file position. -// Note that "samples" here refers to "complete" samples, which would be -// 2 longs for stereo files. The audio data is returned right-justified in -// 32-bit longs in the endian mode native to the executing processor. So, -// if the original data was 16-bit, then the values returned would be -// +/-32k. Floating point data will be returned as 24-bit integers (and may -// also be clipped). The actual number of samples unpacked is returned, -// which should be equal to the number requested unless the end of fle is -// encountered or an error occurs. - -uint32_t WavpackUnpackSamples (WavpackContext *wpc, int32_t *buffer, uint32_t samples) -{ - WavpackStream *wps = &wpc->stream; - uint32_t bcount, samples_unpacked = 0, samples_to_unpack; - int num_channels = wpc->config.num_channels; - - while (samples) { - if (!wps->wphdr.block_samples || !(wps->wphdr.flags & INITIAL_BLOCK) || - wps->sample_index >= wps->wphdr.block_index + wps->wphdr.block_samples) { - bcount = read_next_header (wpc->infile, &wps->wphdr); - - if (bcount == (uint32_t) -1) - break; - - if (!wps->wphdr.block_samples || wps->sample_index == wps->wphdr.block_index) - if (!unpack_init (wpc)) - break; - } - - if (!wps->wphdr.block_samples || !(wps->wphdr.flags & INITIAL_BLOCK) || - wps->sample_index >= wps->wphdr.block_index + wps->wphdr.block_samples) - continue; - - if (wps->sample_index < wps->wphdr.block_index) { - samples_to_unpack = wps->wphdr.block_index - wps->sample_index; - - if (samples_to_unpack > samples) - samples_to_unpack = samples; - - wps->sample_index += samples_to_unpack; - samples_unpacked += samples_to_unpack; - samples -= samples_to_unpack; - - if (wpc->reduced_channels) - samples_to_unpack *= wpc->reduced_channels; - else - samples_to_unpack *= num_channels; - - while (samples_to_unpack--) - *buffer++ = 0; - - continue; - } - - samples_to_unpack = wps->wphdr.block_index + wps->wphdr.block_samples - wps->sample_index; - - if (samples_to_unpack > samples) - samples_to_unpack = samples; - - unpack_samples (wpc, buffer, samples_to_unpack); - - if (wpc->reduced_channels) - buffer += samples_to_unpack * wpc->reduced_channels; - else - buffer += samples_to_unpack * num_channels; - - samples_unpacked += samples_to_unpack; - samples -= samples_to_unpack; - - if (wps->sample_index == wps->wphdr.block_index + wps->wphdr.block_samples) { - if (check_crc_error (wpc)) - wpc->crc_errors++; - } - - if (wps->sample_index == wpc->total_samples) - break; - } - - return samples_unpacked; -} - -// Get total number of samples contained in the WavPack file, or -1 if unknown - -uint32_t WavpackGetNumSamples (WavpackContext *wpc) -{ - return wpc ? wpc->total_samples : (uint32_t) -1; -} - -// Get the current sample index position, or -1 if unknown - -uint32_t WavpackGetSampleIndex (WavpackContext *wpc) -{ - if (wpc) - return wpc->stream.sample_index; - - return (uint32_t) -1; -} - -// Get the number of errors encountered so far - -int WavpackGetNumErrors (WavpackContext *wpc) -{ - return wpc ? wpc->crc_errors : 0; -} - -// return TRUE if any uncorrected lossy blocks were actually written or read - -int WavpackLossyBlocks (WavpackContext *wpc) -{ - return wpc ? wpc->lossy_blocks : 0; -} - -// Returns the sample rate of the specified WavPack file - -uint32_t WavpackGetSampleRate (WavpackContext *wpc) -{ - return wpc ? wpc->config.sample_rate : 44100; -} - -// Returns the number of channels of the specified WavPack file. Note that -// this is the actual number of channels contained in the file, but this -// version can only decode the first two. - -int WavpackGetNumChannels (WavpackContext *wpc) -{ - return wpc ? wpc->config.num_channels : 2; -} - -// Returns the actual number of valid bits per sample contained in the -// original file, which may or may not be a multiple of 8. Floating data -// always has 32 bits, integers may be from 1 to 32 bits each. When this -// value is not a multiple of 8, then the "extra" bits are located in the -// LSBs of the results. That is, values are right justified when unpacked -// into longs, but are left justified in the number of bytes used by the -// original data. - -int WavpackGetBitsPerSample (WavpackContext *wpc) -{ - return wpc ? wpc->config.bits_per_sample : 16; -} - -// Returns the number of bytes used for each sample (1 to 4) in the original -// file. This is required information for the user of this module because the -// audio data is returned in the LOWER bytes of the long buffer and must be -// left-shifted 8, 16, or 24 bits if normalized longs are required. - -int WavpackGetBytesPerSample (WavpackContext *wpc) -{ - return wpc ? wpc->config.bytes_per_sample : 2; -} - -// This function will return the actual number of channels decoded from the -// file (which may or may not be less than the actual number of channels, but -// will always be 1 or 2). Normally, this will be the front left and right -// channels of a multi-channel file. - -int WavpackGetReducedChannels (WavpackContext *wpc) -{ - if (wpc) - return wpc->reduced_channels ? wpc->reduced_channels : wpc->config.num_channels; - else - return 2; -} - -// Read from current file position until a valid 32-byte WavPack 4.0 header is -// found and read into the specified pointer. The number of bytes skipped is -// returned. If no WavPack header is found within 1 meg, then a -1 is returned -// to indicate the error. No additional bytes are read past the header and it -// is returned in the processor's native endian mode. Seeking is not required. - -static uint32_t read_next_header (read_stream infile, WavpackHeader *wphdr) -{ - char buffer [sizeof (*wphdr)], *sp = buffer + sizeof (*wphdr), *ep = sp; - uint32_t bytes_skipped = 0; - int bleft; - - while (1) { - if (sp < ep) { - bleft = ep - sp; - memcpy (buffer, sp, bleft); - } - else - bleft = 0; - - if (infile (buffer + bleft, sizeof (*wphdr) - bleft) != (int32_t) sizeof (*wphdr) - bleft) - return -1; - - sp = buffer; - - if (*sp++ == 'w' && *sp == 'v' && *++sp == 'p' && *++sp == 'k' && - !(*++sp & 1) && sp [2] < 16 && !sp [3] && sp [5] == 4 && - sp [4] >= (MIN_STREAM_VERS & 0xff) && sp [4] <= (MAX_STREAM_VERS & 0xff)) { - memcpy (wphdr, buffer, sizeof (*wphdr)); - little_endian_to_native (wphdr, WavpackHeaderFormat); - return bytes_skipped; - } - - while (sp < ep && *sp != 'w') - sp++; - - if ((bytes_skipped += sp - buffer) > 1048576L) - return -1; - } -} +//////////////////////////////////////////////////////////////////////////// +// **** WAVPACK **** // +// Hybrid Lossless Wavefile Compressor // +// Copyright (c) 1998 - 2006 Conifer Software. // +// All Rights Reserved. // +// Distributed under the BSD Software License (see license.txt) // +//////////////////////////////////////////////////////////////////////////// + +// wputils.c + +// This module provides a high-level interface for decoding WavPack 4.0 audio +// streams and files. WavPack data is read with a stream reading callback. No +// direct seeking is provided for, but it is possible to start decoding +// anywhere in a WavPack stream. In this case, WavPack will be able to provide +// the sample-accurate position when it synchs with the data and begins +// decoding. + +#include "wavpack.h" + +#include <string.h> + +///////////////////////////// local table storage //////////////////////////// + +const uint32_t sample_rates [] = { 6000, 8000, 9600, 11025, 12000, 16000, 22050, + 24000, 32000, 44100, 48000, 64000, 88200, 96000, 192000 }; + +///////////////////////////// executable code //////////////////////////////// + +static uint32_t read_next_header (read_stream infile, WavpackHeader *wphdr); + +// This function reads data from the specified stream in search of a valid +// WavPack 4.0 audio block. If this fails in 1 megabyte (or an invalid or +// unsupported WavPack block is encountered) then an appropriate message is +// copied to "error" and NULL is returned, otherwise a pointer to a +// WavpackContext structure is returned (which is used to call all other +// functions in this module). This can be initiated at the beginning of a +// WavPack file, or anywhere inside a WavPack file. To determine the exact +// position within the file use WavpackGetSampleIndex(). For demonstration +// purposes this uses a single static copy of the WavpackContext structure, +// so obviously it cannot be used for more than one file at a time. Also, +// this function will not handle "correction" files, plays only the first +// two channels of multi-channel files, and is limited in resolution in some +// large integer or floating point files (but always provides at least 24 bits +// of resolution). + +static WavpackContext wpc; + +WavpackContext *WavpackOpenFileInput (read_stream infile, char *error) +{ + WavpackStream *wps = &wpc.stream; + uint32_t bcount; + + CLEAR (wpc); + wpc.infile = infile; + wpc.total_samples = (uint32_t) -1; + wpc.norm_offset = 0; + wpc.open_flags = 0; + + // open the source file for reading and store the size + + while (!wps->wphdr.block_samples) { + + bcount = read_next_header (wpc.infile, &wps->wphdr); + + if (bcount == (uint32_t) -1) { + strcpy (error, "not compatible with this version of WavPack file!"); + return NULL; + } + + if (wps->wphdr.block_samples && wps->wphdr.total_samples != (uint32_t) -1) + wpc.total_samples = wps->wphdr.total_samples; + + if (!unpack_init (&wpc)) { + strcpy (error, wpc.error_message [0] ? wpc.error_message : + "not compatible with this version of WavPack file!"); + + return NULL; + } + } + + wpc.config.flags &= ~0xff; + wpc.config.flags |= wps->wphdr.flags & 0xff; + wpc.config.bytes_per_sample = (wps->wphdr.flags & BYTES_STORED) + 1; + wpc.config.float_norm_exp = wps->float_norm_exp; + + wpc.config.bits_per_sample = (wpc.config.bytes_per_sample * 8) - + ((wps->wphdr.flags & SHIFT_MASK) >> SHIFT_LSB); + + if (wpc.config.flags & FLOAT_DATA) { + wpc.config.bytes_per_sample = 3; + wpc.config.bits_per_sample = 24; + } + + if (!wpc.config.sample_rate) { + if (!wps || !wps->wphdr.block_samples || (wps->wphdr.flags & SRATE_MASK) == SRATE_MASK) + wpc.config.sample_rate = 44100; + else + wpc.config.sample_rate = sample_rates [(wps->wphdr.flags & SRATE_MASK) >> SRATE_LSB]; + } + + if (!wpc.config.num_channels) { + wpc.config.num_channels = (wps->wphdr.flags & MONO_FLAG) ? 1 : 2; + wpc.config.channel_mask = 0x5 - wpc.config.num_channels; + } + + if (!(wps->wphdr.flags & FINAL_BLOCK)) + wpc.reduced_channels = (wps->wphdr.flags & MONO_FLAG) ? 1 : 2; + + return &wpc; +} + +// This function obtains general information about an open file and returns +// a mask with the following bit values: + +// MODE_LOSSLESS: file is lossless (pure lossless only) +// MODE_HYBRID: file is hybrid mode (lossy part only) +// MODE_FLOAT: audio data is 32-bit ieee floating point (but will provided +// in 24-bit integers for convenience) +// MODE_HIGH: file was created in "high" mode (information only) +// MODE_FAST: file was created in "fast" mode (information only) + +int WavpackGetMode (WavpackContext *wpc) +{ + int mode = 0; + + if (wpc) { + if (wpc->config.flags & CONFIG_HYBRID_FLAG) + mode |= MODE_HYBRID; + else if (!(wpc->config.flags & CONFIG_LOSSY_MODE)) + mode |= MODE_LOSSLESS; + + if (wpc->lossy_blocks) + mode &= ~MODE_LOSSLESS; + + if (wpc->config.flags & CONFIG_FLOAT_DATA) + mode |= MODE_FLOAT; + + if (wpc->config.flags & CONFIG_HIGH_FLAG) + mode |= MODE_HIGH; + + if (wpc->config.flags & CONFIG_FAST_FLAG) + mode |= MODE_FAST; + } + + return mode; +} + +// Unpack the specified number of samples from the current file position. +// Note that "samples" here refers to "complete" samples, which would be +// 2 longs for stereo files. The audio data is returned right-justified in +// 32-bit longs in the endian mode native to the executing processor. So, +// if the original data was 16-bit, then the values returned would be +// +/-32k. Floating point data will be returned as 24-bit integers (and may +// also be clipped). The actual number of samples unpacked is returned, +// which should be equal to the number requested unless the end of fle is +// encountered or an error occurs. + +uint32_t WavpackUnpackSamples (WavpackContext *wpc, int32_t *buffer, uint32_t samples) +{ + WavpackStream *wps = &wpc->stream; + uint32_t bcount, samples_unpacked = 0, samples_to_unpack; + int num_channels = wpc->config.num_channels; + + while (samples) { + if (!wps->wphdr.block_samples || !(wps->wphdr.flags & INITIAL_BLOCK) || + wps->sample_index >= wps->wphdr.block_index + wps->wphdr.block_samples) { + bcount = read_next_header (wpc->infile, &wps->wphdr); + + if (bcount == (uint32_t) -1) + break; + + if (!wps->wphdr.block_samples || wps->sample_index == wps->wphdr.block_index) + if (!unpack_init (wpc)) + break; + } + + if (!wps->wphdr.block_samples || !(wps->wphdr.flags & INITIAL_BLOCK) || + wps->sample_index >= wps->wphdr.block_index + wps->wphdr.block_samples) + continue; + + if (wps->sample_index < wps->wphdr.block_index) { + samples_to_unpack = wps->wphdr.block_index - wps->sample_index; + + if (samples_to_unpack > samples) + samples_to_unpack = samples; + + wps->sample_index += samples_to_unpack; + samples_unpacked += samples_to_unpack; + samples -= samples_to_unpack; + + if (wpc->reduced_channels) + samples_to_unpack *= wpc->reduced_channels; + else + samples_to_unpack *= num_channels; + + while (samples_to_unpack--) + *buffer++ = 0; + + continue; + } + + samples_to_unpack = wps->wphdr.block_index + wps->wphdr.block_samples - wps->sample_index; + + if (samples_to_unpack > samples) + samples_to_unpack = samples; + + unpack_samples (wpc, buffer, samples_to_unpack); + + if (wpc->reduced_channels) + buffer += samples_to_unpack * wpc->reduced_channels; + else + buffer += samples_to_unpack * num_channels; + + samples_unpacked += samples_to_unpack; + samples -= samples_to_unpack; + + if (wps->sample_index == wps->wphdr.block_index + wps->wphdr.block_samples) { + if (check_crc_error (wpc)) + wpc->crc_errors++; + } + + if (wps->sample_index == wpc->total_samples) + break; + } + + return samples_unpacked; +} + +// Get total number of samples contained in the WavPack file, or -1 if unknown + +uint32_t WavpackGetNumSamples (WavpackContext *wpc) +{ + return wpc ? wpc->total_samples : (uint32_t) -1; +} + +// Get the current sample index position, or -1 if unknown + +uint32_t WavpackGetSampleIndex (WavpackContext *wpc) +{ + if (wpc) + return wpc->stream.sample_index; + + return (uint32_t) -1; +} + +// Get the number of errors encountered so far + +int WavpackGetNumErrors (WavpackContext *wpc) +{ + return wpc ? wpc->crc_errors : 0; +} + +// return TRUE if any uncorrected lossy blocks were actually written or read + +int WavpackLossyBlocks (WavpackContext *wpc) +{ + return wpc ? wpc->lossy_blocks : 0; +} + +// Returns the sample rate of the specified WavPack file + +uint32_t WavpackGetSampleRate (WavpackContext *wpc) +{ + return wpc ? wpc->config.sample_rate : 44100; +} + +// Returns the number of channels of the specified WavPack file. Note that +// this is the actual number of channels contained in the file, but this +// version can only decode the first two. + +int WavpackGetNumChannels (WavpackContext *wpc) +{ + return wpc ? wpc->config.num_channels : 2; +} + +// Returns the actual number of valid bits per sample contained in the +// original file, which may or may not be a multiple of 8. Floating data +// always has 32 bits, integers may be from 1 to 32 bits each. When this +// value is not a multiple of 8, then the "extra" bits are located in the +// LSBs of the results. That is, values are right justified when unpacked +// into longs, but are left justified in the number of bytes used by the +// original data. + +int WavpackGetBitsPerSample (WavpackContext *wpc) +{ + return wpc ? wpc->config.bits_per_sample : 16; +} + +// Returns the number of bytes used for each sample (1 to 4) in the original +// file. This is required information for the user of this module because the +// audio data is returned in the LOWER bytes of the long buffer and must be +// left-shifted 8, 16, or 24 bits if normalized longs are required. + +int WavpackGetBytesPerSample (WavpackContext *wpc) +{ + return wpc ? wpc->config.bytes_per_sample : 2; +} + +// This function will return the actual number of channels decoded from the +// file (which may or may not be less than the actual number of channels, but +// will always be 1 or 2). Normally, this will be the front left and right +// channels of a multi-channel file. + +int WavpackGetReducedChannels (WavpackContext *wpc) +{ + if (wpc) + return wpc->reduced_channels ? wpc->reduced_channels : wpc->config.num_channels; + else + return 2; +} + +// Read from current file position until a valid 32-byte WavPack 4.0 header is +// found and read into the specified pointer. The number of bytes skipped is +// returned. If no WavPack header is found within 1 meg, then a -1 is returned +// to indicate the error. No additional bytes are read past the header and it +// is returned in the processor's native endian mode. Seeking is not required. + +static uint32_t read_next_header (read_stream infile, WavpackHeader *wphdr) +{ + char buffer [sizeof (*wphdr)], *sp = buffer + sizeof (*wphdr), *ep = sp; + uint32_t bytes_skipped = 0; + int bleft; + + while (1) { + if (sp < ep) { + bleft = ep - sp; + memcpy (buffer, sp, bleft); + } + else + bleft = 0; + + if (infile (buffer + bleft, sizeof (*wphdr) - bleft) != (int32_t) sizeof (*wphdr) - bleft) + return -1; + + sp = buffer; + + if (*sp++ == 'w' && *sp == 'v' && *++sp == 'p' && *++sp == 'k' && + !(*++sp & 1) && sp [2] < 16 && !sp [3] && sp [5] == 4 && + sp [4] >= (MIN_STREAM_VERS & 0xff) && sp [4] <= (MAX_STREAM_VERS & 0xff)) { + memcpy (wphdr, buffer, sizeof (*wphdr)); + little_endian_to_native (wphdr, WavpackHeaderFormat); + return bytes_skipped; + } + + while (sp < ep && *sp != 'w') + sp++; + + if ((bytes_skipped += sp - buffer) > 1048576L) + return -1; + } +} |