about summary refs log tree commit diff
path: root/src/engine/external
diff options
context:
space:
mode:
authorMagnus Auvinen <magnus.auvinen@gmail.com>2008-11-29 06:39:22 +0000
committerMagnus Auvinen <magnus.auvinen@gmail.com>2008-11-29 06:39:22 +0000
commit4004d13b48fd303c982dbcf7c432add64d128637 (patch)
treefb67a18724bb8e1435b0938abe56c24d68fa2dce /src/engine/external
parent61d18702f63cc4731c923e9ca8cd8d98cdaca4ad (diff)
downloadzcatch-4004d13b48fd303c982dbcf7c432add64d128637.tar.gz
zcatch-4004d13b48fd303c982dbcf7c432add64d128637.zip
applied patch from Richie Nyhus to convert CRLF to LF, as it should be
Diffstat (limited to 'src/engine/external')
-rw-r--r--src/engine/external/wavpack/arm.S922
-rw-r--r--src/engine/external/wavpack/arml.S982
-rw-r--r--src/engine/external/wavpack/bits.c280
-rw-r--r--src/engine/external/wavpack/coldfire.S1050
-rw-r--r--src/engine/external/wavpack/float.c100
-rw-r--r--src/engine/external/wavpack/license.txt50
-rw-r--r--src/engine/external/wavpack/metadata.c210
-rw-r--r--src/engine/external/wavpack/readme.txt136
-rw-r--r--src/engine/external/wavpack/unpack.c1570
-rw-r--r--src/engine/external/wavpack/wavpack.h576
-rw-r--r--src/engine/external/wavpack/words.c1120
-rw-r--r--src/engine/external/wavpack/wputils.c702
12 files changed, 3849 insertions, 3849 deletions
diff --git a/src/engine/external/wavpack/arm.S b/src/engine/external/wavpack/arm.S
index ab882181..4f299892 100644
--- a/src/engine/external/wavpack/arm.S
+++ b/src/engine/external/wavpack/arm.S
@@ -1,461 +1,461 @@
-////////////////////////////////////////////////////////////////////////////

-//                           **** WAVPACK ****                            //

-//                  Hybrid Lossless Wavefile Compressor                   //

-//              Copyright (c) 1998 - 2006 Conifer Software.               //

-//                          All Rights Reserved.                          //

-//      Distributed under the BSD Software License (see license.txt)      //

-////////////////////////////////////////////////////////////////////////////

-

-/* This is an assembly optimized version of the following WavPack function:

- *

- * void decorr_stereo_pass_cont (struct decorr_pass *dpp,

- *                               long *buffer, long sample_count);

- *

- * It performs a single pass of stereo decorrelation on the provided buffer.

- * Note that this version of the function requires that the 8 previous stereo

- * samples are visible and correct. In other words, it ignores the "samples_*"

- * fields in the decorr_pass structure and gets the history data directly

- * from the buffer. It does, however, return the appropriate history samples

- * to the decorr_pass structure before returning.

- *

- * This is written to work on a ARM7TDMI processor. This version only uses the

- * 32-bit multiply-accumulate instruction and so will overflow with 24-bit

- * WavPack files.

- */

-        .text

-        .align

-        .global         decorr_stereo_pass_cont_arm

-

-/*

- * on entry:

- *

- * r0 = struct decorr_pass *dpp

- * r1 = long *buffer

- * r2 = long sample_count

- */

-

-decorr_stereo_pass_cont_arm:

-

-        stmfd   sp!, {r4 - r8, r10, r11, lr}

-        mov     r5, r0                  @ r5 = dpp

-        mov     r11, #512               @ r11 = 512 for rounding

-        ldrsh   r6, [r0, #2]            @ r6 = dpp->delta

-        ldrsh   r4, [r0, #4]            @ r4 = dpp->weight_A

-        ldrsh   r0, [r0, #6]            @ r0 = dpp->weight_B

-        cmp     r2, #0                  @ exit if no samples to process

-        beq     common_exit

-

-        add     r7, r1, r2, asl #3      @ r7 = buffer ending position

-        ldrsh   r2, [r5, #0]            @ r2 = dpp->term

-        cmp     r2, #0

-        bmi     minus_term

-

-        ldr     lr, [r1, #-16]          @ load 2 sample history from buffer

-        ldr     r10, [r1, #-12]         @  for terms 2, 17, and 18

-        ldr     r8, [r1, #-8]

-        ldr     r3, [r1, #-4]

-        cmp     r2, #17

-        beq     term_17_loop

-        cmp     r2, #18

-        beq     term_18_loop

-        cmp     r2, #2

-        beq     term_2_loop

-        b       term_default_loop       @ else handle default (1-8, except 2)

-

-minus_term:

-        mov     r10, #1024              @ r10 = -1024 for weight clipping

-        rsb     r10, r10, #0            @  (only used for negative terms)

-        cmn     r2, #1

-        beq     term_minus_1

-        cmn     r2, #2

-        beq     term_minus_2

-        cmn     r2, #3

-        beq     term_minus_3

-        b       common_exit

-

-/*

- ******************************************************************************

- * Loop to handle term = 17 condition

- *

- * r0 = dpp->weight_B           r8 = previous left sample

- * r1 = bptr                    r9 = 

- * r2 = current sample          r10 = second previous left sample

- * r3 = previous right sample   r11 = 512 (for rounding)

- * r4 = dpp->weight_A           ip = current decorrelation value

- * r5 = dpp                     sp =

- * r6 = dpp->delta              lr = second previous right sample

- * r7 = eptr                    pc =

- *******************************************************************************

- */

-

-term_17_loop:

-        rsbs    ip, lr, r8, asl #1      @ decorr value = (2 * prev) - 2nd prev

-        mov     lr, r8                  @ previous becomes 2nd previous

-        ldr     r2, [r1], #4            @ get sample & update pointer

-        mla     r8, ip, r4, r11         @ mult decorr value by weight, round,

-        add     r8, r2, r8, asr #10     @  shift, and add to new sample

-        strne   r8, [r1, #-4]           @ if change possible, store sample back

-        cmpne   r2, #0

-        beq     .L325

-        teq     ip, r2                  @ update weight based on signs

-        submi   r4, r4, r6

-        addpl   r4, r4, r6

-

-.L325:  rsbs    ip, r10, r3, asl #1     @ do same thing for right channel

-        mov     r10, r3

-        ldr     r2, [r1], #4

-        mla     r3, ip, r0, r11

-        add     r3, r2, r3, asr #10

-        strne   r3, [r1, #-4]

-        cmpne   r2, #0

-        beq     .L329

-        teq     ip, r2

-        submi   r0, r0, r6

-        addpl   r0, r0, r6

-

-.L329:  cmp     r7, r1                  @ loop back if more samples to do

-        bhi     term_17_loop

-        b       store_1718              @ common exit for terms 17 & 18

-

-/*

- ******************************************************************************

- * Loop to handle term = 18 condition

- *

- * r0 = dpp->weight_B           r8 = previous left sample

- * r1 = bptr                    r9 = 

- * r2 = current sample          r10 = second previous left sample

- * r3 = previous right sample   r11 = 512 (for rounding)

- * r4 = dpp->weight_A           ip = decorrelation value

- * r5 = dpp                     sp =

- * r6 = dpp->delta              lr = second previous right sample

- * r7 = eptr                    pc =

- *******************************************************************************

- */

-

-term_18_loop:

-        sub     ip, r8, lr              @ decorr value =

-        mov     lr, r8                  @  ((3 * prev) - 2nd prev) >> 1

-        adds    ip, r8, ip, asr #1

-        ldr     r2, [r1], #4            @ get sample & update pointer

-        mla     r8, ip, r4, r11         @ mult decorr value by weight, round,

-        add     r8, r2, r8, asr #10     @  shift, and add to new sample

-        strne   r8, [r1, #-4]           @ if change possible, store sample back

-        cmpne   r2, #0

-        beq     .L337

-        teq     ip, r2                  @ update weight based on signs

-        submi   r4, r4, r6

-        addpl   r4, r4, r6

-

-.L337:  sub     ip, r3, r10             @ do same thing for right channel

-        mov     r10, r3

-        adds    ip, r3, ip, asr #1

-        ldr     r2, [r1], #4

-        mla     r3, ip, r0, r11

-        add     r3, r2, r3, asr #10

-        strne   r3, [r1, #-4]

-        cmpne   r2, #0

-        beq     .L341

-        teq     ip, r2

-        submi   r0, r0, r6

-        addpl   r0, r0, r6

-

-.L341:  cmp     r7, r1                  @ loop back if more samples to do

-        bhi     term_18_loop

-

-/* common exit for terms 17 & 18 */

-

-store_1718:

-        str     r3, [r5, #40]           @ store sample history into struct

-        str     r8, [r5, #8]

-        str     r10, [r5, #44]

-        str     lr, [r5, #12]

-        b       common_exit             @ and return

-

-/*

- ******************************************************************************

- * Loop to handle term = 2 condition

- * (note that this case can be handled by the default term handler (1-8), but

- * this special case is faster because it doesn't have to read memory twice)

- *

- * r0 = dpp->weight_B           r8 = previous left sample

- * r1 = bptr                    r9 = 

- * r2 = current sample          r10 = second previous left sample

- * r3 = previous right sample   r11 = 512 (for rounding)

- * r4 = dpp->weight_A           ip = decorrelation value

- * r5 = dpp                     sp =

- * r6 = dpp->delta              lr = second previous right sample

- * r7 = eptr                    pc =

- *******************************************************************************

- */

-

-term_2_loop:

-        movs    ip, lr                  @ get decorrelation value & test

-        mov     lr, r8                  @ previous becomes 2nd previous

-        ldr     r2, [r1], #4            @ get sample & update pointer

-        mla     r8, ip, r4, r11         @ mult decorr value by weight, round,

-        add     r8, r2, r8, asr #10     @  shift, and add to new sample

-        strne   r8, [r1, #-4]           @ if change possible, store sample back

-        cmpne   r2, #0

-        beq     .L225

-        teq     ip, r2                  @ update weight based on signs

-        submi   r4, r4, r6

-        addpl   r4, r4, r6

-

-.L225:  movs    ip, r10                 @ do same thing for right channel

-        mov     r10, r3

-        ldr     r2, [r1], #4

-        mla     r3, ip, r0, r11

-        add     r3, r2, r3, asr #10

-        strne   r3, [r1, #-4]

-        cmpne   r2, #0

-        beq     .L229

-        teq     ip, r2

-        submi   r0, r0, r6

-        addpl   r0, r0, r6

-

-.L229:  cmp     r7, r1                  @ loop back if more samples to do

-        bhi     term_2_loop

-        b       default_term_exit       @ this exit updates all dpp->samples

-

-/*

- ******************************************************************************

- * Loop to handle default term condition

- *

- * r0 = dpp->weight_B           r8 = result accumulator

- * r1 = bptr                    r9 = 

- * r2 = dpp->term               r10 =

- * r3 = decorrelation value     r11 = 512 (for rounding)

- * r4 = dpp->weight_A           ip = current sample

- * r5 = dpp                     sp =

- * r6 = dpp->delta              lr =

- * r7 = eptr                    pc =

- *******************************************************************************

- */

-

-term_default_loop:

-        ldr     ip, [r1]                @ get original sample

-        ldr     r3, [r1, -r2, asl #3]   @ get decorrelation value based on term

-        mla     r8, r3, r4, r11         @ mult decorr value by weight, round,

-        add     r8, ip, r8, asr #10     @  shift and add to new sample

-        str     r8, [r1], #4            @ store update sample

-        cmp     r3, #0

-        cmpne   ip, #0

-        beq     .L350

-        teq     ip, r3                  @ update weight based on signs

-        submi   r4, r4, r6

-        addpl   r4, r4, r6

-

-.L350:  ldr     ip, [r1]                @ do the same thing for right channel

-        ldr     r3, [r1, -r2, asl #3]

-        mla     r8, r3, r0, r11

-        add     r8, ip, r8, asr #10

-        str     r8, [r1], #4

-        cmp     r3, #0

-        cmpne   ip, #0

-        beq     .L354

-        teq     ip, r3

-        submi   r0, r0, r6

-        addpl   r0, r0, r6

-

-.L354:  cmp     r7, r1                  @ loop back if more samples to do

-        bhi     term_default_loop

-

-/*

- * This exit is used by terms 1-8 to store the previous 8 samples into the decorr

- * structure (even if they are not all used for the given term)

- */

-

-default_term_exit:

-        ldrsh   r3, [r5, #0]

-        sub     ip, r3, #1

-        mov     lr, #7

-

-.L358:  and     r3, ip, #7

-        add     r3, r5, r3, asl #2

-        ldr     r2, [r1, #-4]

-        str     r2, [r3, #40]

-        ldr     r2, [r1, #-8]!

-        str     r2, [r3, #8]

-        sub     ip, ip, #1

-        sub     lr, lr, #1

-        cmn     lr, #1

-        bne     .L358

-        b       common_exit

-

-/*

- ******************************************************************************

- * Loop to handle term = -1 condition

- *

- * r0 = dpp->weight_B           r8 =

- * r1 = bptr                    r9 = 

- * r2 = intermediate result     r10 = -1024 (for clipping)

- * r3 = previous right sample   r11 = 512 (for rounding)

- * r4 = dpp->weight_A           ip = current sample

- * r5 = dpp                     sp =

- * r6 = dpp->delta              lr = updated left sample

- * r7 = eptr                    pc =

- *******************************************************************************

- */

-

-term_minus_1:

-        ldr     r3, [r1, #-4]

-

-term_minus_1_loop:

-        ldr     ip, [r1]                @ for left channel the decorrelation value

-        mla     r2, r3, r4, r11         @  is the previous right sample (in r3)

-        add     lr, ip, r2, asr #10

-        str     lr, [r1], #8

-        cmp     r3, #0

-        cmpne   ip, #0

-        beq     .L361

-        teq     ip, r3                  @ update weight based on signs

-        submi   r4, r4, r6

-        addpl   r4, r4, r6

-        cmp     r4, #1024

-        movgt   r4, #1024

-        cmp     r4, r10

-        movlt   r4, r10

-

-.L361:  ldr     r2, [r1, #-4]           @ for right channel the decorrelation value

-        mla     r3, lr, r0, r11         @  is the just updated right sample (in lr)

-        add     r3, r2, r3, asr #10

-        str     r3, [r1, #-4]

-        cmp     lr, #0

-        cmpne   r2, #0

-        beq     .L369

-        teq     r2, lr

-        submi   r0, r0, r6

-        addpl   r0, r0, r6

-        cmp     r0, #1024               @ then clip weight to +/-1024

-        movgt   r0, #1024

-        cmp     r0, r10

-        movlt   r0, r10

-

-.L369:  cmp     r7, r1                  @ loop back if more samples to do

-        bhi     term_minus_1_loop

-

-        str     r3, [r5, #8]            @ else store right sample and exit

-        b       common_exit

-

-/*

- ******************************************************************************

- * Loop to handle term = -2 condition

- * (note that the channels are processed in the reverse order here)

- *

- * r0 = dpp->weight_B           r8 =

- * r1 = bptr                    r9 = 

- * r2 = intermediate result     r10 = -1024 (for clipping)

- * r3 = previous left sample    r11 = 512 (for rounding)

- * r4 = dpp->weight_A           ip = current sample

- * r5 = dpp                     sp =

- * r6 = dpp->delta              lr = updated right sample

- * r7 = eptr                    pc =

- *******************************************************************************

- */

-

-term_minus_2:

-        ldr     r3, [r1, #-8]

-

-term_minus_2_loop:

-        ldr     ip, [r1, #4]            @ for right channel the decorrelation value

-        mla     r2, r3, r0, r11         @  is the previous left sample (in r3)

-        add     lr, ip, r2, asr #10

-        str     lr, [r1, #4]

-        cmp     r3, #0

-        cmpne   ip, #0

-        beq     .L380

-        teq     ip, r3                  @ update weight based on signs

-        submi   r0, r0, r6

-        addpl   r0, r0, r6

-        cmp     r0, #1024               @ then clip weight to +/-1024

-        movgt   r0, #1024

-        cmp     r0, r10

-        movlt   r0, r10

-

-.L380:  ldr     r2, [r1, #0]            @ for left channel the decorrelation value

-        mla     r3, lr, r4, r11         @  is the just updated left sample (in lr)

-        add     r3, r2, r3, asr #10

-        str     r3, [r1], #8

-        cmp     lr, #0

-        cmpne   r2, #0

-        beq     .L388

-        teq     r2, lr

-        submi   r4, r4, r6

-        addpl   r4, r4, r6

-        cmp     r4, #1024

-        movgt   r4, #1024

-        cmp     r4, r10

-        movlt   r4, r10

-

-.L388:  cmp     r7, r1                  @ loop back if more samples to do

-        bhi     term_minus_2_loop

-

-        str     r3, [r5, #40]           @ else store left channel and exit

-        b       common_exit

-

-/*

- ******************************************************************************

- * Loop to handle term = -3 condition

- *

- * r0 = dpp->weight_B           r8 = previous left sample

- * r1 = bptr                    r9 = 

- * r2 = current left sample     r10 = -1024 (for clipping)

- * r3 = previous right sample   r11 = 512 (for rounding)

- * r4 = dpp->weight_A           ip = intermediate result

- * r5 = dpp                     sp =

- * r6 = dpp->delta              lr =

- * r7 = eptr                    pc =

- *******************************************************************************

- */

-

-term_minus_3:

-        ldr     r3, [r1, #-4]           @ load previous samples

-        ldr     r8, [r1, #-8]

-

-term_minus_3_loop:

-        ldr     ip, [r1]

-        mla     r2, r3, r4, r11

-        add     r2, ip, r2, asr #10

-        str     r2, [r1], #4

-        cmp     r3, #0

-        cmpne   ip, #0

-        beq     .L399

-        teq     ip, r3                  @ update weight based on signs

-        submi   r4, r4, r6

-        addpl   r4, r4, r6

-        cmp     r4, #1024               @ then clip weight to +/-1024

-        movgt   r4, #1024

-        cmp     r4, r10

-        movlt   r4, r10

-

-.L399:  movs    ip, r8                  @ ip = previous left we use now

-        mov     r8, r2                  @ r8 = current left we use next time

-        ldr     r2, [r1], #4

-        mla     r3, ip, r0, r11

-        add     r3, r2, r3, asr #10

-        strne   r3, [r1, #-4]

-        cmpne   r2, #0

-        beq     .L407

-        teq     ip, r2

-        submi   r0, r0, r6

-        addpl   r0, r0, r6

-        cmp     r0, #1024

-        movgt   r0, #1024

-        cmp     r0, r10

-        movlt   r0, r10

-

-.L407:  cmp     r7, r1                  @ loop back if more samples to do

-        bhi     term_minus_3_loop

-

-        str     r3, [r5, #8]            @ else store previous samples & exit

-        str     r8, [r5, #40]

-

-/*

- * Before finally exiting we must store weights back for next time

- */

-

-common_exit:

-        strh    r4, [r5, #4]

-        strh    r0, [r5, #6]

-        ldmfd   sp!, {r4 - r8, r10, r11, pc}

-

+////////////////////////////////////////////////////////////////////////////
+//                           **** WAVPACK ****                            //
+//                  Hybrid Lossless Wavefile Compressor                   //
+//              Copyright (c) 1998 - 2006 Conifer Software.               //
+//                          All Rights Reserved.                          //
+//      Distributed under the BSD Software License (see license.txt)      //
+////////////////////////////////////////////////////////////////////////////
+
+/* This is an assembly optimized version of the following WavPack function:
+ *
+ * void decorr_stereo_pass_cont (struct decorr_pass *dpp,
+ *                               long *buffer, long sample_count);
+ *
+ * It performs a single pass of stereo decorrelation on the provided buffer.
+ * Note that this version of the function requires that the 8 previous stereo
+ * samples are visible and correct. In other words, it ignores the "samples_*"
+ * fields in the decorr_pass structure and gets the history data directly
+ * from the buffer. It does, however, return the appropriate history samples
+ * to the decorr_pass structure before returning.
+ *
+ * This is written to work on a ARM7TDMI processor. This version only uses the
+ * 32-bit multiply-accumulate instruction and so will overflow with 24-bit
+ * WavPack files.
+ */
+        .text
+        .align
+        .global         decorr_stereo_pass_cont_arm
+
+/*
+ * on entry:
+ *
+ * r0 = struct decorr_pass *dpp
+ * r1 = long *buffer
+ * r2 = long sample_count
+ */
+
+decorr_stereo_pass_cont_arm:
+
+        stmfd   sp!, {r4 - r8, r10, r11, lr}
+        mov     r5, r0                  @ r5 = dpp
+        mov     r11, #512               @ r11 = 512 for rounding
+        ldrsh   r6, [r0, #2]            @ r6 = dpp->delta
+        ldrsh   r4, [r0, #4]            @ r4 = dpp->weight_A
+        ldrsh   r0, [r0, #6]            @ r0 = dpp->weight_B
+        cmp     r2, #0                  @ exit if no samples to process
+        beq     common_exit
+
+        add     r7, r1, r2, asl #3      @ r7 = buffer ending position
+        ldrsh   r2, [r5, #0]            @ r2 = dpp->term
+        cmp     r2, #0
+        bmi     minus_term
+
+        ldr     lr, [r1, #-16]          @ load 2 sample history from buffer
+        ldr     r10, [r1, #-12]         @  for terms 2, 17, and 18
+        ldr     r8, [r1, #-8]
+        ldr     r3, [r1, #-4]
+        cmp     r2, #17
+        beq     term_17_loop
+        cmp     r2, #18
+        beq     term_18_loop
+        cmp     r2, #2
+        beq     term_2_loop
+        b       term_default_loop       @ else handle default (1-8, except 2)
+
+minus_term:
+        mov     r10, #1024              @ r10 = -1024 for weight clipping
+        rsb     r10, r10, #0            @  (only used for negative terms)
+        cmn     r2, #1
+        beq     term_minus_1
+        cmn     r2, #2
+        beq     term_minus_2
+        cmn     r2, #3
+        beq     term_minus_3
+        b       common_exit
+
+/*
+ ******************************************************************************
+ * Loop to handle term = 17 condition
+ *
+ * r0 = dpp->weight_B           r8 = previous left sample
+ * r1 = bptr                    r9 = 
+ * r2 = current sample          r10 = second previous left sample
+ * r3 = previous right sample   r11 = 512 (for rounding)
+ * r4 = dpp->weight_A           ip = current decorrelation value
+ * r5 = dpp                     sp =
+ * r6 = dpp->delta              lr = second previous right sample
+ * r7 = eptr                    pc =
+ *******************************************************************************
+ */
+
+term_17_loop:
+        rsbs    ip, lr, r8, asl #1      @ decorr value = (2 * prev) - 2nd prev
+        mov     lr, r8                  @ previous becomes 2nd previous
+        ldr     r2, [r1], #4            @ get sample & update pointer
+        mla     r8, ip, r4, r11         @ mult decorr value by weight, round,
+        add     r8, r2, r8, asr #10     @  shift, and add to new sample
+        strne   r8, [r1, #-4]           @ if change possible, store sample back
+        cmpne   r2, #0
+        beq     .L325
+        teq     ip, r2                  @ update weight based on signs
+        submi   r4, r4, r6
+        addpl   r4, r4, r6
+
+.L325:  rsbs    ip, r10, r3, asl #1     @ do same thing for right channel
+        mov     r10, r3
+        ldr     r2, [r1], #4
+        mla     r3, ip, r0, r11
+        add     r3, r2, r3, asr #10
+        strne   r3, [r1, #-4]
+        cmpne   r2, #0
+        beq     .L329
+        teq     ip, r2
+        submi   r0, r0, r6
+        addpl   r0, r0, r6
+
+.L329:  cmp     r7, r1                  @ loop back if more samples to do
+        bhi     term_17_loop
+        b       store_1718              @ common exit for terms 17 & 18
+
+/*
+ ******************************************************************************
+ * Loop to handle term = 18 condition
+ *
+ * r0 = dpp->weight_B           r8 = previous left sample
+ * r1 = bptr                    r9 = 
+ * r2 = current sample          r10 = second previous left sample
+ * r3 = previous right sample   r11 = 512 (for rounding)
+ * r4 = dpp->weight_A           ip = decorrelation value
+ * r5 = dpp                     sp =
+ * r6 = dpp->delta              lr = second previous right sample
+ * r7 = eptr                    pc =
+ *******************************************************************************
+ */
+
+term_18_loop:
+        sub     ip, r8, lr              @ decorr value =
+        mov     lr, r8                  @  ((3 * prev) - 2nd prev) >> 1
+        adds    ip, r8, ip, asr #1
+        ldr     r2, [r1], #4            @ get sample & update pointer
+        mla     r8, ip, r4, r11         @ mult decorr value by weight, round,
+        add     r8, r2, r8, asr #10     @  shift, and add to new sample
+        strne   r8, [r1, #-4]           @ if change possible, store sample back
+        cmpne   r2, #0
+        beq     .L337
+        teq     ip, r2                  @ update weight based on signs
+        submi   r4, r4, r6
+        addpl   r4, r4, r6
+
+.L337:  sub     ip, r3, r10             @ do same thing for right channel
+        mov     r10, r3
+        adds    ip, r3, ip, asr #1
+        ldr     r2, [r1], #4
+        mla     r3, ip, r0, r11
+        add     r3, r2, r3, asr #10
+        strne   r3, [r1, #-4]
+        cmpne   r2, #0
+        beq     .L341
+        teq     ip, r2
+        submi   r0, r0, r6
+        addpl   r0, r0, r6
+
+.L341:  cmp     r7, r1                  @ loop back if more samples to do
+        bhi     term_18_loop
+
+/* common exit for terms 17 & 18 */
+
+store_1718:
+        str     r3, [r5, #40]           @ store sample history into struct
+        str     r8, [r5, #8]
+        str     r10, [r5, #44]
+        str     lr, [r5, #12]
+        b       common_exit             @ and return
+
+/*
+ ******************************************************************************
+ * Loop to handle term = 2 condition
+ * (note that this case can be handled by the default term handler (1-8), but
+ * this special case is faster because it doesn't have to read memory twice)
+ *
+ * r0 = dpp->weight_B           r8 = previous left sample
+ * r1 = bptr                    r9 = 
+ * r2 = current sample          r10 = second previous left sample
+ * r3 = previous right sample   r11 = 512 (for rounding)
+ * r4 = dpp->weight_A           ip = decorrelation value
+ * r5 = dpp                     sp =
+ * r6 = dpp->delta              lr = second previous right sample
+ * r7 = eptr                    pc =
+ *******************************************************************************
+ */
+
+term_2_loop:
+        movs    ip, lr                  @ get decorrelation value & test
+        mov     lr, r8                  @ previous becomes 2nd previous
+        ldr     r2, [r1], #4            @ get sample & update pointer
+        mla     r8, ip, r4, r11         @ mult decorr value by weight, round,
+        add     r8, r2, r8, asr #10     @  shift, and add to new sample
+        strne   r8, [r1, #-4]           @ if change possible, store sample back
+        cmpne   r2, #0
+        beq     .L225
+        teq     ip, r2                  @ update weight based on signs
+        submi   r4, r4, r6
+        addpl   r4, r4, r6
+
+.L225:  movs    ip, r10                 @ do same thing for right channel
+        mov     r10, r3
+        ldr     r2, [r1], #4
+        mla     r3, ip, r0, r11
+        add     r3, r2, r3, asr #10
+        strne   r3, [r1, #-4]
+        cmpne   r2, #0
+        beq     .L229
+        teq     ip, r2
+        submi   r0, r0, r6
+        addpl   r0, r0, r6
+
+.L229:  cmp     r7, r1                  @ loop back if more samples to do
+        bhi     term_2_loop
+        b       default_term_exit       @ this exit updates all dpp->samples
+
+/*
+ ******************************************************************************
+ * Loop to handle default term condition
+ *
+ * r0 = dpp->weight_B           r8 = result accumulator
+ * r1 = bptr                    r9 = 
+ * r2 = dpp->term               r10 =
+ * r3 = decorrelation value     r11 = 512 (for rounding)
+ * r4 = dpp->weight_A           ip = current sample
+ * r5 = dpp                     sp =
+ * r6 = dpp->delta              lr =
+ * r7 = eptr                    pc =
+ *******************************************************************************
+ */
+
+term_default_loop:
+        ldr     ip, [r1]                @ get original sample
+        ldr     r3, [r1, -r2, asl #3]   @ get decorrelation value based on term
+        mla     r8, r3, r4, r11         @ mult decorr value by weight, round,
+        add     r8, ip, r8, asr #10     @  shift and add to new sample
+        str     r8, [r1], #4            @ store update sample
+        cmp     r3, #0
+        cmpne   ip, #0
+        beq     .L350
+        teq     ip, r3                  @ update weight based on signs
+        submi   r4, r4, r6
+        addpl   r4, r4, r6
+
+.L350:  ldr     ip, [r1]                @ do the same thing for right channel
+        ldr     r3, [r1, -r2, asl #3]
+        mla     r8, r3, r0, r11
+        add     r8, ip, r8, asr #10
+        str     r8, [r1], #4
+        cmp     r3, #0
+        cmpne   ip, #0
+        beq     .L354
+        teq     ip, r3
+        submi   r0, r0, r6
+        addpl   r0, r0, r6
+
+.L354:  cmp     r7, r1                  @ loop back if more samples to do
+        bhi     term_default_loop
+
+/*
+ * This exit is used by terms 1-8 to store the previous 8 samples into the decorr
+ * structure (even if they are not all used for the given term)
+ */
+
+default_term_exit:
+        ldrsh   r3, [r5, #0]
+        sub     ip, r3, #1
+        mov     lr, #7
+
+.L358:  and     r3, ip, #7
+        add     r3, r5, r3, asl #2
+        ldr     r2, [r1, #-4]
+        str     r2, [r3, #40]
+        ldr     r2, [r1, #-8]!
+        str     r2, [r3, #8]
+        sub     ip, ip, #1
+        sub     lr, lr, #1
+        cmn     lr, #1
+        bne     .L358
+        b       common_exit
+
+/*
+ ******************************************************************************
+ * Loop to handle term = -1 condition
+ *
+ * r0 = dpp->weight_B           r8 =
+ * r1 = bptr                    r9 = 
+ * r2 = intermediate result     r10 = -1024 (for clipping)
+ * r3 = previous right sample   r11 = 512 (for rounding)
+ * r4 = dpp->weight_A           ip = current sample
+ * r5 = dpp                     sp =
+ * r6 = dpp->delta              lr = updated left sample
+ * r7 = eptr                    pc =
+ *******************************************************************************
+ */
+
+term_minus_1:
+        ldr     r3, [r1, #-4]
+
+term_minus_1_loop:
+        ldr     ip, [r1]                @ for left channel the decorrelation value
+        mla     r2, r3, r4, r11         @  is the previous right sample (in r3)
+        add     lr, ip, r2, asr #10
+        str     lr, [r1], #8
+        cmp     r3, #0
+        cmpne   ip, #0
+        beq     .L361
+        teq     ip, r3                  @ update weight based on signs
+        submi   r4, r4, r6
+        addpl   r4, r4, r6
+        cmp     r4, #1024
+        movgt   r4, #1024
+        cmp     r4, r10
+        movlt   r4, r10
+
+.L361:  ldr     r2, [r1, #-4]           @ for right channel the decorrelation value
+        mla     r3, lr, r0, r11         @  is the just updated right sample (in lr)
+        add     r3, r2, r3, asr #10
+        str     r3, [r1, #-4]
+        cmp     lr, #0
+        cmpne   r2, #0
+        beq     .L369
+        teq     r2, lr
+        submi   r0, r0, r6
+        addpl   r0, r0, r6
+        cmp     r0, #1024               @ then clip weight to +/-1024
+        movgt   r0, #1024
+        cmp     r0, r10
+        movlt   r0, r10
+
+.L369:  cmp     r7, r1                  @ loop back if more samples to do
+        bhi     term_minus_1_loop
+
+        str     r3, [r5, #8]            @ else store right sample and exit
+        b       common_exit
+
+/*
+ ******************************************************************************
+ * Loop to handle term = -2 condition
+ * (note that the channels are processed in the reverse order here)
+ *
+ * r0 = dpp->weight_B           r8 =
+ * r1 = bptr                    r9 = 
+ * r2 = intermediate result     r10 = -1024 (for clipping)
+ * r3 = previous left sample    r11 = 512 (for rounding)
+ * r4 = dpp->weight_A           ip = current sample
+ * r5 = dpp                     sp =
+ * r6 = dpp->delta              lr = updated right sample
+ * r7 = eptr                    pc =
+ *******************************************************************************
+ */
+
+term_minus_2:
+        ldr     r3, [r1, #-8]
+
+term_minus_2_loop:
+        ldr     ip, [r1, #4]            @ for right channel the decorrelation value
+        mla     r2, r3, r0, r11         @  is the previous left sample (in r3)
+        add     lr, ip, r2, asr #10
+        str     lr, [r1, #4]
+        cmp     r3, #0
+        cmpne   ip, #0
+        beq     .L380
+        teq     ip, r3                  @ update weight based on signs
+        submi   r0, r0, r6
+        addpl   r0, r0, r6
+        cmp     r0, #1024               @ then clip weight to +/-1024
+        movgt   r0, #1024
+        cmp     r0, r10
+        movlt   r0, r10
+
+.L380:  ldr     r2, [r1, #0]            @ for left channel the decorrelation value
+        mla     r3, lr, r4, r11         @  is the just updated left sample (in lr)
+        add     r3, r2, r3, asr #10
+        str     r3, [r1], #8
+        cmp     lr, #0
+        cmpne   r2, #0
+        beq     .L388
+        teq     r2, lr
+        submi   r4, r4, r6
+        addpl   r4, r4, r6
+        cmp     r4, #1024
+        movgt   r4, #1024
+        cmp     r4, r10
+        movlt   r4, r10
+
+.L388:  cmp     r7, r1                  @ loop back if more samples to do
+        bhi     term_minus_2_loop
+
+        str     r3, [r5, #40]           @ else store left channel and exit
+        b       common_exit
+
+/*
+ ******************************************************************************
+ * Loop to handle term = -3 condition
+ *
+ * r0 = dpp->weight_B           r8 = previous left sample
+ * r1 = bptr                    r9 = 
+ * r2 = current left sample     r10 = -1024 (for clipping)
+ * r3 = previous right sample   r11 = 512 (for rounding)
+ * r4 = dpp->weight_A           ip = intermediate result
+ * r5 = dpp                     sp =
+ * r6 = dpp->delta              lr =
+ * r7 = eptr                    pc =
+ *******************************************************************************
+ */
+
+term_minus_3:
+        ldr     r3, [r1, #-4]           @ load previous samples
+        ldr     r8, [r1, #-8]
+
+term_minus_3_loop:
+        ldr     ip, [r1]
+        mla     r2, r3, r4, r11
+        add     r2, ip, r2, asr #10
+        str     r2, [r1], #4
+        cmp     r3, #0
+        cmpne   ip, #0
+        beq     .L399
+        teq     ip, r3                  @ update weight based on signs
+        submi   r4, r4, r6
+        addpl   r4, r4, r6
+        cmp     r4, #1024               @ then clip weight to +/-1024
+        movgt   r4, #1024
+        cmp     r4, r10
+        movlt   r4, r10
+
+.L399:  movs    ip, r8                  @ ip = previous left we use now
+        mov     r8, r2                  @ r8 = current left we use next time
+        ldr     r2, [r1], #4
+        mla     r3, ip, r0, r11
+        add     r3, r2, r3, asr #10
+        strne   r3, [r1, #-4]
+        cmpne   r2, #0
+        beq     .L407
+        teq     ip, r2
+        submi   r0, r0, r6
+        addpl   r0, r0, r6
+        cmp     r0, #1024
+        movgt   r0, #1024
+        cmp     r0, r10
+        movlt   r0, r10
+
+.L407:  cmp     r7, r1                  @ loop back if more samples to do
+        bhi     term_minus_3_loop
+
+        str     r3, [r5, #8]            @ else store previous samples & exit
+        str     r8, [r5, #40]
+
+/*
+ * Before finally exiting we must store weights back for next time
+ */
+
+common_exit:
+        strh    r4, [r5, #4]
+        strh    r0, [r5, #6]
+        ldmfd   sp!, {r4 - r8, r10, r11, pc}
+
diff --git a/src/engine/external/wavpack/arml.S b/src/engine/external/wavpack/arml.S
index 39de5383..7da00513 100644
--- a/src/engine/external/wavpack/arml.S
+++ b/src/engine/external/wavpack/arml.S
@@ -1,491 +1,491 @@
-////////////////////////////////////////////////////////////////////////////

-//                           **** WAVPACK ****                            //

-//                  Hybrid Lossless Wavefile Compressor                   //

-//              Copyright (c) 1998 - 2006 Conifer Software.               //

-//                          All Rights Reserved.                          //

-//      Distributed under the BSD Software License (see license.txt)      //

-////////////////////////////////////////////////////////////////////////////

-

-/* This is an assembly optimized version of the following WavPack function:

- *

- * void decorr_stereo_pass_cont (struct decorr_pass *dpp,

- *                               long *buffer, long sample_count);

- *

- * It performs a single pass of stereo decorrelation on the provided buffer.

- * Note that this version of the function requires that the 8 previous stereo

- * samples are visible and correct. In other words, it ignores the "samples_*"

- * fields in the decorr_pass structure and gets the history data directly

- * from the buffer. It does, however, return the appropriate history samples

- * to the decorr_pass structure before returning.

- *

- * This is written to work on a ARM7TDMI processor. This version uses the

- * 64-bit multiply-accumulate instruction and so can be used with all

- * WavPack files. However, for optimum performance with 16-bit WavPack

- * files, there is a faster version that only uses the 32-bit MLA

- * instruction.

- */

-

-        .text

-        .align

-        .global         decorr_stereo_pass_cont_arml

-

-/*

- * on entry:

- *

- * r0 = struct decorr_pass *dpp

- * r1 = long *buffer

- * r2 = long sample_count

- */

-

-decorr_stereo_pass_cont_arml:

-

-        stmfd   sp!, {r4 - r8, r10, r11, lr}

-        mov     r5, r0                  @ r5 = dpp

-        mov     r11, #512               @ r11 = 512 for rounding

-        ldrsh   r6, [r0, #2]            @ r6 = dpp->delta

-        ldrsh   r4, [r0, #4]            @ r4 = dpp->weight_A

-        ldrsh   r0, [r0, #6]            @ r0 = dpp->weight_B

-        cmp     r2, #0                  @ exit if no samples to process

-        beq     common_exit

-

-        mov     r0, r0, asl #18         @ for 64-bit math we use weights << 18

-        mov     r4, r4, asl #18

-        mov     r6, r6, asl #18

-        add     r7, r1, r2, asl #3      @ r7 = buffer ending position

-        ldrsh   r2, [r5, #0]            @ r2 = dpp->term

-        cmp     r2, #0

-        blt     minus_term

-

-        ldr     lr, [r1, #-16]          @ load 2 sample history from buffer

-        ldr     r10, [r1, #-12]         @  for terms 2, 17, and 18

-        ldr     r8, [r1, #-8]

-        ldr     r3, [r1, #-4]

-

-        cmp     r2, #18

-        beq     term_18_loop

-        mov     lr, lr, asl #4

-        mov     r10, r10, asl #4

-        cmp     r2, #2

-        beq     term_2_loop

-        cmp     r2, #17

-        beq     term_17_loop

-        b       term_default_loop

-

-minus_term:

-        mov     r10, #(1024 << 18)      @ r10 = -1024 << 18 for weight clipping

-        rsb     r10, r10, #0            @  (only used for negative terms)

-        cmn     r2, #1

-        beq     term_minus_1

-        cmn     r2, #2

-        beq     term_minus_2

-        cmn     r2, #3

-        beq     term_minus_3

-        b       common_exit

-

-/*

- ******************************************************************************

- * Loop to handle term = 17 condition

- *

- * r0 = dpp->weight_B           r8 = previous left sample

- * r1 = bptr                    r9 = 

- * r2 = current sample          r10 = second previous left sample << 4

- * r3 = previous right sample   r11 = lo accumulator (for rounding)

- * r4 = dpp->weight_A           ip = current decorrelation value

- * r5 = dpp                     sp =

- * r6 = dpp->delta              lr = second previous right sample << 4

- * r7 = eptr                    pc =

- *******************************************************************************

- */

-

-term_17_loop:

-        rsbs    ip, lr, r8, asl #5      @ decorr value = (2 * prev) - 2nd prev

-        mov     lr, r8, asl #4          @ previous becomes 2nd previous

-        ldr     r2, [r1], #4            @ get sample & update pointer

-        mov     r11, #0x80000000

-        mov     r8, r2

-        smlalne r11, r8, r4, ip

-        strne   r8, [r1, #-4]           @ if change possible, store sample back

-        cmpne   r2, #0

-        beq     .L325

-        teq     ip, r2                  @ update weight based on signs

-        submi   r4, r4, r6

-        addpl   r4, r4, r6

-

-.L325:  rsbs    ip, r10, r3, asl #5     @ do same thing for right channel

-        mov     r10, r3, asl #4

-        ldr     r2, [r1], #4

-        mov     r11, #0x80000000

-        mov     r3, r2

-        smlalne r11, r3, r0, ip

-        strne   r3, [r1, #-4]

-        cmpne   r2, #0

-        beq     .L329

-        teq     ip, r2

-        submi   r0, r0, r6

-        addpl   r0, r0, r6

-

-.L329:  cmp     r7, r1                  @ loop back if more samples to do

-        bhi     term_17_loop

-        mov     lr, lr, asr #4

-        mov     r10, r10, asr #4

-        b       store_1718              @ common exit for terms 17 & 18

-

-/*

- ******************************************************************************

- * Loop to handle term = 18 condition

- *

- * r0 = dpp->weight_B           r8 = previous left sample

- * r1 = bptr                    r9 = 

- * r2 = current sample          r10 = second previous left sample

- * r3 = previous right sample   r11 = lo accumulator (for rounding)

- * r4 = dpp->weight_A           ip = decorrelation value

- * r5 = dpp                     sp =

- * r6 = dpp->delta              lr = second previous right sample

- * r7 = eptr                    pc =

- *******************************************************************************

- */

-

-term_18_loop:

-        rsb     ip, lr, r8              @ decorr value =

-        mov     lr, r8                  @  ((3 * prev) - 2nd prev) >> 1

-        add     ip, lr, ip, asr #1

-        movs    ip, ip, asl #4

-        ldr     r2, [r1], #4            @ get sample & update pointer

-        mov     r11, #0x80000000

-        mov     r8, r2

-        smlalne r11, r8, r4, ip

-        strne   r8, [r1, #-4]           @ if change possible, store sample back

-        cmpne   r2, #0

-        beq     .L337

-        teq     ip, r2                  @ update weight based on signs

-        submi   r4, r4, r6

-        addpl   r4, r4, r6

-

-.L337:  rsb     ip, r10, r3             @ do same thing for right channel

-        mov     r10, r3

-        add     ip, r10, ip, asr #1

-        movs    ip, ip, asl #4

-        ldr     r2, [r1], #4

-        mov     r11, #0x80000000

-        mov     r3, r2

-        smlalne r11, r3, r0, ip

-        strne   r3, [r1, #-4]

-        cmpne   r2, #0

-        beq     .L341

-        teq     ip, r2

-        submi   r0, r0, r6

-        addpl   r0, r0, r6

-

-.L341:  cmp     r7, r1                  @ loop back if more samples to do

-        bhi     term_18_loop

-

-/* common exit for terms 17 & 18 */

-

-store_1718:

-        str     r3, [r5, #40]           @ store sample history into struct

-        str     r8, [r5, #8]

-        str     r10, [r5, #44]

-        str     lr, [r5, #12]

-        b       common_exit             @ and return

-

-/*

- ******************************************************************************

- * Loop to handle term = 2 condition

- * (note that this case can be handled by the default term handler (1-8), but

- * this special case is faster because it doesn't have to read memory twice)

- *

- * r0 = dpp->weight_B           r8 = previous left sample

- * r1 = bptr                    r9 = 

- * r2 = current sample          r10 = second previous left sample << 4

- * r3 = previous right sample   r11 = lo accumulator (for rounding)

- * r4 = dpp->weight_A           ip = decorrelation value

- * r5 = dpp                     sp =

- * r6 = dpp->delta              lr = second previous right sample << 4

- * r7 = eptr                    pc =

- *******************************************************************************

- */

-

-term_2_loop:

-        movs    ip, lr                  @ get decorrelation value & test

-        ldr     r2, [r1], #4            @ get sample & update pointer

-        mov     lr, r8, asl #4          @ previous becomes 2nd previous

-        mov     r11, #0x80000000

-        mov     r8, r2

-        smlalne r11, r8, r4, ip

-        strne   r8, [r1, #-4]           @ if change possible, store sample back

-        cmpne   r2, #0

-        beq     .L225

-        teq     ip, r2                  @ update weight based on signs

-        submi   r4, r4, r6

-        addpl   r4, r4, r6

-

-.L225:  movs    ip, r10                 @ do same thing for right channel

-        ldr     r2, [r1], #4

-        mov     r10, r3, asl #4

-        mov     r11, #0x80000000

-        mov     r3, r2

-        smlalne r11, r3, r0, ip

-        strne   r3, [r1, #-4]

-        cmpne   r2, #0

-        beq     .L229

-        teq     ip, r2

-        submi   r0, r0, r6

-        addpl   r0, r0, r6

-

-.L229:  cmp     r7, r1                  @ loop back if more samples to do

-        bhi     term_2_loop

-

-        b       default_term_exit       @ this exit updates all dpp->samples

-

-/*

- ******************************************************************************

- * Loop to handle default term condition

- *

- * r0 = dpp->weight_B           r8 = result accumulator

- * r1 = bptr                    r9 = 

- * r2 = dpp->term               r10 =

- * r3 = decorrelation value     r11 = lo accumulator (for rounding)

- * r4 = dpp->weight_A           ip = current sample

- * r5 = dpp                     sp =

- * r6 = dpp->delta              lr =

- * r7 = eptr                    pc =

- *******************************************************************************

- */

-

-term_default_loop:

-        ldr     r3, [r1, -r2, asl #3]   @ get decorrelation value based on term

-        ldr     ip, [r1], #4            @ get original sample and bump ptr

-        movs    r3, r3, asl #4

-        mov     r11, #0x80000000

-        mov     r8, ip

-        smlalne r11, r8, r4, r3

-        strne   r8, [r1, #-4]           @ if possibly changed, store updated sample

-        cmpne   ip, #0

-        beq     .L350

-        teq     ip, r3                  @ update weight based on signs

-        submi   r4, r4, r6

-        addpl   r4, r4, r6

-

-.L350:  ldr     r3, [r1, -r2, asl #3]   @ do the same thing for right channel

-        ldr     ip, [r1], #4

-        movs    r3, r3, asl #4

-        mov     r11, #0x80000000

-        mov     r8, ip

-        smlalne r11, r8, r0, r3

-        strne   r8, [r1, #-4]

-        cmpne   ip, #0

-        beq     .L354

-        teq     ip, r3

-        submi   r0, r0, r6

-        addpl   r0, r0, r6

-

-.L354:  cmp     r7, r1                  @ loop back if more samples to do

-        bhi     term_default_loop

-

-/*

- * This exit is used by terms 1-8 to store the previous 8 samples into the decorr

- * structure (even if they are not all used for the given term)

- */

-

-default_term_exit:

-        ldrsh   r3, [r5, #0]

-        sub     ip, r3, #1

-        mov     lr, #7

-

-.L358:  and     r3, ip, #7

-        add     r3, r5, r3, asl #2

-        ldr     r2, [r1, #-4]

-        str     r2, [r3, #40]

-        ldr     r2, [r1, #-8]!

-        str     r2, [r3, #8]

-        sub     ip, ip, #1

-        sub     lr, lr, #1

-        cmn     lr, #1

-        bne     .L358

-        b       common_exit

-

-/*

- ******************************************************************************

- * Loop to handle term = -1 condition

- *

- * r0 = dpp->weight_B           r8 =

- * r1 = bptr                    r9 = 

- * r2 = intermediate result     r10 = -1024 (for clipping)

- * r3 = previous right sample   r11 = lo accumulator (for rounding)

- * r4 = dpp->weight_A           ip = current sample

- * r5 = dpp                     sp =

- * r6 = dpp->delta              lr = updated left sample

- * r7 = eptr                    pc =

- *******************************************************************************

- */

-

-term_minus_1:

-        ldr     r3, [r1, #-4]

-

-term_minus_1_loop:

-        ldr     ip, [r1], #8            @ for left channel the decorrelation value

-        movs    r3, r3, asl #4          @  is the previous right sample (in r3)

-        mov     r11, #0x80000000

-        mov     lr, ip

-        smlalne r11, lr, r4, r3

-        strne   lr, [r1, #-8]

-        cmpne   ip, #0

-        beq     .L361

-        teq     ip, r3                  @ update weight based on signs

-        submi   r4, r4, r6

-        addpl   r4, r4, r6

-        cmp     r4, #(1024 << 18)

-        movgt   r4, #(1024 << 18)

-        cmp     r4, r10

-        movlt   r4, r10

-

-.L361:  ldr     r2, [r1, #-4]           @ for right channel the decorrelation value

-        movs    lr, lr, asl #4

-        mov     r11, #0x80000000

-        mov     r3, r2

-        smlalne r11, r3, r0, lr

-        strne   r3, [r1, #-4]

-        cmpne   r2, #0

-        beq     .L369

-        teq     r2, lr

-        submi   r0, r0, r6

-        addpl   r0, r0, r6

-        cmp     r0, #(1024 << 18)               @ then clip weight to +/-1024

-        movgt   r0, #(1024 << 18)

-        cmp     r0, r10

-        movlt   r0, r10

-

-.L369:  cmp     r7, r1                  @ loop back if more samples to do

-        bhi     term_minus_1_loop

-

-        str     r3, [r5, #8]            @ else store right sample and exit

-        b       common_exit

-

-/*

- ******************************************************************************

- * Loop to handle term = -2 condition

- * (note that the channels are processed in the reverse order here)

- *

- * r0 = dpp->weight_B           r8 =

- * r1 = bptr                    r9 = 

- * r2 = intermediate result     r10 = -1024 (for clipping)

- * r3 = previous left sample    r11 = lo accumulator (for rounding)

- * r4 = dpp->weight_A           ip = current sample

- * r5 = dpp                     sp =

- * r6 = dpp->delta              lr = updated right sample

- * r7 = eptr                    pc =

- *******************************************************************************

- */

-

-term_minus_2:

-        ldr     r3, [r1, #-8]

-

-term_minus_2_loop:

-        ldr     ip, [r1, #4]            @ for right channel the decorrelation value

-        movs    r3, r3, asl #4          @  is the previous left sample (in r3)

-        mov     r11, #0x80000000

-        mov     lr, ip

-        smlalne r11, lr, r0, r3

-        strne   lr, [r1, #4]

-        cmpne   ip, #0

-        beq     .L380

-        teq     ip, r3                  @ update weight based on signs

-        submi   r0, r0, r6

-        addpl   r0, r0, r6

-        cmp     r0, #(1024 << 18)               @ then clip weight to +/-1024

-        movgt   r0, #(1024 << 18)

-        cmp     r0, r10

-        movlt   r0, r10

-

-.L380:  ldr     r2, [r1], #8            @ for left channel the decorrelation value

-        movs    lr, lr, asl #4

-        mov     r11, #0x80000000

-        mov     r3, r2

-        smlalne r11, r3, r4, lr

-        strne   r3, [r1, #-8]

-        cmpne   r2, #0

-        beq     .L388

-        teq     r2, lr

-        submi   r4, r4, r6

-        addpl   r4, r4, r6

-        cmp     r4, #(1024 << 18)

-        movgt   r4, #(1024 << 18)

-        cmp     r4, r10

-        movlt   r4, r10

-

-.L388:  cmp     r7, r1                  @ loop back if more samples to do

-        bhi     term_minus_2_loop

-

-        str     r3, [r5, #40]           @ else store left channel and exit

-        b       common_exit

-

-/*

- ******************************************************************************

- * Loop to handle term = -3 condition

- *

- * r0 = dpp->weight_B           r8 = previous left sample

- * r1 = bptr                    r9 = 

- * r2 = current left sample     r10 = -1024 (for clipping)

- * r3 = previous right sample   r11 = lo accumulator (for rounding)

- * r4 = dpp->weight_A           ip = intermediate result

- * r5 = dpp                     sp =

- * r6 = dpp->delta              lr =

- * r7 = eptr                    pc =

- *******************************************************************************

- */

-

-term_minus_3:

-        ldr     r3, [r1, #-4]           @ load previous samples

-        ldr     r8, [r1, #-8]

-

-term_minus_3_loop:

-        ldr     ip, [r1], #4

-        movs    r3, r3, asl #4

-        mov     r11, #0x80000000

-        mov     r2, ip

-        smlalne r11, r2, r4, r3

-        strne   r2, [r1, #-4]

-        cmpne   ip, #0

-        beq     .L399

-        teq     ip, r3                  @ update weight based on signs

-        submi   r4, r4, r6

-        addpl   r4, r4, r6

-        cmp     r4, #(1024 << 18)       @ then clip weight to +/-1024

-        movgt   r4, #(1024 << 18)

-        cmp     r4, r10

-        movlt   r4, r10

-

-.L399:  movs    ip, r8, asl #4          @ ip = previous left we use now

-        mov     r8, r2                  @ r8 = current left we use next time

-        ldr     r2, [r1], #4

-        mov     r11, #0x80000000

-        mov     r3, r2

-        smlalne r11, r3, r0, ip

-        strne   r3, [r1, #-4]

-        cmpne   r2, #0

-        beq     .L407

-        teq     ip, r2

-        submi   r0, r0, r6

-        addpl   r0, r0, r6

-        cmp     r0, #(1024 << 18)

-        movgt   r0, #(1024 << 18)

-        cmp     r0, r10

-        movlt   r0, r10

-

-.L407:  cmp     r7, r1                  @ loop back if more samples to do

-        bhi     term_minus_3_loop

-

-        str     r3, [r5, #8]            @ else store previous samples & exit

-        str     r8, [r5, #40]

-

-/*

- * Before finally exiting we must store weights back for next time

- */

-

-common_exit:

-        mov     r0, r0, asr #18         @ restore weights to real magnitude

-        mov     r4, r4, asr #18

-        strh    r4, [r5, #4]

-        strh    r0, [r5, #6]

-        ldmfd   sp!, {r4 - r8, r10, r11, pc}

-

+////////////////////////////////////////////////////////////////////////////
+//                           **** WAVPACK ****                            //
+//                  Hybrid Lossless Wavefile Compressor                   //
+//              Copyright (c) 1998 - 2006 Conifer Software.               //
+//                          All Rights Reserved.                          //
+//      Distributed under the BSD Software License (see license.txt)      //
+////////////////////////////////////////////////////////////////////////////
+
+/* This is an assembly optimized version of the following WavPack function:
+ *
+ * void decorr_stereo_pass_cont (struct decorr_pass *dpp,
+ *                               long *buffer, long sample_count);
+ *
+ * It performs a single pass of stereo decorrelation on the provided buffer.
+ * Note that this version of the function requires that the 8 previous stereo
+ * samples are visible and correct. In other words, it ignores the "samples_*"
+ * fields in the decorr_pass structure and gets the history data directly
+ * from the buffer. It does, however, return the appropriate history samples
+ * to the decorr_pass structure before returning.
+ *
+ * This is written to work on a ARM7TDMI processor. This version uses the
+ * 64-bit multiply-accumulate instruction and so can be used with all
+ * WavPack files. However, for optimum performance with 16-bit WavPack
+ * files, there is a faster version that only uses the 32-bit MLA
+ * instruction.
+ */
+
+        .text
+        .align
+        .global         decorr_stereo_pass_cont_arml
+
+/*
+ * on entry:
+ *
+ * r0 = struct decorr_pass *dpp
+ * r1 = long *buffer
+ * r2 = long sample_count
+ */
+
+decorr_stereo_pass_cont_arml:
+
+        stmfd   sp!, {r4 - r8, r10, r11, lr}
+        mov     r5, r0                  @ r5 = dpp
+        mov     r11, #512               @ r11 = 512 for rounding
+        ldrsh   r6, [r0, #2]            @ r6 = dpp->delta
+        ldrsh   r4, [r0, #4]            @ r4 = dpp->weight_A
+        ldrsh   r0, [r0, #6]            @ r0 = dpp->weight_B
+        cmp     r2, #0                  @ exit if no samples to process
+        beq     common_exit
+
+        mov     r0, r0, asl #18         @ for 64-bit math we use weights << 18
+        mov     r4, r4, asl #18
+        mov     r6, r6, asl #18
+        add     r7, r1, r2, asl #3      @ r7 = buffer ending position
+        ldrsh   r2, [r5, #0]            @ r2 = dpp->term
+        cmp     r2, #0
+        blt     minus_term
+
+        ldr     lr, [r1, #-16]          @ load 2 sample history from buffer
+        ldr     r10, [r1, #-12]         @  for terms 2, 17, and 18
+        ldr     r8, [r1, #-8]
+        ldr     r3, [r1, #-4]
+
+        cmp     r2, #18
+        beq     term_18_loop
+        mov     lr, lr, asl #4
+        mov     r10, r10, asl #4
+        cmp     r2, #2
+        beq     term_2_loop
+        cmp     r2, #17
+        beq     term_17_loop
+        b       term_default_loop
+
+minus_term:
+        mov     r10, #(1024 << 18)      @ r10 = -1024 << 18 for weight clipping
+        rsb     r10, r10, #0            @  (only used for negative terms)
+        cmn     r2, #1
+        beq     term_minus_1
+        cmn     r2, #2
+        beq     term_minus_2
+        cmn     r2, #3
+        beq     term_minus_3
+        b       common_exit
+
+/*
+ ******************************************************************************
+ * Loop to handle term = 17 condition
+ *
+ * r0 = dpp->weight_B           r8 = previous left sample
+ * r1 = bptr                    r9 = 
+ * r2 = current sample          r10 = second previous left sample << 4
+ * r3 = previous right sample   r11 = lo accumulator (for rounding)
+ * r4 = dpp->weight_A           ip = current decorrelation value
+ * r5 = dpp                     sp =
+ * r6 = dpp->delta              lr = second previous right sample << 4
+ * r7 = eptr                    pc =
+ *******************************************************************************
+ */
+
+term_17_loop:
+        rsbs    ip, lr, r8, asl #5      @ decorr value = (2 * prev) - 2nd prev
+        mov     lr, r8, asl #4          @ previous becomes 2nd previous
+        ldr     r2, [r1], #4            @ get sample & update pointer
+        mov     r11, #0x80000000
+        mov     r8, r2
+        smlalne r11, r8, r4, ip
+        strne   r8, [r1, #-4]           @ if change possible, store sample back
+        cmpne   r2, #0
+        beq     .L325
+        teq     ip, r2                  @ update weight based on signs
+        submi   r4, r4, r6
+        addpl   r4, r4, r6
+
+.L325:  rsbs    ip, r10, r3, asl #5     @ do same thing for right channel
+        mov     r10, r3, asl #4
+        ldr     r2, [r1], #4
+        mov     r11, #0x80000000
+        mov     r3, r2
+        smlalne r11, r3, r0, ip
+        strne   r3, [r1, #-4]
+        cmpne   r2, #0
+        beq     .L329
+        teq     ip, r2
+        submi   r0, r0, r6
+        addpl   r0, r0, r6
+
+.L329:  cmp     r7, r1                  @ loop back if more samples to do
+        bhi     term_17_loop
+        mov     lr, lr, asr #4
+        mov     r10, r10, asr #4
+        b       store_1718              @ common exit for terms 17 & 18
+
+/*
+ ******************************************************************************
+ * Loop to handle term = 18 condition
+ *
+ * r0 = dpp->weight_B           r8 = previous left sample
+ * r1 = bptr                    r9 = 
+ * r2 = current sample          r10 = second previous left sample
+ * r3 = previous right sample   r11 = lo accumulator (for rounding)
+ * r4 = dpp->weight_A           ip = decorrelation value
+ * r5 = dpp                     sp =
+ * r6 = dpp->delta              lr = second previous right sample
+ * r7 = eptr                    pc =
+ *******************************************************************************
+ */
+
+term_18_loop:
+        rsb     ip, lr, r8              @ decorr value =
+        mov     lr, r8                  @  ((3 * prev) - 2nd prev) >> 1
+        add     ip, lr, ip, asr #1
+        movs    ip, ip, asl #4
+        ldr     r2, [r1], #4            @ get sample & update pointer
+        mov     r11, #0x80000000
+        mov     r8, r2
+        smlalne r11, r8, r4, ip
+        strne   r8, [r1, #-4]           @ if change possible, store sample back
+        cmpne   r2, #0
+        beq     .L337
+        teq     ip, r2                  @ update weight based on signs
+        submi   r4, r4, r6
+        addpl   r4, r4, r6
+
+.L337:  rsb     ip, r10, r3             @ do same thing for right channel
+        mov     r10, r3
+        add     ip, r10, ip, asr #1
+        movs    ip, ip, asl #4
+        ldr     r2, [r1], #4
+        mov     r11, #0x80000000
+        mov     r3, r2
+        smlalne r11, r3, r0, ip
+        strne   r3, [r1, #-4]
+        cmpne   r2, #0
+        beq     .L341
+        teq     ip, r2
+        submi   r0, r0, r6
+        addpl   r0, r0, r6
+
+.L341:  cmp     r7, r1                  @ loop back if more samples to do
+        bhi     term_18_loop
+
+/* common exit for terms 17 & 18 */
+
+store_1718:
+        str     r3, [r5, #40]           @ store sample history into struct
+        str     r8, [r5, #8]
+        str     r10, [r5, #44]
+        str     lr, [r5, #12]
+        b       common_exit             @ and return
+
+/*
+ ******************************************************************************
+ * Loop to handle term = 2 condition
+ * (note that this case can be handled by the default term handler (1-8), but
+ * this special case is faster because it doesn't have to read memory twice)
+ *
+ * r0 = dpp->weight_B           r8 = previous left sample
+ * r1 = bptr                    r9 = 
+ * r2 = current sample          r10 = second previous left sample << 4
+ * r3 = previous right sample   r11 = lo accumulator (for rounding)
+ * r4 = dpp->weight_A           ip = decorrelation value
+ * r5 = dpp                     sp =
+ * r6 = dpp->delta              lr = second previous right sample << 4
+ * r7 = eptr                    pc =
+ *******************************************************************************
+ */
+
+term_2_loop:
+        movs    ip, lr                  @ get decorrelation value & test
+        ldr     r2, [r1], #4            @ get sample & update pointer
+        mov     lr, r8, asl #4          @ previous becomes 2nd previous
+        mov     r11, #0x80000000
+        mov     r8, r2
+        smlalne r11, r8, r4, ip
+        strne   r8, [r1, #-4]           @ if change possible, store sample back
+        cmpne   r2, #0
+        beq     .L225
+        teq     ip, r2                  @ update weight based on signs
+        submi   r4, r4, r6
+        addpl   r4, r4, r6
+
+.L225:  movs    ip, r10                 @ do same thing for right channel
+        ldr     r2, [r1], #4
+        mov     r10, r3, asl #4
+        mov     r11, #0x80000000
+        mov     r3, r2
+        smlalne r11, r3, r0, ip
+        strne   r3, [r1, #-4]
+        cmpne   r2, #0
+        beq     .L229
+        teq     ip, r2
+        submi   r0, r0, r6
+        addpl   r0, r0, r6
+
+.L229:  cmp     r7, r1                  @ loop back if more samples to do
+        bhi     term_2_loop
+
+        b       default_term_exit       @ this exit updates all dpp->samples
+
+/*
+ ******************************************************************************
+ * Loop to handle default term condition
+ *
+ * r0 = dpp->weight_B           r8 = result accumulator
+ * r1 = bptr                    r9 = 
+ * r2 = dpp->term               r10 =
+ * r3 = decorrelation value     r11 = lo accumulator (for rounding)
+ * r4 = dpp->weight_A           ip = current sample
+ * r5 = dpp                     sp =
+ * r6 = dpp->delta              lr =
+ * r7 = eptr                    pc =
+ *******************************************************************************
+ */
+
+term_default_loop:
+        ldr     r3, [r1, -r2, asl #3]   @ get decorrelation value based on term
+        ldr     ip, [r1], #4            @ get original sample and bump ptr
+        movs    r3, r3, asl #4
+        mov     r11, #0x80000000
+        mov     r8, ip
+        smlalne r11, r8, r4, r3
+        strne   r8, [r1, #-4]           @ if possibly changed, store updated sample
+        cmpne   ip, #0
+        beq     .L350
+        teq     ip, r3                  @ update weight based on signs
+        submi   r4, r4, r6
+        addpl   r4, r4, r6
+
+.L350:  ldr     r3, [r1, -r2, asl #3]   @ do the same thing for right channel
+        ldr     ip, [r1], #4
+        movs    r3, r3, asl #4
+        mov     r11, #0x80000000
+        mov     r8, ip
+        smlalne r11, r8, r0, r3
+        strne   r8, [r1, #-4]
+        cmpne   ip, #0
+        beq     .L354
+        teq     ip, r3
+        submi   r0, r0, r6
+        addpl   r0, r0, r6
+
+.L354:  cmp     r7, r1                  @ loop back if more samples to do
+        bhi     term_default_loop
+
+/*
+ * This exit is used by terms 1-8 to store the previous 8 samples into the decorr
+ * structure (even if they are not all used for the given term)
+ */
+
+default_term_exit:
+        ldrsh   r3, [r5, #0]
+        sub     ip, r3, #1
+        mov     lr, #7
+
+.L358:  and     r3, ip, #7
+        add     r3, r5, r3, asl #2
+        ldr     r2, [r1, #-4]
+        str     r2, [r3, #40]
+        ldr     r2, [r1, #-8]!
+        str     r2, [r3, #8]
+        sub     ip, ip, #1
+        sub     lr, lr, #1
+        cmn     lr, #1
+        bne     .L358
+        b       common_exit
+
+/*
+ ******************************************************************************
+ * Loop to handle term = -1 condition
+ *
+ * r0 = dpp->weight_B           r8 =
+ * r1 = bptr                    r9 = 
+ * r2 = intermediate result     r10 = -1024 (for clipping)
+ * r3 = previous right sample   r11 = lo accumulator (for rounding)
+ * r4 = dpp->weight_A           ip = current sample
+ * r5 = dpp                     sp =
+ * r6 = dpp->delta              lr = updated left sample
+ * r7 = eptr                    pc =
+ *******************************************************************************
+ */
+
+term_minus_1:
+        ldr     r3, [r1, #-4]
+
+term_minus_1_loop:
+        ldr     ip, [r1], #8            @ for left channel the decorrelation value
+        movs    r3, r3, asl #4          @  is the previous right sample (in r3)
+        mov     r11, #0x80000000
+        mov     lr, ip
+        smlalne r11, lr, r4, r3
+        strne   lr, [r1, #-8]
+        cmpne   ip, #0
+        beq     .L361
+        teq     ip, r3                  @ update weight based on signs
+        submi   r4, r4, r6
+        addpl   r4, r4, r6
+        cmp     r4, #(1024 << 18)
+        movgt   r4, #(1024 << 18)
+        cmp     r4, r10
+        movlt   r4, r10
+
+.L361:  ldr     r2, [r1, #-4]           @ for right channel the decorrelation value
+        movs    lr, lr, asl #4
+        mov     r11, #0x80000000
+        mov     r3, r2
+        smlalne r11, r3, r0, lr
+        strne   r3, [r1, #-4]
+        cmpne   r2, #0
+        beq     .L369
+        teq     r2, lr
+        submi   r0, r0, r6
+        addpl   r0, r0, r6
+        cmp     r0, #(1024 << 18)               @ then clip weight to +/-1024
+        movgt   r0, #(1024 << 18)
+        cmp     r0, r10
+        movlt   r0, r10
+
+.L369:  cmp     r7, r1                  @ loop back if more samples to do
+        bhi     term_minus_1_loop
+
+        str     r3, [r5, #8]            @ else store right sample and exit
+        b       common_exit
+
+/*
+ ******************************************************************************
+ * Loop to handle term = -2 condition
+ * (note that the channels are processed in the reverse order here)
+ *
+ * r0 = dpp->weight_B           r8 =
+ * r1 = bptr                    r9 = 
+ * r2 = intermediate result     r10 = -1024 (for clipping)
+ * r3 = previous left sample    r11 = lo accumulator (for rounding)
+ * r4 = dpp->weight_A           ip = current sample
+ * r5 = dpp                     sp =
+ * r6 = dpp->delta              lr = updated right sample
+ * r7 = eptr                    pc =
+ *******************************************************************************
+ */
+
+term_minus_2:
+        ldr     r3, [r1, #-8]
+
+term_minus_2_loop:
+        ldr     ip, [r1, #4]            @ for right channel the decorrelation value
+        movs    r3, r3, asl #4          @  is the previous left sample (in r3)
+        mov     r11, #0x80000000
+        mov     lr, ip
+        smlalne r11, lr, r0, r3
+        strne   lr, [r1, #4]
+        cmpne   ip, #0
+        beq     .L380
+        teq     ip, r3                  @ update weight based on signs
+        submi   r0, r0, r6
+        addpl   r0, r0, r6
+        cmp     r0, #(1024 << 18)               @ then clip weight to +/-1024
+        movgt   r0, #(1024 << 18)
+        cmp     r0, r10
+        movlt   r0, r10
+
+.L380:  ldr     r2, [r1], #8            @ for left channel the decorrelation value
+        movs    lr, lr, asl #4
+        mov     r11, #0x80000000
+        mov     r3, r2
+        smlalne r11, r3, r4, lr
+        strne   r3, [r1, #-8]
+        cmpne   r2, #0
+        beq     .L388
+        teq     r2, lr
+        submi   r4, r4, r6
+        addpl   r4, r4, r6
+        cmp     r4, #(1024 << 18)
+        movgt   r4, #(1024 << 18)
+        cmp     r4, r10
+        movlt   r4, r10
+
+.L388:  cmp     r7, r1                  @ loop back if more samples to do
+        bhi     term_minus_2_loop
+
+        str     r3, [r5, #40]           @ else store left channel and exit
+        b       common_exit
+
+/*
+ ******************************************************************************
+ * Loop to handle term = -3 condition
+ *
+ * r0 = dpp->weight_B           r8 = previous left sample
+ * r1 = bptr                    r9 = 
+ * r2 = current left sample     r10 = -1024 (for clipping)
+ * r3 = previous right sample   r11 = lo accumulator (for rounding)
+ * r4 = dpp->weight_A           ip = intermediate result
+ * r5 = dpp                     sp =
+ * r6 = dpp->delta              lr =
+ * r7 = eptr                    pc =
+ *******************************************************************************
+ */
+
+term_minus_3:
+        ldr     r3, [r1, #-4]           @ load previous samples
+        ldr     r8, [r1, #-8]
+
+term_minus_3_loop:
+        ldr     ip, [r1], #4
+        movs    r3, r3, asl #4
+        mov     r11, #0x80000000
+        mov     r2, ip
+        smlalne r11, r2, r4, r3
+        strne   r2, [r1, #-4]
+        cmpne   ip, #0
+        beq     .L399
+        teq     ip, r3                  @ update weight based on signs
+        submi   r4, r4, r6
+        addpl   r4, r4, r6
+        cmp     r4, #(1024 << 18)       @ then clip weight to +/-1024
+        movgt   r4, #(1024 << 18)
+        cmp     r4, r10
+        movlt   r4, r10
+
+.L399:  movs    ip, r8, asl #4          @ ip = previous left we use now
+        mov     r8, r2                  @ r8 = current left we use next time
+        ldr     r2, [r1], #4
+        mov     r11, #0x80000000
+        mov     r3, r2
+        smlalne r11, r3, r0, ip
+        strne   r3, [r1, #-4]
+        cmpne   r2, #0
+        beq     .L407
+        teq     ip, r2
+        submi   r0, r0, r6
+        addpl   r0, r0, r6
+        cmp     r0, #(1024 << 18)
+        movgt   r0, #(1024 << 18)
+        cmp     r0, r10
+        movlt   r0, r10
+
+.L407:  cmp     r7, r1                  @ loop back if more samples to do
+        bhi     term_minus_3_loop
+
+        str     r3, [r5, #8]            @ else store previous samples & exit
+        str     r8, [r5, #40]
+
+/*
+ * Before finally exiting we must store weights back for next time
+ */
+
+common_exit:
+        mov     r0, r0, asr #18         @ restore weights to real magnitude
+        mov     r4, r4, asr #18
+        strh    r4, [r5, #4]
+        strh    r0, [r5, #6]
+        ldmfd   sp!, {r4 - r8, r10, r11, pc}
+
diff --git a/src/engine/external/wavpack/bits.c b/src/engine/external/wavpack/bits.c
index dbfa0cae..b4f03385 100644
--- a/src/engine/external/wavpack/bits.c
+++ b/src/engine/external/wavpack/bits.c
@@ -1,140 +1,140 @@
-////////////////////////////////////////////////////////////////////////////

-//                           **** WAVPACK ****                            //

-//                  Hybrid Lossless Wavefile Compressor                   //

-//              Copyright (c) 1998 - 2006 Conifer Software.               //

-//                          All Rights Reserved.                          //

-//      Distributed under the BSD Software License (see license.txt)      //

-////////////////////////////////////////////////////////////////////////////

-

-// bits.c

-

-// This module provides utilities to support the BitStream structure which is

-// used to read and write all WavPack audio data streams. It also contains a

-// wrapper for the stream I/O functions and a set of functions dealing with

-// endian-ness, both for enhancing portability. Finally, a debug wrapper for

-// the malloc() system is provided.

-

-#include "wavpack.h"

-

-#include <string.h>

-#include <ctype.h>

-

-////////////////////////// Bitstream functions ////////////////////////////////

-

-// Open the specified BitStream and associate with the specified buffer.

-

-static void bs_read (Bitstream *bs);

-

-void bs_open_read (Bitstream *bs, uchar *buffer_start, uchar *buffer_end, read_stream file, uint32_t file_bytes)

-{

-    CLEAR (*bs);

-    bs->buf = buffer_start;

-    bs->end = buffer_end;

-

-    if (file) {

-        bs->ptr = bs->end - 1;

-        bs->file_bytes = file_bytes;

-        bs->file = file;

-    }

-    else

-        bs->ptr = bs->buf - 1;

-

-    bs->wrap = bs_read;

-}

-

-// This function is only called from the getbit() and getbits() macros when

-// the BitStream has been exhausted and more data is required. Sinve these

-// bistreams no longer access files, this function simple sets an error and

-// resets the buffer.

-

-static void bs_read (Bitstream *bs)

-{

-    if (bs->file && bs->file_bytes) {

-        uint32_t bytes_read, bytes_to_read = bs->end - bs->buf;

-

-        if (bytes_to_read > bs->file_bytes)

-            bytes_to_read = bs->file_bytes;

-

-        bytes_read = bs->file (bs->buf, bytes_to_read);

-

-        if (bytes_read) {

-            bs->end = bs->buf + bytes_read;

-            bs->file_bytes -= bytes_read;

-        }

-        else {

-            memset (bs->buf, -1, bs->end - bs->buf);

-            bs->error = 1;

-        }

-    }

-    else

-        bs->error = 1;

-

-    if (bs->error)

-        memset (bs->buf, -1, bs->end - bs->buf);

-

-    bs->ptr = bs->buf;

-}

-

-/////////////////////// Endian Correction Routines ////////////////////////////

-

-void little_endian_to_native (void *data, char *format)

-{

-    uchar *cp = (uchar *) data;

-    int32_t temp;

-

-    while (*format) {

-        switch (*format) {

-            case 'L':

-                temp = cp [0] + ((int32_t) cp [1] << 8) + ((int32_t) cp [2] << 16) + ((int32_t) cp [3] << 24);

-                * (int32_t *) cp = temp;

-                cp += 4;

-                break;

-

-            case 'S':

-                temp = cp [0] + (cp [1] << 8);

-                * (short *) cp = (short) temp;

-                cp += 2;

-                break;

-

-            default:

-                if (isdigit (*format))

-                    cp += *format - '0';

-

-                break;

-        }

-

-        format++;

-    }

-}

-

-void native_to_little_endian (void *data, char *format)

-{

-    uchar *cp = (uchar *) data;

-    int32_t temp;

-

-    while (*format) {

-        switch (*format) {

-            case 'L':

-                temp = * (int32_t *) cp;

-                *cp++ = (uchar) temp;

-                *cp++ = (uchar) (temp >> 8);

-                *cp++ = (uchar) (temp >> 16);

-                *cp++ = (uchar) (temp >> 24);

-                break;

-

-            case 'S':

-                temp = * (short *) cp;

-                *cp++ = (uchar) temp;

-                *cp++ = (uchar) (temp >> 8);

-                break;

-

-            default:

-                if (isdigit (*format))

-                    cp += *format - '0';

-

-                break;

-        }

-

-        format++;

-    }

-}

+////////////////////////////////////////////////////////////////////////////
+//                           **** WAVPACK ****                            //
+//                  Hybrid Lossless Wavefile Compressor                   //
+//              Copyright (c) 1998 - 2006 Conifer Software.               //
+//                          All Rights Reserved.                          //
+//      Distributed under the BSD Software License (see license.txt)      //
+////////////////////////////////////////////////////////////////////////////
+
+// bits.c
+
+// This module provides utilities to support the BitStream structure which is
+// used to read and write all WavPack audio data streams. It also contains a
+// wrapper for the stream I/O functions and a set of functions dealing with
+// endian-ness, both for enhancing portability. Finally, a debug wrapper for
+// the malloc() system is provided.
+
+#include "wavpack.h"
+
+#include <string.h>
+#include <ctype.h>
+
+////////////////////////// Bitstream functions ////////////////////////////////
+
+// Open the specified BitStream and associate with the specified buffer.
+
+static void bs_read (Bitstream *bs);
+
+void bs_open_read (Bitstream *bs, uchar *buffer_start, uchar *buffer_end, read_stream file, uint32_t file_bytes)
+{
+    CLEAR (*bs);
+    bs->buf = buffer_start;
+    bs->end = buffer_end;
+
+    if (file) {
+        bs->ptr = bs->end - 1;
+        bs->file_bytes = file_bytes;
+        bs->file = file;
+    }
+    else
+        bs->ptr = bs->buf - 1;
+
+    bs->wrap = bs_read;
+}
+
+// This function is only called from the getbit() and getbits() macros when
+// the BitStream has been exhausted and more data is required. Sinve these
+// bistreams no longer access files, this function simple sets an error and
+// resets the buffer.
+
+static void bs_read (Bitstream *bs)
+{
+    if (bs->file && bs->file_bytes) {
+        uint32_t bytes_read, bytes_to_read = bs->end - bs->buf;
+
+        if (bytes_to_read > bs->file_bytes)
+            bytes_to_read = bs->file_bytes;
+
+        bytes_read = bs->file (bs->buf, bytes_to_read);
+
+        if (bytes_read) {
+            bs->end = bs->buf + bytes_read;
+            bs->file_bytes -= bytes_read;
+        }
+        else {
+            memset (bs->buf, -1, bs->end - bs->buf);
+            bs->error = 1;
+        }
+    }
+    else
+        bs->error = 1;
+
+    if (bs->error)
+        memset (bs->buf, -1, bs->end - bs->buf);
+
+    bs->ptr = bs->buf;
+}
+
+/////////////////////// Endian Correction Routines ////////////////////////////
+
+void little_endian_to_native (void *data, char *format)
+{
+    uchar *cp = (uchar *) data;
+    int32_t temp;
+
+    while (*format) {
+        switch (*format) {
+            case 'L':
+                temp = cp [0] + ((int32_t) cp [1] << 8) + ((int32_t) cp [2] << 16) + ((int32_t) cp [3] << 24);
+                * (int32_t *) cp = temp;
+                cp += 4;
+                break;
+
+            case 'S':
+                temp = cp [0] + (cp [1] << 8);
+                * (short *) cp = (short) temp;
+                cp += 2;
+                break;
+
+            default:
+                if (isdigit (*format))
+                    cp += *format - '0';
+
+                break;
+        }
+
+        format++;
+    }
+}
+
+void native_to_little_endian (void *data, char *format)
+{
+    uchar *cp = (uchar *) data;
+    int32_t temp;
+
+    while (*format) {
+        switch (*format) {
+            case 'L':
+                temp = * (int32_t *) cp;
+                *cp++ = (uchar) temp;
+                *cp++ = (uchar) (temp >> 8);
+                *cp++ = (uchar) (temp >> 16);
+                *cp++ = (uchar) (temp >> 24);
+                break;
+
+            case 'S':
+                temp = * (short *) cp;
+                *cp++ = (uchar) temp;
+                *cp++ = (uchar) (temp >> 8);
+                break;
+
+            default:
+                if (isdigit (*format))
+                    cp += *format - '0';
+
+                break;
+        }
+
+        format++;
+    }
+}
diff --git a/src/engine/external/wavpack/coldfire.S b/src/engine/external/wavpack/coldfire.S
index 93df9d82..83530900 100644
--- a/src/engine/external/wavpack/coldfire.S
+++ b/src/engine/external/wavpack/coldfire.S
@@ -1,525 +1,525 @@
-////////////////////////////////////////////////////////////////////////////

-//                           **** WAVPACK ****                            //

-//                  Hybrid Lossless Wavefile Compressor                   //

-//              Copyright (c) 1998 - 2006 Conifer Software.               //

-//                          All Rights Reserved.                          //

-//      Distributed under the BSD Software License (see license.txt)      //

-////////////////////////////////////////////////////////////////////////////

-

-/* This is an assembly optimized version of the following WavPack function:

- *

- * void decorr_stereo_pass_cont (struct decorr_pass *dpp,

- *                               long *buffer, long sample_count);

- *

- * It performs a single pass of stereo decorrelation on the provided buffer.

- * Note that this version of the function requires that the 8 previous stereo

- * samples are visible and correct. In other words, it ignores the "samples_*"

- * fields in the decorr_pass structure and gets the history data directly

- * from the buffer. It does, however, return the appropriate history samples

- * to the decorr_pass structure before returning.

- *

- * This is written to work on a MCF5249 processor, or any processor based on

- * the ColdFire V2 core with an EMAC unit. The EMAC is perfectly suited for

- * the "apply_weight" function of WavPack decorrelation because it provides

- * the requires 40-bit product. The fractional rounding mode of the EMAC is not

- * configurable and uses "round to even" while WavPack uses "round to larger",

- * so the rounding has to be done manually.

- */

-

-        .text

-        .align  2

-        .global decorr_stereo_pass_cont_mcf5249

-

-decorr_stereo_pass_cont_mcf5249:

-

-        lea     (-44, %sp), %sp

-        movem.l %d2-%d7/%a2-%a6, (%sp)

-        move.l  44+4(%sp), %a2          | a2 = dpp->

-        move.l  44+8(%sp), %a1          | a1 = bptr

-        move.w  2(%a2), %a3             | a3 = dpp->delta

-        move.w  4(%a2), %d3             | d3 = dpp->weight_A (sign extended)

-        ext.l   %d3

-        move.w  6(%a2), %d4             | d4 = dpp->weight_B (sign extended)

-        ext.l   %d4

-        move.l 44+12(%sp), %d0          | d0 = sample_count

-        jbeq    return_only             | if zero, nothing to do

-

-        lsl.l   #3, %d0                 | d5 = bptr + (sample_count * 8)

-        move.l  %d0, %d5

-        add.l   %a1, %d5

-

-        moveq.l #17, %d0                | left shift weights & delta 17 places

-        asl.l   %d0, %d3

-        asl.l   %d0, %d4

-        move.l  %a3, %d1

-        asl.l   %d0, %d1

-        move.l  %d1, %a3

-

-        moveq.l #0x20, %d6

-        move.l  %d6, %macsr             | set fractional mode for MAC

-        move.l  #0, %acc1               | acc1 = 0x00 0000 80 (for rounding)

-        move.l  #0x800000, %accext01

-        

-        move.l  #1024<<17, %d6          | d6 & d7 are weight clipping limits

-        move.l  #-1024<<17, %d7         | (only used by negative terms)

-

-        move.w  (%a2), %d0              | d0 = term

-        ext.l   %d0

-        cmp.l   #17, %d0

-        jbeq    term_17                 | term = 17

-        cmp.l   #18, %d0

-        jbeq    term_18                 | term = 18

-        addq.l  #1, %d0

-        jbeq    term_minus_1            | term = -1

-        addq.l  #1, %d0

-        jbeq    term_minus_2            | term = -2

-        addq.l  #1, %d0

-        jbeq    term_minus_3            | term = -3

-        jbra    term_default            | default term = 1 - 8

-

-|------------------------------------------------------------------------------

-| Loop to handle term = 17 condition

-|

-| a0 =                          d0 = (2 * bptr [-1]) - bptr [-2]

-| a1 = bptr                     d1 = initial bptr [0]

-| a2 = dpp->                    d2 = updated bptr [0]

-| a3 = dpp->delta << 17         d3 = dpp->weight_A << 17

-| a4 =                          d4 = dpp->weight_B << 17

-| a5 =                          d5 = eptr

-| macsr = 0x20                  acc1 = 0x00 0000 80

-|------------------------------------------------------------------------------

-

-term_17:

-        move.l  -8(%a1), %d0            | d0 = 2 * bptr [-1] - bptr [-2]

-        add.l   %d0, %d0

-        sub.l   -16(%a1), %d0

-        beq     .L251                   | if zero, skip calculation

-        move.l  %acc1, %acc0

-        asl.l   #4, %d0                 | acc0 = acc1 + (d0 << 4) * weight_A

-        mac.l   %d0, %d3, %acc0

-        move.l  (%a1), %d1

-        beq     .L255

-        eor.l   %d1, %d0                | else compare signs

-        bge     .L256                   | if same, add delta to weight

-        sub.l   %a3, %d3                | else subtract delta from weight

-        sub.l   %a3, %d3                | subtract again instead of branch

-.L256:  add.l   %a3, %d3                | add delta to weight

-

-.L255:  move.l  %acc0, %d2              | d2 = rounded product

-        add.l   %d1, %d2                | update bptr [0] and store

-        move.l  %d2, (%a1)+

-

-.L253:  move.l  -8(%a1), %d0            | d0 = 2 * bptr [-1] - bptr [-2]

-        add.l   %d0, %d0

-        sub.l   -16(%a1), %d0

-        beq     .L257                   | if zero, skip calculations

-        move.l  %acc1, %acc0

-        asl.l   #4, %d0                 | acc0 = acc1 + (d0 << 4) * weight_B

-        mac.l   %d0, %d4, %acc0

-        move.l  (%a1), %d1

-        beq     .L254

-        eor.l   %d1, %d0                | else compare signs

-        bge     .L259                   | if same, add delta to weight

-        sub.l   %a3, %d4                | else subtract delta from weight

-        sub.l   %a3, %d4                | subtract again instead of branch

-.L259:  add.l   %a3, %d4                | add delta to weight

-

-.L254:  move.l  %acc0, %d2              | d2 = rounded product

-        add.l   %d1, %d2                | update bptr [0] and store

-        move.l  %d2, (%a1)+

-

-.L252:  cmp.l   %a1, %d5                | loop if bptr < eptr

-        jbhi    term_17

-        bra     term_17_18_finish       | exit through common path

-

-.L251:  addq.l  #4, %a1                 | update point and jump back into loop

-        bra     .L253

-

-.L257:  addq.l  #4, %a1                 | update point and jump back into loop

-        bra     .L252

-

-|------------------------------------------------------------------------------

-| Loop to handle term = 18 condition

-|

-| a0 =                          d0 = ((3 * bptr [-1]) - bptr [-2]) >> 1

-| a1 = bptr                     d1 = initial bptr [0]

-| a2 = dpp->                    d2 = updated bptr [0]

-| a3 = dpp->delta << 17         d3 = dpp->weight_A << 17

-| a4 =                          d4 = dpp->weight_B << 17

-| a5 =                          d5 = eptr

-| macsr = 0x20                  acc1 = 0x00 0000 80

-|------------------------------------------------------------------------------

-

-term_18:

-        move.l  -8(%a1), %a0            | d0 = (3 * bptr [-1] - bptr [-2]) >> 1

-        lea     (%a0,%a0.l*2), %a0

-        move.l  %a0, %d0

-        sub.l   -16(%a1), %d0

-        asr.l   #1, %d0

-        beq     .L260

-        move.l  %acc1, %acc0

-        asl.l   #4, %d0                 | acc0 = acc1 + (d0 << 4) * weight_A

-        mac.l   %d0, %d3, %acc0

-        move.l  (%a1), %d1

-        beq     .L266

-        eor.l   %d1, %d0                | else compare signs

-        bge     .L267                   | if same, add delta to weight

-        sub.l   %a3, %d3                | else subtract delta from weight

-        sub.l   %a3, %d3                | subtract again instead of branch

-.L267:  add.l   %a3, %d3                | add delta to weight

-

-.L266:  move.l  %acc0, %d2              | d2 = rounded product

-        add.l   %d1, %d2                | add applied weight to bptr [0], store

-        move.l  %d2, (%a1)+

-

-.L268:  move.l  -8(%a1), %a0            | d0 = (3 * bptr [-1] - bptr [-2]) >> 1

-        lea     (%a0,%a0.l*2), %a0

-        move.l  %a0, %d0

-        sub.l   -16(%a1), %d0

-        asr.l   #1, %d0

-        beq     .L261

-        move.l  %acc1, %acc0

-        asl.l   #4, %d0                 | acc0 = acc1 + (d0 << 4) * weight_B

-        mac.l   %d0, %d4, %acc0

-        move.l  (%a1), %d1

-        beq     .L265

-        eor.l   %d1, %d0                | else compare signs

-        bge     .L270                   | if same, add delta to weight

-        sub.l   %a3, %d4                | else subtract delta from weight

-        sub.l   %a3, %d4                | subtract again instead of branch

-.L270:  add.l   %a3, %d4                | add delta to weight

-

-.L265:  move.l  %acc0, %d2              | d2 = rounded product

-        add.l   %d1, %d2                | add applied weight to bptr [0], store

-        move.l  %d2, (%a1)+

-

-.L269:  cmp.l   %a1, %d5                | loop if bptr < eptr

-        jbhi    term_18

-        bra     term_17_18_finish       | exit through common path

-

-.L260:  addq.l  #4, %a1                 | bump pointer and jump back into loop

-        bra     .L268

-

-.L261:  addq.l  #4, %a1                 | bump pointer and jump back into loop

-        bra     .L269

-

-term_17_18_finish:

-        move.l  -4(%a1), 40(%a2)        | restore dpp->samples_A [0-1], B [0-1]

-        move.l  -8(%a1), 8(%a2)

-        move.l  -12(%a1), 44(%a2)

-        move.l  -16(%a1), 12(%a2)

-        jbra    finish_up

-

-|------------------------------------------------------------------------------

-| Loop to handle default terms (i.e. 1 - 8)

-|

-| a0 = tptr                     d0 = tptr [0]

-| a1 = bptr                     d1 = initial bptr [0]

-| a2 = dpp->                    d2 = updated bptr [0]

-| a3 = dpp->delta << 17         d3 = dpp->weight_A << 17

-| a4 =                          d4 = dpp->weight_B << 17

-| a5 =                          d5 = eptr

-| macsr = 0x20                  acc1 = 0x00 0000 80

-|------------------------------------------------------------------------------

-

-term_default:

-        move.w  (%a2), %d0              | a0 = a1 - (dpp->term * 8)

-        ext.l   %d0

-        lsl.l   #3, %d0

-        move.l  %a1, %a0

-        sub.l   %d0, %a0

-

-term_default_loop:

-        move.l  (%a0)+, %d0             | d0 = tptr [0], skip ahead if zero

-        beq     .L271

-        move.l  %acc1, %acc0

-        asl.l   #4, %d0                 | acc0 = acc1 + (d0 << 4) * weight_A

-        mac.l   %d0, %d3, %acc0

-        move.l  (%a1), %d1

-        beq     .L277

-        eor.l   %d1, %d0                | else compare signs

-        bge     .L278                   | if same, add delta to weight

-        sub.l   %a3, %d3                | else subtract delta from weight

-        sub.l   %a3, %d3                | subtract again instead of branch

-.L278:  add.l   %a3, %d3                | add delta to weight

-

-.L277:  move.l  %acc0, %d2              | d2 = rounded product

-        add.l   %d1, %d2                | add applied weight to bptr [0], store

-        move.l  %d2, (%a1)+

-

-.L275:  move.l  (%a0)+, %d0             | d0 = tptr [0], skip ahead if zero

-        beq     .L272

-        move.l  %acc1, %acc0

-        asl.l   #4, %d0                 | acc0 = acc1 + (d0 << 4) * weight_B

-        mac.l   %d0, %d4, %acc0

-        move.l  (%a1), %d1

-        beq     .L276

-        eor.l   %d1, %d0                | else compare signs

-        bge     .L281                   | if same, add delta to weight

-        sub.l   %a3, %d4                | else subtract delta from weight

-        sub.l   %a3, %d4                | subtract again instead of branch

-.L281:  add.l   %a3, %d4                | add delta to weight

-

-.L276:  move.l  %acc0, %d2              | d2 = rounded product

-        add.l   %d1, %d2                | add applied weight to bptr [0], store

-        move.l  %d2, (%a1)+

-

-.L274:  cmp.l   %a1, %d5                | loop back if bptr < eptr

-        jbhi    term_default_loop

-        move.w  (%a2), %d0              | d0 = term - 1

-        moveq.l #8, %d1                 | d1 = loop counter

-

-.L323:  subq.l  #1, %d0                 | back up & mask index

-        and.l   #7, %d0

-        move.l  -(%a1), 40(%a2,%d0.l*4) | store dpp->samples_B [d0]

-        move.l  -(%a1), 8(%a2,%d0.l*4)  | store dpp->samples_A [d0]

-        subq.l  #1, %d1                 | loop on count

-        jbne    .L323

-        jbra    finish_up

-

-.L271:  addq.l  #4, %a1                 | bump pointer and jump back into loop

-        bra     .L275

-

-.L272:  addq.l  #4, %a1                 | bump pointer and jump back into loop

-        bra     .L274

-

-

-|------------------------------------------------------------------------------

-| Loop to handle term = -1 condition

-|

-| a0 =                          d0 = decorrelation sample

-| a1 = bptr                     d1 = initial bptr [0]

-| a2 = dpp->                    d2 = updated bptr [0]

-| a3 = dpp->delta << 17         d3 = dpp->weight_A << 17

-| a4 =                          d4 = dpp->weight_B << 17

-| a5 =                          d5 = eptr

-| a6 =                          d6 = 1024 << 17

-| a7 =                          d7 = -1024 << 17

-| macsr = 0x20                  acc1 = 0x00 0000 80

-|------------------------------------------------------------------------------

-

-term_minus_1:

-        move.l  -4(%a1), %d0            | d0 = bptr [-1]

-        beq     .L402

-        move.l  %acc1, %acc0

-        asl.l   #4, %d0                 | acc0 = acc1 + ((d0 << 4) * weight_A)

-        mac.l   %d0, %d3, %acc0

-        move.l  (%a1), %d1

-        beq     .L405

-        eor.l   %d1, %d0                | else compare signs

-        bge     .L404                   | if same, add delta to weight

-        sub.l   %a3, %d3                | else subtract delta from weight

-        cmp.l   %d7, %d3                | check for negative clip limit

-        bge     .L405

-        move.l  %d7, %d3

-        bra     .L405

-

-.L404:  add.l   %a3, %d3                | add delta to weight

-        cmp.l   %d6, %d3                | check for positive clip limit

-        ble     .L405

-        move.l  %d6, %d3

-

-.L405:  move.l  %acc0, %d0              | d2 = rounded product

-        add.l   %d1, %d0                | add applied weight to bptr [0], store

-        move.l  %d0, (%a1)+

-        beq     .L401

-

-.L410:  move.l  %acc1, %acc0

-        asl.l   #4, %d0                 | acc0 = acc1 + ((d0 << 4) * weight_B)

-        mac.l   %d0, %d4, %acc0

-        move.l  (%a1), %d1

-        beq     .L403

-        eor.l   %d1, %d0                | else compare signs

-        bge     .L407                   | if same, add delta to weight

-        sub.l   %a3, %d4                | else subtract delta from weight

-        cmp.l   %d7, %d4                | check for negative clip limit

-        bge     .L403

-        move.l  %d7, %d4

-        bra     .L403

-

-.L407:  add.l   %a3, %d4                | add delta to weight

-        cmp.l   %d6, %d4                | check for positive clip limit

-        ble     .L403

-        move.l  %d6, %d4

-

-.L403:  move.l  %acc0, %d2              | d2 = rounded product

-        add.l   %d1, %d2                | add applied weight to bptr [1], store

-        move.l  %d2, (%a1)+

-

-.L411:  cmp.l   %a1, %d5                | loop back if bptr < eptr

-        jbhi    term_minus_1

-        move.l  -4(%a1), 8(%a2)         | dpp->samples_A [0] = bptr [-1]

-        jbra    finish_up

-

-.L402:  move.l  (%a1)+, %d0

-        bne     .L410

-

-.L401:  addq.l  #4, %a1

-        bra     .L411

-

-

-|------------------------------------------------------------------------------

-| Loop to handle term = -2 condition

-|

-| a0 =                          d0 = decorrelation sample

-| a1 = bptr                     d1 = initial bptr [0]

-| a2 = dpp->                    d2 = updated bptr [0]

-| a3 = dpp->delta << 17         d3 = dpp->weight_A << 17

-| a4 =                          d4 = dpp->weight_B << 17

-| a5 =                          d5 = eptr

-| a6 =                          d6 = 1024 << 17

-| a7 =                          d7 = -1024 << 17

-| macsr = 0x20                  acc1 = 0x00 0000 80

-|------------------------------------------------------------------------------

-

-term_minus_2:

-        move.l  -8(%a1), %d0            | d0 = bptr [-2]

-        beq     .L511

-        move.l  %acc1, %acc0

-        asl.l   #4, %d0                 | acc0 = acc1 + ((d0 << 4) * weight_B)

-        mac.l   %d0, %d4, %acc0

-        move.l  4(%a1), %d1

-        beq     .L505

-        eor.l   %d1, %d0                | else compare signs

-        bge     .L504                   | if same, add delta to weight

-        sub.l   %a3, %d4                | else subtract delta from weight

-        cmp.l   %d7, %d4                | ckeck for negative clip limit

-        bge     .L505

-        move.l  %d7, %d4

-        bra     .L505

-

-.L504:  add.l   %a3, %d4                | add delta to weight

-        cmp.l   %d6, %d4                | check for positive clip limit

-        ble     .L505

-        move.l  %d6, %d4

-

-.L505:  move.l  %acc0, %d0              | d2 = rounded product

-        add.l   %d1, %d0                | add applied weight to bptr [0], store

-        move.l  %d0, 4(%a1)

-        beq     .L512

-

-.L510:  move.l  %acc1, %acc0

-        asl.l   #4, %d0                 | acc0 = acc1 + ((d0 << 4) * weight_A)

-        mac.l   %d0, %d3, %acc0

-        move.l  (%a1), %d1

-        beq     .L503

-        eor.l   %d1, %d0                | else compare signs

-        bge     .L507                   | if same, add delta to weight

-        sub.l   %a3, %d3                | else subtract delta from weight

-        cmp.l   %d7, %d3                | check for negative clip limit

-        bge     .L503

-        move.l  %d7, %d3

-        bra     .L503

-

-.L507:  add.l   %a3, %d3                | add delta to weight

-        cmp.l   %d6, %d3                | check for negative clip limit

-        ble     .L503

-        move.l  %d6, %d3

-

-.L503:  move.l  %acc0, %d2              | d2 = rounded product

-        add.l   %d1, %d2                | add applied weight to bptr [1], store

-        move.l  %d2, (%a1)

-

-.L512:  addq.l  #8, %a1

-        cmp.l   %a1, %d5                | loop if bptr < eptr

-        jbhi    term_minus_2

-        move.l  -8(%a1), 40(%a2)        | dpp->samples_B [0] = bptr [-4]

-        jbra    finish_up

-

-.L511:  move.l  4(%a1), %d0

-        beq     .L512

-        bra     .L510

-

-

-|------------------------------------------------------------------------------

-| Loop to handle term = -3 condition

-|

-| a0 =                          d0 = decorrelation sample

-| a1 = bptr                     d1 = initial bptr [0]

-| a2 = dpp->                    d2 = updated bptr [0]

-| a3 = dpp->delta << 17         d3 = dpp->weight_A << 17

-| a4 =                          d4 = dpp->weight_B << 17

-| a5 =                          d5 = eptr

-| a6 =                          d6 = 1024 << 17

-| a7 =                          d7 = -1024 << 17

-| macsr = 0x20                  acc1 = 0x00 0000 80

-|------------------------------------------------------------------------------

-

-term_minus_3:

-        move.l  -4(%a1), %d0            | d0 = bptr [-1]

-        beq     .L301

-        move.l  %acc1, %acc0

-        asl.l   #4, %d0                 | acc0 = acc1 + ((d0 << 4) * weight_A)

-        mac.l   %d0, %d3, %acc0

-        move.l  (%a1), %d1

-        beq     .L320

-        eor.l   %d1, %d0                | else compare signs

-        bge     .L319                   | if same, add delta to weight

-        sub.l   %a3, %d3                | else subtract delta from weight

-        cmp.l   %d7, %d3                | check for negative clip limit

-        bge     .L320

-        move.l  %d7, %d3

-        bra     .L320

-

-.L319:  add.l   %a3, %d3                | add delta to weight

-        cmp.l   %d6, %d3                | check for positive clip limit

-        ble     .L320

-        move.l  %d6, %d3

-

-.L320:  move.l  %acc0, %d2              | d2 = rounded product

-        add.l   %d1, %d2                | add applied weight to bptr [0], store

-        move.l  %d2, (%a1)+

-

-.L330:  move.l  -12(%a1), %d0           | d0 = bptr [-2]

-        beq     .L302

-        move.l  %acc1, %acc0

-        asl.l   #4, %d0                 | acc0 = acc1 + ((d0 << 4) * weight_B)

-        mac.l   %d0, %d4, %acc0

-        move.l  (%a1), %d1

-        beq     .L318

-        eor.l   %d1, %d0                | else compare signs

-        bge     .L322                   | if same, add delta to weight

-        sub.l   %a3, %d4                | else subtract delta from weight

-        cmp.l   %d7, %d4                | check for negative clip limit

-        bge     .L318

-        move.l  %d7, %d4

-        bra     .L318

-

-.L322:  add.l   %a3, %d4                | add delta to weight

-        cmp.l   %d6, %d4                | check for positive clip limit

-        ble     .L318

-        move.l  %d6, %d4

-

-.L318:  move.l  %acc0, %d2              | d2 = rounded product

-        add.l   %d1, %d2                | add applied weight to bptr [1], store

-        move.l  %d2, (%a1)+

-

-.L331:  cmp.l   %a1, %d5                | bptr, eptr

-        jbhi    term_minus_3

-        move.l  -4(%a1), 8(%a2)         | dpp->samples_A [0] = bptr [-1]

-        move.l  -8(%a1), 40(%a2)        | dpp->samples_B [0] = bptr [-2]

-        jbra    finish_up

-

-.L301:  addq.l  #4, %a1

-        bra     .L330

-

-.L302:  addq.l  #4, %a1

-        bra     .L331

-

-| finish and return

-

-finish_up:

-        moveq.l #17, %d0

-        asr.l   %d0, %d3

-        asr.l   %d0, %d4

-        move.w  %d3, 4(%a2)     | weight_A, dpp->weight_A

-        move.w  %d4, 6(%a2)     | weight_B, dpp->weight_B

-

-        clr.l   %d0             | clear up EMAC

-        move.l  %d0, %acc0

-        move.l  %d0, %acc1

-

-return_only:

-        movem.l (%sp), %d2-%d7/%a2-%a6

-        lea     (44,%sp), %sp

-        rts

+////////////////////////////////////////////////////////////////////////////
+//                           **** WAVPACK ****                            //
+//                  Hybrid Lossless Wavefile Compressor                   //
+//              Copyright (c) 1998 - 2006 Conifer Software.               //
+//                          All Rights Reserved.                          //
+//      Distributed under the BSD Software License (see license.txt)      //
+////////////////////////////////////////////////////////////////////////////
+
+/* This is an assembly optimized version of the following WavPack function:
+ *
+ * void decorr_stereo_pass_cont (struct decorr_pass *dpp,
+ *                               long *buffer, long sample_count);
+ *
+ * It performs a single pass of stereo decorrelation on the provided buffer.
+ * Note that this version of the function requires that the 8 previous stereo
+ * samples are visible and correct. In other words, it ignores the "samples_*"
+ * fields in the decorr_pass structure and gets the history data directly
+ * from the buffer. It does, however, return the appropriate history samples
+ * to the decorr_pass structure before returning.
+ *
+ * This is written to work on a MCF5249 processor, or any processor based on
+ * the ColdFire V2 core with an EMAC unit. The EMAC is perfectly suited for
+ * the "apply_weight" function of WavPack decorrelation because it provides
+ * the requires 40-bit product. The fractional rounding mode of the EMAC is not
+ * configurable and uses "round to even" while WavPack uses "round to larger",
+ * so the rounding has to be done manually.
+ */
+
+        .text
+        .align  2
+        .global decorr_stereo_pass_cont_mcf5249
+
+decorr_stereo_pass_cont_mcf5249:
+
+        lea     (-44, %sp), %sp
+        movem.l %d2-%d7/%a2-%a6, (%sp)
+        move.l  44+4(%sp), %a2          | a2 = dpp->
+        move.l  44+8(%sp), %a1          | a1 = bptr
+        move.w  2(%a2), %a3             | a3 = dpp->delta
+        move.w  4(%a2), %d3             | d3 = dpp->weight_A (sign extended)
+        ext.l   %d3
+        move.w  6(%a2), %d4             | d4 = dpp->weight_B (sign extended)
+        ext.l   %d4
+        move.l 44+12(%sp), %d0          | d0 = sample_count
+        jbeq    return_only             | if zero, nothing to do
+
+        lsl.l   #3, %d0                 | d5 = bptr + (sample_count * 8)
+        move.l  %d0, %d5
+        add.l   %a1, %d5
+
+        moveq.l #17, %d0                | left shift weights & delta 17 places
+        asl.l   %d0, %d3
+        asl.l   %d0, %d4
+        move.l  %a3, %d1
+        asl.l   %d0, %d1
+        move.l  %d1, %a3
+
+        moveq.l #0x20, %d6
+        move.l  %d6, %macsr             | set fractional mode for MAC
+        move.l  #0, %acc1               | acc1 = 0x00 0000 80 (for rounding)
+        move.l  #0x800000, %accext01
+        
+        move.l  #1024<<17, %d6          | d6 & d7 are weight clipping limits
+        move.l  #-1024<<17, %d7         | (only used by negative terms)
+
+        move.w  (%a2), %d0              | d0 = term
+        ext.l   %d0
+        cmp.l   #17, %d0
+        jbeq    term_17                 | term = 17
+        cmp.l   #18, %d0
+        jbeq    term_18                 | term = 18
+        addq.l  #1, %d0
+        jbeq    term_minus_1            | term = -1
+        addq.l  #1, %d0
+        jbeq    term_minus_2            | term = -2
+        addq.l  #1, %d0
+        jbeq    term_minus_3            | term = -3
+        jbra    term_default            | default term = 1 - 8
+
+|------------------------------------------------------------------------------
+| Loop to handle term = 17 condition
+|
+| a0 =                          d0 = (2 * bptr [-1]) - bptr [-2]
+| a1 = bptr                     d1 = initial bptr [0]
+| a2 = dpp->                    d2 = updated bptr [0]
+| a3 = dpp->delta << 17         d3 = dpp->weight_A << 17
+| a4 =                          d4 = dpp->weight_B << 17
+| a5 =                          d5 = eptr
+| macsr = 0x20                  acc1 = 0x00 0000 80
+|------------------------------------------------------------------------------
+
+term_17:
+        move.l  -8(%a1), %d0            | d0 = 2 * bptr [-1] - bptr [-2]
+        add.l   %d0, %d0
+        sub.l   -16(%a1), %d0
+        beq     .L251                   | if zero, skip calculation
+        move.l  %acc1, %acc0
+        asl.l   #4, %d0                 | acc0 = acc1 + (d0 << 4) * weight_A
+        mac.l   %d0, %d3, %acc0
+        move.l  (%a1), %d1
+        beq     .L255
+        eor.l   %d1, %d0                | else compare signs
+        bge     .L256                   | if same, add delta to weight
+        sub.l   %a3, %d3                | else subtract delta from weight
+        sub.l   %a3, %d3                | subtract again instead of branch
+.L256:  add.l   %a3, %d3                | add delta to weight
+
+.L255:  move.l  %acc0, %d2              | d2 = rounded product
+        add.l   %d1, %d2                | update bptr [0] and store
+        move.l  %d2, (%a1)+
+
+.L253:  move.l  -8(%a1), %d0            | d0 = 2 * bptr [-1] - bptr [-2]
+        add.l   %d0, %d0
+        sub.l   -16(%a1), %d0
+        beq     .L257                   | if zero, skip calculations
+        move.l  %acc1, %acc0
+        asl.l   #4, %d0                 | acc0 = acc1 + (d0 << 4) * weight_B
+        mac.l   %d0, %d4, %acc0
+        move.l  (%a1), %d1
+        beq     .L254
+        eor.l   %d1, %d0                | else compare signs
+        bge     .L259                   | if same, add delta to weight
+        sub.l   %a3, %d4                | else subtract delta from weight
+        sub.l   %a3, %d4                | subtract again instead of branch
+.L259:  add.l   %a3, %d4                | add delta to weight
+
+.L254:  move.l  %acc0, %d2              | d2 = rounded product
+        add.l   %d1, %d2                | update bptr [0] and store
+        move.l  %d2, (%a1)+
+
+.L252:  cmp.l   %a1, %d5                | loop if bptr < eptr
+        jbhi    term_17
+        bra     term_17_18_finish       | exit through common path
+
+.L251:  addq.l  #4, %a1                 | update point and jump back into loop
+        bra     .L253
+
+.L257:  addq.l  #4, %a1                 | update point and jump back into loop
+        bra     .L252
+
+|------------------------------------------------------------------------------
+| Loop to handle term = 18 condition
+|
+| a0 =                          d0 = ((3 * bptr [-1]) - bptr [-2]) >> 1
+| a1 = bptr                     d1 = initial bptr [0]
+| a2 = dpp->                    d2 = updated bptr [0]
+| a3 = dpp->delta << 17         d3 = dpp->weight_A << 17
+| a4 =                          d4 = dpp->weight_B << 17
+| a5 =                          d5 = eptr
+| macsr = 0x20                  acc1 = 0x00 0000 80
+|------------------------------------------------------------------------------
+
+term_18:
+        move.l  -8(%a1), %a0            | d0 = (3 * bptr [-1] - bptr [-2]) >> 1
+        lea     (%a0,%a0.l*2), %a0
+        move.l  %a0, %d0
+        sub.l   -16(%a1), %d0
+        asr.l   #1, %d0
+        beq     .L260
+        move.l  %acc1, %acc0
+        asl.l   #4, %d0                 | acc0 = acc1 + (d0 << 4) * weight_A
+        mac.l   %d0, %d3, %acc0
+        move.l  (%a1), %d1
+        beq     .L266
+        eor.l   %d1, %d0                | else compare signs
+        bge     .L267                   | if same, add delta to weight
+        sub.l   %a3, %d3                | else subtract delta from weight
+        sub.l   %a3, %d3                | subtract again instead of branch
+.L267:  add.l   %a3, %d3                | add delta to weight
+
+.L266:  move.l  %acc0, %d2              | d2 = rounded product
+        add.l   %d1, %d2                | add applied weight to bptr [0], store
+        move.l  %d2, (%a1)+
+
+.L268:  move.l  -8(%a1), %a0            | d0 = (3 * bptr [-1] - bptr [-2]) >> 1
+        lea     (%a0,%a0.l*2), %a0
+        move.l  %a0, %d0
+        sub.l   -16(%a1), %d0
+        asr.l   #1, %d0
+        beq     .L261
+        move.l  %acc1, %acc0
+        asl.l   #4, %d0                 | acc0 = acc1 + (d0 << 4) * weight_B
+        mac.l   %d0, %d4, %acc0
+        move.l  (%a1), %d1
+        beq     .L265
+        eor.l   %d1, %d0                | else compare signs
+        bge     .L270                   | if same, add delta to weight
+        sub.l   %a3, %d4                | else subtract delta from weight
+        sub.l   %a3, %d4                | subtract again instead of branch
+.L270:  add.l   %a3, %d4                | add delta to weight
+
+.L265:  move.l  %acc0, %d2              | d2 = rounded product
+        add.l   %d1, %d2                | add applied weight to bptr [0], store
+        move.l  %d2, (%a1)+
+
+.L269:  cmp.l   %a1, %d5                | loop if bptr < eptr
+        jbhi    term_18
+        bra     term_17_18_finish       | exit through common path
+
+.L260:  addq.l  #4, %a1                 | bump pointer and jump back into loop
+        bra     .L268
+
+.L261:  addq.l  #4, %a1                 | bump pointer and jump back into loop
+        bra     .L269
+
+term_17_18_finish:
+        move.l  -4(%a1), 40(%a2)        | restore dpp->samples_A [0-1], B [0-1]
+        move.l  -8(%a1), 8(%a2)
+        move.l  -12(%a1), 44(%a2)
+        move.l  -16(%a1), 12(%a2)
+        jbra    finish_up
+
+|------------------------------------------------------------------------------
+| Loop to handle default terms (i.e. 1 - 8)
+|
+| a0 = tptr                     d0 = tptr [0]
+| a1 = bptr                     d1 = initial bptr [0]
+| a2 = dpp->                    d2 = updated bptr [0]
+| a3 = dpp->delta << 17         d3 = dpp->weight_A << 17
+| a4 =                          d4 = dpp->weight_B << 17
+| a5 =                          d5 = eptr
+| macsr = 0x20                  acc1 = 0x00 0000 80
+|------------------------------------------------------------------------------
+
+term_default:
+        move.w  (%a2), %d0              | a0 = a1 - (dpp->term * 8)
+        ext.l   %d0
+        lsl.l   #3, %d0
+        move.l  %a1, %a0
+        sub.l   %d0, %a0
+
+term_default_loop:
+        move.l  (%a0)+, %d0             | d0 = tptr [0], skip ahead if zero
+        beq     .L271
+        move.l  %acc1, %acc0
+        asl.l   #4, %d0                 | acc0 = acc1 + (d0 << 4) * weight_A
+        mac.l   %d0, %d3, %acc0
+        move.l  (%a1), %d1
+        beq     .L277
+        eor.l   %d1, %d0                | else compare signs
+        bge     .L278                   | if same, add delta to weight
+        sub.l   %a3, %d3                | else subtract delta from weight
+        sub.l   %a3, %d3                | subtract again instead of branch
+.L278:  add.l   %a3, %d3                | add delta to weight
+
+.L277:  move.l  %acc0, %d2              | d2 = rounded product
+        add.l   %d1, %d2                | add applied weight to bptr [0], store
+        move.l  %d2, (%a1)+
+
+.L275:  move.l  (%a0)+, %d0             | d0 = tptr [0], skip ahead if zero
+        beq     .L272
+        move.l  %acc1, %acc0
+        asl.l   #4, %d0                 | acc0 = acc1 + (d0 << 4) * weight_B
+        mac.l   %d0, %d4, %acc0
+        move.l  (%a1), %d1
+        beq     .L276
+        eor.l   %d1, %d0                | else compare signs
+        bge     .L281                   | if same, add delta to weight
+        sub.l   %a3, %d4                | else subtract delta from weight
+        sub.l   %a3, %d4                | subtract again instead of branch
+.L281:  add.l   %a3, %d4                | add delta to weight
+
+.L276:  move.l  %acc0, %d2              | d2 = rounded product
+        add.l   %d1, %d2                | add applied weight to bptr [0], store
+        move.l  %d2, (%a1)+
+
+.L274:  cmp.l   %a1, %d5                | loop back if bptr < eptr
+        jbhi    term_default_loop
+        move.w  (%a2), %d0              | d0 = term - 1
+        moveq.l #8, %d1                 | d1 = loop counter
+
+.L323:  subq.l  #1, %d0                 | back up & mask index
+        and.l   #7, %d0
+        move.l  -(%a1), 40(%a2,%d0.l*4) | store dpp->samples_B [d0]
+        move.l  -(%a1), 8(%a2,%d0.l*4)  | store dpp->samples_A [d0]
+        subq.l  #1, %d1                 | loop on count
+        jbne    .L323
+        jbra    finish_up
+
+.L271:  addq.l  #4, %a1                 | bump pointer and jump back into loop
+        bra     .L275
+
+.L272:  addq.l  #4, %a1                 | bump pointer and jump back into loop
+        bra     .L274
+
+
+|------------------------------------------------------------------------------
+| Loop to handle term = -1 condition
+|
+| a0 =                          d0 = decorrelation sample
+| a1 = bptr                     d1 = initial bptr [0]
+| a2 = dpp->                    d2 = updated bptr [0]
+| a3 = dpp->delta << 17         d3 = dpp->weight_A << 17
+| a4 =                          d4 = dpp->weight_B << 17
+| a5 =                          d5 = eptr
+| a6 =                          d6 = 1024 << 17
+| a7 =                          d7 = -1024 << 17
+| macsr = 0x20                  acc1 = 0x00 0000 80
+|------------------------------------------------------------------------------
+
+term_minus_1:
+        move.l  -4(%a1), %d0            | d0 = bptr [-1]
+        beq     .L402
+        move.l  %acc1, %acc0
+        asl.l   #4, %d0                 | acc0 = acc1 + ((d0 << 4) * weight_A)
+        mac.l   %d0, %d3, %acc0
+        move.l  (%a1), %d1
+        beq     .L405
+        eor.l   %d1, %d0                | else compare signs
+        bge     .L404                   | if same, add delta to weight
+        sub.l   %a3, %d3                | else subtract delta from weight
+        cmp.l   %d7, %d3                | check for negative clip limit
+        bge     .L405
+        move.l  %d7, %d3
+        bra     .L405
+
+.L404:  add.l   %a3, %d3                | add delta to weight
+        cmp.l   %d6, %d3                | check for positive clip limit
+        ble     .L405
+        move.l  %d6, %d3
+
+.L405:  move.l  %acc0, %d0              | d2 = rounded product
+        add.l   %d1, %d0                | add applied weight to bptr [0], store
+        move.l  %d0, (%a1)+
+        beq     .L401
+
+.L410:  move.l  %acc1, %acc0
+        asl.l   #4, %d0                 | acc0 = acc1 + ((d0 << 4) * weight_B)
+        mac.l   %d0, %d4, %acc0
+        move.l  (%a1), %d1
+        beq     .L403
+        eor.l   %d1, %d0                | else compare signs
+        bge     .L407                   | if same, add delta to weight
+        sub.l   %a3, %d4                | else subtract delta from weight
+        cmp.l   %d7, %d4                | check for negative clip limit
+        bge     .L403
+        move.l  %d7, %d4
+        bra     .L403
+
+.L407:  add.l   %a3, %d4                | add delta to weight
+        cmp.l   %d6, %d4                | check for positive clip limit
+        ble     .L403
+        move.l  %d6, %d4
+
+.L403:  move.l  %acc0, %d2              | d2 = rounded product
+        add.l   %d1, %d2                | add applied weight to bptr [1], store
+        move.l  %d2, (%a1)+
+
+.L411:  cmp.l   %a1, %d5                | loop back if bptr < eptr
+        jbhi    term_minus_1
+        move.l  -4(%a1), 8(%a2)         | dpp->samples_A [0] = bptr [-1]
+        jbra    finish_up
+
+.L402:  move.l  (%a1)+, %d0
+        bne     .L410
+
+.L401:  addq.l  #4, %a1
+        bra     .L411
+
+
+|------------------------------------------------------------------------------
+| Loop to handle term = -2 condition
+|
+| a0 =                          d0 = decorrelation sample
+| a1 = bptr                     d1 = initial bptr [0]
+| a2 = dpp->                    d2 = updated bptr [0]
+| a3 = dpp->delta << 17         d3 = dpp->weight_A << 17
+| a4 =                          d4 = dpp->weight_B << 17
+| a5 =                          d5 = eptr
+| a6 =                          d6 = 1024 << 17
+| a7 =                          d7 = -1024 << 17
+| macsr = 0x20                  acc1 = 0x00 0000 80
+|------------------------------------------------------------------------------
+
+term_minus_2:
+        move.l  -8(%a1), %d0            | d0 = bptr [-2]
+        beq     .L511
+        move.l  %acc1, %acc0
+        asl.l   #4, %d0                 | acc0 = acc1 + ((d0 << 4) * weight_B)
+        mac.l   %d0, %d4, %acc0
+        move.l  4(%a1), %d1
+        beq     .L505
+        eor.l   %d1, %d0                | else compare signs
+        bge     .L504                   | if same, add delta to weight
+        sub.l   %a3, %d4                | else subtract delta from weight
+        cmp.l   %d7, %d4                | ckeck for negative clip limit
+        bge     .L505
+        move.l  %d7, %d4
+        bra     .L505
+
+.L504:  add.l   %a3, %d4                | add delta to weight
+        cmp.l   %d6, %d4                | check for positive clip limit
+        ble     .L505
+        move.l  %d6, %d4
+
+.L505:  move.l  %acc0, %d0              | d2 = rounded product
+        add.l   %d1, %d0                | add applied weight to bptr [0], store
+        move.l  %d0, 4(%a1)
+        beq     .L512
+
+.L510:  move.l  %acc1, %acc0
+        asl.l   #4, %d0                 | acc0 = acc1 + ((d0 << 4) * weight_A)
+        mac.l   %d0, %d3, %acc0
+        move.l  (%a1), %d1
+        beq     .L503
+        eor.l   %d1, %d0                | else compare signs
+        bge     .L507                   | if same, add delta to weight
+        sub.l   %a3, %d3                | else subtract delta from weight
+        cmp.l   %d7, %d3                | check for negative clip limit
+        bge     .L503
+        move.l  %d7, %d3
+        bra     .L503
+
+.L507:  add.l   %a3, %d3                | add delta to weight
+        cmp.l   %d6, %d3                | check for negative clip limit
+        ble     .L503
+        move.l  %d6, %d3
+
+.L503:  move.l  %acc0, %d2              | d2 = rounded product
+        add.l   %d1, %d2                | add applied weight to bptr [1], store
+        move.l  %d2, (%a1)
+
+.L512:  addq.l  #8, %a1
+        cmp.l   %a1, %d5                | loop if bptr < eptr
+        jbhi    term_minus_2
+        move.l  -8(%a1), 40(%a2)        | dpp->samples_B [0] = bptr [-4]
+        jbra    finish_up
+
+.L511:  move.l  4(%a1), %d0
+        beq     .L512
+        bra     .L510
+
+
+|------------------------------------------------------------------------------
+| Loop to handle term = -3 condition
+|
+| a0 =                          d0 = decorrelation sample
+| a1 = bptr                     d1 = initial bptr [0]
+| a2 = dpp->                    d2 = updated bptr [0]
+| a3 = dpp->delta << 17         d3 = dpp->weight_A << 17
+| a4 =                          d4 = dpp->weight_B << 17
+| a5 =                          d5 = eptr
+| a6 =                          d6 = 1024 << 17
+| a7 =                          d7 = -1024 << 17
+| macsr = 0x20                  acc1 = 0x00 0000 80
+|------------------------------------------------------------------------------
+
+term_minus_3:
+        move.l  -4(%a1), %d0            | d0 = bptr [-1]
+        beq     .L301
+        move.l  %acc1, %acc0
+        asl.l   #4, %d0                 | acc0 = acc1 + ((d0 << 4) * weight_A)
+        mac.l   %d0, %d3, %acc0
+        move.l  (%a1), %d1
+        beq     .L320
+        eor.l   %d1, %d0                | else compare signs
+        bge     .L319                   | if same, add delta to weight
+        sub.l   %a3, %d3                | else subtract delta from weight
+        cmp.l   %d7, %d3                | check for negative clip limit
+        bge     .L320
+        move.l  %d7, %d3
+        bra     .L320
+
+.L319:  add.l   %a3, %d3                | add delta to weight
+        cmp.l   %d6, %d3                | check for positive clip limit
+        ble     .L320
+        move.l  %d6, %d3
+
+.L320:  move.l  %acc0, %d2              | d2 = rounded product
+        add.l   %d1, %d2                | add applied weight to bptr [0], store
+        move.l  %d2, (%a1)+
+
+.L330:  move.l  -12(%a1), %d0           | d0 = bptr [-2]
+        beq     .L302
+        move.l  %acc1, %acc0
+        asl.l   #4, %d0                 | acc0 = acc1 + ((d0 << 4) * weight_B)
+        mac.l   %d0, %d4, %acc0
+        move.l  (%a1), %d1
+        beq     .L318
+        eor.l   %d1, %d0                | else compare signs
+        bge     .L322                   | if same, add delta to weight
+        sub.l   %a3, %d4                | else subtract delta from weight
+        cmp.l   %d7, %d4                | check for negative clip limit
+        bge     .L318
+        move.l  %d7, %d4
+        bra     .L318
+
+.L322:  add.l   %a3, %d4                | add delta to weight
+        cmp.l   %d6, %d4                | check for positive clip limit
+        ble     .L318
+        move.l  %d6, %d4
+
+.L318:  move.l  %acc0, %d2              | d2 = rounded product
+        add.l   %d1, %d2                | add applied weight to bptr [1], store
+        move.l  %d2, (%a1)+
+
+.L331:  cmp.l   %a1, %d5                | bptr, eptr
+        jbhi    term_minus_3
+        move.l  -4(%a1), 8(%a2)         | dpp->samples_A [0] = bptr [-1]
+        move.l  -8(%a1), 40(%a2)        | dpp->samples_B [0] = bptr [-2]
+        jbra    finish_up
+
+.L301:  addq.l  #4, %a1
+        bra     .L330
+
+.L302:  addq.l  #4, %a1
+        bra     .L331
+
+| finish and return
+
+finish_up:
+        moveq.l #17, %d0
+        asr.l   %d0, %d3
+        asr.l   %d0, %d4
+        move.w  %d3, 4(%a2)     | weight_A, dpp->weight_A
+        move.w  %d4, 6(%a2)     | weight_B, dpp->weight_B
+
+        clr.l   %d0             | clear up EMAC
+        move.l  %d0, %acc0
+        move.l  %d0, %acc1
+
+return_only:
+        movem.l (%sp), %d2-%d7/%a2-%a6
+        lea     (44,%sp), %sp
+        rts
diff --git a/src/engine/external/wavpack/float.c b/src/engine/external/wavpack/float.c
index 4b9b44ee..09f0e4c3 100644
--- a/src/engine/external/wavpack/float.c
+++ b/src/engine/external/wavpack/float.c
@@ -1,50 +1,50 @@
-////////////////////////////////////////////////////////////////////////////

-//                           **** WAVPACK ****                            //

-//                  Hybrid Lossless Wavefile Compressor                   //

-//              Copyright (c) 1998 - 2006 Conifer Software.               //

-//                          All Rights Reserved.                          //

-//      Distributed under the BSD Software License (see license.txt)      //

-////////////////////////////////////////////////////////////////////////////

-

-// float.c

-

-#include "wavpack.h"

-

-int read_float_info (WavpackStream *wps, WavpackMetadata *wpmd)

-{

-    int bytecnt = wpmd->byte_length;

-    char *byteptr = wpmd->data;

-

-    if (bytecnt != 4)

-        return FALSE;

-

-    wps->float_flags = *byteptr++;

-    wps->float_shift = *byteptr++;

-    wps->float_max_exp = *byteptr++;

-    wps->float_norm_exp = *byteptr;

-    return TRUE;

-}

-

-void float_values (WavpackStream *wps, int32_t *values, int32_t num_values)

-{

-    int shift = wps->float_max_exp - wps->float_norm_exp + wps->float_shift;

-

-    if (shift > 32)

-        shift = 32;

-    else if (shift < -32)

-        shift = -32;

-

-    while (num_values--) {

-        if (shift > 0)

-            *values <<= shift;

-        else if (shift < 0)

-            *values >>= -shift;

-

-        if (*values > 8388607L)

-            *values = 8388607L;

-        else if (*values < -8388608L)

-            *values = -8388608L;

-

-        values++;

-    }

-}

+////////////////////////////////////////////////////////////////////////////
+//                           **** WAVPACK ****                            //
+//                  Hybrid Lossless Wavefile Compressor                   //
+//              Copyright (c) 1998 - 2006 Conifer Software.               //
+//                          All Rights Reserved.                          //
+//      Distributed under the BSD Software License (see license.txt)      //
+////////////////////////////////////////////////////////////////////////////
+
+// float.c
+
+#include "wavpack.h"
+
+int read_float_info (WavpackStream *wps, WavpackMetadata *wpmd)
+{
+    int bytecnt = wpmd->byte_length;
+    char *byteptr = wpmd->data;
+
+    if (bytecnt != 4)
+        return FALSE;
+
+    wps->float_flags = *byteptr++;
+    wps->float_shift = *byteptr++;
+    wps->float_max_exp = *byteptr++;
+    wps->float_norm_exp = *byteptr;
+    return TRUE;
+}
+
+void float_values (WavpackStream *wps, int32_t *values, int32_t num_values)
+{
+    int shift = wps->float_max_exp - wps->float_norm_exp + wps->float_shift;
+
+    if (shift > 32)
+        shift = 32;
+    else if (shift < -32)
+        shift = -32;
+
+    while (num_values--) {
+        if (shift > 0)
+            *values <<= shift;
+        else if (shift < 0)
+            *values >>= -shift;
+
+        if (*values > 8388607L)
+            *values = 8388607L;
+        else if (*values < -8388608L)
+            *values = -8388608L;
+
+        values++;
+    }
+}
diff --git a/src/engine/external/wavpack/license.txt b/src/engine/external/wavpack/license.txt
index 98f6e6b1..d2ee90fc 100644
--- a/src/engine/external/wavpack/license.txt
+++ b/src/engine/external/wavpack/license.txt
@@ -1,25 +1,25 @@
-               Copyright (c) 1998 - 2006 Conifer Software

-                          All rights reserved.

-

-Redistribution and use in source and binary forms, with or without

-modification, are permitted provided that the following conditions are met:

-

-    * Redistributions of source code must retain the above copyright notice,

-      this list of conditions and the following disclaimer.

-    * Redistributions in binary form must reproduce the above copyright notice,

-      this list of conditions and the following disclaimer in the

-      documentation and/or other materials provided with the distribution.

-    * Neither the name of Conifer Software nor the names of its contributors

-      may be used to endorse or promote products derived from this software

-      without specific prior written permission.

-

-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

-ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR

-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL

-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR

-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER

-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,

-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+               Copyright (c) 1998 - 2006 Conifer Software
+                          All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Conifer Software nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/src/engine/external/wavpack/metadata.c b/src/engine/external/wavpack/metadata.c
index 578b17f9..6a19ac9b 100644
--- a/src/engine/external/wavpack/metadata.c
+++ b/src/engine/external/wavpack/metadata.c
@@ -1,105 +1,105 @@
-////////////////////////////////////////////////////////////////////////////

-//                           **** WAVPACK ****                            //

-//                  Hybrid Lossless Wavefile Compressor                   //

-//              Copyright (c) 1998 - 2006 Conifer Software.               //

-//                          All Rights Reserved.                          //

-//      Distributed under the BSD Software License (see license.txt)      //

-////////////////////////////////////////////////////////////////////////////

-

-// metadata.c

-

-// This module handles the metadata structure introduced in WavPack 4.0

-

-#include "wavpack.h"

-

-int read_metadata_buff (WavpackContext *wpc, WavpackMetadata *wpmd)

-{

-    uchar tchar;

-

-    if (!wpc->infile (&wpmd->id, 1) || !wpc->infile (&tchar, 1))

-        return FALSE;

-

-    wpmd->byte_length = tchar << 1;

-

-    if (wpmd->id & ID_LARGE) {

-        wpmd->id &= ~ID_LARGE;

-

-        if (!wpc->infile (&tchar, 1))

-            return FALSE;

-

-        wpmd->byte_length += (int32_t) tchar << 9; 

-

-        if (!wpc->infile (&tchar, 1))

-            return FALSE;

-

-        wpmd->byte_length += (int32_t) tchar << 17;

-    }

-

-    if (wpmd->id & ID_ODD_SIZE) {

-        wpmd->id &= ~ID_ODD_SIZE;

-        wpmd->byte_length--;

-    }

-

-    if (wpmd->byte_length && wpmd->byte_length <= sizeof (wpc->read_buffer)) {

-        uint32_t bytes_to_read = wpmd->byte_length + (wpmd->byte_length & 1);

-

-        if (wpc->infile (wpc->read_buffer, bytes_to_read) != (int32_t) bytes_to_read) {

-            wpmd->data = NULL;

-            return FALSE;

-        }

-

-        wpmd->data = wpc->read_buffer;

-    }

-    else

-        wpmd->data = NULL;

-

-    return TRUE;

-}

-

-int process_metadata (WavpackContext *wpc, WavpackMetadata *wpmd)

-{

-    WavpackStream *wps = &wpc->stream;

-

-    switch (wpmd->id) {

-        case ID_DUMMY:

-            return TRUE;

-

-        case ID_DECORR_TERMS:

-            return read_decorr_terms (wps, wpmd);

-

-        case ID_DECORR_WEIGHTS:

-            return read_decorr_weights (wps, wpmd);

-

-        case ID_DECORR_SAMPLES:

-            return read_decorr_samples (wps, wpmd);

-

-        case ID_ENTROPY_VARS:

-            return read_entropy_vars (wps, wpmd);

-

-        case ID_HYBRID_PROFILE:

-            return read_hybrid_profile (wps, wpmd);

-

-        case ID_FLOAT_INFO:

-            return read_float_info (wps, wpmd);

-

-        case ID_INT32_INFO:

-            return read_int32_info (wps, wpmd);

-

-        case ID_CHANNEL_INFO:

-            return read_channel_info (wpc, wpmd);

-

-        case ID_CONFIG_BLOCK:

-            return read_config_info (wpc, wpmd);

-

-        case ID_WV_BITSTREAM:

-            return init_wv_bitstream (wpc, wpmd);

-

-        case ID_SHAPING_WEIGHTS:

-        case ID_WVC_BITSTREAM:

-        case ID_WVX_BITSTREAM:

-            return TRUE;

-

-        default:

-            return (wpmd->id & ID_OPTIONAL_DATA) ? TRUE : FALSE;

-    }

-}

+////////////////////////////////////////////////////////////////////////////
+//                           **** WAVPACK ****                            //
+//                  Hybrid Lossless Wavefile Compressor                   //
+//              Copyright (c) 1998 - 2006 Conifer Software.               //
+//                          All Rights Reserved.                          //
+//      Distributed under the BSD Software License (see license.txt)      //
+////////////////////////////////////////////////////////////////////////////
+
+// metadata.c
+
+// This module handles the metadata structure introduced in WavPack 4.0
+
+#include "wavpack.h"
+
+int read_metadata_buff (WavpackContext *wpc, WavpackMetadata *wpmd)
+{
+    uchar tchar;
+
+    if (!wpc->infile (&wpmd->id, 1) || !wpc->infile (&tchar, 1))
+        return FALSE;
+
+    wpmd->byte_length = tchar << 1;
+
+    if (wpmd->id & ID_LARGE) {
+        wpmd->id &= ~ID_LARGE;
+
+        if (!wpc->infile (&tchar, 1))
+            return FALSE;
+
+        wpmd->byte_length += (int32_t) tchar << 9; 
+
+        if (!wpc->infile (&tchar, 1))
+            return FALSE;
+
+        wpmd->byte_length += (int32_t) tchar << 17;
+    }
+
+    if (wpmd->id & ID_ODD_SIZE) {
+        wpmd->id &= ~ID_ODD_SIZE;
+        wpmd->byte_length--;
+    }
+
+    if (wpmd->byte_length && wpmd->byte_length <= sizeof (wpc->read_buffer)) {
+        uint32_t bytes_to_read = wpmd->byte_length + (wpmd->byte_length & 1);
+
+        if (wpc->infile (wpc->read_buffer, bytes_to_read) != (int32_t) bytes_to_read) {
+            wpmd->data = NULL;
+            return FALSE;
+        }
+
+        wpmd->data = wpc->read_buffer;
+    }
+    else
+        wpmd->data = NULL;
+
+    return TRUE;
+}
+
+int process_metadata (WavpackContext *wpc, WavpackMetadata *wpmd)
+{
+    WavpackStream *wps = &wpc->stream;
+
+    switch (wpmd->id) {
+        case ID_DUMMY:
+            return TRUE;
+
+        case ID_DECORR_TERMS:
+            return read_decorr_terms (wps, wpmd);
+
+        case ID_DECORR_WEIGHTS:
+            return read_decorr_weights (wps, wpmd);
+
+        case ID_DECORR_SAMPLES:
+            return read_decorr_samples (wps, wpmd);
+
+        case ID_ENTROPY_VARS:
+            return read_entropy_vars (wps, wpmd);
+
+        case ID_HYBRID_PROFILE:
+            return read_hybrid_profile (wps, wpmd);
+
+        case ID_FLOAT_INFO:
+            return read_float_info (wps, wpmd);
+
+        case ID_INT32_INFO:
+            return read_int32_info (wps, wpmd);
+
+        case ID_CHANNEL_INFO:
+            return read_channel_info (wpc, wpmd);
+
+        case ID_CONFIG_BLOCK:
+            return read_config_info (wpc, wpmd);
+
+        case ID_WV_BITSTREAM:
+            return init_wv_bitstream (wpc, wpmd);
+
+        case ID_SHAPING_WEIGHTS:
+        case ID_WVC_BITSTREAM:
+        case ID_WVX_BITSTREAM:
+            return TRUE;
+
+        default:
+            return (wpmd->id & ID_OPTIONAL_DATA) ? TRUE : FALSE;
+    }
+}
diff --git a/src/engine/external/wavpack/readme.txt b/src/engine/external/wavpack/readme.txt
index 4ccbdf42..07b7a261 100644
--- a/src/engine/external/wavpack/readme.txt
+++ b/src/engine/external/wavpack/readme.txt
@@ -1,68 +1,68 @@
-////////////////////////////////////////////////////////////////////////////

-//                           **** WAVPACK ****                            //

-//                  Hybrid Lossless Wavefile Compressor                   //

-//              Copyright (c) 1998 - 2006 Conifer Software.               //

-//                          All Rights Reserved.                          //

-//      Distributed under the BSD Software License (see license.txt)      //

-////////////////////////////////////////////////////////////////////////////

-

-This package contains a tiny version of the WavPack 4.40 decoder that might

-be used in a "resource limited" CPU environment or form the basis for a

-hardware decoding implementation. It is packaged with a demo command-line

-program that accepts a WavPack audio file on stdin and outputs a RIFF wav

-file to stdout. The program is standard C, and a win32 executable is

-included which was compiled under MS Visual C++ 6.0 using this command:

-

-cl /O1 /DWIN32 wvfilter.c wputils.c unpack.c float.c metadata.c words.c bits.c

-

-WavPack data is read with a stream reading callback. No direct seeking is

-provided for, but it is possible to start decoding anywhere in a WavPack

-stream. In this case, WavPack will be able to provide the sample-accurate

-position when it synchs with the data and begins decoding. The WIN32 macro

-is used for Windows to force the stdin and stdout streams to be binary mode.

-

-Compared to the previous version, this library has been optimized somewhat

-for improved performance in exchange for slightly larger code size. The

-library also now includes hand-optimized assembly language versions of the

-decorrelation functions for both the ColdFire (w/EMAC) and ARM processors.

-

-For demonstration purposes this uses a single static copy of the

-WavpackContext structure, so obviously it cannot be used for more than one

-file at a time. Also, this decoder will not handle "correction" files, plays

-only the first two channels of multi-channel files, and is limited in

-resolution in some large integer or floating point files (but always

-provides at least 24 bits of resolution). It also will not accept WavPack

-files from before version 4.0.

-

-The previous version of this library would handle float files by returning

-32-bit floating-point data (even though no floating point math was used).

-Because this library would normally be used for simply playing WavPack

-files where lossless performance (beyond 24-bits) is not relevant, I have

-changed this behavior. Now, these files will generate clipped 24-bit data.

-The MODE_FLOAT flag will still be returned by WavpackGetMode(), but the

-BitsPerSample and BytesPerSample queries will be 24 and 3, respectfully.

-What this means is that an application that can handle 24-bit data will

-now be able to handle floating point data (assuming that the MODE_FLOAT

-flag is ignored).

-

-To make this code viable on the greatest number of hardware platforms, the

-following are true:

-

-   speed is about 5x realtime on an AMD K6 300 MHz

-      ("high" mode 16/44 stereo; normal mode is about twice that fast)

-

-   no floating-point math required; just 32b * 32b = 32b int multiply

-

-   large data areas are static and less than 4K total

-   executable code and tables are less than 40K

-   no malloc / free usage

-

-To maintain compatibility on various platforms, the following conventions

-are used:

-

-   a "char" must be exactly 8-bits

-   a "short" must be exactly 16-bits

-   an "int" must be at least 16-bits, but may be larger

-   the "long" type is not used to avoid problems with 64-bit compilers

-

-Questions or comments should be directed to david@wavpack.com

+////////////////////////////////////////////////////////////////////////////
+//                           **** WAVPACK ****                            //
+//                  Hybrid Lossless Wavefile Compressor                   //
+//              Copyright (c) 1998 - 2006 Conifer Software.               //
+//                          All Rights Reserved.                          //
+//      Distributed under the BSD Software License (see license.txt)      //
+////////////////////////////////////////////////////////////////////////////
+
+This package contains a tiny version of the WavPack 4.40 decoder that might
+be used in a "resource limited" CPU environment or form the basis for a
+hardware decoding implementation. It is packaged with a demo command-line
+program that accepts a WavPack audio file on stdin and outputs a RIFF wav
+file to stdout. The program is standard C, and a win32 executable is
+included which was compiled under MS Visual C++ 6.0 using this command:
+
+cl /O1 /DWIN32 wvfilter.c wputils.c unpack.c float.c metadata.c words.c bits.c
+
+WavPack data is read with a stream reading callback. No direct seeking is
+provided for, but it is possible to start decoding anywhere in a WavPack
+stream. In this case, WavPack will be able to provide the sample-accurate
+position when it synchs with the data and begins decoding. The WIN32 macro
+is used for Windows to force the stdin and stdout streams to be binary mode.
+
+Compared to the previous version, this library has been optimized somewhat
+for improved performance in exchange for slightly larger code size. The
+library also now includes hand-optimized assembly language versions of the
+decorrelation functions for both the ColdFire (w/EMAC) and ARM processors.
+
+For demonstration purposes this uses a single static copy of the
+WavpackContext structure, so obviously it cannot be used for more than one
+file at a time. Also, this decoder will not handle "correction" files, plays
+only the first two channels of multi-channel files, and is limited in
+resolution in some large integer or floating point files (but always
+provides at least 24 bits of resolution). It also will not accept WavPack
+files from before version 4.0.
+
+The previous version of this library would handle float files by returning
+32-bit floating-point data (even though no floating point math was used).
+Because this library would normally be used for simply playing WavPack
+files where lossless performance (beyond 24-bits) is not relevant, I have
+changed this behavior. Now, these files will generate clipped 24-bit data.
+The MODE_FLOAT flag will still be returned by WavpackGetMode(), but the
+BitsPerSample and BytesPerSample queries will be 24 and 3, respectfully.
+What this means is that an application that can handle 24-bit data will
+now be able to handle floating point data (assuming that the MODE_FLOAT
+flag is ignored).
+
+To make this code viable on the greatest number of hardware platforms, the
+following are true:
+
+   speed is about 5x realtime on an AMD K6 300 MHz
+      ("high" mode 16/44 stereo; normal mode is about twice that fast)
+
+   no floating-point math required; just 32b * 32b = 32b int multiply
+
+   large data areas are static and less than 4K total
+   executable code and tables are less than 40K
+   no malloc / free usage
+
+To maintain compatibility on various platforms, the following conventions
+are used:
+
+   a "char" must be exactly 8-bits
+   a "short" must be exactly 16-bits
+   an "int" must be at least 16-bits, but may be larger
+   the "long" type is not used to avoid problems with 64-bit compilers
+
+Questions or comments should be directed to david@wavpack.com
diff --git a/src/engine/external/wavpack/unpack.c b/src/engine/external/wavpack/unpack.c
index 2bed5a0c..317fe222 100644
--- a/src/engine/external/wavpack/unpack.c
+++ b/src/engine/external/wavpack/unpack.c
@@ -1,785 +1,785 @@
-////////////////////////////////////////////////////////////////////////////

-//                           **** WAVPACK ****                            //

-//                  Hybrid Lossless Wavefile Compressor                   //

-//              Copyright (c) 1998 - 2006 Conifer Software.               //

-//                          All Rights Reserved.                          //

-//      Distributed under the BSD Software License (see license.txt)      //

-////////////////////////////////////////////////////////////////////////////

-

-// unpack.c

-

-// This module actually handles the decompression of the audio data, except

-// for the entropy decoding which is handled by the words.c module. For

-// maximum efficiency, the conversion is isolated to tight loops that handle

-// an entire buffer.

-

-#include "wavpack.h"

-

-#include <stdlib.h>

-#include <string.h>

-

-#define LOSSY_MUTE

-

-///////////////////////////// executable code ////////////////////////////////

-

-// This function initializes everything required to unpack a WavPack block

-// and must be called before unpack_samples() is called to obtain audio data.

-// It is assumed that the WavpackHeader has been read into the wps->wphdr

-// (in the current WavpackStream). This is where all the metadata blocks are

-// scanned up to the one containing the audio bitstream.

-

-int unpack_init (WavpackContext *wpc)

-{

-    WavpackStream *wps = &wpc->stream;

-    WavpackMetadata wpmd;

-

-    if (wps->wphdr.block_samples && wps->wphdr.block_index != (uint32_t) -1)

-        wps->sample_index = wps->wphdr.block_index;

-

-    wps->mute_error = FALSE;

-    wps->crc = 0xffffffff;

-    CLEAR (wps->wvbits);

-    CLEAR (wps->decorr_passes);

-    CLEAR (wps->w);

-

-    while (read_metadata_buff (wpc, &wpmd)) {

-        if (!process_metadata (wpc, &wpmd)) {

-            strcpy (wpc->error_message, "invalid metadata!");

-            return FALSE;

-        }

-

-        if (wpmd.id == ID_WV_BITSTREAM)

-            break;

-    }

-

-    if (wps->wphdr.block_samples && !bs_is_open (&wps->wvbits)) {

-        strcpy (wpc->error_message, "invalid WavPack file!");

-        return FALSE;

-    }

-

-    if (wps->wphdr.block_samples) {

-        if ((wps->wphdr.flags & INT32_DATA) && wps->int32_sent_bits)

-            wpc->lossy_blocks = TRUE;

-

-        if ((wps->wphdr.flags & FLOAT_DATA) &&

-            wps->float_flags & (FLOAT_EXCEPTIONS | FLOAT_ZEROS_SENT | FLOAT_SHIFT_SENT | FLOAT_SHIFT_SAME))

-                wpc->lossy_blocks = TRUE;

-    }

-

-    return TRUE;

-}

-

-// This function initialzes the main bitstream for audio samples, which must

-// be in the "wv" file.

-

-int init_wv_bitstream (WavpackContext *wpc, WavpackMetadata *wpmd)

-{

-    WavpackStream *wps = &wpc->stream;

-

-    if (wpmd->data)

-        bs_open_read (&wps->wvbits, wpmd->data, (unsigned char *) wpmd->data + wpmd->byte_length, NULL, 0);

-    else if (wpmd->byte_length)

-        bs_open_read (&wps->wvbits, wpc->read_buffer, wpc->read_buffer + sizeof (wpc->read_buffer),

-            wpc->infile, wpmd->byte_length + (wpmd->byte_length & 1));

-

-    return TRUE;

-}

-

-// Read decorrelation terms from specified metadata block into the

-// decorr_passes array. The terms range from -3 to 8, plus 17 & 18;

-// other values are reserved and generate errors for now. The delta

-// ranges from 0 to 7 with all values valid. Note that the terms are

-// stored in the opposite order in the decorr_passes array compared

-// to packing.

-

-int read_decorr_terms (WavpackStream *wps, WavpackMetadata *wpmd)

-{

-    int termcnt = wpmd->byte_length;

-    uchar *byteptr = wpmd->data;

-    struct decorr_pass *dpp;

-

-    if (termcnt > MAX_NTERMS)

-        return FALSE;

-

-    wps->num_terms = termcnt;

-

-    for (dpp = wps->decorr_passes + termcnt - 1; termcnt--; dpp--) {

-        dpp->term = (int)(*byteptr & 0x1f) - 5;

-        dpp->delta = (*byteptr++ >> 5) & 0x7;

-

-        if (!dpp->term || dpp->term < -3 || (dpp->term > MAX_TERM && dpp->term < 17) || dpp->term > 18)

-            return FALSE;

-    }

-

-    return TRUE;

-}

-

-// Read decorrelation weights from specified metadata block into the

-// decorr_passes array. The weights range +/-1024, but are rounded and

-// truncated to fit in signed chars for metadata storage. Weights are

-// separate for the two channels and are specified from the "last" term

-// (first during encode). Unspecified weights are set to zero.

-

-int read_decorr_weights (WavpackStream *wps, WavpackMetadata *wpmd)

-{

-    int termcnt = wpmd->byte_length, tcount;

-    signed char *byteptr = wpmd->data;

-    struct decorr_pass *dpp;

-

-    if (!(wps->wphdr.flags & MONO_DATA))

-        termcnt /= 2;

-

-    if (termcnt > wps->num_terms)

-        return FALSE;

-

-    for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++)

-        dpp->weight_A = dpp->weight_B = 0;

-

-    while (--dpp >= wps->decorr_passes && termcnt--) {

-        dpp->weight_A = restore_weight (*byteptr++);

-

-        if (!(wps->wphdr.flags & MONO_DATA))

-            dpp->weight_B = restore_weight (*byteptr++);

-    }

-

-    return TRUE;

-}

-

-// Read decorrelation samples from specified metadata block into the

-// decorr_passes array. The samples are signed 32-bit values, but are

-// converted to signed log2 values for storage in metadata. Values are

-// stored for both channels and are specified from the "last" term

-// (first during encode) with unspecified samples set to zero. The

-// number of samples stored varies with the actual term value, so

-// those must obviously come first in the metadata.

-

-int read_decorr_samples (WavpackStream *wps, WavpackMetadata *wpmd)

-{

-    uchar *byteptr = wpmd->data;

-    uchar *endptr = byteptr + wpmd->byte_length;

-    struct decorr_pass *dpp;

-    int tcount;

-

-    for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) {

-        CLEAR (dpp->samples_A);

-        CLEAR (dpp->samples_B);

-    }

-

-    if (wps->wphdr.version == 0x402 && (wps->wphdr.flags & HYBRID_FLAG)) {

-        byteptr += 2;

-

-        if (!(wps->wphdr.flags & MONO_DATA))

-            byteptr += 2;

-    }

-

-    while (dpp-- > wps->decorr_passes && byteptr < endptr)

-        if (dpp->term > MAX_TERM) {

-            dpp->samples_A [0] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8)));

-            dpp->samples_A [1] = exp2s ((short)(byteptr [2] + (byteptr [3] << 8)));

-            byteptr += 4;

-

-            if (!(wps->wphdr.flags & MONO_DATA)) {

-                dpp->samples_B [0] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8)));

-                dpp->samples_B [1] = exp2s ((short)(byteptr [2] + (byteptr [3] << 8)));

-                byteptr += 4;

-            }

-        }

-        else if (dpp->term < 0) {

-            dpp->samples_A [0] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8)));

-            dpp->samples_B [0] = exp2s ((short)(byteptr [2] + (byteptr [3] << 8)));

-            byteptr += 4;

-        }

-        else {

-            int m = 0, cnt = dpp->term;

-

-            while (cnt--) {

-                dpp->samples_A [m] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8)));

-                byteptr += 2;

-

-                if (!(wps->wphdr.flags & MONO_DATA)) {

-                    dpp->samples_B [m] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8)));

-                    byteptr += 2;

-                }

-

-                m++;

-            }

-        }

-

-    return byteptr == endptr;

-}

-

-// Read the int32 data from the specified metadata into the specified stream.

-// This data is used for integer data that has more than 24 bits of magnitude

-// or, in some cases, used to eliminate redundant bits from any audio stream.

-

-int read_int32_info (WavpackStream *wps, WavpackMetadata *wpmd)

-{

-    int bytecnt = wpmd->byte_length;

-    char *byteptr = wpmd->data;

-

-    if (bytecnt != 4)

-        return FALSE;

-

-    wps->int32_sent_bits = *byteptr++;

-    wps->int32_zeros = *byteptr++;

-    wps->int32_ones = *byteptr++;

-    wps->int32_dups = *byteptr;

-    return TRUE;

-}

-

-// Read multichannel information from metadata. The first byte is the total

-// number of channels and the following bytes represent the channel_mask

-// as described for Microsoft WAVEFORMATEX.

-

-int read_channel_info (WavpackContext *wpc, WavpackMetadata *wpmd)

-{

-    int bytecnt = wpmd->byte_length, shift = 0;

-    char *byteptr = wpmd->data;

-    uint32_t mask = 0;

-

-    if (!bytecnt || bytecnt > 5)

-        return FALSE;

-

-    wpc->config.num_channels = *byteptr++;

-

-    while (--bytecnt) {

-        mask |= (uint32_t) *byteptr++ << shift;

-        shift += 8;

-    }

-

-    wpc->config.channel_mask = mask;

-    return TRUE;

-}

-

-// Read configuration information from metadata.

-

-int read_config_info (WavpackContext *wpc, WavpackMetadata *wpmd)

-{

-    int bytecnt = wpmd->byte_length;

-    uchar *byteptr = wpmd->data;

-

-    if (bytecnt >= 3) {

-        wpc->config.flags &= 0xff;

-        wpc->config.flags |= (int32_t) *byteptr++ << 8;

-        wpc->config.flags |= (int32_t) *byteptr++ << 16;

-        wpc->config.flags |= (int32_t) *byteptr << 24;

-    }

-

-    return TRUE;

-}

-

-// This monster actually unpacks the WavPack bitstream(s) into the specified

-// buffer as 32-bit integers or floats (depending on orignal data). Lossy

-// samples will be clipped to their original limits (i.e. 8-bit samples are

-// clipped to -128/+127) but are still returned in int32_ts. It is up to the

-// caller to potentially reformat this for the final output including any

-// multichannel distribution, block alignment or endian compensation. The

-// function unpack_init() must have been called and the entire WavPack block

-// must still be visible (although wps->blockbuff will not be accessed again).

-// For maximum clarity, the function is broken up into segments that handle

-// various modes. This makes for a few extra infrequent flag checks, but

-// makes the code easier to follow because the nesting does not become so

-// deep. For maximum efficiency, the conversion is isolated to tight loops

-// that handle an entire buffer. The function returns the total number of

-// samples unpacked, which can be less than the number requested if an error

-// occurs or the end of the block is reached.

-

-#if defined(CPU_COLDFIRE) && !defined(SIMULATOR)

-extern void decorr_stereo_pass_cont_mcf5249 (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count);

-#elif defined(CPU_ARM) && !defined(SIMULATOR)

-extern void decorr_stereo_pass_cont_arm (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count);

-extern void decorr_stereo_pass_cont_arml (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count);

-#else

-static void decorr_stereo_pass_cont (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count);

-#endif

-

-static void decorr_mono_pass (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count);

-static void decorr_stereo_pass (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count);

-static void fixup_samples (WavpackStream *wps, int32_t *buffer, uint32_t sample_count);

-

-int32_t unpack_samples (WavpackContext *wpc, int32_t *buffer, uint32_t sample_count)

-{

-    WavpackStream *wps = &wpc->stream;

-    uint32_t flags = wps->wphdr.flags, crc = wps->crc, i;

-    int32_t mute_limit = (1L << ((flags & MAG_MASK) >> MAG_LSB)) + 2;

-    struct decorr_pass *dpp;

-    int32_t *bptr, *eptr;

-    int tcount;

-

-    if (wps->sample_index + sample_count > wps->wphdr.block_index + wps->wphdr.block_samples)

-        sample_count = wps->wphdr.block_index + wps->wphdr.block_samples - wps->sample_index;

-

-    if (wps->mute_error) {

-        memset (buffer, 0, sample_count * (flags & MONO_FLAG ? 4 : 8));

-        wps->sample_index += sample_count;

-        return sample_count;

-    }

-

-    if (flags & HYBRID_FLAG)

-        mute_limit *= 2;

-

-    ///////////////////// handle version 4 mono data /////////////////////////

-

-    if (flags & MONO_DATA) {

-        eptr = buffer + sample_count;

-        i = get_words (buffer, sample_count, flags, &wps->w, &wps->wvbits);

-

-        for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++)

-            decorr_mono_pass (dpp, buffer, sample_count);

-

-        for (bptr = buffer; bptr < eptr; ++bptr) {

-            if (labs (bptr [0]) > mute_limit) {

-                i = bptr - buffer;

-                break;

-            }

-

-            crc = crc * 3 + bptr [0];

-        }

-    }

-

-    //////////////////// handle version 4 stereo data ////////////////////////

-

-    else {

-        eptr = buffer + (sample_count * 2);

-        i = get_words (buffer, sample_count, flags, &wps->w, &wps->wvbits);

-

-        if (sample_count < 16)

-            for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++)

-                decorr_stereo_pass (dpp, buffer, sample_count);

-        else

-            for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) {

-                decorr_stereo_pass (dpp, buffer, 8);

-#if defined(CPU_COLDFIRE) && !defined(SIMULATOR)

-                decorr_stereo_pass_cont_mcf5249 (dpp, buffer + 16, sample_count - 8);

-#elif defined(CPU_ARM) && !defined(SIMULATOR)

-                if (((flags & MAG_MASK) >> MAG_LSB) > 15)

-                    decorr_stereo_pass_cont_arml (dpp, buffer + 16, sample_count - 8);

-                else

-                    decorr_stereo_pass_cont_arm (dpp, buffer + 16, sample_count - 8);

-#else

-                decorr_stereo_pass_cont (dpp, buffer + 16, sample_count - 8);

-#endif

-            }

-

-        if (flags & JOINT_STEREO)

-            for (bptr = buffer; bptr < eptr; bptr += 2) {

-                bptr [0] += (bptr [1] -= (bptr [0] >> 1));

-

-                if (labs (bptr [0]) > mute_limit || labs (bptr [1]) > mute_limit) {

-                    i = (bptr - buffer) / 2;

-                    break;

-                }

-

-                crc = (crc * 3 + bptr [0]) * 3 + bptr [1];

-            }

-        else

-            for (bptr = buffer; bptr < eptr; bptr += 2) {

-                if (labs (bptr [0]) > mute_limit || labs (bptr [1]) > mute_limit) {

-                    i = (bptr - buffer) / 2;

-                    break;

-                }

-

-                crc = (crc * 3 + bptr [0]) * 3 + bptr [1];

-            }

-    }

-

-    if (i != sample_count) {

-        memset (buffer, 0, sample_count * (flags & MONO_FLAG ? 4 : 8));

-        wps->mute_error = TRUE;

-        i = sample_count;

-    }

-

-    fixup_samples (wps, buffer, i);

-

-    if (flags & FALSE_STEREO) {

-        int32_t *dptr = buffer + i * 2;

-        int32_t *sptr = buffer + i;

-        int32_t c = i;

-

-        while (c--) {

-            *--dptr = *--sptr;

-            *--dptr = *sptr;

-        }

-    }

-

-    wps->sample_index += i;

-    wps->crc = crc;

-

-    return i;

-}

-

-static void decorr_stereo_pass (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count)

-{

-    int32_t delta = dpp->delta, weight_A = dpp->weight_A, weight_B = dpp->weight_B;

-    int32_t *bptr, *eptr = buffer + (sample_count * 2), sam_A, sam_B;

-    int m, k;

-

-    switch (dpp->term) {

-

-        case 17:

-            for (bptr = buffer; bptr < eptr; bptr += 2) {

-                sam_A = 2 * dpp->samples_A [0] - dpp->samples_A [1];

-                dpp->samples_A [1] = dpp->samples_A [0];

-                dpp->samples_A [0] = apply_weight (weight_A, sam_A) + bptr [0];

-                update_weight (weight_A, delta, sam_A, bptr [0]);

-                bptr [0] = dpp->samples_A [0];

-

-                sam_A = 2 * dpp->samples_B [0] - dpp->samples_B [1];

-                dpp->samples_B [1] = dpp->samples_B [0];

-                dpp->samples_B [0] = apply_weight (weight_B, sam_A) + bptr [1];

-                update_weight (weight_B, delta, sam_A, bptr [1]);

-                bptr [1] = dpp->samples_B [0];

-            }

-

-            break;

-

-        case 18:

-            for (bptr = buffer; bptr < eptr; bptr += 2) {

-                sam_A = (3 * dpp->samples_A [0] - dpp->samples_A [1]) >> 1;

-                dpp->samples_A [1] = dpp->samples_A [0];

-                dpp->samples_A [0] = apply_weight (weight_A, sam_A) + bptr [0];

-                update_weight (weight_A, delta, sam_A, bptr [0]);

-                bptr [0] = dpp->samples_A [0];

-

-                sam_A = (3 * dpp->samples_B [0] - dpp->samples_B [1]) >> 1;

-                dpp->samples_B [1] = dpp->samples_B [0];

-                dpp->samples_B [0] = apply_weight (weight_B, sam_A) + bptr [1];

-                update_weight (weight_B, delta, sam_A, bptr [1]);

-                bptr [1] = dpp->samples_B [0];

-            }

-

-            break;

-

-        default:

-            for (m = 0, k = dpp->term & (MAX_TERM - 1), bptr = buffer; bptr < eptr; bptr += 2) {

-                sam_A = dpp->samples_A [m];

-                dpp->samples_A [k] = apply_weight (weight_A, sam_A) + bptr [0];

-                update_weight (weight_A, delta, sam_A, bptr [0]);

-                bptr [0] = dpp->samples_A [k];

-

-                sam_A = dpp->samples_B [m];

-                dpp->samples_B [k] = apply_weight (weight_B, sam_A) + bptr [1];

-                update_weight (weight_B, delta, sam_A, bptr [1]);

-                bptr [1] = dpp->samples_B [k];

-

-                m = (m + 1) & (MAX_TERM - 1);

-                k = (k + 1) & (MAX_TERM - 1);

-            }

-

-            if (m) {

-                int32_t temp_samples [MAX_TERM];

-

-                memcpy (temp_samples, dpp->samples_A, sizeof (dpp->samples_A));

-

-                for (k = 0; k < MAX_TERM; k++, m++)

-                    dpp->samples_A [k] = temp_samples [m & (MAX_TERM - 1)];

-

-                memcpy (temp_samples, dpp->samples_B, sizeof (dpp->samples_B));

-

-                for (k = 0; k < MAX_TERM; k++, m++)

-                    dpp->samples_B [k] = temp_samples [m & (MAX_TERM - 1)];

-            }

-

-            break;

-

-        case -1:

-            for (bptr = buffer; bptr < eptr; bptr += 2) {

-                sam_A = bptr [0] + apply_weight (weight_A, dpp->samples_A [0]);

-                update_weight_clip (weight_A, delta, dpp->samples_A [0], bptr [0]);

-                bptr [0] = sam_A;

-                dpp->samples_A [0] = bptr [1] + apply_weight (weight_B, sam_A);

-                update_weight_clip (weight_B, delta, sam_A, bptr [1]);

-                bptr [1] = dpp->samples_A [0];

-            }

-

-            break;

-

-        case -2:

-            for (bptr = buffer; bptr < eptr; bptr += 2) {

-                sam_B = bptr [1] + apply_weight (weight_B, dpp->samples_B [0]);

-                update_weight_clip (weight_B, delta, dpp->samples_B [0], bptr [1]);

-                bptr [1] = sam_B;

-                dpp->samples_B [0] = bptr [0] + apply_weight (weight_A, sam_B);

-                update_weight_clip (weight_A, delta, sam_B, bptr [0]);

-                bptr [0] = dpp->samples_B [0];

-            }

-

-            break;

-

-        case -3:

-            for (bptr = buffer; bptr < eptr; bptr += 2) {

-                sam_A = bptr [0] + apply_weight (weight_A, dpp->samples_A [0]);

-                update_weight_clip (weight_A, delta, dpp->samples_A [0], bptr [0]);

-                sam_B = bptr [1] + apply_weight (weight_B, dpp->samples_B [0]);

-                update_weight_clip (weight_B, delta, dpp->samples_B [0], bptr [1]);

-                bptr [0] = dpp->samples_B [0] = sam_A;

-                bptr [1] = dpp->samples_A [0] = sam_B;

-            }

-

-            break;

-    }

-

-    dpp->weight_A = weight_A;

-    dpp->weight_B = weight_B;

-}

-

-#if (!defined(CPU_COLDFIRE) && !defined(CPU_ARM)) || defined(SIMULATOR)

-

-static void decorr_stereo_pass_cont (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count)

-{

-    int32_t delta = dpp->delta, weight_A = dpp->weight_A, weight_B = dpp->weight_B;

-    int32_t *bptr, *tptr, *eptr = buffer + (sample_count * 2), sam_A, sam_B;

-    int k, i;

-

-    switch (dpp->term) {

-

-        case 17:

-            for (bptr = buffer; bptr < eptr; bptr += 2) {

-                sam_A = 2 * bptr [-2] - bptr [-4];

-                bptr [0] = apply_weight (weight_A, sam_A) + (sam_B = bptr [0]);

-                update_weight (weight_A, delta, sam_A, sam_B);

-

-                sam_A = 2 * bptr [-1] - bptr [-3];

-                bptr [1] = apply_weight (weight_B, sam_A) + (sam_B = bptr [1]);

-                update_weight (weight_B, delta, sam_A, sam_B);

-            }

-

-            dpp->samples_B [0] = bptr [-1];

-            dpp->samples_A [0] = bptr [-2];

-            dpp->samples_B [1] = bptr [-3];

-            dpp->samples_A [1] = bptr [-4];

-            break;

-

-        case 18:

-            for (bptr = buffer; bptr < eptr; bptr += 2) {

-                sam_A = (3 * bptr [-2] - bptr [-4]) >> 1;

-                bptr [0] = apply_weight (weight_A, sam_A) + (sam_B = bptr [0]);

-                update_weight (weight_A, delta, sam_A, sam_B);

-

-                sam_A = (3 * bptr [-1] - bptr [-3]) >> 1;

-                bptr [1] = apply_weight (weight_B, sam_A) + (sam_B = bptr [1]);

-                update_weight (weight_B, delta, sam_A, sam_B);

-            }

-

-            dpp->samples_B [0] = bptr [-1];

-            dpp->samples_A [0] = bptr [-2];

-            dpp->samples_B [1] = bptr [-3];

-            dpp->samples_A [1] = bptr [-4];

-            break;

-

-        default:

-            for (bptr = buffer, tptr = buffer - (dpp->term * 2); bptr < eptr; bptr += 2, tptr += 2) {

-                bptr [0] = apply_weight (weight_A, tptr [0]) + (sam_A = bptr [0]);

-                update_weight (weight_A, delta, tptr [0], sam_A);

-

-                bptr [1] = apply_weight (weight_B, tptr [1]) + (sam_A = bptr [1]);

-                update_weight (weight_B, delta, tptr [1], sam_A);

-            }

-

-            for (k = dpp->term - 1, i = 8; i--; k--) {

-                dpp->samples_B [k & (MAX_TERM - 1)] = *--bptr;

-                dpp->samples_A [k & (MAX_TERM - 1)] = *--bptr;

-            }

-

-            break;

-

-        case -1:

-            for (bptr = buffer; bptr < eptr; bptr += 2) {

-                bptr [0] = apply_weight (weight_A, bptr [-1]) + (sam_A = bptr [0]);

-                update_weight_clip (weight_A, delta, bptr [-1], sam_A);

-                bptr [1] = apply_weight (weight_B, bptr [0]) + (sam_A = bptr [1]);

-                update_weight_clip (weight_B, delta, bptr [0], sam_A);

-            }

-

-            dpp->samples_A [0] = bptr [-1];

-            break;

-

-        case -2:

-            for (bptr = buffer; bptr < eptr; bptr += 2) {

-                bptr [1] = apply_weight (weight_B, bptr [-2]) + (sam_A = bptr [1]);

-                update_weight_clip (weight_B, delta, bptr [-2], sam_A);

-                bptr [0] = apply_weight (weight_A, bptr [1]) + (sam_A = bptr [0]);

-                update_weight_clip (weight_A, delta, bptr [1], sam_A);

-            }

-

-            dpp->samples_B [0] = bptr [-2];

-            break;

-

-        case -3:

-            for (bptr = buffer; bptr < eptr; bptr += 2) {

-                bptr [0] = apply_weight (weight_A, bptr [-1]) + (sam_A = bptr [0]);

-                update_weight_clip (weight_A, delta, bptr [-1], sam_A);

-                bptr [1] = apply_weight (weight_B, bptr [-2]) + (sam_A = bptr [1]);

-                update_weight_clip (weight_B, delta, bptr [-2], sam_A);

-            }

-

-            dpp->samples_A [0] = bptr [-1];

-            dpp->samples_B [0] = bptr [-2];

-            break;

-    }

-

-    dpp->weight_A = weight_A;

-    dpp->weight_B = weight_B;

-}

-

-#endif

-

-static void decorr_mono_pass (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count)

-{

-    int32_t delta = dpp->delta, weight_A = dpp->weight_A;

-    int32_t *bptr, *eptr = buffer + sample_count, sam_A;

-    int m, k;

-

-    switch (dpp->term) {

-

-        case 17:

-            for (bptr = buffer; bptr < eptr; bptr++) {

-                sam_A = 2 * dpp->samples_A [0] - dpp->samples_A [1];

-                dpp->samples_A [1] = dpp->samples_A [0];

-                dpp->samples_A [0] = apply_weight (weight_A, sam_A) + bptr [0];

-                update_weight (weight_A, delta, sam_A, bptr [0]);

-                bptr [0] = dpp->samples_A [0];

-            }

-

-            break;

-

-        case 18:

-            for (bptr = buffer; bptr < eptr; bptr++) {

-                sam_A = (3 * dpp->samples_A [0] - dpp->samples_A [1]) >> 1;

-                dpp->samples_A [1] = dpp->samples_A [0];

-                dpp->samples_A [0] = apply_weight (weight_A, sam_A) + bptr [0];

-                update_weight (weight_A, delta, sam_A, bptr [0]);

-                bptr [0] = dpp->samples_A [0];

-            }

-

-            break;

-

-        default:

-            for (m = 0, k = dpp->term & (MAX_TERM - 1), bptr = buffer; bptr < eptr; bptr++) {

-                sam_A = dpp->samples_A [m];

-                dpp->samples_A [k] = apply_weight (weight_A, sam_A) + bptr [0];

-                update_weight (weight_A, delta, sam_A, bptr [0]);

-                bptr [0] = dpp->samples_A [k];

-                m = (m + 1) & (MAX_TERM - 1);

-                k = (k + 1) & (MAX_TERM - 1);

-            }

-

-            if (m) {

-                int32_t temp_samples [MAX_TERM];

-

-                memcpy (temp_samples, dpp->samples_A, sizeof (dpp->samples_A));

-

-                for (k = 0; k < MAX_TERM; k++, m++)

-                    dpp->samples_A [k] = temp_samples [m & (MAX_TERM - 1)];

-            }

-

-            break;

-    }

-

-    dpp->weight_A = weight_A;

-}

-

-

-// This is a helper function for unpack_samples() that applies several final

-// operations. First, if the data is 32-bit float data, then that conversion

-// is done in the float.c module (whether lossy or lossless) and we return.

-// Otherwise, if the extended integer data applies, then that operation is

-// executed first. If the unpacked data is lossy (and not corrected) then

-// it is clipped and shifted in a single operation. Otherwise, if it's

-// lossless then the last step is to apply the final shift (if any).

-

-static void fixup_samples (WavpackStream *wps, int32_t *buffer, uint32_t sample_count)

-{

-    uint32_t flags = wps->wphdr.flags;

-    int shift = (flags & SHIFT_MASK) >> SHIFT_LSB;

-

-    if (flags & FLOAT_DATA) {

-        float_values (wps, buffer, (flags & MONO_FLAG) ? sample_count : sample_count * 2);

-        return;

-    }

-

-    if (flags & INT32_DATA) {

-        uint32_t count = (flags & MONO_FLAG) ? sample_count : sample_count * 2;

-        int sent_bits = wps->int32_sent_bits, zeros = wps->int32_zeros;

-        int ones = wps->int32_ones, dups = wps->int32_dups;

-        int32_t *dptr = buffer;

-

-        if (!(flags & HYBRID_FLAG) && !sent_bits && (zeros + ones + dups))

-            while (count--) {

-                if (zeros)

-                    *dptr <<= zeros;

-                else if (ones)

-                    *dptr = ((*dptr + 1) << ones) - 1;

-                else if (dups)

-                    *dptr = ((*dptr + (*dptr & 1)) << dups) - (*dptr & 1);

-

-                dptr++;

-            }

-        else

-            shift += zeros + sent_bits + ones + dups;

-    }

-

-    if (flags & HYBRID_FLAG) {

-        int32_t min_value, max_value, min_shifted, max_shifted;

-

-        switch (flags & BYTES_STORED) {

-            case 0:

-                min_shifted = (min_value = -128 >> shift) << shift;

-                max_shifted = (max_value = 127 >> shift) << shift;

-                break;

-

-            case 1:

-                min_shifted = (min_value = -32768 >> shift) << shift;

-                max_shifted = (max_value = 32767 >> shift) << shift;

-                break;

-

-            case 2:

-                min_shifted = (min_value = -8388608 >> shift) << shift;

-                max_shifted = (max_value = 8388607 >> shift) << shift;

-                break;

-

-            case 3:

-            default:

-                min_shifted = (min_value = (int32_t) 0x80000000 >> shift) << shift;

-                max_shifted = (max_value = (int32_t) 0x7FFFFFFF >> shift) << shift;

-                break;

-        }

-

-        if (!(flags & MONO_FLAG))

-            sample_count *= 2;

-

-        while (sample_count--) {

-            if (*buffer < min_value)

-                *buffer++ = min_shifted;

-            else if (*buffer > max_value)

-                *buffer++ = max_shifted;

-            else

-                *buffer++ <<= shift;

-        }

-    }

-    else if (shift) {

-        if (!(flags & MONO_FLAG))

-            sample_count *= 2;

-

-        while (sample_count--)

-            *buffer++ <<= shift;

-    }

-}

-

-// This function checks the crc value(s) for an unpacked block, returning the

-// number of actual crc errors detected for the block. The block must be

-// completely unpacked before this test is valid. For losslessly unpacked

-// blocks of float or extended integer data the extended crc is also checked.

-// Note that WavPack's crc is not a CCITT approved polynomial algorithm, but

-// is a much simpler method that is virtually as robust for real world data.

-

-int check_crc_error (WavpackContext *wpc)

-{

-    WavpackStream *wps = &wpc->stream;

-    int result = 0;

-

-    if (wps->crc != wps->wphdr.crc)

-        ++result;

-

-    return result;

-}

+////////////////////////////////////////////////////////////////////////////
+//                           **** WAVPACK ****                            //
+//                  Hybrid Lossless Wavefile Compressor                   //
+//              Copyright (c) 1998 - 2006 Conifer Software.               //
+//                          All Rights Reserved.                          //
+//      Distributed under the BSD Software License (see license.txt)      //
+////////////////////////////////////////////////////////////////////////////
+
+// unpack.c
+
+// This module actually handles the decompression of the audio data, except
+// for the entropy decoding which is handled by the words.c module. For
+// maximum efficiency, the conversion is isolated to tight loops that handle
+// an entire buffer.
+
+#include "wavpack.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#define LOSSY_MUTE
+
+///////////////////////////// executable code ////////////////////////////////
+
+// This function initializes everything required to unpack a WavPack block
+// and must be called before unpack_samples() is called to obtain audio data.
+// It is assumed that the WavpackHeader has been read into the wps->wphdr
+// (in the current WavpackStream). This is where all the metadata blocks are
+// scanned up to the one containing the audio bitstream.
+
+int unpack_init (WavpackContext *wpc)
+{
+    WavpackStream *wps = &wpc->stream;
+    WavpackMetadata wpmd;
+
+    if (wps->wphdr.block_samples && wps->wphdr.block_index != (uint32_t) -1)
+        wps->sample_index = wps->wphdr.block_index;
+
+    wps->mute_error = FALSE;
+    wps->crc = 0xffffffff;
+    CLEAR (wps->wvbits);
+    CLEAR (wps->decorr_passes);
+    CLEAR (wps->w);
+
+    while (read_metadata_buff (wpc, &wpmd)) {
+        if (!process_metadata (wpc, &wpmd)) {
+            strcpy (wpc->error_message, "invalid metadata!");
+            return FALSE;
+        }
+
+        if (wpmd.id == ID_WV_BITSTREAM)
+            break;
+    }
+
+    if (wps->wphdr.block_samples && !bs_is_open (&wps->wvbits)) {
+        strcpy (wpc->error_message, "invalid WavPack file!");
+        return FALSE;
+    }
+
+    if (wps->wphdr.block_samples) {
+        if ((wps->wphdr.flags & INT32_DATA) && wps->int32_sent_bits)
+            wpc->lossy_blocks = TRUE;
+
+        if ((wps->wphdr.flags & FLOAT_DATA) &&
+            wps->float_flags & (FLOAT_EXCEPTIONS | FLOAT_ZEROS_SENT | FLOAT_SHIFT_SENT | FLOAT_SHIFT_SAME))
+                wpc->lossy_blocks = TRUE;
+    }
+
+    return TRUE;
+}
+
+// This function initialzes the main bitstream for audio samples, which must
+// be in the "wv" file.
+
+int init_wv_bitstream (WavpackContext *wpc, WavpackMetadata *wpmd)
+{
+    WavpackStream *wps = &wpc->stream;
+
+    if (wpmd->data)
+        bs_open_read (&wps->wvbits, wpmd->data, (unsigned char *) wpmd->data + wpmd->byte_length, NULL, 0);
+    else if (wpmd->byte_length)
+        bs_open_read (&wps->wvbits, wpc->read_buffer, wpc->read_buffer + sizeof (wpc->read_buffer),
+            wpc->infile, wpmd->byte_length + (wpmd->byte_length & 1));
+
+    return TRUE;
+}
+
+// Read decorrelation terms from specified metadata block into the
+// decorr_passes array. The terms range from -3 to 8, plus 17 & 18;
+// other values are reserved and generate errors for now. The delta
+// ranges from 0 to 7 with all values valid. Note that the terms are
+// stored in the opposite order in the decorr_passes array compared
+// to packing.
+
+int read_decorr_terms (WavpackStream *wps, WavpackMetadata *wpmd)
+{
+    int termcnt = wpmd->byte_length;
+    uchar *byteptr = wpmd->data;
+    struct decorr_pass *dpp;
+
+    if (termcnt > MAX_NTERMS)
+        return FALSE;
+
+    wps->num_terms = termcnt;
+
+    for (dpp = wps->decorr_passes + termcnt - 1; termcnt--; dpp--) {
+        dpp->term = (int)(*byteptr & 0x1f) - 5;
+        dpp->delta = (*byteptr++ >> 5) & 0x7;
+
+        if (!dpp->term || dpp->term < -3 || (dpp->term > MAX_TERM && dpp->term < 17) || dpp->term > 18)
+            return FALSE;
+    }
+
+    return TRUE;
+}
+
+// Read decorrelation weights from specified metadata block into the
+// decorr_passes array. The weights range +/-1024, but are rounded and
+// truncated to fit in signed chars for metadata storage. Weights are
+// separate for the two channels and are specified from the "last" term
+// (first during encode). Unspecified weights are set to zero.
+
+int read_decorr_weights (WavpackStream *wps, WavpackMetadata *wpmd)
+{
+    int termcnt = wpmd->byte_length, tcount;
+    signed char *byteptr = wpmd->data;
+    struct decorr_pass *dpp;
+
+    if (!(wps->wphdr.flags & MONO_DATA))
+        termcnt /= 2;
+
+    if (termcnt > wps->num_terms)
+        return FALSE;
+
+    for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++)
+        dpp->weight_A = dpp->weight_B = 0;
+
+    while (--dpp >= wps->decorr_passes && termcnt--) {
+        dpp->weight_A = restore_weight (*byteptr++);
+
+        if (!(wps->wphdr.flags & MONO_DATA))
+            dpp->weight_B = restore_weight (*byteptr++);
+    }
+
+    return TRUE;
+}
+
+// Read decorrelation samples from specified metadata block into the
+// decorr_passes array. The samples are signed 32-bit values, but are
+// converted to signed log2 values for storage in metadata. Values are
+// stored for both channels and are specified from the "last" term
+// (first during encode) with unspecified samples set to zero. The
+// number of samples stored varies with the actual term value, so
+// those must obviously come first in the metadata.
+
+int read_decorr_samples (WavpackStream *wps, WavpackMetadata *wpmd)
+{
+    uchar *byteptr = wpmd->data;
+    uchar *endptr = byteptr + wpmd->byte_length;
+    struct decorr_pass *dpp;
+    int tcount;
+
+    for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) {
+        CLEAR (dpp->samples_A);
+        CLEAR (dpp->samples_B);
+    }
+
+    if (wps->wphdr.version == 0x402 && (wps->wphdr.flags & HYBRID_FLAG)) {
+        byteptr += 2;
+
+        if (!(wps->wphdr.flags & MONO_DATA))
+            byteptr += 2;
+    }
+
+    while (dpp-- > wps->decorr_passes && byteptr < endptr)
+        if (dpp->term > MAX_TERM) {
+            dpp->samples_A [0] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8)));
+            dpp->samples_A [1] = exp2s ((short)(byteptr [2] + (byteptr [3] << 8)));
+            byteptr += 4;
+
+            if (!(wps->wphdr.flags & MONO_DATA)) {
+                dpp->samples_B [0] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8)));
+                dpp->samples_B [1] = exp2s ((short)(byteptr [2] + (byteptr [3] << 8)));
+                byteptr += 4;
+            }
+        }
+        else if (dpp->term < 0) {
+            dpp->samples_A [0] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8)));
+            dpp->samples_B [0] = exp2s ((short)(byteptr [2] + (byteptr [3] << 8)));
+            byteptr += 4;
+        }
+        else {
+            int m = 0, cnt = dpp->term;
+
+            while (cnt--) {
+                dpp->samples_A [m] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8)));
+                byteptr += 2;
+
+                if (!(wps->wphdr.flags & MONO_DATA)) {
+                    dpp->samples_B [m] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8)));
+                    byteptr += 2;
+                }
+
+                m++;
+            }
+        }
+
+    return byteptr == endptr;
+}
+
+// Read the int32 data from the specified metadata into the specified stream.
+// This data is used for integer data that has more than 24 bits of magnitude
+// or, in some cases, used to eliminate redundant bits from any audio stream.
+
+int read_int32_info (WavpackStream *wps, WavpackMetadata *wpmd)
+{
+    int bytecnt = wpmd->byte_length;
+    char *byteptr = wpmd->data;
+
+    if (bytecnt != 4)
+        return FALSE;
+
+    wps->int32_sent_bits = *byteptr++;
+    wps->int32_zeros = *byteptr++;
+    wps->int32_ones = *byteptr++;
+    wps->int32_dups = *byteptr;
+    return TRUE;
+}
+
+// Read multichannel information from metadata. The first byte is the total
+// number of channels and the following bytes represent the channel_mask
+// as described for Microsoft WAVEFORMATEX.
+
+int read_channel_info (WavpackContext *wpc, WavpackMetadata *wpmd)
+{
+    int bytecnt = wpmd->byte_length, shift = 0;
+    char *byteptr = wpmd->data;
+    uint32_t mask = 0;
+
+    if (!bytecnt || bytecnt > 5)
+        return FALSE;
+
+    wpc->config.num_channels = *byteptr++;
+
+    while (--bytecnt) {
+        mask |= (uint32_t) *byteptr++ << shift;
+        shift += 8;
+    }
+
+    wpc->config.channel_mask = mask;
+    return TRUE;
+}
+
+// Read configuration information from metadata.
+
+int read_config_info (WavpackContext *wpc, WavpackMetadata *wpmd)
+{
+    int bytecnt = wpmd->byte_length;
+    uchar *byteptr = wpmd->data;
+
+    if (bytecnt >= 3) {
+        wpc->config.flags &= 0xff;
+        wpc->config.flags |= (int32_t) *byteptr++ << 8;
+        wpc->config.flags |= (int32_t) *byteptr++ << 16;
+        wpc->config.flags |= (int32_t) *byteptr << 24;
+    }
+
+    return TRUE;
+}
+
+// This monster actually unpacks the WavPack bitstream(s) into the specified
+// buffer as 32-bit integers or floats (depending on orignal data). Lossy
+// samples will be clipped to their original limits (i.e. 8-bit samples are
+// clipped to -128/+127) but are still returned in int32_ts. It is up to the
+// caller to potentially reformat this for the final output including any
+// multichannel distribution, block alignment or endian compensation. The
+// function unpack_init() must have been called and the entire WavPack block
+// must still be visible (although wps->blockbuff will not be accessed again).
+// For maximum clarity, the function is broken up into segments that handle
+// various modes. This makes for a few extra infrequent flag checks, but
+// makes the code easier to follow because the nesting does not become so
+// deep. For maximum efficiency, the conversion is isolated to tight loops
+// that handle an entire buffer. The function returns the total number of
+// samples unpacked, which can be less than the number requested if an error
+// occurs or the end of the block is reached.
+
+#if defined(CPU_COLDFIRE) && !defined(SIMULATOR)
+extern void decorr_stereo_pass_cont_mcf5249 (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count);
+#elif defined(CPU_ARM) && !defined(SIMULATOR)
+extern void decorr_stereo_pass_cont_arm (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count);
+extern void decorr_stereo_pass_cont_arml (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count);
+#else
+static void decorr_stereo_pass_cont (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count);
+#endif
+
+static void decorr_mono_pass (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count);
+static void decorr_stereo_pass (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count);
+static void fixup_samples (WavpackStream *wps, int32_t *buffer, uint32_t sample_count);
+
+int32_t unpack_samples (WavpackContext *wpc, int32_t *buffer, uint32_t sample_count)
+{
+    WavpackStream *wps = &wpc->stream;
+    uint32_t flags = wps->wphdr.flags, crc = wps->crc, i;
+    int32_t mute_limit = (1L << ((flags & MAG_MASK) >> MAG_LSB)) + 2;
+    struct decorr_pass *dpp;
+    int32_t *bptr, *eptr;
+    int tcount;
+
+    if (wps->sample_index + sample_count > wps->wphdr.block_index + wps->wphdr.block_samples)
+        sample_count = wps->wphdr.block_index + wps->wphdr.block_samples - wps->sample_index;
+
+    if (wps->mute_error) {
+        memset (buffer, 0, sample_count * (flags & MONO_FLAG ? 4 : 8));
+        wps->sample_index += sample_count;
+        return sample_count;
+    }
+
+    if (flags & HYBRID_FLAG)
+        mute_limit *= 2;
+
+    ///////////////////// handle version 4 mono data /////////////////////////
+
+    if (flags & MONO_DATA) {
+        eptr = buffer + sample_count;
+        i = get_words (buffer, sample_count, flags, &wps->w, &wps->wvbits);
+
+        for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++)
+            decorr_mono_pass (dpp, buffer, sample_count);
+
+        for (bptr = buffer; bptr < eptr; ++bptr) {
+            if (labs (bptr [0]) > mute_limit) {
+                i = bptr - buffer;
+                break;
+            }
+
+            crc = crc * 3 + bptr [0];
+        }
+    }
+
+    //////////////////// handle version 4 stereo data ////////////////////////
+
+    else {
+        eptr = buffer + (sample_count * 2);
+        i = get_words (buffer, sample_count, flags, &wps->w, &wps->wvbits);
+
+        if (sample_count < 16)
+            for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++)
+                decorr_stereo_pass (dpp, buffer, sample_count);
+        else
+            for (tcount = wps->num_terms, dpp = wps->decorr_passes; tcount--; dpp++) {
+                decorr_stereo_pass (dpp, buffer, 8);
+#if defined(CPU_COLDFIRE) && !defined(SIMULATOR)
+                decorr_stereo_pass_cont_mcf5249 (dpp, buffer + 16, sample_count - 8);
+#elif defined(CPU_ARM) && !defined(SIMULATOR)
+                if (((flags & MAG_MASK) >> MAG_LSB) > 15)
+                    decorr_stereo_pass_cont_arml (dpp, buffer + 16, sample_count - 8);
+                else
+                    decorr_stereo_pass_cont_arm (dpp, buffer + 16, sample_count - 8);
+#else
+                decorr_stereo_pass_cont (dpp, buffer + 16, sample_count - 8);
+#endif
+            }
+
+        if (flags & JOINT_STEREO)
+            for (bptr = buffer; bptr < eptr; bptr += 2) {
+                bptr [0] += (bptr [1] -= (bptr [0] >> 1));
+
+                if (labs (bptr [0]) > mute_limit || labs (bptr [1]) > mute_limit) {
+                    i = (bptr - buffer) / 2;
+                    break;
+                }
+
+                crc = (crc * 3 + bptr [0]) * 3 + bptr [1];
+            }
+        else
+            for (bptr = buffer; bptr < eptr; bptr += 2) {
+                if (labs (bptr [0]) > mute_limit || labs (bptr [1]) > mute_limit) {
+                    i = (bptr - buffer) / 2;
+                    break;
+                }
+
+                crc = (crc * 3 + bptr [0]) * 3 + bptr [1];
+            }
+    }
+
+    if (i != sample_count) {
+        memset (buffer, 0, sample_count * (flags & MONO_FLAG ? 4 : 8));
+        wps->mute_error = TRUE;
+        i = sample_count;
+    }
+
+    fixup_samples (wps, buffer, i);
+
+    if (flags & FALSE_STEREO) {
+        int32_t *dptr = buffer + i * 2;
+        int32_t *sptr = buffer + i;
+        int32_t c = i;
+
+        while (c--) {
+            *--dptr = *--sptr;
+            *--dptr = *sptr;
+        }
+    }
+
+    wps->sample_index += i;
+    wps->crc = crc;
+
+    return i;
+}
+
+static void decorr_stereo_pass (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count)
+{
+    int32_t delta = dpp->delta, weight_A = dpp->weight_A, weight_B = dpp->weight_B;
+    int32_t *bptr, *eptr = buffer + (sample_count * 2), sam_A, sam_B;
+    int m, k;
+
+    switch (dpp->term) {
+
+        case 17:
+            for (bptr = buffer; bptr < eptr; bptr += 2) {
+                sam_A = 2 * dpp->samples_A [0] - dpp->samples_A [1];
+                dpp->samples_A [1] = dpp->samples_A [0];
+                dpp->samples_A [0] = apply_weight (weight_A, sam_A) + bptr [0];
+                update_weight (weight_A, delta, sam_A, bptr [0]);
+                bptr [0] = dpp->samples_A [0];
+
+                sam_A = 2 * dpp->samples_B [0] - dpp->samples_B [1];
+                dpp->samples_B [1] = dpp->samples_B [0];
+                dpp->samples_B [0] = apply_weight (weight_B, sam_A) + bptr [1];
+                update_weight (weight_B, delta, sam_A, bptr [1]);
+                bptr [1] = dpp->samples_B [0];
+            }
+
+            break;
+
+        case 18:
+            for (bptr = buffer; bptr < eptr; bptr += 2) {
+                sam_A = (3 * dpp->samples_A [0] - dpp->samples_A [1]) >> 1;
+                dpp->samples_A [1] = dpp->samples_A [0];
+                dpp->samples_A [0] = apply_weight (weight_A, sam_A) + bptr [0];
+                update_weight (weight_A, delta, sam_A, bptr [0]);
+                bptr [0] = dpp->samples_A [0];
+
+                sam_A = (3 * dpp->samples_B [0] - dpp->samples_B [1]) >> 1;
+                dpp->samples_B [1] = dpp->samples_B [0];
+                dpp->samples_B [0] = apply_weight (weight_B, sam_A) + bptr [1];
+                update_weight (weight_B, delta, sam_A, bptr [1]);
+                bptr [1] = dpp->samples_B [0];
+            }
+
+            break;
+
+        default:
+            for (m = 0, k = dpp->term & (MAX_TERM - 1), bptr = buffer; bptr < eptr; bptr += 2) {
+                sam_A = dpp->samples_A [m];
+                dpp->samples_A [k] = apply_weight (weight_A, sam_A) + bptr [0];
+                update_weight (weight_A, delta, sam_A, bptr [0]);
+                bptr [0] = dpp->samples_A [k];
+
+                sam_A = dpp->samples_B [m];
+                dpp->samples_B [k] = apply_weight (weight_B, sam_A) + bptr [1];
+                update_weight (weight_B, delta, sam_A, bptr [1]);
+                bptr [1] = dpp->samples_B [k];
+
+                m = (m + 1) & (MAX_TERM - 1);
+                k = (k + 1) & (MAX_TERM - 1);
+            }
+
+            if (m) {
+                int32_t temp_samples [MAX_TERM];
+
+                memcpy (temp_samples, dpp->samples_A, sizeof (dpp->samples_A));
+
+                for (k = 0; k < MAX_TERM; k++, m++)
+                    dpp->samples_A [k] = temp_samples [m & (MAX_TERM - 1)];
+
+                memcpy (temp_samples, dpp->samples_B, sizeof (dpp->samples_B));
+
+                for (k = 0; k < MAX_TERM; k++, m++)
+                    dpp->samples_B [k] = temp_samples [m & (MAX_TERM - 1)];
+            }
+
+            break;
+
+        case -1:
+            for (bptr = buffer; bptr < eptr; bptr += 2) {
+                sam_A = bptr [0] + apply_weight (weight_A, dpp->samples_A [0]);
+                update_weight_clip (weight_A, delta, dpp->samples_A [0], bptr [0]);
+                bptr [0] = sam_A;
+                dpp->samples_A [0] = bptr [1] + apply_weight (weight_B, sam_A);
+                update_weight_clip (weight_B, delta, sam_A, bptr [1]);
+                bptr [1] = dpp->samples_A [0];
+            }
+
+            break;
+
+        case -2:
+            for (bptr = buffer; bptr < eptr; bptr += 2) {
+                sam_B = bptr [1] + apply_weight (weight_B, dpp->samples_B [0]);
+                update_weight_clip (weight_B, delta, dpp->samples_B [0], bptr [1]);
+                bptr [1] = sam_B;
+                dpp->samples_B [0] = bptr [0] + apply_weight (weight_A, sam_B);
+                update_weight_clip (weight_A, delta, sam_B, bptr [0]);
+                bptr [0] = dpp->samples_B [0];
+            }
+
+            break;
+
+        case -3:
+            for (bptr = buffer; bptr < eptr; bptr += 2) {
+                sam_A = bptr [0] + apply_weight (weight_A, dpp->samples_A [0]);
+                update_weight_clip (weight_A, delta, dpp->samples_A [0], bptr [0]);
+                sam_B = bptr [1] + apply_weight (weight_B, dpp->samples_B [0]);
+                update_weight_clip (weight_B, delta, dpp->samples_B [0], bptr [1]);
+                bptr [0] = dpp->samples_B [0] = sam_A;
+                bptr [1] = dpp->samples_A [0] = sam_B;
+            }
+
+            break;
+    }
+
+    dpp->weight_A = weight_A;
+    dpp->weight_B = weight_B;
+}
+
+#if (!defined(CPU_COLDFIRE) && !defined(CPU_ARM)) || defined(SIMULATOR)
+
+static void decorr_stereo_pass_cont (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count)
+{
+    int32_t delta = dpp->delta, weight_A = dpp->weight_A, weight_B = dpp->weight_B;
+    int32_t *bptr, *tptr, *eptr = buffer + (sample_count * 2), sam_A, sam_B;
+    int k, i;
+
+    switch (dpp->term) {
+
+        case 17:
+            for (bptr = buffer; bptr < eptr; bptr += 2) {
+                sam_A = 2 * bptr [-2] - bptr [-4];
+                bptr [0] = apply_weight (weight_A, sam_A) + (sam_B = bptr [0]);
+                update_weight (weight_A, delta, sam_A, sam_B);
+
+                sam_A = 2 * bptr [-1] - bptr [-3];
+                bptr [1] = apply_weight (weight_B, sam_A) + (sam_B = bptr [1]);
+                update_weight (weight_B, delta, sam_A, sam_B);
+            }
+
+            dpp->samples_B [0] = bptr [-1];
+            dpp->samples_A [0] = bptr [-2];
+            dpp->samples_B [1] = bptr [-3];
+            dpp->samples_A [1] = bptr [-4];
+            break;
+
+        case 18:
+            for (bptr = buffer; bptr < eptr; bptr += 2) {
+                sam_A = (3 * bptr [-2] - bptr [-4]) >> 1;
+                bptr [0] = apply_weight (weight_A, sam_A) + (sam_B = bptr [0]);
+                update_weight (weight_A, delta, sam_A, sam_B);
+
+                sam_A = (3 * bptr [-1] - bptr [-3]) >> 1;
+                bptr [1] = apply_weight (weight_B, sam_A) + (sam_B = bptr [1]);
+                update_weight (weight_B, delta, sam_A, sam_B);
+            }
+
+            dpp->samples_B [0] = bptr [-1];
+            dpp->samples_A [0] = bptr [-2];
+            dpp->samples_B [1] = bptr [-3];
+            dpp->samples_A [1] = bptr [-4];
+            break;
+
+        default:
+            for (bptr = buffer, tptr = buffer - (dpp->term * 2); bptr < eptr; bptr += 2, tptr += 2) {
+                bptr [0] = apply_weight (weight_A, tptr [0]) + (sam_A = bptr [0]);
+                update_weight (weight_A, delta, tptr [0], sam_A);
+
+                bptr [1] = apply_weight (weight_B, tptr [1]) + (sam_A = bptr [1]);
+                update_weight (weight_B, delta, tptr [1], sam_A);
+            }
+
+            for (k = dpp->term - 1, i = 8; i--; k--) {
+                dpp->samples_B [k & (MAX_TERM - 1)] = *--bptr;
+                dpp->samples_A [k & (MAX_TERM - 1)] = *--bptr;
+            }
+
+            break;
+
+        case -1:
+            for (bptr = buffer; bptr < eptr; bptr += 2) {
+                bptr [0] = apply_weight (weight_A, bptr [-1]) + (sam_A = bptr [0]);
+                update_weight_clip (weight_A, delta, bptr [-1], sam_A);
+                bptr [1] = apply_weight (weight_B, bptr [0]) + (sam_A = bptr [1]);
+                update_weight_clip (weight_B, delta, bptr [0], sam_A);
+            }
+
+            dpp->samples_A [0] = bptr [-1];
+            break;
+
+        case -2:
+            for (bptr = buffer; bptr < eptr; bptr += 2) {
+                bptr [1] = apply_weight (weight_B, bptr [-2]) + (sam_A = bptr [1]);
+                update_weight_clip (weight_B, delta, bptr [-2], sam_A);
+                bptr [0] = apply_weight (weight_A, bptr [1]) + (sam_A = bptr [0]);
+                update_weight_clip (weight_A, delta, bptr [1], sam_A);
+            }
+
+            dpp->samples_B [0] = bptr [-2];
+            break;
+
+        case -3:
+            for (bptr = buffer; bptr < eptr; bptr += 2) {
+                bptr [0] = apply_weight (weight_A, bptr [-1]) + (sam_A = bptr [0]);
+                update_weight_clip (weight_A, delta, bptr [-1], sam_A);
+                bptr [1] = apply_weight (weight_B, bptr [-2]) + (sam_A = bptr [1]);
+                update_weight_clip (weight_B, delta, bptr [-2], sam_A);
+            }
+
+            dpp->samples_A [0] = bptr [-1];
+            dpp->samples_B [0] = bptr [-2];
+            break;
+    }
+
+    dpp->weight_A = weight_A;
+    dpp->weight_B = weight_B;
+}
+
+#endif
+
+static void decorr_mono_pass (struct decorr_pass *dpp, int32_t *buffer, int32_t sample_count)
+{
+    int32_t delta = dpp->delta, weight_A = dpp->weight_A;
+    int32_t *bptr, *eptr = buffer + sample_count, sam_A;
+    int m, k;
+
+    switch (dpp->term) {
+
+        case 17:
+            for (bptr = buffer; bptr < eptr; bptr++) {
+                sam_A = 2 * dpp->samples_A [0] - dpp->samples_A [1];
+                dpp->samples_A [1] = dpp->samples_A [0];
+                dpp->samples_A [0] = apply_weight (weight_A, sam_A) + bptr [0];
+                update_weight (weight_A, delta, sam_A, bptr [0]);
+                bptr [0] = dpp->samples_A [0];
+            }
+
+            break;
+
+        case 18:
+            for (bptr = buffer; bptr < eptr; bptr++) {
+                sam_A = (3 * dpp->samples_A [0] - dpp->samples_A [1]) >> 1;
+                dpp->samples_A [1] = dpp->samples_A [0];
+                dpp->samples_A [0] = apply_weight (weight_A, sam_A) + bptr [0];
+                update_weight (weight_A, delta, sam_A, bptr [0]);
+                bptr [0] = dpp->samples_A [0];
+            }
+
+            break;
+
+        default:
+            for (m = 0, k = dpp->term & (MAX_TERM - 1), bptr = buffer; bptr < eptr; bptr++) {
+                sam_A = dpp->samples_A [m];
+                dpp->samples_A [k] = apply_weight (weight_A, sam_A) + bptr [0];
+                update_weight (weight_A, delta, sam_A, bptr [0]);
+                bptr [0] = dpp->samples_A [k];
+                m = (m + 1) & (MAX_TERM - 1);
+                k = (k + 1) & (MAX_TERM - 1);
+            }
+
+            if (m) {
+                int32_t temp_samples [MAX_TERM];
+
+                memcpy (temp_samples, dpp->samples_A, sizeof (dpp->samples_A));
+
+                for (k = 0; k < MAX_TERM; k++, m++)
+                    dpp->samples_A [k] = temp_samples [m & (MAX_TERM - 1)];
+            }
+
+            break;
+    }
+
+    dpp->weight_A = weight_A;
+}
+
+
+// This is a helper function for unpack_samples() that applies several final
+// operations. First, if the data is 32-bit float data, then that conversion
+// is done in the float.c module (whether lossy or lossless) and we return.
+// Otherwise, if the extended integer data applies, then that operation is
+// executed first. If the unpacked data is lossy (and not corrected) then
+// it is clipped and shifted in a single operation. Otherwise, if it's
+// lossless then the last step is to apply the final shift (if any).
+
+static void fixup_samples (WavpackStream *wps, int32_t *buffer, uint32_t sample_count)
+{
+    uint32_t flags = wps->wphdr.flags;
+    int shift = (flags & SHIFT_MASK) >> SHIFT_LSB;
+
+    if (flags & FLOAT_DATA) {
+        float_values (wps, buffer, (flags & MONO_FLAG) ? sample_count : sample_count * 2);
+        return;
+    }
+
+    if (flags & INT32_DATA) {
+        uint32_t count = (flags & MONO_FLAG) ? sample_count : sample_count * 2;
+        int sent_bits = wps->int32_sent_bits, zeros = wps->int32_zeros;
+        int ones = wps->int32_ones, dups = wps->int32_dups;
+        int32_t *dptr = buffer;
+
+        if (!(flags & HYBRID_FLAG) && !sent_bits && (zeros + ones + dups))
+            while (count--) {
+                if (zeros)
+                    *dptr <<= zeros;
+                else if (ones)
+                    *dptr = ((*dptr + 1) << ones) - 1;
+                else if (dups)
+                    *dptr = ((*dptr + (*dptr & 1)) << dups) - (*dptr & 1);
+
+                dptr++;
+            }
+        else
+            shift += zeros + sent_bits + ones + dups;
+    }
+
+    if (flags & HYBRID_FLAG) {
+        int32_t min_value, max_value, min_shifted, max_shifted;
+
+        switch (flags & BYTES_STORED) {
+            case 0:
+                min_shifted = (min_value = -128 >> shift) << shift;
+                max_shifted = (max_value = 127 >> shift) << shift;
+                break;
+
+            case 1:
+                min_shifted = (min_value = -32768 >> shift) << shift;
+                max_shifted = (max_value = 32767 >> shift) << shift;
+                break;
+
+            case 2:
+                min_shifted = (min_value = -8388608 >> shift) << shift;
+                max_shifted = (max_value = 8388607 >> shift) << shift;
+                break;
+
+            case 3:
+            default:
+                min_shifted = (min_value = (int32_t) 0x80000000 >> shift) << shift;
+                max_shifted = (max_value = (int32_t) 0x7FFFFFFF >> shift) << shift;
+                break;
+        }
+
+        if (!(flags & MONO_FLAG))
+            sample_count *= 2;
+
+        while (sample_count--) {
+            if (*buffer < min_value)
+                *buffer++ = min_shifted;
+            else if (*buffer > max_value)
+                *buffer++ = max_shifted;
+            else
+                *buffer++ <<= shift;
+        }
+    }
+    else if (shift) {
+        if (!(flags & MONO_FLAG))
+            sample_count *= 2;
+
+        while (sample_count--)
+            *buffer++ <<= shift;
+    }
+}
+
+// This function checks the crc value(s) for an unpacked block, returning the
+// number of actual crc errors detected for the block. The block must be
+// completely unpacked before this test is valid. For losslessly unpacked
+// blocks of float or extended integer data the extended crc is also checked.
+// Note that WavPack's crc is not a CCITT approved polynomial algorithm, but
+// is a much simpler method that is virtually as robust for real world data.
+
+int check_crc_error (WavpackContext *wpc)
+{
+    WavpackStream *wps = &wpc->stream;
+    int result = 0;
+
+    if (wps->crc != wps->wphdr.crc)
+        ++result;
+
+    return result;
+}
diff --git a/src/engine/external/wavpack/wavpack.h b/src/engine/external/wavpack/wavpack.h
index 29e6af75..2e22fc51 100644
--- a/src/engine/external/wavpack/wavpack.h
+++ b/src/engine/external/wavpack/wavpack.h
@@ -5,56 +5,56 @@
 /*                          All Rights Reserved.                          // */
 /*      Distributed under the BSD Software License (see license.txt)      // */
 /*////////////////////////////////////////////////////////////////////////// */
-

+
 /* wavpack.h */
-

-#include <sys/types.h>

-

+
+#include <sys/types.h>
+
 /* This header file contains all the definitions required by WavPack. */
-

-#ifdef __BORLANDC__

-typedef unsigned long uint32_t;

-typedef long int32_t;

-#elif defined(_WIN32) && !defined(__MINGW32__)

-#include <stdlib.h>

-typedef unsigned __int64 uint64_t;

-typedef unsigned __int32 uint32_t;

-typedef __int64 int64_t;

-typedef __int32 int32_t;

-#else

-#include <inttypes.h>

-#endif

-

-typedef unsigned char   uchar;

-

-#if !defined(__GNUC__) || defined(WIN32)

-typedef unsigned short  ushort;

-typedef unsigned int    uint;

-#endif

-

-#include <stdio.h>

-

-#define FALSE 0

-#define TRUE 1

-

+
+#ifdef __BORLANDC__
+typedef unsigned long uint32_t;
+typedef long int32_t;
+#elif defined(_WIN32) && !defined(__MINGW32__)
+#include <stdlib.h>
+typedef unsigned __int64 uint64_t;
+typedef unsigned __int32 uint32_t;
+typedef __int64 int64_t;
+typedef __int32 int32_t;
+#else
+#include <inttypes.h>
+#endif
+
+typedef unsigned char   uchar;
+
+#if !defined(__GNUC__) || defined(WIN32)
+typedef unsigned short  ushort;
+typedef unsigned int    uint;
+#endif
+
+#include <stdio.h>
+
+#define FALSE 0
+#define TRUE 1
+
 /*//////////////////////////// WavPack Header ///////////////////////////////// */
-

+
 /* Note that this is the ONLY structure that is written to (or read from) */
 /* WavPack 4.0 files, and is the preamble to every block in both the .wv */
 /* and .wvc files. */
-

-typedef struct {

-    char ckID [4];

-    uint32_t ckSize;

-    short version;

-    uchar track_no, index_no;

-    uint32_t total_samples, block_index, block_samples, flags, crc;

-} WavpackHeader;

-

-#define WavpackHeaderFormat "4LS2LLLLL"

-

+
+typedef struct {
+    char ckID [4];
+    uint32_t ckSize;
+    short version;
+    uchar track_no, index_no;
+    uint32_t total_samples, block_index, block_samples, flags, crc;
+} WavpackHeader;
+
+#define WavpackHeaderFormat "4LS2LLLLL"
+
 /* or-values for "flags" */
-

+
 #define BYTES_STORED    3       /* 1-4 bytes/sample */
 #define MONO_FLAG       4       /* not stereo */
 #define HYBRID_FLAG     8       /* hybrid mode */
@@ -62,83 +62,83 @@ typedef struct {
 #define CROSS_DECORR    0x20    /* no-delay cross decorrelation */
 #define HYBRID_SHAPE    0x40    /* noise shape (hybrid mode only) */
 #define FLOAT_DATA      0x80    /* ieee 32-bit floating point data */
-

+
 #define INT32_DATA      0x100   /* special extended int handling */
 #define HYBRID_BITRATE  0x200   /* bitrate noise (hybrid mode only) */
 #define HYBRID_BALANCE  0x400   /* balance noise (hybrid stereo mode only) */
-

+
 #define INITIAL_BLOCK   0x800   /* initial block of multichannel segment */
 #define FINAL_BLOCK     0x1000  /* final block of multichannel segment */
-

-#define SHIFT_LSB       13

-#define SHIFT_MASK      (0x1fL << SHIFT_LSB)

-

-#define MAG_LSB         18

-#define MAG_MASK        (0x1fL << MAG_LSB)

-

-#define SRATE_LSB       23

-#define SRATE_MASK      (0xfL << SRATE_LSB)

-

+
+#define SHIFT_LSB       13
+#define SHIFT_MASK      (0x1fL << SHIFT_LSB)
+
+#define MAG_LSB         18
+#define MAG_MASK        (0x1fL << MAG_LSB)
+
+#define SRATE_LSB       23
+#define SRATE_MASK      (0xfL << SRATE_LSB)
+
 #define FALSE_STEREO    0x40000000      /* block is stereo, but data is mono */
-

+
 #define IGNORED_FLAGS   0x18000000      /* reserved, but ignore if encountered */
 #define NEW_SHAPING     0x20000000      /* use IIR filter for negative shaping */
 #define UNKNOWN_FLAGS   0x80000000      /* also reserved, but refuse decode if */
                                         /*  encountered */
-

-#define MONO_DATA (MONO_FLAG | FALSE_STEREO)

-

+
+#define MONO_DATA (MONO_FLAG | FALSE_STEREO)
+
 #define MIN_STREAM_VERS     0x402       /* lowest stream version we'll decode */
 #define MAX_STREAM_VERS     0x410       /* highest stream version we'll decode */
-

+
 /*////////////////////////// WavPack Metadata ///////////////////////////////// */
-

+
 /* This is an internal representation of metadata. */
-

-typedef struct {

-    int32_t byte_length;

-    void *data;

-    uchar id;

-} WavpackMetadata;

-

-#define ID_OPTIONAL_DATA        0x20

-#define ID_ODD_SIZE             0x40

-#define ID_LARGE                0x80

-

-#define ID_DUMMY                0x0

-#define ID_ENCODER_INFO         0x1

-#define ID_DECORR_TERMS         0x2

-#define ID_DECORR_WEIGHTS       0x3

-#define ID_DECORR_SAMPLES       0x4

-#define ID_ENTROPY_VARS         0x5

-#define ID_HYBRID_PROFILE       0x6

-#define ID_SHAPING_WEIGHTS      0x7

-#define ID_FLOAT_INFO           0x8

-#define ID_INT32_INFO           0x9

-#define ID_WV_BITSTREAM         0xa

-#define ID_WVC_BITSTREAM        0xb

-#define ID_WVX_BITSTREAM        0xc

-#define ID_CHANNEL_INFO         0xd

-

-#define ID_RIFF_HEADER          (ID_OPTIONAL_DATA | 0x1)

-#define ID_RIFF_TRAILER         (ID_OPTIONAL_DATA | 0x2)

-#define ID_REPLAY_GAIN          (ID_OPTIONAL_DATA | 0x3)

-#define ID_CUESHEET             (ID_OPTIONAL_DATA | 0x4)

-#define ID_CONFIG_BLOCK         (ID_OPTIONAL_DATA | 0x5)

-#define ID_MD5_CHECKSUM         (ID_OPTIONAL_DATA | 0x6)

-

+
+typedef struct {
+    int32_t byte_length;
+    void *data;
+    uchar id;
+} WavpackMetadata;
+
+#define ID_OPTIONAL_DATA        0x20
+#define ID_ODD_SIZE             0x40
+#define ID_LARGE                0x80
+
+#define ID_DUMMY                0x0
+#define ID_ENCODER_INFO         0x1
+#define ID_DECORR_TERMS         0x2
+#define ID_DECORR_WEIGHTS       0x3
+#define ID_DECORR_SAMPLES       0x4
+#define ID_ENTROPY_VARS         0x5
+#define ID_HYBRID_PROFILE       0x6
+#define ID_SHAPING_WEIGHTS      0x7
+#define ID_FLOAT_INFO           0x8
+#define ID_INT32_INFO           0x9
+#define ID_WV_BITSTREAM         0xa
+#define ID_WVC_BITSTREAM        0xb
+#define ID_WVX_BITSTREAM        0xc
+#define ID_CHANNEL_INFO         0xd
+
+#define ID_RIFF_HEADER          (ID_OPTIONAL_DATA | 0x1)
+#define ID_RIFF_TRAILER         (ID_OPTIONAL_DATA | 0x2)
+#define ID_REPLAY_GAIN          (ID_OPTIONAL_DATA | 0x3)
+#define ID_CUESHEET             (ID_OPTIONAL_DATA | 0x4)
+#define ID_CONFIG_BLOCK         (ID_OPTIONAL_DATA | 0x5)
+#define ID_MD5_CHECKSUM         (ID_OPTIONAL_DATA | 0x6)
+
 /*/////////////////////// WavPack Configuration /////////////////////////////// */
-

+
 /* This internal structure is used during encode to provide configuration to */
 /* the encoding engine and during decoding to provide fle information back to */
 /* the higher level functions. Not all fields are used in both modes. */
-

-typedef struct {

-    int bits_per_sample, bytes_per_sample;

-    int num_channels, float_norm_exp;

-    uint32_t flags, sample_rate, channel_mask;

-} WavpackConfig;

-

+
+typedef struct {
+    int bits_per_sample, bytes_per_sample;
+    int num_channels, float_norm_exp;
+    uint32_t flags, sample_rate, channel_mask;
+} WavpackConfig;
+
 #define CONFIG_BYTES_STORED     3       /* 1-4 bytes/sample */
 #define CONFIG_MONO_FLAG        4       /* not stereo */
 #define CONFIG_HYBRID_FLAG      8       /* hybrid mode */
@@ -146,7 +146,7 @@ typedef struct {
 #define CONFIG_CROSS_DECORR     0x20    /* no-delay cross decorrelation */
 #define CONFIG_HYBRID_SHAPE     0x40    /* noise shape (hybrid mode only) */
 #define CONFIG_FLOAT_DATA       0x80    /* ieee 32-bit floating point data */
-

+
 #define CONFIG_FAST_FLAG        0x200   /* fast mode */
 #define CONFIG_HIGH_FLAG        0x800   /* high quality mode */
 #define CONFIG_VERY_HIGH_FLAG   0x1000  /* very high */
@@ -163,127 +163,127 @@ typedef struct {
 #define CONFIG_SKIP_WVX         0x4000000 /* no wvx stream w/ floats & big ints */
 #define CONFIG_MD5_CHECKSUM     0x8000000 /* compute & store MD5 signature */
 #define CONFIG_OPTIMIZE_MONO    0x80000000 /* optimize for mono streams posing as stereo */
-

+
 /*////////////////////////////// WavPack Stream /////////////////////////////// */
-

+
 /* This internal structure contains everything required to handle a WavPack */
 /* "stream", which is defined as a stereo or mono stream of audio samples. For */
 /* multichannel audio several of these would be required. Each stream contains */
 /* pointers to hold a complete allocated block of WavPack data, although it's */
 /* possible to decode WavPack blocks without buffering an entire block. */
-

-typedef int32_t (*read_stream)(void *, int32_t);

-

-typedef struct bs {

-    uchar *buf, *end, *ptr;

-    void (*wrap)(struct bs *bs);

-    uint32_t file_bytes, sr;

-    int error, bc;

-    read_stream file;

-} Bitstream;

-

-#define MAX_NTERMS 16

-#define MAX_TERM 8

-

-struct decorr_pass {

-    short term, delta, weight_A, weight_B;

-    int32_t samples_A [MAX_TERM], samples_B [MAX_TERM];

-};

-

-struct entropy_data {

-    uint32_t median [3], slow_level, error_limit;

-};

-

-struct words_data {

-    uint32_t bitrate_delta [2], bitrate_acc [2];

-    uint32_t pend_data, holding_one, zeros_acc;

-    int holding_zero, pend_count;

-    struct entropy_data c [2];

-};

-

-typedef struct {

-    WavpackHeader wphdr;

-    Bitstream wvbits;

-

-    struct words_data w;

-

-    int num_terms, mute_error;

-    uint32_t sample_index, crc;

-

-    uchar int32_sent_bits, int32_zeros, int32_ones, int32_dups;

-    uchar float_flags, float_shift, float_max_exp, float_norm_exp;

- 

-    struct decorr_pass decorr_passes [MAX_NTERMS];

-

-} WavpackStream;

-

+
+typedef int32_t (*read_stream)(void *, int32_t);
+
+typedef struct bs {
+    uchar *buf, *end, *ptr;
+    void (*wrap)(struct bs *bs);
+    uint32_t file_bytes, sr;
+    int error, bc;
+    read_stream file;
+} Bitstream;
+
+#define MAX_NTERMS 16
+#define MAX_TERM 8
+
+struct decorr_pass {
+    short term, delta, weight_A, weight_B;
+    int32_t samples_A [MAX_TERM], samples_B [MAX_TERM];
+};
+
+struct entropy_data {
+    uint32_t median [3], slow_level, error_limit;
+};
+
+struct words_data {
+    uint32_t bitrate_delta [2], bitrate_acc [2];
+    uint32_t pend_data, holding_one, zeros_acc;
+    int holding_zero, pend_count;
+    struct entropy_data c [2];
+};
+
+typedef struct {
+    WavpackHeader wphdr;
+    Bitstream wvbits;
+
+    struct words_data w;
+
+    int num_terms, mute_error;
+    uint32_t sample_index, crc;
+
+    uchar int32_sent_bits, int32_zeros, int32_ones, int32_dups;
+    uchar float_flags, float_shift, float_max_exp, float_norm_exp;
+ 
+    struct decorr_pass decorr_passes [MAX_NTERMS];
+
+} WavpackStream;
+
 /* flags for float_flags: */
-

+
 #define FLOAT_SHIFT_ONES 1      /* bits left-shifted into float = '1' */
 #define FLOAT_SHIFT_SAME 2      /* bits left-shifted into float are the same */
 #define FLOAT_SHIFT_SENT 4      /* bits shifted into float are sent literally */
 #define FLOAT_ZEROS_SENT 8      /* "zeros" are not all real zeros */
 #define FLOAT_NEG_ZEROS  0x10   /* contains negative zeros */
 #define FLOAT_EXCEPTIONS 0x20   /* contains exceptions (inf, nan, etc.) */
-

+
 /*///////////////////////////// WavPack Context /////////////////////////////// */
-

+
 /* This internal structure holds everything required to encode or decode WavPack */
 /* files. It is recommended that direct access to this structure be minimized */
 /* and the provided utilities used instead. */
-

-typedef struct {

-    WavpackConfig config;

-    WavpackStream stream;

-

-    uchar read_buffer [1024];

-    char error_message [80];

-

-    read_stream infile;

-    uint32_t total_samples, crc_errors, first_flags;

-    int open_flags, norm_offset, reduced_channels, lossy_blocks;

-

-} WavpackContext;

-

+
+typedef struct {
+    WavpackConfig config;
+    WavpackStream stream;
+
+    uchar read_buffer [1024];
+    char error_message [80];
+
+    read_stream infile;
+    uint32_t total_samples, crc_errors, first_flags;
+    int open_flags, norm_offset, reduced_channels, lossy_blocks;
+
+} WavpackContext;
+
 /*////////////////////// function prototypes and macros ////////////////////// */
-

-#define CLEAR(destin) memset (&destin, 0, sizeof (destin));

-

+
+#define CLEAR(destin) memset (&destin, 0, sizeof (destin));
+
 /* bits.c */
-

-void bs_open_read (Bitstream *bs, uchar *buffer_start, uchar *buffer_end, read_stream file, uint32_t file_bytes);

-

-#define bs_is_open(bs) ((bs)->ptr != NULL)

-

-#define getbit(bs) ( \

-    (((bs)->bc) ? \

-        ((bs)->bc--, (bs)->sr & 1) : \

-            (((++((bs)->ptr) != (bs)->end) ? (void) 0 : (bs)->wrap (bs)), (bs)->bc = 7, ((bs)->sr = *((bs)->ptr)) & 1) \

-    ) ? \

-        ((bs)->sr >>= 1, 1) : \

-        ((bs)->sr >>= 1, 0) \

-)

-

-#define getbits(value, nbits, bs) { \

-    while ((nbits) > (bs)->bc) { \

-        if (++((bs)->ptr) == (bs)->end) (bs)->wrap (bs); \

-        (bs)->sr |= (int32_t)*((bs)->ptr) << (bs)->bc; \

-        (bs)->bc += 8; \

-    } \

-    *(value) = (bs)->sr; \

-    if ((bs)->bc > 32) { \

-        (bs)->bc -= (nbits); \

-        (bs)->sr = *((bs)->ptr) >> (8 - (bs)->bc); \

-    } \

-    else { \

-        (bs)->bc -= (nbits); \

-        (bs)->sr >>= (nbits); \

-    } \

-}

-

-void little_endian_to_native (void *data, char *format);

-void native_to_little_endian (void *data, char *format);

-

+
+void bs_open_read (Bitstream *bs, uchar *buffer_start, uchar *buffer_end, read_stream file, uint32_t file_bytes);
+
+#define bs_is_open(bs) ((bs)->ptr != NULL)
+
+#define getbit(bs) ( \
+    (((bs)->bc) ? \
+        ((bs)->bc--, (bs)->sr & 1) : \
+            (((++((bs)->ptr) != (bs)->end) ? (void) 0 : (bs)->wrap (bs)), (bs)->bc = 7, ((bs)->sr = *((bs)->ptr)) & 1) \
+    ) ? \
+        ((bs)->sr >>= 1, 1) : \
+        ((bs)->sr >>= 1, 0) \
+)
+
+#define getbits(value, nbits, bs) { \
+    while ((nbits) > (bs)->bc) { \
+        if (++((bs)->ptr) == (bs)->end) (bs)->wrap (bs); \
+        (bs)->sr |= (int32_t)*((bs)->ptr) << (bs)->bc; \
+        (bs)->bc += 8; \
+    } \
+    *(value) = (bs)->sr; \
+    if ((bs)->bc > 32) { \
+        (bs)->bc -= (nbits); \
+        (bs)->sr = *((bs)->ptr) >> (8 - (bs)->bc); \
+    } \
+    else { \
+        (bs)->bc -= (nbits); \
+        (bs)->sr >>= (nbits); \
+    } \
+}
+
+void little_endian_to_native (void *data, char *format);
+void native_to_little_endian (void *data, char *format);
+
 /* These macros implement the weight application and update operations */
 /* that are at the heart of the decorrelation loops. Note that when there */
 /* are several alternative versions of the same macro (marked with PERFCOND) */
@@ -291,94 +291,94 @@ void native_to_little_endian (void *data, char *format);
 /* decoding and the user should choose the one that provides the best */
 /* performance. This may be easier to check when NOT using the assembly */
 /* language optimizations. */
-

+
 #if 1   /* PERFCOND */
-#define apply_weight_i(weight, sample) ((weight * sample + 512) >> 10)

-#else

-#define apply_weight_i(weight, sample) ((((weight * sample) >> 8) + 2) >> 2)

-#endif

-

-#define apply_weight_f(weight, sample) (((((sample & 0xffffL) * weight) >> 9) + \

-    (((sample & ~0xffffL) >> 9) * weight) + 1) >> 1)

-

+#define apply_weight_i(weight, sample) ((weight * sample + 512) >> 10)
+#else
+#define apply_weight_i(weight, sample) ((((weight * sample) >> 8) + 2) >> 2)
+#endif
+
+#define apply_weight_f(weight, sample) (((((sample & 0xffffL) * weight) >> 9) + \
+    (((sample & ~0xffffL) >> 9) * weight) + 1) >> 1)
+
 #if 1   /* PERFCOND */
-#define apply_weight(weight, sample) (sample != (short) sample ? \

-    apply_weight_f (weight, sample) : apply_weight_i (weight, sample))

-#else

-#define apply_weight(weight, sample) ((int32_t)((weight * (int64_t) sample + 512) >> 10))

-#endif

-

+#define apply_weight(weight, sample) (sample != (short) sample ? \
+    apply_weight_f (weight, sample) : apply_weight_i (weight, sample))
+#else
+#define apply_weight(weight, sample) ((int32_t)((weight * (int64_t) sample + 512) >> 10))
+#endif
+
 #if 0   /* PERFCOND */
-#define update_weight(weight, delta, source, result) \

-    if (source && result) { int32_t s = (int32_t) (source ^ result) >> 31; weight = (delta ^ s) + (weight - s); }

-#elif 1

-#define update_weight(weight, delta, source, result) \

-    if (source && result) weight += (((source ^ result) >> 30) | 1) * delta

-#else

-#define update_weight(weight, delta, source, result) \

-    if (source && result) (source ^ result) < 0 ? (weight -= delta) : (weight += delta)

-#endif

-

-#define update_weight_clip(weight, delta, source, result) \

-    if (source && result && ((source ^ result) < 0 ? (weight -= delta) < -1024 : (weight += delta) > 1024)) \

-        weight = weight < 0 ? -1024 : 1024

-

+#define update_weight(weight, delta, source, result) \
+    if (source && result) { int32_t s = (int32_t) (source ^ result) >> 31; weight = (delta ^ s) + (weight - s); }
+#elif 1
+#define update_weight(weight, delta, source, result) \
+    if (source && result) weight += (((source ^ result) >> 30) | 1) * delta
+#else
+#define update_weight(weight, delta, source, result) \
+    if (source && result) (source ^ result) < 0 ? (weight -= delta) : (weight += delta)
+#endif
+
+#define update_weight_clip(weight, delta, source, result) \
+    if (source && result && ((source ^ result) < 0 ? (weight -= delta) < -1024 : (weight += delta) > 1024)) \
+        weight = weight < 0 ? -1024 : 1024
+
 /* unpack.c */
-

-int unpack_init (WavpackContext *wpc);

-int init_wv_bitstream (WavpackContext *wpc, WavpackMetadata *wpmd);

-int read_decorr_terms (WavpackStream *wps, WavpackMetadata *wpmd);

-int read_decorr_weights (WavpackStream *wps, WavpackMetadata *wpmd);

-int read_decorr_samples (WavpackStream *wps, WavpackMetadata *wpmd);

-int read_float_info (WavpackStream *wps, WavpackMetadata *wpmd);

-int read_int32_info (WavpackStream *wps, WavpackMetadata *wpmd);

-int read_channel_info (WavpackContext *wpc, WavpackMetadata *wpmd);

-int read_config_info (WavpackContext *wpc, WavpackMetadata *wpmd);

-int32_t unpack_samples (WavpackContext *wpc, int32_t *buffer, uint32_t sample_count);

-int check_crc_error (WavpackContext *wpc);

-

+
+int unpack_init (WavpackContext *wpc);
+int init_wv_bitstream (WavpackContext *wpc, WavpackMetadata *wpmd);
+int read_decorr_terms (WavpackStream *wps, WavpackMetadata *wpmd);
+int read_decorr_weights (WavpackStream *wps, WavpackMetadata *wpmd);
+int read_decorr_samples (WavpackStream *wps, WavpackMetadata *wpmd);
+int read_float_info (WavpackStream *wps, WavpackMetadata *wpmd);
+int read_int32_info (WavpackStream *wps, WavpackMetadata *wpmd);
+int read_channel_info (WavpackContext *wpc, WavpackMetadata *wpmd);
+int read_config_info (WavpackContext *wpc, WavpackMetadata *wpmd);
+int32_t unpack_samples (WavpackContext *wpc, int32_t *buffer, uint32_t sample_count);
+int check_crc_error (WavpackContext *wpc);
+
 /* metadata.c stuff */
-

-int read_metadata_buff (WavpackContext *wpc, WavpackMetadata *wpmd);

-int process_metadata (WavpackContext *wpc, WavpackMetadata *wpmd);

-

+
+int read_metadata_buff (WavpackContext *wpc, WavpackMetadata *wpmd);
+int process_metadata (WavpackContext *wpc, WavpackMetadata *wpmd);
+
 /* words.c stuff */
-

-int read_entropy_vars (WavpackStream *wps, WavpackMetadata *wpmd);

-int read_hybrid_profile (WavpackStream *wps, WavpackMetadata *wpmd);

-int32_t get_words (int32_t *buffer, int nsamples, uint32_t flags,

-                struct words_data *w, Bitstream *bs);

-int32_t exp2s (int log);

-int restore_weight (signed char weight);

-

-#define WORD_EOF (1L << 31)

-

+
+int read_entropy_vars (WavpackStream *wps, WavpackMetadata *wpmd);
+int read_hybrid_profile (WavpackStream *wps, WavpackMetadata *wpmd);
+int32_t get_words (int32_t *buffer, int nsamples, uint32_t flags,
+                struct words_data *w, Bitstream *bs);
+int32_t exp2s (int log);
+int restore_weight (signed char weight);
+
+#define WORD_EOF (1L << 31)
+
 /* float.c */
-

-int read_float_info (WavpackStream *wps, WavpackMetadata *wpmd);

-void float_values (WavpackStream *wps, int32_t *values, int32_t num_values);

-

+
+int read_float_info (WavpackStream *wps, WavpackMetadata *wpmd);
+void float_values (WavpackStream *wps, int32_t *values, int32_t num_values);
+
 /* wputils.c */
-

-WavpackContext *WavpackOpenFileInput (read_stream infile, char *error);

-

-int WavpackGetMode (WavpackContext *wpc);

-

-#define MODE_WVC        0x1

-#define MODE_LOSSLESS   0x2

-#define MODE_HYBRID     0x4

-#define MODE_FLOAT      0x8

-#define MODE_VALID_TAG  0x10

-#define MODE_HIGH       0x20

-#define MODE_FAST       0x40

-

-uint32_t WavpackUnpackSamples (WavpackContext *wpc, int32_t *buffer, uint32_t samples);

-uint32_t WavpackGetNumSamples (WavpackContext *wpc);

-uint32_t WavpackGetSampleIndex (WavpackContext *wpc);

-int WavpackGetNumErrors (WavpackContext *wpc);

-int WavpackLossyBlocks (WavpackContext *wpc);

-uint32_t WavpackGetSampleRate (WavpackContext *wpc);

-int WavpackGetBitsPerSample (WavpackContext *wpc);

-int WavpackGetBytesPerSample (WavpackContext *wpc);

-int WavpackGetNumChannels (WavpackContext *wpc);

-int WavpackGetReducedChannels (WavpackContext *wpc);

+
+WavpackContext *WavpackOpenFileInput (read_stream infile, char *error);
+
+int WavpackGetMode (WavpackContext *wpc);
+
+#define MODE_WVC        0x1
+#define MODE_LOSSLESS   0x2
+#define MODE_HYBRID     0x4
+#define MODE_FLOAT      0x8
+#define MODE_VALID_TAG  0x10
+#define MODE_HIGH       0x20
+#define MODE_FAST       0x40
+
+uint32_t WavpackUnpackSamples (WavpackContext *wpc, int32_t *buffer, uint32_t samples);
+uint32_t WavpackGetNumSamples (WavpackContext *wpc);
+uint32_t WavpackGetSampleIndex (WavpackContext *wpc);
+int WavpackGetNumErrors (WavpackContext *wpc);
+int WavpackLossyBlocks (WavpackContext *wpc);
+uint32_t WavpackGetSampleRate (WavpackContext *wpc);
+int WavpackGetBitsPerSample (WavpackContext *wpc);
+int WavpackGetBytesPerSample (WavpackContext *wpc);
+int WavpackGetNumChannels (WavpackContext *wpc);
+int WavpackGetReducedChannels (WavpackContext *wpc);
diff --git a/src/engine/external/wavpack/words.c b/src/engine/external/wavpack/words.c
index 0e5a3db7..d39563b0 100644
--- a/src/engine/external/wavpack/words.c
+++ b/src/engine/external/wavpack/words.c
@@ -1,560 +1,560 @@
-////////////////////////////////////////////////////////////////////////////

-//                           **** WAVPACK ****                            //

-//                  Hybrid Lossless Wavefile Compressor                   //

-//              Copyright (c) 1998 - 2006 Conifer Software.               //

-//                          All Rights Reserved.                          //

-//      Distributed under the BSD Software License (see license.txt)      //

-////////////////////////////////////////////////////////////////////////////

-

-// words.c

-

-// This module provides entropy word encoding and decoding functions using

-// a variation on the Rice method.  This was introduced in version 3.93

-// because it allows splitting the data into a "lossy" stream and a

-// "correction" stream in a very efficient manner and is therefore ideal

-// for the "hybrid" mode.  For 4.0, the efficiency of this method was

-// significantly improved by moving away from the normal Rice restriction of

-// using powers of two for the modulus divisions and now the method can be

-// used for both hybrid and pure lossless encoding.

-

-// Samples are divided by median probabilities at 5/7 (71.43%), 10/49 (20.41%),

-// and 20/343 (5.83%). Each zone has 3.5 times fewer samples than the

-// previous. Using standard Rice coding on this data would result in 1.4

-// bits per sample average (not counting sign bit). However, there is a

-// very simple encoding that is over 99% efficient with this data and

-// results in about 1.22 bits per sample.

-

-#include "wavpack.h"

-

-#include <string.h>

-

-//////////////////////////////// local macros /////////////////////////////////

-

-#define LIMIT_ONES 16   // maximum consecutive 1s sent for "div" data

-

-// these control the time constant "slow_level" which is used for hybrid mode

-// that controls bitrate as a function of residual level (HYBRID_BITRATE).

-#define SLS 8

-#define SLO ((1 << (SLS - 1)))

-

-// these control the time constant of the 3 median level breakpoints

-#define DIV0 128        // 5/7 of samples

-#define DIV1 64         // 10/49 of samples

-#define DIV2 32         // 20/343 of samples

-

-// this macro retrieves the specified median breakpoint (without frac; min = 1)

-#define GET_MED(med) (((c->median [med]) >> 4) + 1)

-

-// These macros update the specified median breakpoints. Note that the median

-// is incremented when the sample is higher than the median, else decremented.

-// They are designed so that the median will never drop below 1 and the value

-// is essentially stationary if there are 2 increments for every 5 decrements.

-

-#define INC_MED0() (c->median [0] += ((c->median [0] + DIV0) / DIV0) * 5)

-#define DEC_MED0() (c->median [0] -= ((c->median [0] + (DIV0-2)) / DIV0) * 2)

-#define INC_MED1() (c->median [1] += ((c->median [1] + DIV1) / DIV1) * 5)

-#define DEC_MED1() (c->median [1] -= ((c->median [1] + (DIV1-2)) / DIV1) * 2)

-#define INC_MED2() (c->median [2] += ((c->median [2] + DIV2) / DIV2) * 5)

-#define DEC_MED2() (c->median [2] -= ((c->median [2] + (DIV2-2)) / DIV2) * 2)

-

-#define count_bits(av) ( \

- (av) < (1 << 8) ? nbits_table [av] : \

-  ( \

-   (av) < (1L << 16) ? nbits_table [(av) >> 8] + 8 : \

-   ((av) < (1L << 24) ? nbits_table [(av) >> 16] + 16 : nbits_table [(av) >> 24] + 24) \

-  ) \

-)

-

-///////////////////////////// local table storage ////////////////////////////

-

-const char nbits_table [] = {

-    0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,     // 0 - 15

-    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,     // 16 - 31

-    6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,     // 32 - 47

-    6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,     // 48 - 63

-    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,     // 64 - 79

-    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,     // 80 - 95

-    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,     // 96 - 111

-    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,     // 112 - 127

-    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,     // 128 - 143

-    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,     // 144 - 159

-    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,     // 160 - 175

-    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,     // 176 - 191

-    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,     // 192 - 207

-    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,     // 208 - 223

-    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,     // 224 - 239

-    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8      // 240 - 255

-};

-

-static const uchar log2_table [] = {

-    0x00, 0x01, 0x03, 0x04, 0x06, 0x07, 0x09, 0x0a, 0x0b, 0x0d, 0x0e, 0x10, 0x11, 0x12, 0x14, 0x15,

-    0x16, 0x18, 0x19, 0x1a, 0x1c, 0x1d, 0x1e, 0x20, 0x21, 0x22, 0x24, 0x25, 0x26, 0x28, 0x29, 0x2a,

-    0x2c, 0x2d, 0x2e, 0x2f, 0x31, 0x32, 0x33, 0x34, 0x36, 0x37, 0x38, 0x39, 0x3b, 0x3c, 0x3d, 0x3e,

-    0x3f, 0x41, 0x42, 0x43, 0x44, 0x45, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4d, 0x4e, 0x4f, 0x50, 0x51,

-    0x52, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63,

-    0x64, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x74, 0x75,

-    0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85,

-    0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95,

-    0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,

-    0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb2,

-    0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc0,

-    0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcb, 0xcc, 0xcd, 0xce,

-    0xcf, 0xd0, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd8, 0xd9, 0xda, 0xdb,

-    0xdc, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe4, 0xe5, 0xe6, 0xe7, 0xe7,

-    0xe8, 0xe9, 0xea, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xee, 0xef, 0xf0, 0xf1, 0xf1, 0xf2, 0xf3, 0xf4,

-    0xf4, 0xf5, 0xf6, 0xf7, 0xf7, 0xf8, 0xf9, 0xf9, 0xfa, 0xfb, 0xfc, 0xfc, 0xfd, 0xfe, 0xff, 0xff

-};

-

-static const uchar exp2_table [] = {

-    0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08, 0x09, 0x0a, 0x0b,

-    0x0b, 0x0c, 0x0d, 0x0e, 0x0e, 0x0f, 0x10, 0x10, 0x11, 0x12, 0x13, 0x13, 0x14, 0x15, 0x16, 0x16,

-    0x17, 0x18, 0x19, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1d, 0x1e, 0x1f, 0x20, 0x20, 0x21, 0x22, 0x23,

-    0x24, 0x24, 0x25, 0x26, 0x27, 0x28, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,

-    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3a, 0x3b, 0x3c, 0x3d,

-    0x3e, 0x3f, 0x40, 0x41, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x48, 0x49, 0x4a, 0x4b,

-    0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a,

-    0x5b, 0x5c, 0x5d, 0x5e, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,

-    0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,

-    0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x87, 0x88, 0x89, 0x8a,

-    0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b,

-    0x9c, 0x9d, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad,

-    0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0,

-    0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc8, 0xc9, 0xca, 0xcb, 0xcd, 0xce, 0xcf, 0xd0, 0xd2, 0xd3, 0xd4,

-    0xd6, 0xd7, 0xd8, 0xd9, 0xdb, 0xdc, 0xdd, 0xde, 0xe0, 0xe1, 0xe2, 0xe4, 0xe5, 0xe6, 0xe8, 0xe9,

-    0xea, 0xec, 0xed, 0xee, 0xf0, 0xf1, 0xf2, 0xf4, 0xf5, 0xf6, 0xf8, 0xf9, 0xfa, 0xfc, 0xfd, 0xff

-};

-

-static const char ones_count_table [] = {

-    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,

-    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,

-    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,

-    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,7,

-    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,

-    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,

-    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,

-    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,8

-};

-

-///////////////////////////// executable code ////////////////////////////////

-

-void init_words (WavpackStream *wps)

-{

-    CLEAR (wps->w);

-}

-

-static int mylog2 (uint32_t avalue);

-

-// Read the median log2 values from the specifed metadata structure, convert

-// them back to 32-bit unsigned values and store them. If length is not

-// exactly correct then we flag and return an error.

-

-int read_entropy_vars (WavpackStream *wps, WavpackMetadata *wpmd)

-{

-    uchar *byteptr = wpmd->data;

-

-    if (wpmd->byte_length != ((wps->wphdr.flags & MONO_DATA) ? 6 : 12))

-        return FALSE;

-

-    wps->w.c [0].median [0] = exp2s (byteptr [0] + (byteptr [1] << 8));

-    wps->w.c [0].median [1] = exp2s (byteptr [2] + (byteptr [3] << 8));

-    wps->w.c [0].median [2] = exp2s (byteptr [4] + (byteptr [5] << 8));

-

-    if (!(wps->wphdr.flags & MONO_DATA)) {

-        wps->w.c [1].median [0] = exp2s (byteptr [6] + (byteptr [7] << 8));

-        wps->w.c [1].median [1] = exp2s (byteptr [8] + (byteptr [9] << 8));

-        wps->w.c [1].median [2] = exp2s (byteptr [10] + (byteptr [11] << 8));

-    }

-

-    return TRUE;

-}

-

-// Read the hybrid related values from the specifed metadata structure, convert

-// them back to their internal formats and store them. The extended profile

-// stuff is not implemented yet, so return an error if we get more data than

-// we know what to do with.

-

-int read_hybrid_profile (WavpackStream *wps, WavpackMetadata *wpmd)

-{

-    uchar *byteptr = wpmd->data;

-    uchar *endptr = byteptr + wpmd->byte_length;

-

-    if (wps->wphdr.flags & HYBRID_BITRATE) {

-        wps->w.c [0].slow_level = exp2s (byteptr [0] + (byteptr [1] << 8));

-        byteptr += 2;

-

-        if (!(wps->wphdr.flags & MONO_DATA)) {

-            wps->w.c [1].slow_level = exp2s (byteptr [0] + (byteptr [1] << 8));

-            byteptr += 2;

-        }

-    }

-

-    wps->w.bitrate_acc [0] = (int32_t)(byteptr [0] + (byteptr [1] << 8)) << 16;

-    byteptr += 2;

-

-    if (!(wps->wphdr.flags & MONO_DATA)) {

-        wps->w.bitrate_acc [1] = (int32_t)(byteptr [0] + (byteptr [1] << 8)) << 16;

-        byteptr += 2;

-    }

-

-    if (byteptr < endptr) {

-        wps->w.bitrate_delta [0] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8)));

-        byteptr += 2;

-

-        if (!(wps->wphdr.flags & MONO_DATA)) {

-            wps->w.bitrate_delta [1] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8)));

-            byteptr += 2;

-        }

-

-        if (byteptr < endptr)

-            return FALSE;

-    }

-    else

-        wps->w.bitrate_delta [0] = wps->w.bitrate_delta [1] = 0;

-

-    return TRUE;

-}

-

-// This function is called during both encoding and decoding of hybrid data to

-// update the "error_limit" variable which determines the maximum sample error

-// allowed in the main bitstream. In the HYBRID_BITRATE mode (which is the only

-// currently implemented) this is calculated from the slow_level values and the

-// bitrate accumulators. Note that the bitrate accumulators can be changing.

-

-void update_error_limit (struct words_data *w, uint32_t flags)

-{

-    int bitrate_0 = (w->bitrate_acc [0] += w->bitrate_delta [0]) >> 16;

-

-    if (flags & MONO_DATA) {

-        if (flags & HYBRID_BITRATE) {

-            int slow_log_0 = (w->c [0].slow_level + SLO) >> SLS;

-

-            if (slow_log_0 - bitrate_0 > -0x100)

-                w->c [0].error_limit = exp2s (slow_log_0 - bitrate_0 + 0x100);

-            else

-                w->c [0].error_limit = 0;

-        }

-        else

-            w->c [0].error_limit = exp2s (bitrate_0);

-    }

-    else {

-        int bitrate_1 = (w->bitrate_acc [1] += w->bitrate_delta [1]) >> 16;

-

-        if (flags & HYBRID_BITRATE) {

-            int slow_log_0 = (w->c [0].slow_level + SLO) >> SLS;

-            int slow_log_1 = (w->c [1].slow_level + SLO) >> SLS;

-

-            if (flags & HYBRID_BALANCE) {

-                int balance = (slow_log_1 - slow_log_0 + bitrate_1 + 1) >> 1;

-

-                if (balance > bitrate_0) {

-                    bitrate_1 = bitrate_0 * 2;

-                    bitrate_0 = 0;

-                }

-                else if (-balance > bitrate_0) {

-                    bitrate_0 = bitrate_0 * 2;

-                    bitrate_1 = 0;

-                }

-                else {

-                    bitrate_1 = bitrate_0 + balance;

-                    bitrate_0 = bitrate_0 - balance;

-                }

-            }

-

-            if (slow_log_0 - bitrate_0 > -0x100)

-                w->c [0].error_limit = exp2s (slow_log_0 - bitrate_0 + 0x100);

-            else

-                w->c [0].error_limit = 0;

-

-            if (slow_log_1 - bitrate_1 > -0x100)

-                w->c [1].error_limit = exp2s (slow_log_1 - bitrate_1 + 0x100);

-            else

-                w->c [1].error_limit = 0;

-        }

-        else {

-            w->c [0].error_limit = exp2s (bitrate_0);

-            w->c [1].error_limit = exp2s (bitrate_1);

-        }

-    }

-}

-

-static uint32_t read_code (Bitstream *bs, uint32_t maxcode);

-

-// Read the next word from the bitstream "wvbits" and return the value. This

-// function can be used for hybrid or lossless streams, but since an

-// optimized version is available for lossless this function would normally

-// be used for hybrid only. If a hybrid lossless stream is being read then

-// the "correction" offset is written at the specified pointer. A return value

-// of WORD_EOF indicates that the end of the bitstream was reached (all 1s) or

-// some other error occurred.

-

-int32_t get_words (int32_t *buffer, int nsamples, uint32_t flags,

-                struct words_data *w, Bitstream *bs)

-{

-    register struct entropy_data *c = w->c;

-    int csamples;

-

-    if (!(flags & MONO_DATA))

-        nsamples *= 2;

-

-    for (csamples = 0; csamples < nsamples; ++csamples) {

-        uint32_t ones_count, low, mid, high;

-

-        if (!(flags & MONO_DATA))

-            c = w->c + (csamples & 1);

-

-        if (!(w->c [0].median [0] & ~1) && !w->holding_zero && !w->holding_one && !(w->c [1].median [0] & ~1)) {

-            uint32_t mask;

-            int cbits;

-

-            if (w->zeros_acc) {

-                if (--w->zeros_acc) {

-                    c->slow_level -= (c->slow_level + SLO) >> SLS;

-                    *buffer++ = 0;

-                    continue;

-                }

-            }

-            else {

-                for (cbits = 0; cbits < 33 && getbit (bs); ++cbits);

-

-                if (cbits == 33)

-                    break;

-

-                if (cbits < 2)

-                    w->zeros_acc = cbits;

-                else {

-                    for (mask = 1, w->zeros_acc = 0; --cbits; mask <<= 1)

-                        if (getbit (bs))

-                            w->zeros_acc |= mask;

-

-                    w->zeros_acc |= mask;

-                }

-

-                if (w->zeros_acc) {

-                    c->slow_level -= (c->slow_level + SLO) >> SLS;

-                    CLEAR (w->c [0].median);

-                    CLEAR (w->c [1].median);

-                    *buffer++ = 0;

-                    continue;

-                }

-            }

-        }

-

-        if (w->holding_zero)

-            ones_count = w->holding_zero = 0;

-        else {

-            int next8;

-

-            if (bs->bc < 8) {

-                if (++(bs->ptr) == bs->end)

-                    bs->wrap (bs);

-

-                next8 = (bs->sr |= *(bs->ptr) << bs->bc) & 0xff;

-                bs->bc += 8;

-            }

-            else

-                next8 = bs->sr & 0xff;

-

-            if (next8 == 0xff) {

-                bs->bc -= 8;

-                bs->sr >>= 8;

-

-                for (ones_count = 8; ones_count < (LIMIT_ONES + 1) && getbit (bs); ++ones_count);

-

-                if (ones_count == (LIMIT_ONES + 1))

-                    break;

-

-                if (ones_count == LIMIT_ONES) {

-                    uint32_t mask;

-                    int cbits;

-

-                    for (cbits = 0; cbits < 33 && getbit (bs); ++cbits);

-

-                    if (cbits == 33)

-                        break;

-

-                    if (cbits < 2)

-                        ones_count = cbits;

-                    else {

-                        for (mask = 1, ones_count = 0; --cbits; mask <<= 1)

-                            if (getbit (bs))

-                                ones_count |= mask;

-

-                        ones_count |= mask;

-                    }

-

-                    ones_count += LIMIT_ONES;

-                }

-            }

-            else {

-                bs->bc -= (ones_count = ones_count_table [next8]) + 1;

-                bs->sr >>= ones_count + 1;

-            }

-

-            if (w->holding_one) {

-                w->holding_one = ones_count & 1;

-                ones_count = (ones_count >> 1) + 1;

-            }

-            else {

-                w->holding_one = ones_count & 1;

-                ones_count >>= 1;

-            }

-

-            w->holding_zero = ~w->holding_one & 1;

-        }

-

-        if ((flags & HYBRID_FLAG) && ((flags & MONO_DATA) || !(csamples & 1)))

-            update_error_limit (w, flags);

-

-        if (ones_count == 0) {

-            low = 0;

-            high = GET_MED (0) - 1;

-            DEC_MED0 ();

-        }

-        else {

-            low = GET_MED (0);

-            INC_MED0 ();

-

-            if (ones_count == 1) {

-                high = low + GET_MED (1) - 1;

-                DEC_MED1 ();

-            }

-            else {

-                low += GET_MED (1);

-                INC_MED1 ();

-

-                if (ones_count == 2) {

-                    high = low + GET_MED (2) - 1;

-                    DEC_MED2 ();

-                }

-                else {

-                    low += (ones_count - 2) * GET_MED (2);

-                    high = low + GET_MED (2) - 1;

-                    INC_MED2 ();

-                }

-            }

-        }

-

-        mid = (high + low + 1) >> 1;

-

-        if (!c->error_limit)

-            mid = read_code (bs, high - low) + low;

-        else while (high - low > c->error_limit) {

-            if (getbit (bs))

-                mid = (high + (low = mid) + 1) >> 1;

-            else

-                mid = ((high = mid - 1) + low + 1) >> 1;

-        }

-

-        *buffer++ = getbit (bs) ? ~mid : mid;

-

-        if (flags & HYBRID_BITRATE)

-            c->slow_level = c->slow_level - ((c->slow_level + SLO) >> SLS) + mylog2 (mid);

-    }

-

-    return (flags & MONO_DATA) ? csamples : (csamples / 2);

-}

-

-// Read a single unsigned value from the specified bitstream with a value

-// from 0 to maxcode. If there are exactly a power of two number of possible

-// codes then this will read a fixed number of bits; otherwise it reads the

-// minimum number of bits and then determines whether another bit is needed

-// to define the code.

-

-static uint32_t read_code (Bitstream *bs, uint32_t maxcode)

-{

-    int bitcount = count_bits (maxcode);

-    uint32_t extras = (1L << bitcount) - maxcode - 1, code;

-

-    if (!bitcount)

-        return 0;

-

-    getbits (&code, bitcount - 1, bs);

-    code &= (1L << (bitcount - 1)) - 1;

-

-    if (code >= extras) {

-        code = (code << 1) - extras;

-

-        if (getbit (bs))

-            ++code;

-    }

-

-    return code;

-}

-

-// The concept of a base 2 logarithm is used in many parts of WavPack. It is

-// a way of sufficiently accurately representing 32-bit signed and unsigned

-// values storing only 16 bits (actually fewer). It is also used in the hybrid

-// mode for quickly comparing the relative magnitude of large values (i.e.

-// division) and providing smooth exponentials using only addition.

-

-// These are not strict logarithms in that they become linear around zero and

-// can therefore represent both zero and negative values. They have 8 bits

-// of precision and in "roundtrip" conversions the total error never exceeds 1

-// part in 225 except for the cases of +/-115 and +/-195 (which error by 1).

-

-

-// This function returns the log2 for the specified 32-bit unsigned value.

-// The maximum value allowed is about 0xff800000 and returns 8447.

-

-static int mylog2 (uint32_t avalue)

-{

-    int dbits;

-

-    if ((avalue += avalue >> 9) < (1 << 8)) {

-        dbits = nbits_table [avalue];

-        return (dbits << 8) + log2_table [(avalue << (9 - dbits)) & 0xff];

-    }

-    else {

-        if (avalue < (1L << 16))

-            dbits = nbits_table [avalue >> 8] + 8;

-        else if (avalue < (1L << 24))

-            dbits = nbits_table [avalue >> 16] + 16;

-        else

-            dbits = nbits_table [avalue >> 24] + 24;

-

-        return (dbits << 8) + log2_table [(avalue >> (dbits - 9)) & 0xff];

-    }

-}

-

-// This function returns the log2 for the specified 32-bit signed value.

-// All input values are valid and the return values are in the range of

-// +/- 8192.

-

-int log2s (int32_t value)

-{

-    return (value < 0) ? -mylog2 (-value) : mylog2 (value);

-}

-

-// This function returns the original integer represented by the supplied

-// logarithm (at least within the provided accuracy). The log is signed,

-// but since a full 32-bit value is returned this can be used for unsigned

-// conversions as well (i.e. the input range is -8192 to +8447).

-

-int32_t exp2s (int log)

-{

-    uint32_t value;

-

-    if (log < 0)

-        return -exp2s (-log);

-

-    value = exp2_table [log & 0xff] | 0x100;

-

-    if ((log >>= 8) <= 9)

-        return value >> (9 - log);

-    else

-        return value << (log - 9);

-}

-

-// These two functions convert internal weights (which are normally +/-1024)

-// to and from an 8-bit signed character version for storage in metadata. The

-// weights are clipped here in the case that they are outside that range.

-

-int restore_weight (signed char weight)

-{

-    int result;

-

-    if ((result = (int) weight << 3) > 0)

-        result += (result + 64) >> 7;

-

-    return result;

-}

+////////////////////////////////////////////////////////////////////////////
+//                           **** WAVPACK ****                            //
+//                  Hybrid Lossless Wavefile Compressor                   //
+//              Copyright (c) 1998 - 2006 Conifer Software.               //
+//                          All Rights Reserved.                          //
+//      Distributed under the BSD Software License (see license.txt)      //
+////////////////////////////////////////////////////////////////////////////
+
+// words.c
+
+// This module provides entropy word encoding and decoding functions using
+// a variation on the Rice method.  This was introduced in version 3.93
+// because it allows splitting the data into a "lossy" stream and a
+// "correction" stream in a very efficient manner and is therefore ideal
+// for the "hybrid" mode.  For 4.0, the efficiency of this method was
+// significantly improved by moving away from the normal Rice restriction of
+// using powers of two for the modulus divisions and now the method can be
+// used for both hybrid and pure lossless encoding.
+
+// Samples are divided by median probabilities at 5/7 (71.43%), 10/49 (20.41%),
+// and 20/343 (5.83%). Each zone has 3.5 times fewer samples than the
+// previous. Using standard Rice coding on this data would result in 1.4
+// bits per sample average (not counting sign bit). However, there is a
+// very simple encoding that is over 99% efficient with this data and
+// results in about 1.22 bits per sample.
+
+#include "wavpack.h"
+
+#include <string.h>
+
+//////////////////////////////// local macros /////////////////////////////////
+
+#define LIMIT_ONES 16   // maximum consecutive 1s sent for "div" data
+
+// these control the time constant "slow_level" which is used for hybrid mode
+// that controls bitrate as a function of residual level (HYBRID_BITRATE).
+#define SLS 8
+#define SLO ((1 << (SLS - 1)))
+
+// these control the time constant of the 3 median level breakpoints
+#define DIV0 128        // 5/7 of samples
+#define DIV1 64         // 10/49 of samples
+#define DIV2 32         // 20/343 of samples
+
+// this macro retrieves the specified median breakpoint (without frac; min = 1)
+#define GET_MED(med) (((c->median [med]) >> 4) + 1)
+
+// These macros update the specified median breakpoints. Note that the median
+// is incremented when the sample is higher than the median, else decremented.
+// They are designed so that the median will never drop below 1 and the value
+// is essentially stationary if there are 2 increments for every 5 decrements.
+
+#define INC_MED0() (c->median [0] += ((c->median [0] + DIV0) / DIV0) * 5)
+#define DEC_MED0() (c->median [0] -= ((c->median [0] + (DIV0-2)) / DIV0) * 2)
+#define INC_MED1() (c->median [1] += ((c->median [1] + DIV1) / DIV1) * 5)
+#define DEC_MED1() (c->median [1] -= ((c->median [1] + (DIV1-2)) / DIV1) * 2)
+#define INC_MED2() (c->median [2] += ((c->median [2] + DIV2) / DIV2) * 5)
+#define DEC_MED2() (c->median [2] -= ((c->median [2] + (DIV2-2)) / DIV2) * 2)
+
+#define count_bits(av) ( \
+ (av) < (1 << 8) ? nbits_table [av] : \
+  ( \
+   (av) < (1L << 16) ? nbits_table [(av) >> 8] + 8 : \
+   ((av) < (1L << 24) ? nbits_table [(av) >> 16] + 16 : nbits_table [(av) >> 24] + 24) \
+  ) \
+)
+
+///////////////////////////// local table storage ////////////////////////////
+
+const char nbits_table [] = {
+    0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,     // 0 - 15
+    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,     // 16 - 31
+    6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,     // 32 - 47
+    6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,     // 48 - 63
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,     // 64 - 79
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,     // 80 - 95
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,     // 96 - 111
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,     // 112 - 127
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,     // 128 - 143
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,     // 144 - 159
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,     // 160 - 175
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,     // 176 - 191
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,     // 192 - 207
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,     // 208 - 223
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,     // 224 - 239
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8      // 240 - 255
+};
+
+static const uchar log2_table [] = {
+    0x00, 0x01, 0x03, 0x04, 0x06, 0x07, 0x09, 0x0a, 0x0b, 0x0d, 0x0e, 0x10, 0x11, 0x12, 0x14, 0x15,
+    0x16, 0x18, 0x19, 0x1a, 0x1c, 0x1d, 0x1e, 0x20, 0x21, 0x22, 0x24, 0x25, 0x26, 0x28, 0x29, 0x2a,
+    0x2c, 0x2d, 0x2e, 0x2f, 0x31, 0x32, 0x33, 0x34, 0x36, 0x37, 0x38, 0x39, 0x3b, 0x3c, 0x3d, 0x3e,
+    0x3f, 0x41, 0x42, 0x43, 0x44, 0x45, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4d, 0x4e, 0x4f, 0x50, 0x51,
+    0x52, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63,
+    0x64, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x74, 0x75,
+    0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85,
+    0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95,
+    0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
+    0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb2,
+    0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc0,
+    0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcb, 0xcc, 0xcd, 0xce,
+    0xcf, 0xd0, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd8, 0xd9, 0xda, 0xdb,
+    0xdc, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe4, 0xe5, 0xe6, 0xe7, 0xe7,
+    0xe8, 0xe9, 0xea, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xee, 0xef, 0xf0, 0xf1, 0xf1, 0xf2, 0xf3, 0xf4,
+    0xf4, 0xf5, 0xf6, 0xf7, 0xf7, 0xf8, 0xf9, 0xf9, 0xfa, 0xfb, 0xfc, 0xfc, 0xfd, 0xfe, 0xff, 0xff
+};
+
+static const uchar exp2_table [] = {
+    0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08, 0x09, 0x0a, 0x0b,
+    0x0b, 0x0c, 0x0d, 0x0e, 0x0e, 0x0f, 0x10, 0x10, 0x11, 0x12, 0x13, 0x13, 0x14, 0x15, 0x16, 0x16,
+    0x17, 0x18, 0x19, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1d, 0x1e, 0x1f, 0x20, 0x20, 0x21, 0x22, 0x23,
+    0x24, 0x24, 0x25, 0x26, 0x27, 0x28, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
+    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3a, 0x3b, 0x3c, 0x3d,
+    0x3e, 0x3f, 0x40, 0x41, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x48, 0x49, 0x4a, 0x4b,
+    0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a,
+    0x5b, 0x5c, 0x5d, 0x5e, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
+    0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
+    0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x87, 0x88, 0x89, 0x8a,
+    0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b,
+    0x9c, 0x9d, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad,
+    0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0,
+    0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc8, 0xc9, 0xca, 0xcb, 0xcd, 0xce, 0xcf, 0xd0, 0xd2, 0xd3, 0xd4,
+    0xd6, 0xd7, 0xd8, 0xd9, 0xdb, 0xdc, 0xdd, 0xde, 0xe0, 0xe1, 0xe2, 0xe4, 0xe5, 0xe6, 0xe8, 0xe9,
+    0xea, 0xec, 0xed, 0xee, 0xf0, 0xf1, 0xf2, 0xf4, 0xf5, 0xf6, 0xf8, 0xf9, 0xfa, 0xfc, 0xfd, 0xff
+};
+
+static const char ones_count_table [] = {
+    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
+    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,
+    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
+    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,7,
+    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
+    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,
+    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,
+    0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,8
+};
+
+///////////////////////////// executable code ////////////////////////////////
+
+void init_words (WavpackStream *wps)
+{
+    CLEAR (wps->w);
+}
+
+static int mylog2 (uint32_t avalue);
+
+// Read the median log2 values from the specifed metadata structure, convert
+// them back to 32-bit unsigned values and store them. If length is not
+// exactly correct then we flag and return an error.
+
+int read_entropy_vars (WavpackStream *wps, WavpackMetadata *wpmd)
+{
+    uchar *byteptr = wpmd->data;
+
+    if (wpmd->byte_length != ((wps->wphdr.flags & MONO_DATA) ? 6 : 12))
+        return FALSE;
+
+    wps->w.c [0].median [0] = exp2s (byteptr [0] + (byteptr [1] << 8));
+    wps->w.c [0].median [1] = exp2s (byteptr [2] + (byteptr [3] << 8));
+    wps->w.c [0].median [2] = exp2s (byteptr [4] + (byteptr [5] << 8));
+
+    if (!(wps->wphdr.flags & MONO_DATA)) {
+        wps->w.c [1].median [0] = exp2s (byteptr [6] + (byteptr [7] << 8));
+        wps->w.c [1].median [1] = exp2s (byteptr [8] + (byteptr [9] << 8));
+        wps->w.c [1].median [2] = exp2s (byteptr [10] + (byteptr [11] << 8));
+    }
+
+    return TRUE;
+}
+
+// Read the hybrid related values from the specifed metadata structure, convert
+// them back to their internal formats and store them. The extended profile
+// stuff is not implemented yet, so return an error if we get more data than
+// we know what to do with.
+
+int read_hybrid_profile (WavpackStream *wps, WavpackMetadata *wpmd)
+{
+    uchar *byteptr = wpmd->data;
+    uchar *endptr = byteptr + wpmd->byte_length;
+
+    if (wps->wphdr.flags & HYBRID_BITRATE) {
+        wps->w.c [0].slow_level = exp2s (byteptr [0] + (byteptr [1] << 8));
+        byteptr += 2;
+
+        if (!(wps->wphdr.flags & MONO_DATA)) {
+            wps->w.c [1].slow_level = exp2s (byteptr [0] + (byteptr [1] << 8));
+            byteptr += 2;
+        }
+    }
+
+    wps->w.bitrate_acc [0] = (int32_t)(byteptr [0] + (byteptr [1] << 8)) << 16;
+    byteptr += 2;
+
+    if (!(wps->wphdr.flags & MONO_DATA)) {
+        wps->w.bitrate_acc [1] = (int32_t)(byteptr [0] + (byteptr [1] << 8)) << 16;
+        byteptr += 2;
+    }
+
+    if (byteptr < endptr) {
+        wps->w.bitrate_delta [0] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8)));
+        byteptr += 2;
+
+        if (!(wps->wphdr.flags & MONO_DATA)) {
+            wps->w.bitrate_delta [1] = exp2s ((short)(byteptr [0] + (byteptr [1] << 8)));
+            byteptr += 2;
+        }
+
+        if (byteptr < endptr)
+            return FALSE;
+    }
+    else
+        wps->w.bitrate_delta [0] = wps->w.bitrate_delta [1] = 0;
+
+    return TRUE;
+}
+
+// This function is called during both encoding and decoding of hybrid data to
+// update the "error_limit" variable which determines the maximum sample error
+// allowed in the main bitstream. In the HYBRID_BITRATE mode (which is the only
+// currently implemented) this is calculated from the slow_level values and the
+// bitrate accumulators. Note that the bitrate accumulators can be changing.
+
+void update_error_limit (struct words_data *w, uint32_t flags)
+{
+    int bitrate_0 = (w->bitrate_acc [0] += w->bitrate_delta [0]) >> 16;
+
+    if (flags & MONO_DATA) {
+        if (flags & HYBRID_BITRATE) {
+            int slow_log_0 = (w->c [0].slow_level + SLO) >> SLS;
+
+            if (slow_log_0 - bitrate_0 > -0x100)
+                w->c [0].error_limit = exp2s (slow_log_0 - bitrate_0 + 0x100);
+            else
+                w->c [0].error_limit = 0;
+        }
+        else
+            w->c [0].error_limit = exp2s (bitrate_0);
+    }
+    else {
+        int bitrate_1 = (w->bitrate_acc [1] += w->bitrate_delta [1]) >> 16;
+
+        if (flags & HYBRID_BITRATE) {
+            int slow_log_0 = (w->c [0].slow_level + SLO) >> SLS;
+            int slow_log_1 = (w->c [1].slow_level + SLO) >> SLS;
+
+            if (flags & HYBRID_BALANCE) {
+                int balance = (slow_log_1 - slow_log_0 + bitrate_1 + 1) >> 1;
+
+                if (balance > bitrate_0) {
+                    bitrate_1 = bitrate_0 * 2;
+                    bitrate_0 = 0;
+                }
+                else if (-balance > bitrate_0) {
+                    bitrate_0 = bitrate_0 * 2;
+                    bitrate_1 = 0;
+                }
+                else {
+                    bitrate_1 = bitrate_0 + balance;
+                    bitrate_0 = bitrate_0 - balance;
+                }
+            }
+
+            if (slow_log_0 - bitrate_0 > -0x100)
+                w->c [0].error_limit = exp2s (slow_log_0 - bitrate_0 + 0x100);
+            else
+                w->c [0].error_limit = 0;
+
+            if (slow_log_1 - bitrate_1 > -0x100)
+                w->c [1].error_limit = exp2s (slow_log_1 - bitrate_1 + 0x100);
+            else
+                w->c [1].error_limit = 0;
+        }
+        else {
+            w->c [0].error_limit = exp2s (bitrate_0);
+            w->c [1].error_limit = exp2s (bitrate_1);
+        }
+    }
+}
+
+static uint32_t read_code (Bitstream *bs, uint32_t maxcode);
+
+// Read the next word from the bitstream "wvbits" and return the value. This
+// function can be used for hybrid or lossless streams, but since an
+// optimized version is available for lossless this function would normally
+// be used for hybrid only. If a hybrid lossless stream is being read then
+// the "correction" offset is written at the specified pointer. A return value
+// of WORD_EOF indicates that the end of the bitstream was reached (all 1s) or
+// some other error occurred.
+
+int32_t get_words (int32_t *buffer, int nsamples, uint32_t flags,
+                struct words_data *w, Bitstream *bs)
+{
+    register struct entropy_data *c = w->c;
+    int csamples;
+
+    if (!(flags & MONO_DATA))
+        nsamples *= 2;
+
+    for (csamples = 0; csamples < nsamples; ++csamples) {
+        uint32_t ones_count, low, mid, high;
+
+        if (!(flags & MONO_DATA))
+            c = w->c + (csamples & 1);
+
+        if (!(w->c [0].median [0] & ~1) && !w->holding_zero && !w->holding_one && !(w->c [1].median [0] & ~1)) {
+            uint32_t mask;
+            int cbits;
+
+            if (w->zeros_acc) {
+                if (--w->zeros_acc) {
+                    c->slow_level -= (c->slow_level + SLO) >> SLS;
+                    *buffer++ = 0;
+                    continue;
+                }
+            }
+            else {
+                for (cbits = 0; cbits < 33 && getbit (bs); ++cbits);
+
+                if (cbits == 33)
+                    break;
+
+                if (cbits < 2)
+                    w->zeros_acc = cbits;
+                else {
+                    for (mask = 1, w->zeros_acc = 0; --cbits; mask <<= 1)
+                        if (getbit (bs))
+                            w->zeros_acc |= mask;
+
+                    w->zeros_acc |= mask;
+                }
+
+                if (w->zeros_acc) {
+                    c->slow_level -= (c->slow_level + SLO) >> SLS;
+                    CLEAR (w->c [0].median);
+                    CLEAR (w->c [1].median);
+                    *buffer++ = 0;
+                    continue;
+                }
+            }
+        }
+
+        if (w->holding_zero)
+            ones_count = w->holding_zero = 0;
+        else {
+            int next8;
+
+            if (bs->bc < 8) {
+                if (++(bs->ptr) == bs->end)
+                    bs->wrap (bs);
+
+                next8 = (bs->sr |= *(bs->ptr) << bs->bc) & 0xff;
+                bs->bc += 8;
+            }
+            else
+                next8 = bs->sr & 0xff;
+
+            if (next8 == 0xff) {
+                bs->bc -= 8;
+                bs->sr >>= 8;
+
+                for (ones_count = 8; ones_count < (LIMIT_ONES + 1) && getbit (bs); ++ones_count);
+
+                if (ones_count == (LIMIT_ONES + 1))
+                    break;
+
+                if (ones_count == LIMIT_ONES) {
+                    uint32_t mask;
+                    int cbits;
+
+                    for (cbits = 0; cbits < 33 && getbit (bs); ++cbits);
+
+                    if (cbits == 33)
+                        break;
+
+                    if (cbits < 2)
+                        ones_count = cbits;
+                    else {
+                        for (mask = 1, ones_count = 0; --cbits; mask <<= 1)
+                            if (getbit (bs))
+                                ones_count |= mask;
+
+                        ones_count |= mask;
+                    }
+
+                    ones_count += LIMIT_ONES;
+                }
+            }
+            else {
+                bs->bc -= (ones_count = ones_count_table [next8]) + 1;
+                bs->sr >>= ones_count + 1;
+            }
+
+            if (w->holding_one) {
+                w->holding_one = ones_count & 1;
+                ones_count = (ones_count >> 1) + 1;
+            }
+            else {
+                w->holding_one = ones_count & 1;
+                ones_count >>= 1;
+            }
+
+            w->holding_zero = ~w->holding_one & 1;
+        }
+
+        if ((flags & HYBRID_FLAG) && ((flags & MONO_DATA) || !(csamples & 1)))
+            update_error_limit (w, flags);
+
+        if (ones_count == 0) {
+            low = 0;
+            high = GET_MED (0) - 1;
+            DEC_MED0 ();
+        }
+        else {
+            low = GET_MED (0);
+            INC_MED0 ();
+
+            if (ones_count == 1) {
+                high = low + GET_MED (1) - 1;
+                DEC_MED1 ();
+            }
+            else {
+                low += GET_MED (1);
+                INC_MED1 ();
+
+                if (ones_count == 2) {
+                    high = low + GET_MED (2) - 1;
+                    DEC_MED2 ();
+                }
+                else {
+                    low += (ones_count - 2) * GET_MED (2);
+                    high = low + GET_MED (2) - 1;
+                    INC_MED2 ();
+                }
+            }
+        }
+
+        mid = (high + low + 1) >> 1;
+
+        if (!c->error_limit)
+            mid = read_code (bs, high - low) + low;
+        else while (high - low > c->error_limit) {
+            if (getbit (bs))
+                mid = (high + (low = mid) + 1) >> 1;
+            else
+                mid = ((high = mid - 1) + low + 1) >> 1;
+        }
+
+        *buffer++ = getbit (bs) ? ~mid : mid;
+
+        if (flags & HYBRID_BITRATE)
+            c->slow_level = c->slow_level - ((c->slow_level + SLO) >> SLS) + mylog2 (mid);
+    }
+
+    return (flags & MONO_DATA) ? csamples : (csamples / 2);
+}
+
+// Read a single unsigned value from the specified bitstream with a value
+// from 0 to maxcode. If there are exactly a power of two number of possible
+// codes then this will read a fixed number of bits; otherwise it reads the
+// minimum number of bits and then determines whether another bit is needed
+// to define the code.
+
+static uint32_t read_code (Bitstream *bs, uint32_t maxcode)
+{
+    int bitcount = count_bits (maxcode);
+    uint32_t extras = (1L << bitcount) - maxcode - 1, code;
+
+    if (!bitcount)
+        return 0;
+
+    getbits (&code, bitcount - 1, bs);
+    code &= (1L << (bitcount - 1)) - 1;
+
+    if (code >= extras) {
+        code = (code << 1) - extras;
+
+        if (getbit (bs))
+            ++code;
+    }
+
+    return code;
+}
+
+// The concept of a base 2 logarithm is used in many parts of WavPack. It is
+// a way of sufficiently accurately representing 32-bit signed and unsigned
+// values storing only 16 bits (actually fewer). It is also used in the hybrid
+// mode for quickly comparing the relative magnitude of large values (i.e.
+// division) and providing smooth exponentials using only addition.
+
+// These are not strict logarithms in that they become linear around zero and
+// can therefore represent both zero and negative values. They have 8 bits
+// of precision and in "roundtrip" conversions the total error never exceeds 1
+// part in 225 except for the cases of +/-115 and +/-195 (which error by 1).
+
+
+// This function returns the log2 for the specified 32-bit unsigned value.
+// The maximum value allowed is about 0xff800000 and returns 8447.
+
+static int mylog2 (uint32_t avalue)
+{
+    int dbits;
+
+    if ((avalue += avalue >> 9) < (1 << 8)) {
+        dbits = nbits_table [avalue];
+        return (dbits << 8) + log2_table [(avalue << (9 - dbits)) & 0xff];
+    }
+    else {
+        if (avalue < (1L << 16))
+            dbits = nbits_table [avalue >> 8] + 8;
+        else if (avalue < (1L << 24))
+            dbits = nbits_table [avalue >> 16] + 16;
+        else
+            dbits = nbits_table [avalue >> 24] + 24;
+
+        return (dbits << 8) + log2_table [(avalue >> (dbits - 9)) & 0xff];
+    }
+}
+
+// This function returns the log2 for the specified 32-bit signed value.
+// All input values are valid and the return values are in the range of
+// +/- 8192.
+
+int log2s (int32_t value)
+{
+    return (value < 0) ? -mylog2 (-value) : mylog2 (value);
+}
+
+// This function returns the original integer represented by the supplied
+// logarithm (at least within the provided accuracy). The log is signed,
+// but since a full 32-bit value is returned this can be used for unsigned
+// conversions as well (i.e. the input range is -8192 to +8447).
+
+int32_t exp2s (int log)
+{
+    uint32_t value;
+
+    if (log < 0)
+        return -exp2s (-log);
+
+    value = exp2_table [log & 0xff] | 0x100;
+
+    if ((log >>= 8) <= 9)
+        return value >> (9 - log);
+    else
+        return value << (log - 9);
+}
+
+// These two functions convert internal weights (which are normally +/-1024)
+// to and from an 8-bit signed character version for storage in metadata. The
+// weights are clipped here in the case that they are outside that range.
+
+int restore_weight (signed char weight)
+{
+    int result;
+
+    if ((result = (int) weight << 3) > 0)
+        result += (result + 64) >> 7;
+
+    return result;
+}
diff --git a/src/engine/external/wavpack/wputils.c b/src/engine/external/wavpack/wputils.c
index 0d71af53..1bc65610 100644
--- a/src/engine/external/wavpack/wputils.c
+++ b/src/engine/external/wavpack/wputils.c
@@ -1,351 +1,351 @@
-////////////////////////////////////////////////////////////////////////////

-//                           **** WAVPACK ****                            //

-//                  Hybrid Lossless Wavefile Compressor                   //

-//              Copyright (c) 1998 - 2006 Conifer Software.               //

-//                          All Rights Reserved.                          //

-//      Distributed under the BSD Software License (see license.txt)      //

-////////////////////////////////////////////////////////////////////////////

-

-// wputils.c

-

-// This module provides a high-level interface for decoding WavPack 4.0 audio

-// streams and files. WavPack data is read with a stream reading callback. No

-// direct seeking is provided for, but it is possible to start decoding

-// anywhere in a WavPack stream. In this case, WavPack will be able to provide

-// the sample-accurate position when it synchs with the data and begins

-// decoding.

-

-#include "wavpack.h"

-

-#include <string.h>

-

-///////////////////////////// local table storage ////////////////////////////

-

-const uint32_t sample_rates [] = { 6000, 8000, 9600, 11025, 12000, 16000, 22050,

-    24000, 32000, 44100, 48000, 64000, 88200, 96000, 192000 };

-

-///////////////////////////// executable code ////////////////////////////////

-

-static uint32_t read_next_header (read_stream infile, WavpackHeader *wphdr);

-        

-// This function reads data from the specified stream in search of a valid

-// WavPack 4.0 audio block. If this fails in 1 megabyte (or an invalid or

-// unsupported WavPack block is encountered) then an appropriate message is

-// copied to "error" and NULL is returned, otherwise a pointer to a

-// WavpackContext structure is returned (which is used to call all other

-// functions in this module). This can be initiated at the beginning of a

-// WavPack file, or anywhere inside a WavPack file. To determine the exact

-// position within the file use WavpackGetSampleIndex(). For demonstration

-// purposes this uses a single static copy of the WavpackContext structure,

-// so obviously it cannot be used for more than one file at a time. Also,

-// this function will not handle "correction" files, plays only the first

-// two channels of multi-channel files, and is limited in resolution in some

-// large integer or floating point files (but always provides at least 24 bits

-// of resolution).

-

-static WavpackContext wpc;

-

-WavpackContext *WavpackOpenFileInput (read_stream infile, char *error)

-{

-    WavpackStream *wps = &wpc.stream;

-    uint32_t bcount;

-

-    CLEAR (wpc);

-    wpc.infile = infile;

-    wpc.total_samples = (uint32_t) -1;

-    wpc.norm_offset = 0;

-    wpc.open_flags = 0;

-

-    // open the source file for reading and store the size

-

-    while (!wps->wphdr.block_samples) {

-

-        bcount = read_next_header (wpc.infile, &wps->wphdr);

-

-        if (bcount == (uint32_t) -1) {

-            strcpy (error, "not compatible with this version of WavPack file!");

-            return NULL;

-        }

-

-        if (wps->wphdr.block_samples && wps->wphdr.total_samples != (uint32_t) -1)

-            wpc.total_samples = wps->wphdr.total_samples;

-

-        if (!unpack_init (&wpc)) {

-            strcpy (error, wpc.error_message [0] ? wpc.error_message :

-                "not compatible with this version of WavPack file!");

-

-            return NULL;

-        }

-    }

-

-    wpc.config.flags &= ~0xff;

-    wpc.config.flags |= wps->wphdr.flags & 0xff;

-    wpc.config.bytes_per_sample = (wps->wphdr.flags & BYTES_STORED) + 1;

-    wpc.config.float_norm_exp = wps->float_norm_exp;

-

-    wpc.config.bits_per_sample = (wpc.config.bytes_per_sample * 8) - 

-        ((wps->wphdr.flags & SHIFT_MASK) >> SHIFT_LSB);

-

-    if (wpc.config.flags & FLOAT_DATA) {

-        wpc.config.bytes_per_sample = 3;

-        wpc.config.bits_per_sample = 24;

-    }

-

-    if (!wpc.config.sample_rate) {

-        if (!wps || !wps->wphdr.block_samples || (wps->wphdr.flags & SRATE_MASK) == SRATE_MASK)

-            wpc.config.sample_rate = 44100;

-        else

-            wpc.config.sample_rate = sample_rates [(wps->wphdr.flags & SRATE_MASK) >> SRATE_LSB];

-    }

-

-    if (!wpc.config.num_channels) {

-        wpc.config.num_channels = (wps->wphdr.flags & MONO_FLAG) ? 1 : 2;

-        wpc.config.channel_mask = 0x5 - wpc.config.num_channels;

-    }

-

-    if (!(wps->wphdr.flags & FINAL_BLOCK))

-        wpc.reduced_channels = (wps->wphdr.flags & MONO_FLAG) ? 1 : 2;

-

-    return &wpc;

-}

-

-// This function obtains general information about an open file and returns

-// a mask with the following bit values:

-

-// MODE_LOSSLESS:  file is lossless (pure lossless only)

-// MODE_HYBRID:  file is hybrid mode (lossy part only)

-// MODE_FLOAT:  audio data is 32-bit ieee floating point (but will provided

-//               in 24-bit integers for convenience)

-// MODE_HIGH:  file was created in "high" mode (information only)

-// MODE_FAST:  file was created in "fast" mode (information only)

-

-int WavpackGetMode (WavpackContext *wpc)

-{

-    int mode = 0;

-

-    if (wpc) {

-        if (wpc->config.flags & CONFIG_HYBRID_FLAG)

-            mode |= MODE_HYBRID;

-        else if (!(wpc->config.flags & CONFIG_LOSSY_MODE))

-            mode |= MODE_LOSSLESS;

-

-        if (wpc->lossy_blocks)

-            mode &= ~MODE_LOSSLESS;

-

-        if (wpc->config.flags & CONFIG_FLOAT_DATA)

-            mode |= MODE_FLOAT;

-

-        if (wpc->config.flags & CONFIG_HIGH_FLAG)

-            mode |= MODE_HIGH;

-

-        if (wpc->config.flags & CONFIG_FAST_FLAG)

-            mode |= MODE_FAST;

-    }

-

-    return mode;

-}

-

-// Unpack the specified number of samples from the current file position.

-// Note that "samples" here refers to "complete" samples, which would be

-// 2 longs for stereo files. The audio data is returned right-justified in

-// 32-bit longs in the endian mode native to the executing processor. So,

-// if the original data was 16-bit, then the values returned would be

-// +/-32k. Floating point data will be returned as 24-bit integers (and may

-// also be clipped). The actual number of samples unpacked is returned,

-// which should be equal to the number requested unless the end of fle is

-// encountered or an error occurs.

-

-uint32_t WavpackUnpackSamples (WavpackContext *wpc, int32_t *buffer, uint32_t samples)

-{

-    WavpackStream *wps = &wpc->stream;

-    uint32_t bcount, samples_unpacked = 0, samples_to_unpack;

-    int num_channels = wpc->config.num_channels;

-

-    while (samples) {

-        if (!wps->wphdr.block_samples || !(wps->wphdr.flags & INITIAL_BLOCK) ||

-            wps->sample_index >= wps->wphdr.block_index + wps->wphdr.block_samples) {

-                bcount = read_next_header (wpc->infile, &wps->wphdr);

-

-                if (bcount == (uint32_t) -1)

-                    break;

-

-                if (!wps->wphdr.block_samples || wps->sample_index == wps->wphdr.block_index)

-                    if (!unpack_init (wpc))

-                        break;

-        }

-

-        if (!wps->wphdr.block_samples || !(wps->wphdr.flags & INITIAL_BLOCK) ||

-            wps->sample_index >= wps->wphdr.block_index + wps->wphdr.block_samples)

-                continue;

-

-        if (wps->sample_index < wps->wphdr.block_index) {

-            samples_to_unpack = wps->wphdr.block_index - wps->sample_index;

-

-            if (samples_to_unpack > samples)

-                samples_to_unpack = samples;

-

-            wps->sample_index += samples_to_unpack;

-            samples_unpacked += samples_to_unpack;

-            samples -= samples_to_unpack;

-

-            if (wpc->reduced_channels)

-                samples_to_unpack *= wpc->reduced_channels;

-            else

-                samples_to_unpack *= num_channels;

-

-            while (samples_to_unpack--)

-                *buffer++ = 0;

-

-            continue;

-        }

-

-        samples_to_unpack = wps->wphdr.block_index + wps->wphdr.block_samples - wps->sample_index;

-

-        if (samples_to_unpack > samples)

-            samples_to_unpack = samples;

-

-        unpack_samples (wpc, buffer, samples_to_unpack);

-

-        if (wpc->reduced_channels)

-            buffer += samples_to_unpack * wpc->reduced_channels;

-        else

-            buffer += samples_to_unpack * num_channels;

-

-        samples_unpacked += samples_to_unpack;

-        samples -= samples_to_unpack;

-

-        if (wps->sample_index == wps->wphdr.block_index + wps->wphdr.block_samples) {

-            if (check_crc_error (wpc))

-                wpc->crc_errors++;

-        }

-

-        if (wps->sample_index == wpc->total_samples)

-            break;

-    }

-

-    return samples_unpacked;

-}

-

-// Get total number of samples contained in the WavPack file, or -1 if unknown

-

-uint32_t WavpackGetNumSamples (WavpackContext *wpc)

-{

-    return wpc ? wpc->total_samples : (uint32_t) -1;

-}

-

-// Get the current sample index position, or -1 if unknown

-

-uint32_t WavpackGetSampleIndex (WavpackContext *wpc)

-{

-    if (wpc)

-        return wpc->stream.sample_index;

-

-    return (uint32_t) -1;

-}

-

-// Get the number of errors encountered so far

-

-int WavpackGetNumErrors (WavpackContext *wpc)

-{

-    return wpc ? wpc->crc_errors : 0;

-}

-

-// return TRUE if any uncorrected lossy blocks were actually written or read

-

-int WavpackLossyBlocks (WavpackContext *wpc)

-{

-    return wpc ? wpc->lossy_blocks : 0;

-}

-

-// Returns the sample rate of the specified WavPack file

-

-uint32_t WavpackGetSampleRate (WavpackContext *wpc)

-{

-    return wpc ? wpc->config.sample_rate : 44100;

-}

-

-// Returns the number of channels of the specified WavPack file. Note that

-// this is the actual number of channels contained in the file, but this

-// version can only decode the first two.

-

-int WavpackGetNumChannels (WavpackContext *wpc)

-{

-    return wpc ? wpc->config.num_channels : 2;

-}

-

-// Returns the actual number of valid bits per sample contained in the

-// original file, which may or may not be a multiple of 8. Floating data

-// always has 32 bits, integers may be from 1 to 32 bits each. When this

-// value is not a multiple of 8, then the "extra" bits are located in the

-// LSBs of the results. That is, values are right justified when unpacked

-// into longs, but are left justified in the number of bytes used by the

-// original data.

-

-int WavpackGetBitsPerSample (WavpackContext *wpc)

-{

-    return wpc ? wpc->config.bits_per_sample : 16;

-}

-

-// Returns the number of bytes used for each sample (1 to 4) in the original

-// file. This is required information for the user of this module because the

-// audio data is returned in the LOWER bytes of the long buffer and must be

-// left-shifted 8, 16, or 24 bits if normalized longs are required.

-

-int WavpackGetBytesPerSample (WavpackContext *wpc)

-{

-    return wpc ? wpc->config.bytes_per_sample : 2;

-}

-

-// This function will return the actual number of channels decoded from the

-// file (which may or may not be less than the actual number of channels, but

-// will always be 1 or 2). Normally, this will be the front left and right

-// channels of a multi-channel file.

-

-int WavpackGetReducedChannels (WavpackContext *wpc)

-{

-    if (wpc)

-        return wpc->reduced_channels ? wpc->reduced_channels : wpc->config.num_channels;

-    else

-        return 2;

-}

-

-// Read from current file position until a valid 32-byte WavPack 4.0 header is

-// found and read into the specified pointer. The number of bytes skipped is

-// returned. If no WavPack header is found within 1 meg, then a -1 is returned

-// to indicate the error. No additional bytes are read past the header and it

-// is returned in the processor's native endian mode. Seeking is not required.

-

-static uint32_t read_next_header (read_stream infile, WavpackHeader *wphdr)

-{

-    char buffer [sizeof (*wphdr)], *sp = buffer + sizeof (*wphdr), *ep = sp;

-    uint32_t bytes_skipped = 0;

-    int bleft;

-

-    while (1) {

-        if (sp < ep) {

-            bleft = ep - sp;

-            memcpy (buffer, sp, bleft);

-        }

-        else

-            bleft = 0;

-

-        if (infile (buffer + bleft, sizeof (*wphdr) - bleft) != (int32_t) sizeof (*wphdr) - bleft)

-            return -1;

-

-        sp = buffer;

-

-        if (*sp++ == 'w' && *sp == 'v' && *++sp == 'p' && *++sp == 'k' &&

-            !(*++sp & 1) && sp [2] < 16 && !sp [3] && sp [5] == 4 &&

-            sp [4] >= (MIN_STREAM_VERS & 0xff) && sp [4] <= (MAX_STREAM_VERS & 0xff)) {

-                memcpy (wphdr, buffer, sizeof (*wphdr));

-                little_endian_to_native (wphdr, WavpackHeaderFormat);

-                return bytes_skipped;

-            }

-

-        while (sp < ep && *sp != 'w')

-            sp++;

-

-        if ((bytes_skipped += sp - buffer) > 1048576L)

-            return -1;

-    }

-}

+////////////////////////////////////////////////////////////////////////////
+//                           **** WAVPACK ****                            //
+//                  Hybrid Lossless Wavefile Compressor                   //
+//              Copyright (c) 1998 - 2006 Conifer Software.               //
+//                          All Rights Reserved.                          //
+//      Distributed under the BSD Software License (see license.txt)      //
+////////////////////////////////////////////////////////////////////////////
+
+// wputils.c
+
+// This module provides a high-level interface for decoding WavPack 4.0 audio
+// streams and files. WavPack data is read with a stream reading callback. No
+// direct seeking is provided for, but it is possible to start decoding
+// anywhere in a WavPack stream. In this case, WavPack will be able to provide
+// the sample-accurate position when it synchs with the data and begins
+// decoding.
+
+#include "wavpack.h"
+
+#include <string.h>
+
+///////////////////////////// local table storage ////////////////////////////
+
+const uint32_t sample_rates [] = { 6000, 8000, 9600, 11025, 12000, 16000, 22050,
+    24000, 32000, 44100, 48000, 64000, 88200, 96000, 192000 };
+
+///////////////////////////// executable code ////////////////////////////////
+
+static uint32_t read_next_header (read_stream infile, WavpackHeader *wphdr);
+        
+// This function reads data from the specified stream in search of a valid
+// WavPack 4.0 audio block. If this fails in 1 megabyte (or an invalid or
+// unsupported WavPack block is encountered) then an appropriate message is
+// copied to "error" and NULL is returned, otherwise a pointer to a
+// WavpackContext structure is returned (which is used to call all other
+// functions in this module). This can be initiated at the beginning of a
+// WavPack file, or anywhere inside a WavPack file. To determine the exact
+// position within the file use WavpackGetSampleIndex(). For demonstration
+// purposes this uses a single static copy of the WavpackContext structure,
+// so obviously it cannot be used for more than one file at a time. Also,
+// this function will not handle "correction" files, plays only the first
+// two channels of multi-channel files, and is limited in resolution in some
+// large integer or floating point files (but always provides at least 24 bits
+// of resolution).
+
+static WavpackContext wpc;
+
+WavpackContext *WavpackOpenFileInput (read_stream infile, char *error)
+{
+    WavpackStream *wps = &wpc.stream;
+    uint32_t bcount;
+
+    CLEAR (wpc);
+    wpc.infile = infile;
+    wpc.total_samples = (uint32_t) -1;
+    wpc.norm_offset = 0;
+    wpc.open_flags = 0;
+
+    // open the source file for reading and store the size
+
+    while (!wps->wphdr.block_samples) {
+
+        bcount = read_next_header (wpc.infile, &wps->wphdr);
+
+        if (bcount == (uint32_t) -1) {
+            strcpy (error, "not compatible with this version of WavPack file!");
+            return NULL;
+        }
+
+        if (wps->wphdr.block_samples && wps->wphdr.total_samples != (uint32_t) -1)
+            wpc.total_samples = wps->wphdr.total_samples;
+
+        if (!unpack_init (&wpc)) {
+            strcpy (error, wpc.error_message [0] ? wpc.error_message :
+                "not compatible with this version of WavPack file!");
+
+            return NULL;
+        }
+    }
+
+    wpc.config.flags &= ~0xff;
+    wpc.config.flags |= wps->wphdr.flags & 0xff;
+    wpc.config.bytes_per_sample = (wps->wphdr.flags & BYTES_STORED) + 1;
+    wpc.config.float_norm_exp = wps->float_norm_exp;
+
+    wpc.config.bits_per_sample = (wpc.config.bytes_per_sample * 8) - 
+        ((wps->wphdr.flags & SHIFT_MASK) >> SHIFT_LSB);
+
+    if (wpc.config.flags & FLOAT_DATA) {
+        wpc.config.bytes_per_sample = 3;
+        wpc.config.bits_per_sample = 24;
+    }
+
+    if (!wpc.config.sample_rate) {
+        if (!wps || !wps->wphdr.block_samples || (wps->wphdr.flags & SRATE_MASK) == SRATE_MASK)
+            wpc.config.sample_rate = 44100;
+        else
+            wpc.config.sample_rate = sample_rates [(wps->wphdr.flags & SRATE_MASK) >> SRATE_LSB];
+    }
+
+    if (!wpc.config.num_channels) {
+        wpc.config.num_channels = (wps->wphdr.flags & MONO_FLAG) ? 1 : 2;
+        wpc.config.channel_mask = 0x5 - wpc.config.num_channels;
+    }
+
+    if (!(wps->wphdr.flags & FINAL_BLOCK))
+        wpc.reduced_channels = (wps->wphdr.flags & MONO_FLAG) ? 1 : 2;
+
+    return &wpc;
+}
+
+// This function obtains general information about an open file and returns
+// a mask with the following bit values:
+
+// MODE_LOSSLESS:  file is lossless (pure lossless only)
+// MODE_HYBRID:  file is hybrid mode (lossy part only)
+// MODE_FLOAT:  audio data is 32-bit ieee floating point (but will provided
+//               in 24-bit integers for convenience)
+// MODE_HIGH:  file was created in "high" mode (information only)
+// MODE_FAST:  file was created in "fast" mode (information only)
+
+int WavpackGetMode (WavpackContext *wpc)
+{
+    int mode = 0;
+
+    if (wpc) {
+        if (wpc->config.flags & CONFIG_HYBRID_FLAG)
+            mode |= MODE_HYBRID;
+        else if (!(wpc->config.flags & CONFIG_LOSSY_MODE))
+            mode |= MODE_LOSSLESS;
+
+        if (wpc->lossy_blocks)
+            mode &= ~MODE_LOSSLESS;
+
+        if (wpc->config.flags & CONFIG_FLOAT_DATA)
+            mode |= MODE_FLOAT;
+
+        if (wpc->config.flags & CONFIG_HIGH_FLAG)
+            mode |= MODE_HIGH;
+
+        if (wpc->config.flags & CONFIG_FAST_FLAG)
+            mode |= MODE_FAST;
+    }
+
+    return mode;
+}
+
+// Unpack the specified number of samples from the current file position.
+// Note that "samples" here refers to "complete" samples, which would be
+// 2 longs for stereo files. The audio data is returned right-justified in
+// 32-bit longs in the endian mode native to the executing processor. So,
+// if the original data was 16-bit, then the values returned would be
+// +/-32k. Floating point data will be returned as 24-bit integers (and may
+// also be clipped). The actual number of samples unpacked is returned,
+// which should be equal to the number requested unless the end of fle is
+// encountered or an error occurs.
+
+uint32_t WavpackUnpackSamples (WavpackContext *wpc, int32_t *buffer, uint32_t samples)
+{
+    WavpackStream *wps = &wpc->stream;
+    uint32_t bcount, samples_unpacked = 0, samples_to_unpack;
+    int num_channels = wpc->config.num_channels;
+
+    while (samples) {
+        if (!wps->wphdr.block_samples || !(wps->wphdr.flags & INITIAL_BLOCK) ||
+            wps->sample_index >= wps->wphdr.block_index + wps->wphdr.block_samples) {
+                bcount = read_next_header (wpc->infile, &wps->wphdr);
+
+                if (bcount == (uint32_t) -1)
+                    break;
+
+                if (!wps->wphdr.block_samples || wps->sample_index == wps->wphdr.block_index)
+                    if (!unpack_init (wpc))
+                        break;
+        }
+
+        if (!wps->wphdr.block_samples || !(wps->wphdr.flags & INITIAL_BLOCK) ||
+            wps->sample_index >= wps->wphdr.block_index + wps->wphdr.block_samples)
+                continue;
+
+        if (wps->sample_index < wps->wphdr.block_index) {
+            samples_to_unpack = wps->wphdr.block_index - wps->sample_index;
+
+            if (samples_to_unpack > samples)
+                samples_to_unpack = samples;
+
+            wps->sample_index += samples_to_unpack;
+            samples_unpacked += samples_to_unpack;
+            samples -= samples_to_unpack;
+
+            if (wpc->reduced_channels)
+                samples_to_unpack *= wpc->reduced_channels;
+            else
+                samples_to_unpack *= num_channels;
+
+            while (samples_to_unpack--)
+                *buffer++ = 0;
+
+            continue;
+        }
+
+        samples_to_unpack = wps->wphdr.block_index + wps->wphdr.block_samples - wps->sample_index;
+
+        if (samples_to_unpack > samples)
+            samples_to_unpack = samples;
+
+        unpack_samples (wpc, buffer, samples_to_unpack);
+
+        if (wpc->reduced_channels)
+            buffer += samples_to_unpack * wpc->reduced_channels;
+        else
+            buffer += samples_to_unpack * num_channels;
+
+        samples_unpacked += samples_to_unpack;
+        samples -= samples_to_unpack;
+
+        if (wps->sample_index == wps->wphdr.block_index + wps->wphdr.block_samples) {
+            if (check_crc_error (wpc))
+                wpc->crc_errors++;
+        }
+
+        if (wps->sample_index == wpc->total_samples)
+            break;
+    }
+
+    return samples_unpacked;
+}
+
+// Get total number of samples contained in the WavPack file, or -1 if unknown
+
+uint32_t WavpackGetNumSamples (WavpackContext *wpc)
+{
+    return wpc ? wpc->total_samples : (uint32_t) -1;
+}
+
+// Get the current sample index position, or -1 if unknown
+
+uint32_t WavpackGetSampleIndex (WavpackContext *wpc)
+{
+    if (wpc)
+        return wpc->stream.sample_index;
+
+    return (uint32_t) -1;
+}
+
+// Get the number of errors encountered so far
+
+int WavpackGetNumErrors (WavpackContext *wpc)
+{
+    return wpc ? wpc->crc_errors : 0;
+}
+
+// return TRUE if any uncorrected lossy blocks were actually written or read
+
+int WavpackLossyBlocks (WavpackContext *wpc)
+{
+    return wpc ? wpc->lossy_blocks : 0;
+}
+
+// Returns the sample rate of the specified WavPack file
+
+uint32_t WavpackGetSampleRate (WavpackContext *wpc)
+{
+    return wpc ? wpc->config.sample_rate : 44100;
+}
+
+// Returns the number of channels of the specified WavPack file. Note that
+// this is the actual number of channels contained in the file, but this
+// version can only decode the first two.
+
+int WavpackGetNumChannels (WavpackContext *wpc)
+{
+    return wpc ? wpc->config.num_channels : 2;
+}
+
+// Returns the actual number of valid bits per sample contained in the
+// original file, which may or may not be a multiple of 8. Floating data
+// always has 32 bits, integers may be from 1 to 32 bits each. When this
+// value is not a multiple of 8, then the "extra" bits are located in the
+// LSBs of the results. That is, values are right justified when unpacked
+// into longs, but are left justified in the number of bytes used by the
+// original data.
+
+int WavpackGetBitsPerSample (WavpackContext *wpc)
+{
+    return wpc ? wpc->config.bits_per_sample : 16;
+}
+
+// Returns the number of bytes used for each sample (1 to 4) in the original
+// file. This is required information for the user of this module because the
+// audio data is returned in the LOWER bytes of the long buffer and must be
+// left-shifted 8, 16, or 24 bits if normalized longs are required.
+
+int WavpackGetBytesPerSample (WavpackContext *wpc)
+{
+    return wpc ? wpc->config.bytes_per_sample : 2;
+}
+
+// This function will return the actual number of channels decoded from the
+// file (which may or may not be less than the actual number of channels, but
+// will always be 1 or 2). Normally, this will be the front left and right
+// channels of a multi-channel file.
+
+int WavpackGetReducedChannels (WavpackContext *wpc)
+{
+    if (wpc)
+        return wpc->reduced_channels ? wpc->reduced_channels : wpc->config.num_channels;
+    else
+        return 2;
+}
+
+// Read from current file position until a valid 32-byte WavPack 4.0 header is
+// found and read into the specified pointer. The number of bytes skipped is
+// returned. If no WavPack header is found within 1 meg, then a -1 is returned
+// to indicate the error. No additional bytes are read past the header and it
+// is returned in the processor's native endian mode. Seeking is not required.
+
+static uint32_t read_next_header (read_stream infile, WavpackHeader *wphdr)
+{
+    char buffer [sizeof (*wphdr)], *sp = buffer + sizeof (*wphdr), *ep = sp;
+    uint32_t bytes_skipped = 0;
+    int bleft;
+
+    while (1) {
+        if (sp < ep) {
+            bleft = ep - sp;
+            memcpy (buffer, sp, bleft);
+        }
+        else
+            bleft = 0;
+
+        if (infile (buffer + bleft, sizeof (*wphdr) - bleft) != (int32_t) sizeof (*wphdr) - bleft)
+            return -1;
+
+        sp = buffer;
+
+        if (*sp++ == 'w' && *sp == 'v' && *++sp == 'p' && *++sp == 'k' &&
+            !(*++sp & 1) && sp [2] < 16 && !sp [3] && sp [5] == 4 &&
+            sp [4] >= (MIN_STREAM_VERS & 0xff) && sp [4] <= (MAX_STREAM_VERS & 0xff)) {
+                memcpy (wphdr, buffer, sizeof (*wphdr));
+                little_endian_to_native (wphdr, WavpackHeaderFormat);
+                return bytes_skipped;
+            }
+
+        while (sp < ep && *sp != 'w')
+            sp++;
+
+        if ((bytes_skipped += sp - buffer) > 1048576L)
+            return -1;
+    }
+}