about summary refs log tree commit diff
path: root/src/wavpack/arml.S
diff options
context:
space:
mode:
Diffstat (limited to 'src/wavpack/arml.S')
-rw-r--r--src/wavpack/arml.S491
1 files changed, 0 insertions, 491 deletions
diff --git a/src/wavpack/arml.S b/src/wavpack/arml.S
deleted file mode 100644
index 39de5383..00000000
--- a/src/wavpack/arml.S
+++ /dev/null
@@ -1,491 +0,0 @@
-////////////////////////////////////////////////////////////////////////////

-//                           **** WAVPACK ****                            //

-//                  Hybrid Lossless Wavefile Compressor                   //

-//              Copyright (c) 1998 - 2006 Conifer Software.               //

-//                          All Rights Reserved.                          //

-//      Distributed under the BSD Software License (see license.txt)      //

-////////////////////////////////////////////////////////////////////////////

-

-/* This is an assembly optimized version of the following WavPack function:

- *

- * void decorr_stereo_pass_cont (struct decorr_pass *dpp,

- *                               long *buffer, long sample_count);

- *

- * It performs a single pass of stereo decorrelation on the provided buffer.

- * Note that this version of the function requires that the 8 previous stereo

- * samples are visible and correct. In other words, it ignores the "samples_*"

- * fields in the decorr_pass structure and gets the history data directly

- * from the buffer. It does, however, return the appropriate history samples

- * to the decorr_pass structure before returning.

- *

- * This is written to work on a ARM7TDMI processor. This version uses the

- * 64-bit multiply-accumulate instruction and so can be used with all

- * WavPack files. However, for optimum performance with 16-bit WavPack

- * files, there is a faster version that only uses the 32-bit MLA

- * instruction.

- */

-

-        .text

-        .align

-        .global         decorr_stereo_pass_cont_arml

-

-/*

- * on entry:

- *

- * r0 = struct decorr_pass *dpp

- * r1 = long *buffer

- * r2 = long sample_count

- */

-

-decorr_stereo_pass_cont_arml:

-

-        stmfd   sp!, {r4 - r8, r10, r11, lr}

-        mov     r5, r0                  @ r5 = dpp

-        mov     r11, #512               @ r11 = 512 for rounding

-        ldrsh   r6, [r0, #2]            @ r6 = dpp->delta

-        ldrsh   r4, [r0, #4]            @ r4 = dpp->weight_A

-        ldrsh   r0, [r0, #6]            @ r0 = dpp->weight_B

-        cmp     r2, #0                  @ exit if no samples to process

-        beq     common_exit

-

-        mov     r0, r0, asl #18         @ for 64-bit math we use weights << 18

-        mov     r4, r4, asl #18

-        mov     r6, r6, asl #18

-        add     r7, r1, r2, asl #3      @ r7 = buffer ending position

-        ldrsh   r2, [r5, #0]            @ r2 = dpp->term

-        cmp     r2, #0

-        blt     minus_term

-

-        ldr     lr, [r1, #-16]          @ load 2 sample history from buffer

-        ldr     r10, [r1, #-12]         @  for terms 2, 17, and 18

-        ldr     r8, [r1, #-8]

-        ldr     r3, [r1, #-4]

-

-        cmp     r2, #18

-        beq     term_18_loop

-        mov     lr, lr, asl #4

-        mov     r10, r10, asl #4

-        cmp     r2, #2

-        beq     term_2_loop

-        cmp     r2, #17

-        beq     term_17_loop

-        b       term_default_loop

-

-minus_term:

-        mov     r10, #(1024 << 18)      @ r10 = -1024 << 18 for weight clipping

-        rsb     r10, r10, #0            @  (only used for negative terms)

-        cmn     r2, #1

-        beq     term_minus_1

-        cmn     r2, #2

-        beq     term_minus_2

-        cmn     r2, #3

-        beq     term_minus_3

-        b       common_exit

-

-/*

- ******************************************************************************

- * Loop to handle term = 17 condition

- *

- * r0 = dpp->weight_B           r8 = previous left sample

- * r1 = bptr                    r9 = 

- * r2 = current sample          r10 = second previous left sample << 4

- * r3 = previous right sample   r11 = lo accumulator (for rounding)

- * r4 = dpp->weight_A           ip = current decorrelation value

- * r5 = dpp                     sp =

- * r6 = dpp->delta              lr = second previous right sample << 4

- * r7 = eptr                    pc =

- *******************************************************************************

- */

-

-term_17_loop:

-        rsbs    ip, lr, r8, asl #5      @ decorr value = (2 * prev) - 2nd prev

-        mov     lr, r8, asl #4          @ previous becomes 2nd previous

-        ldr     r2, [r1], #4            @ get sample & update pointer

-        mov     r11, #0x80000000

-        mov     r8, r2

-        smlalne r11, r8, r4, ip

-        strne   r8, [r1, #-4]           @ if change possible, store sample back

-        cmpne   r2, #0

-        beq     .L325

-        teq     ip, r2                  @ update weight based on signs

-        submi   r4, r4, r6

-        addpl   r4, r4, r6

-

-.L325:  rsbs    ip, r10, r3, asl #5     @ do same thing for right channel

-        mov     r10, r3, asl #4

-        ldr     r2, [r1], #4

-        mov     r11, #0x80000000

-        mov     r3, r2

-        smlalne r11, r3, r0, ip

-        strne   r3, [r1, #-4]

-        cmpne   r2, #0

-        beq     .L329

-        teq     ip, r2

-        submi   r0, r0, r6

-        addpl   r0, r0, r6

-

-.L329:  cmp     r7, r1                  @ loop back if more samples to do

-        bhi     term_17_loop

-        mov     lr, lr, asr #4

-        mov     r10, r10, asr #4

-        b       store_1718              @ common exit for terms 17 & 18

-

-/*

- ******************************************************************************

- * Loop to handle term = 18 condition

- *

- * r0 = dpp->weight_B           r8 = previous left sample

- * r1 = bptr                    r9 = 

- * r2 = current sample          r10 = second previous left sample

- * r3 = previous right sample   r11 = lo accumulator (for rounding)

- * r4 = dpp->weight_A           ip = decorrelation value

- * r5 = dpp                     sp =

- * r6 = dpp->delta              lr = second previous right sample

- * r7 = eptr                    pc =

- *******************************************************************************

- */

-

-term_18_loop:

-        rsb     ip, lr, r8              @ decorr value =

-        mov     lr, r8                  @  ((3 * prev) - 2nd prev) >> 1

-        add     ip, lr, ip, asr #1

-        movs    ip, ip, asl #4

-        ldr     r2, [r1], #4            @ get sample & update pointer

-        mov     r11, #0x80000000

-        mov     r8, r2

-        smlalne r11, r8, r4, ip

-        strne   r8, [r1, #-4]           @ if change possible, store sample back

-        cmpne   r2, #0

-        beq     .L337

-        teq     ip, r2                  @ update weight based on signs

-        submi   r4, r4, r6

-        addpl   r4, r4, r6

-

-.L337:  rsb     ip, r10, r3             @ do same thing for right channel

-        mov     r10, r3

-        add     ip, r10, ip, asr #1

-        movs    ip, ip, asl #4

-        ldr     r2, [r1], #4

-        mov     r11, #0x80000000

-        mov     r3, r2

-        smlalne r11, r3, r0, ip

-        strne   r3, [r1, #-4]

-        cmpne   r2, #0

-        beq     .L341

-        teq     ip, r2

-        submi   r0, r0, r6

-        addpl   r0, r0, r6

-

-.L341:  cmp     r7, r1                  @ loop back if more samples to do

-        bhi     term_18_loop

-

-/* common exit for terms 17 & 18 */

-

-store_1718:

-        str     r3, [r5, #40]           @ store sample history into struct

-        str     r8, [r5, #8]

-        str     r10, [r5, #44]

-        str     lr, [r5, #12]

-        b       common_exit             @ and return

-

-/*

- ******************************************************************************

- * Loop to handle term = 2 condition

- * (note that this case can be handled by the default term handler (1-8), but

- * this special case is faster because it doesn't have to read memory twice)

- *

- * r0 = dpp->weight_B           r8 = previous left sample

- * r1 = bptr                    r9 = 

- * r2 = current sample          r10 = second previous left sample << 4

- * r3 = previous right sample   r11 = lo accumulator (for rounding)

- * r4 = dpp->weight_A           ip = decorrelation value

- * r5 = dpp                     sp =

- * r6 = dpp->delta              lr = second previous right sample << 4

- * r7 = eptr                    pc =

- *******************************************************************************

- */

-

-term_2_loop:

-        movs    ip, lr                  @ get decorrelation value & test

-        ldr     r2, [r1], #4            @ get sample & update pointer

-        mov     lr, r8, asl #4          @ previous becomes 2nd previous

-        mov     r11, #0x80000000

-        mov     r8, r2

-        smlalne r11, r8, r4, ip

-        strne   r8, [r1, #-4]           @ if change possible, store sample back

-        cmpne   r2, #0

-        beq     .L225

-        teq     ip, r2                  @ update weight based on signs

-        submi   r4, r4, r6

-        addpl   r4, r4, r6

-

-.L225:  movs    ip, r10                 @ do same thing for right channel

-        ldr     r2, [r1], #4

-        mov     r10, r3, asl #4

-        mov     r11, #0x80000000

-        mov     r3, r2

-        smlalne r11, r3, r0, ip

-        strne   r3, [r1, #-4]

-        cmpne   r2, #0

-        beq     .L229

-        teq     ip, r2

-        submi   r0, r0, r6

-        addpl   r0, r0, r6

-

-.L229:  cmp     r7, r1                  @ loop back if more samples to do

-        bhi     term_2_loop

-

-        b       default_term_exit       @ this exit updates all dpp->samples

-

-/*

- ******************************************************************************

- * Loop to handle default term condition

- *

- * r0 = dpp->weight_B           r8 = result accumulator

- * r1 = bptr                    r9 = 

- * r2 = dpp->term               r10 =

- * r3 = decorrelation value     r11 = lo accumulator (for rounding)

- * r4 = dpp->weight_A           ip = current sample

- * r5 = dpp                     sp =

- * r6 = dpp->delta              lr =

- * r7 = eptr                    pc =

- *******************************************************************************

- */

-

-term_default_loop:

-        ldr     r3, [r1, -r2, asl #3]   @ get decorrelation value based on term

-        ldr     ip, [r1], #4            @ get original sample and bump ptr

-        movs    r3, r3, asl #4

-        mov     r11, #0x80000000

-        mov     r8, ip

-        smlalne r11, r8, r4, r3

-        strne   r8, [r1, #-4]           @ if possibly changed, store updated sample

-        cmpne   ip, #0

-        beq     .L350

-        teq     ip, r3                  @ update weight based on signs

-        submi   r4, r4, r6

-        addpl   r4, r4, r6

-

-.L350:  ldr     r3, [r1, -r2, asl #3]   @ do the same thing for right channel

-        ldr     ip, [r1], #4

-        movs    r3, r3, asl #4

-        mov     r11, #0x80000000

-        mov     r8, ip

-        smlalne r11, r8, r0, r3

-        strne   r8, [r1, #-4]

-        cmpne   ip, #0

-        beq     .L354

-        teq     ip, r3

-        submi   r0, r0, r6

-        addpl   r0, r0, r6

-

-.L354:  cmp     r7, r1                  @ loop back if more samples to do

-        bhi     term_default_loop

-

-/*

- * This exit is used by terms 1-8 to store the previous 8 samples into the decorr

- * structure (even if they are not all used for the given term)

- */

-

-default_term_exit:

-        ldrsh   r3, [r5, #0]

-        sub     ip, r3, #1

-        mov     lr, #7

-

-.L358:  and     r3, ip, #7

-        add     r3, r5, r3, asl #2

-        ldr     r2, [r1, #-4]

-        str     r2, [r3, #40]

-        ldr     r2, [r1, #-8]!

-        str     r2, [r3, #8]

-        sub     ip, ip, #1

-        sub     lr, lr, #1

-        cmn     lr, #1

-        bne     .L358

-        b       common_exit

-

-/*

- ******************************************************************************

- * Loop to handle term = -1 condition

- *

- * r0 = dpp->weight_B           r8 =

- * r1 = bptr                    r9 = 

- * r2 = intermediate result     r10 = -1024 (for clipping)

- * r3 = previous right sample   r11 = lo accumulator (for rounding)

- * r4 = dpp->weight_A           ip = current sample

- * r5 = dpp                     sp =

- * r6 = dpp->delta              lr = updated left sample

- * r7 = eptr                    pc =

- *******************************************************************************

- */

-

-term_minus_1:

-        ldr     r3, [r1, #-4]

-

-term_minus_1_loop:

-        ldr     ip, [r1], #8            @ for left channel the decorrelation value

-        movs    r3, r3, asl #4          @  is the previous right sample (in r3)

-        mov     r11, #0x80000000

-        mov     lr, ip

-        smlalne r11, lr, r4, r3

-        strne   lr, [r1, #-8]

-        cmpne   ip, #0

-        beq     .L361

-        teq     ip, r3                  @ update weight based on signs

-        submi   r4, r4, r6

-        addpl   r4, r4, r6

-        cmp     r4, #(1024 << 18)

-        movgt   r4, #(1024 << 18)

-        cmp     r4, r10

-        movlt   r4, r10

-

-.L361:  ldr     r2, [r1, #-4]           @ for right channel the decorrelation value

-        movs    lr, lr, asl #4

-        mov     r11, #0x80000000

-        mov     r3, r2

-        smlalne r11, r3, r0, lr

-        strne   r3, [r1, #-4]

-        cmpne   r2, #0

-        beq     .L369

-        teq     r2, lr

-        submi   r0, r0, r6

-        addpl   r0, r0, r6

-        cmp     r0, #(1024 << 18)               @ then clip weight to +/-1024

-        movgt   r0, #(1024 << 18)

-        cmp     r0, r10

-        movlt   r0, r10

-

-.L369:  cmp     r7, r1                  @ loop back if more samples to do

-        bhi     term_minus_1_loop

-

-        str     r3, [r5, #8]            @ else store right sample and exit

-        b       common_exit

-

-/*

- ******************************************************************************

- * Loop to handle term = -2 condition

- * (note that the channels are processed in the reverse order here)

- *

- * r0 = dpp->weight_B           r8 =

- * r1 = bptr                    r9 = 

- * r2 = intermediate result     r10 = -1024 (for clipping)

- * r3 = previous left sample    r11 = lo accumulator (for rounding)

- * r4 = dpp->weight_A           ip = current sample

- * r5 = dpp                     sp =

- * r6 = dpp->delta              lr = updated right sample

- * r7 = eptr                    pc =

- *******************************************************************************

- */

-

-term_minus_2:

-        ldr     r3, [r1, #-8]

-

-term_minus_2_loop:

-        ldr     ip, [r1, #4]            @ for right channel the decorrelation value

-        movs    r3, r3, asl #4          @  is the previous left sample (in r3)

-        mov     r11, #0x80000000

-        mov     lr, ip

-        smlalne r11, lr, r0, r3

-        strne   lr, [r1, #4]

-        cmpne   ip, #0

-        beq     .L380

-        teq     ip, r3                  @ update weight based on signs

-        submi   r0, r0, r6

-        addpl   r0, r0, r6

-        cmp     r0, #(1024 << 18)               @ then clip weight to +/-1024

-        movgt   r0, #(1024 << 18)

-        cmp     r0, r10

-        movlt   r0, r10

-

-.L380:  ldr     r2, [r1], #8            @ for left channel the decorrelation value

-        movs    lr, lr, asl #4

-        mov     r11, #0x80000000

-        mov     r3, r2

-        smlalne r11, r3, r4, lr

-        strne   r3, [r1, #-8]

-        cmpne   r2, #0

-        beq     .L388

-        teq     r2, lr

-        submi   r4, r4, r6

-        addpl   r4, r4, r6

-        cmp     r4, #(1024 << 18)

-        movgt   r4, #(1024 << 18)

-        cmp     r4, r10

-        movlt   r4, r10

-

-.L388:  cmp     r7, r1                  @ loop back if more samples to do

-        bhi     term_minus_2_loop

-

-        str     r3, [r5, #40]           @ else store left channel and exit

-        b       common_exit

-

-/*

- ******************************************************************************

- * Loop to handle term = -3 condition

- *

- * r0 = dpp->weight_B           r8 = previous left sample

- * r1 = bptr                    r9 = 

- * r2 = current left sample     r10 = -1024 (for clipping)

- * r3 = previous right sample   r11 = lo accumulator (for rounding)

- * r4 = dpp->weight_A           ip = intermediate result

- * r5 = dpp                     sp =

- * r6 = dpp->delta              lr =

- * r7 = eptr                    pc =

- *******************************************************************************

- */

-

-term_minus_3:

-        ldr     r3, [r1, #-4]           @ load previous samples

-        ldr     r8, [r1, #-8]

-

-term_minus_3_loop:

-        ldr     ip, [r1], #4

-        movs    r3, r3, asl #4

-        mov     r11, #0x80000000

-        mov     r2, ip

-        smlalne r11, r2, r4, r3

-        strne   r2, [r1, #-4]

-        cmpne   ip, #0

-        beq     .L399

-        teq     ip, r3                  @ update weight based on signs

-        submi   r4, r4, r6

-        addpl   r4, r4, r6

-        cmp     r4, #(1024 << 18)       @ then clip weight to +/-1024

-        movgt   r4, #(1024 << 18)

-        cmp     r4, r10

-        movlt   r4, r10

-

-.L399:  movs    ip, r8, asl #4          @ ip = previous left we use now

-        mov     r8, r2                  @ r8 = current left we use next time

-        ldr     r2, [r1], #4

-        mov     r11, #0x80000000

-        mov     r3, r2

-        smlalne r11, r3, r0, ip

-        strne   r3, [r1, #-4]

-        cmpne   r2, #0

-        beq     .L407

-        teq     ip, r2

-        submi   r0, r0, r6

-        addpl   r0, r0, r6

-        cmp     r0, #(1024 << 18)

-        movgt   r0, #(1024 << 18)

-        cmp     r0, r10

-        movlt   r0, r10

-

-.L407:  cmp     r7, r1                  @ loop back if more samples to do

-        bhi     term_minus_3_loop

-

-        str     r3, [r5, #8]            @ else store previous samples & exit

-        str     r8, [r5, #40]

-

-/*

- * Before finally exiting we must store weights back for next time

- */

-

-common_exit:

-        mov     r0, r0, asr #18         @ restore weights to real magnitude

-        mov     r4, r4, asr #18

-        strh    r4, [r5, #4]

-        strh    r0, [r5, #6]

-        ldmfd   sp!, {r4 - r8, r10, r11, pc}

-