diff options
| author | Magnus Auvinen <magnus.auvinen@gmail.com> | 2007-08-22 07:52:33 +0000 |
|---|---|---|
| committer | Magnus Auvinen <magnus.auvinen@gmail.com> | 2007-08-22 07:52:33 +0000 |
| commit | 8b3c16e6152a527f9aec1a88a9eed74119de7000 (patch) | |
| tree | f0bde5cea15e696e42cade06a3b12ff6b13acc57 /src/wavpack/arml.S | |
| parent | 9899666a7ce6679a3b9667ab09f615f4d0769c16 (diff) | |
| download | zcatch-8b3c16e6152a527f9aec1a88a9eed74119de7000.tar.gz zcatch-8b3c16e6152a527f9aec1a88a9eed74119de7000.zip | |
major engine cleanup. dependency on baselib removed. engine is now C code (not ansi tho). some other cruft removed aswell
Diffstat (limited to 'src/wavpack/arml.S')
| -rw-r--r-- | src/wavpack/arml.S | 491 |
1 files changed, 0 insertions, 491 deletions
diff --git a/src/wavpack/arml.S b/src/wavpack/arml.S deleted file mode 100644 index 39de5383..00000000 --- a/src/wavpack/arml.S +++ /dev/null @@ -1,491 +0,0 @@ -//////////////////////////////////////////////////////////////////////////// -// **** WAVPACK **** // -// Hybrid Lossless Wavefile Compressor // -// Copyright (c) 1998 - 2006 Conifer Software. // -// All Rights Reserved. // -// Distributed under the BSD Software License (see license.txt) // -//////////////////////////////////////////////////////////////////////////// - -/* This is an assembly optimized version of the following WavPack function: - * - * void decorr_stereo_pass_cont (struct decorr_pass *dpp, - * long *buffer, long sample_count); - * - * It performs a single pass of stereo decorrelation on the provided buffer. - * Note that this version of the function requires that the 8 previous stereo - * samples are visible and correct. In other words, it ignores the "samples_*" - * fields in the decorr_pass structure and gets the history data directly - * from the buffer. It does, however, return the appropriate history samples - * to the decorr_pass structure before returning. - * - * This is written to work on a ARM7TDMI processor. This version uses the - * 64-bit multiply-accumulate instruction and so can be used with all - * WavPack files. However, for optimum performance with 16-bit WavPack - * files, there is a faster version that only uses the 32-bit MLA - * instruction. - */ - - .text - .align - .global decorr_stereo_pass_cont_arml - -/* - * on entry: - * - * r0 = struct decorr_pass *dpp - * r1 = long *buffer - * r2 = long sample_count - */ - -decorr_stereo_pass_cont_arml: - - stmfd sp!, {r4 - r8, r10, r11, lr} - mov r5, r0 @ r5 = dpp - mov r11, #512 @ r11 = 512 for rounding - ldrsh r6, [r0, #2] @ r6 = dpp->delta - ldrsh r4, [r0, #4] @ r4 = dpp->weight_A - ldrsh r0, [r0, #6] @ r0 = dpp->weight_B - cmp r2, #0 @ exit if no samples to process - beq common_exit - - mov r0, r0, asl #18 @ for 64-bit math we use weights << 18 - mov r4, r4, asl #18 - mov r6, r6, asl #18 - add r7, r1, r2, asl #3 @ r7 = buffer ending position - ldrsh r2, [r5, #0] @ r2 = dpp->term - cmp r2, #0 - blt minus_term - - ldr lr, [r1, #-16] @ load 2 sample history from buffer - ldr r10, [r1, #-12] @ for terms 2, 17, and 18 - ldr r8, [r1, #-8] - ldr r3, [r1, #-4] - - cmp r2, #18 - beq term_18_loop - mov lr, lr, asl #4 - mov r10, r10, asl #4 - cmp r2, #2 - beq term_2_loop - cmp r2, #17 - beq term_17_loop - b term_default_loop - -minus_term: - mov r10, #(1024 << 18) @ r10 = -1024 << 18 for weight clipping - rsb r10, r10, #0 @ (only used for negative terms) - cmn r2, #1 - beq term_minus_1 - cmn r2, #2 - beq term_minus_2 - cmn r2, #3 - beq term_minus_3 - b common_exit - -/* - ****************************************************************************** - * Loop to handle term = 17 condition - * - * r0 = dpp->weight_B r8 = previous left sample - * r1 = bptr r9 = - * r2 = current sample r10 = second previous left sample << 4 - * r3 = previous right sample r11 = lo accumulator (for rounding) - * r4 = dpp->weight_A ip = current decorrelation value - * r5 = dpp sp = - * r6 = dpp->delta lr = second previous right sample << 4 - * r7 = eptr pc = - ******************************************************************************* - */ - -term_17_loop: - rsbs ip, lr, r8, asl #5 @ decorr value = (2 * prev) - 2nd prev - mov lr, r8, asl #4 @ previous becomes 2nd previous - ldr r2, [r1], #4 @ get sample & update pointer - mov r11, #0x80000000 - mov r8, r2 - smlalne r11, r8, r4, ip - strne r8, [r1, #-4] @ if change possible, store sample back - cmpne r2, #0 - beq .L325 - teq ip, r2 @ update weight based on signs - submi r4, r4, r6 - addpl r4, r4, r6 - -.L325: rsbs ip, r10, r3, asl #5 @ do same thing for right channel - mov r10, r3, asl #4 - ldr r2, [r1], #4 - mov r11, #0x80000000 - mov r3, r2 - smlalne r11, r3, r0, ip - strne r3, [r1, #-4] - cmpne r2, #0 - beq .L329 - teq ip, r2 - submi r0, r0, r6 - addpl r0, r0, r6 - -.L329: cmp r7, r1 @ loop back if more samples to do - bhi term_17_loop - mov lr, lr, asr #4 - mov r10, r10, asr #4 - b store_1718 @ common exit for terms 17 & 18 - -/* - ****************************************************************************** - * Loop to handle term = 18 condition - * - * r0 = dpp->weight_B r8 = previous left sample - * r1 = bptr r9 = - * r2 = current sample r10 = second previous left sample - * r3 = previous right sample r11 = lo accumulator (for rounding) - * r4 = dpp->weight_A ip = decorrelation value - * r5 = dpp sp = - * r6 = dpp->delta lr = second previous right sample - * r7 = eptr pc = - ******************************************************************************* - */ - -term_18_loop: - rsb ip, lr, r8 @ decorr value = - mov lr, r8 @ ((3 * prev) - 2nd prev) >> 1 - add ip, lr, ip, asr #1 - movs ip, ip, asl #4 - ldr r2, [r1], #4 @ get sample & update pointer - mov r11, #0x80000000 - mov r8, r2 - smlalne r11, r8, r4, ip - strne r8, [r1, #-4] @ if change possible, store sample back - cmpne r2, #0 - beq .L337 - teq ip, r2 @ update weight based on signs - submi r4, r4, r6 - addpl r4, r4, r6 - -.L337: rsb ip, r10, r3 @ do same thing for right channel - mov r10, r3 - add ip, r10, ip, asr #1 - movs ip, ip, asl #4 - ldr r2, [r1], #4 - mov r11, #0x80000000 - mov r3, r2 - smlalne r11, r3, r0, ip - strne r3, [r1, #-4] - cmpne r2, #0 - beq .L341 - teq ip, r2 - submi r0, r0, r6 - addpl r0, r0, r6 - -.L341: cmp r7, r1 @ loop back if more samples to do - bhi term_18_loop - -/* common exit for terms 17 & 18 */ - -store_1718: - str r3, [r5, #40] @ store sample history into struct - str r8, [r5, #8] - str r10, [r5, #44] - str lr, [r5, #12] - b common_exit @ and return - -/* - ****************************************************************************** - * Loop to handle term = 2 condition - * (note that this case can be handled by the default term handler (1-8), but - * this special case is faster because it doesn't have to read memory twice) - * - * r0 = dpp->weight_B r8 = previous left sample - * r1 = bptr r9 = - * r2 = current sample r10 = second previous left sample << 4 - * r3 = previous right sample r11 = lo accumulator (for rounding) - * r4 = dpp->weight_A ip = decorrelation value - * r5 = dpp sp = - * r6 = dpp->delta lr = second previous right sample << 4 - * r7 = eptr pc = - ******************************************************************************* - */ - -term_2_loop: - movs ip, lr @ get decorrelation value & test - ldr r2, [r1], #4 @ get sample & update pointer - mov lr, r8, asl #4 @ previous becomes 2nd previous - mov r11, #0x80000000 - mov r8, r2 - smlalne r11, r8, r4, ip - strne r8, [r1, #-4] @ if change possible, store sample back - cmpne r2, #0 - beq .L225 - teq ip, r2 @ update weight based on signs - submi r4, r4, r6 - addpl r4, r4, r6 - -.L225: movs ip, r10 @ do same thing for right channel - ldr r2, [r1], #4 - mov r10, r3, asl #4 - mov r11, #0x80000000 - mov r3, r2 - smlalne r11, r3, r0, ip - strne r3, [r1, #-4] - cmpne r2, #0 - beq .L229 - teq ip, r2 - submi r0, r0, r6 - addpl r0, r0, r6 - -.L229: cmp r7, r1 @ loop back if more samples to do - bhi term_2_loop - - b default_term_exit @ this exit updates all dpp->samples - -/* - ****************************************************************************** - * Loop to handle default term condition - * - * r0 = dpp->weight_B r8 = result accumulator - * r1 = bptr r9 = - * r2 = dpp->term r10 = - * r3 = decorrelation value r11 = lo accumulator (for rounding) - * r4 = dpp->weight_A ip = current sample - * r5 = dpp sp = - * r6 = dpp->delta lr = - * r7 = eptr pc = - ******************************************************************************* - */ - -term_default_loop: - ldr r3, [r1, -r2, asl #3] @ get decorrelation value based on term - ldr ip, [r1], #4 @ get original sample and bump ptr - movs r3, r3, asl #4 - mov r11, #0x80000000 - mov r8, ip - smlalne r11, r8, r4, r3 - strne r8, [r1, #-4] @ if possibly changed, store updated sample - cmpne ip, #0 - beq .L350 - teq ip, r3 @ update weight based on signs - submi r4, r4, r6 - addpl r4, r4, r6 - -.L350: ldr r3, [r1, -r2, asl #3] @ do the same thing for right channel - ldr ip, [r1], #4 - movs r3, r3, asl #4 - mov r11, #0x80000000 - mov r8, ip - smlalne r11, r8, r0, r3 - strne r8, [r1, #-4] - cmpne ip, #0 - beq .L354 - teq ip, r3 - submi r0, r0, r6 - addpl r0, r0, r6 - -.L354: cmp r7, r1 @ loop back if more samples to do - bhi term_default_loop - -/* - * This exit is used by terms 1-8 to store the previous 8 samples into the decorr - * structure (even if they are not all used for the given term) - */ - -default_term_exit: - ldrsh r3, [r5, #0] - sub ip, r3, #1 - mov lr, #7 - -.L358: and r3, ip, #7 - add r3, r5, r3, asl #2 - ldr r2, [r1, #-4] - str r2, [r3, #40] - ldr r2, [r1, #-8]! - str r2, [r3, #8] - sub ip, ip, #1 - sub lr, lr, #1 - cmn lr, #1 - bne .L358 - b common_exit - -/* - ****************************************************************************** - * Loop to handle term = -1 condition - * - * r0 = dpp->weight_B r8 = - * r1 = bptr r9 = - * r2 = intermediate result r10 = -1024 (for clipping) - * r3 = previous right sample r11 = lo accumulator (for rounding) - * r4 = dpp->weight_A ip = current sample - * r5 = dpp sp = - * r6 = dpp->delta lr = updated left sample - * r7 = eptr pc = - ******************************************************************************* - */ - -term_minus_1: - ldr r3, [r1, #-4] - -term_minus_1_loop: - ldr ip, [r1], #8 @ for left channel the decorrelation value - movs r3, r3, asl #4 @ is the previous right sample (in r3) - mov r11, #0x80000000 - mov lr, ip - smlalne r11, lr, r4, r3 - strne lr, [r1, #-8] - cmpne ip, #0 - beq .L361 - teq ip, r3 @ update weight based on signs - submi r4, r4, r6 - addpl r4, r4, r6 - cmp r4, #(1024 << 18) - movgt r4, #(1024 << 18) - cmp r4, r10 - movlt r4, r10 - -.L361: ldr r2, [r1, #-4] @ for right channel the decorrelation value - movs lr, lr, asl #4 - mov r11, #0x80000000 - mov r3, r2 - smlalne r11, r3, r0, lr - strne r3, [r1, #-4] - cmpne r2, #0 - beq .L369 - teq r2, lr - submi r0, r0, r6 - addpl r0, r0, r6 - cmp r0, #(1024 << 18) @ then clip weight to +/-1024 - movgt r0, #(1024 << 18) - cmp r0, r10 - movlt r0, r10 - -.L369: cmp r7, r1 @ loop back if more samples to do - bhi term_minus_1_loop - - str r3, [r5, #8] @ else store right sample and exit - b common_exit - -/* - ****************************************************************************** - * Loop to handle term = -2 condition - * (note that the channels are processed in the reverse order here) - * - * r0 = dpp->weight_B r8 = - * r1 = bptr r9 = - * r2 = intermediate result r10 = -1024 (for clipping) - * r3 = previous left sample r11 = lo accumulator (for rounding) - * r4 = dpp->weight_A ip = current sample - * r5 = dpp sp = - * r6 = dpp->delta lr = updated right sample - * r7 = eptr pc = - ******************************************************************************* - */ - -term_minus_2: - ldr r3, [r1, #-8] - -term_minus_2_loop: - ldr ip, [r1, #4] @ for right channel the decorrelation value - movs r3, r3, asl #4 @ is the previous left sample (in r3) - mov r11, #0x80000000 - mov lr, ip - smlalne r11, lr, r0, r3 - strne lr, [r1, #4] - cmpne ip, #0 - beq .L380 - teq ip, r3 @ update weight based on signs - submi r0, r0, r6 - addpl r0, r0, r6 - cmp r0, #(1024 << 18) @ then clip weight to +/-1024 - movgt r0, #(1024 << 18) - cmp r0, r10 - movlt r0, r10 - -.L380: ldr r2, [r1], #8 @ for left channel the decorrelation value - movs lr, lr, asl #4 - mov r11, #0x80000000 - mov r3, r2 - smlalne r11, r3, r4, lr - strne r3, [r1, #-8] - cmpne r2, #0 - beq .L388 - teq r2, lr - submi r4, r4, r6 - addpl r4, r4, r6 - cmp r4, #(1024 << 18) - movgt r4, #(1024 << 18) - cmp r4, r10 - movlt r4, r10 - -.L388: cmp r7, r1 @ loop back if more samples to do - bhi term_minus_2_loop - - str r3, [r5, #40] @ else store left channel and exit - b common_exit - -/* - ****************************************************************************** - * Loop to handle term = -3 condition - * - * r0 = dpp->weight_B r8 = previous left sample - * r1 = bptr r9 = - * r2 = current left sample r10 = -1024 (for clipping) - * r3 = previous right sample r11 = lo accumulator (for rounding) - * r4 = dpp->weight_A ip = intermediate result - * r5 = dpp sp = - * r6 = dpp->delta lr = - * r7 = eptr pc = - ******************************************************************************* - */ - -term_minus_3: - ldr r3, [r1, #-4] @ load previous samples - ldr r8, [r1, #-8] - -term_minus_3_loop: - ldr ip, [r1], #4 - movs r3, r3, asl #4 - mov r11, #0x80000000 - mov r2, ip - smlalne r11, r2, r4, r3 - strne r2, [r1, #-4] - cmpne ip, #0 - beq .L399 - teq ip, r3 @ update weight based on signs - submi r4, r4, r6 - addpl r4, r4, r6 - cmp r4, #(1024 << 18) @ then clip weight to +/-1024 - movgt r4, #(1024 << 18) - cmp r4, r10 - movlt r4, r10 - -.L399: movs ip, r8, asl #4 @ ip = previous left we use now - mov r8, r2 @ r8 = current left we use next time - ldr r2, [r1], #4 - mov r11, #0x80000000 - mov r3, r2 - smlalne r11, r3, r0, ip - strne r3, [r1, #-4] - cmpne r2, #0 - beq .L407 - teq ip, r2 - submi r0, r0, r6 - addpl r0, r0, r6 - cmp r0, #(1024 << 18) - movgt r0, #(1024 << 18) - cmp r0, r10 - movlt r0, r10 - -.L407: cmp r7, r1 @ loop back if more samples to do - bhi term_minus_3_loop - - str r3, [r5, #8] @ else store previous samples & exit - str r8, [r5, #40] - -/* - * Before finally exiting we must store weights back for next time - */ - -common_exit: - mov r0, r0, asr #18 @ restore weights to real magnitude - mov r4, r4, asr #18 - strh r4, [r5, #4] - strh r0, [r5, #6] - ldmfd sp!, {r4 - r8, r10, r11, pc} - |