dos2unix it, remove unmatched single quotes in comments (whatever way ProjectBuilder calls "as" chokes on them)

This commit is contained in:
Josh Coalson 2004-07-28 01:32:59 +00:00
parent a7ffb0ce9c
commit 52f8f77844

View File

@ -1,428 +1,428 @@
; libFLAC - Free Lossless Audio Codec library ; libFLAC - Free Lossless Audio Codec library
; Copyright (C) 2004 Josh Coalson ; Copyright (C) 2004 Josh Coalson
; ;
; Redistribution and use in source and binary forms, with or without ; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions ; modification, are permitted provided that the following conditions
; are met: ; are met:
; ;
; - Redistributions of source code must retain the above copyright ; - Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer. ; notice, this list of conditions and the following disclaimer.
; ;
; - Redistributions in binary form must reproduce the above copyright ; - Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in the ; notice, this list of conditions and the following disclaimer in the
; documentation and/or other materials provided with the distribution. ; documentation and/or other materials provided with the distribution.
; ;
; - Neither the name of the Xiph.org Foundation nor the names of its ; - Neither the name of the Xiph.org Foundation nor the names of its
; contributors may be used to endorse or promote products derived from ; contributors may be used to endorse or promote products derived from
; this software without specific prior written permission. ; this software without specific prior written permission.
; ;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, ; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, ; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR ; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING ; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.text .text
.align 2 .align 2
.globl _FLAC__lpc_restore_signal_asm_ppc_altivec_16 .globl _FLAC__lpc_restore_signal_asm_ppc_altivec_16
.globl _FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8 .globl _FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8
_FLAC__lpc_restore_signal_asm_ppc_altivec_16: _FLAC__lpc_restore_signal_asm_ppc_altivec_16:
; r3: residual[] ; r3: residual[]
; r4: data_len ; r4: data_len
; r5: qlp_coeff[] ; r5: qlp_coeff[]
; r6: order ; r6: order
; r7: lp_quantization ; r7: lp_quantization
; r8: data[] ; r8: data[]
; see src/libFLAC/lpc.c:FLAC__lpc_restore_signal() ; see src/libFLAC/lpc.c:FLAC__lpc_restore_signal()
; these is a PowerPC/Altivec assembly version which requires bps<=16 (or actual ; these is a PowerPC/Altivec assembly version which requires bps<=16 (or actual
; bps<=15 for mid-side coding, since that uses an extra bit) ; bps<=15 for mid-side coding, since that uses an extra bit)
; these should be fast; the inner loop is unrolled (it takes no more than ; these should be fast; the inner loop is unrolled (it takes no more than
; 3*(order%4) instructions, all of which are arithmetic), and all of the ; 3*(order%4) instructions, all of which are arithmetic), and all of the
; coefficients and all relevant history stay in registers, so the outer loop ; coefficients and all relevant history stay in registers, so the outer loop
; has only one load from memory (the residual) ; has only one load from memory (the residual)
; I haven't yet run this through simg4, so there may be some avoidable stalls, ; I have not yet run this through simg4, so there may be some avoidable stalls,
; and there may be a somewhat more clever way to do the outer loop ; and there may be a somewhat more clever way to do the outer loop
; the branch mechanism may prevent dynamic loading; I still need to examine ; the branch mechanism may prevent dynamic loading; I still need to examine
; this issue, and there may be a more elegant method ; this issue, and there may be a more elegant method
stmw r31,-4(r1) stmw r31,-4(r1)
addi r9,r1,-28 addi r9,r1,-28
li r31,0xf li r31,0xf
andc r9,r9,r31 ; for quadword-aligned stack data andc r9,r9,r31 ; for quadword-aligned stack data
slwi r6,r6,2 ; adjust for word size slwi r6,r6,2 ; adjust for word size
slwi r4,r4,2 slwi r4,r4,2
add r4,r4,r8 ; r4 = data+data_len add r4,r4,r8 ; r4 = data+data_len
mfspr r0,256 ; cache old vrsave mfspr r0,256 ; cache old vrsave
addis r31,0,hi16(0xfffffc00) addis r31,0,hi16(0xfffffc00)
ori r31,r31,lo16(0xfffffc00) ori r31,r31,lo16(0xfffffc00)
mtspr 256,r31 ; declare VRs in vrsave mtspr 256,r31 ; declare VRs in vrsave
cmplw cr0,r8,r4 ; i<data_len cmplw cr0,r8,r4 ; i<data_len
bc 4,0,L1400 bc 4,0,L1400
; load coefficients into v0-v7 and initial history into v8-v15 ; load coefficients into v0-v7 and initial history into v8-v15
li r31,0xf li r31,0xf
and r31,r8,r31 ; r31: data%4 and r31,r8,r31 ; r31: data%4
li r11,16 li r11,16
subf r31,r31,r11 ; r31: 4-(data%4) subf r31,r31,r11 ; r31: 4-(data%4)
slwi r31,r31,3 ; convert to bits for vsro slwi r31,r31,3 ; convert to bits for vsro
li r10,-4 li r10,-4
stw r31,-4(r9) stw r31,-4(r9)
lvewx v0,r10,r9 lvewx v0,r10,r9
vspltisb v18,-1 vspltisb v18,-1
vsro v18,v18,v0 ; v18: mask vector vsro v18,v18,v0 ; v18: mask vector
li r31,0x8 li r31,0x8
lvsl v0,0,r31 lvsl v0,0,r31
vsldoi v0,v0,v0,12 vsldoi v0,v0,v0,12
li r31,0xc li r31,0xc
lvsl v1,0,r31 lvsl v1,0,r31
vspltisb v2,0 vspltisb v2,0
vspltisb v3,-1 vspltisb v3,-1
vmrglw v2,v2,v3 vmrglw v2,v2,v3
vsel v0,v1,v0,v2 ; v0: reversal permutation vector vsel v0,v1,v0,v2 ; v0: reversal permutation vector
add r10,r5,r6 add r10,r5,r6
lvsl v17,0,r5 ; v17: coefficient alignment permutation vector lvsl v17,0,r5 ; v17: coefficient alignment permutation vector
vperm v17,v17,v17,v0 ; v17: reversal coefficient alignment permutation vector vperm v17,v17,v17,v0 ; v17: reversal coefficient alignment permutation vector
mr r11,r8 mr r11,r8
lvsl v16,0,r11 ; v16: history alignment permutation vector lvsl v16,0,r11 ; v16: history alignment permutation vector
lvx v0,0,r5 lvx v0,0,r5
addi r5,r5,16 addi r5,r5,16
lvx v1,0,r5 lvx v1,0,r5
vperm v0,v0,v1,v17 vperm v0,v0,v1,v17
lvx v8,0,r11 lvx v8,0,r11
addi r11,r11,-16 addi r11,r11,-16
lvx v9,0,r11 lvx v9,0,r11
vperm v8,v9,v8,v16 vperm v8,v9,v8,v16
cmplw cr0,r5,r10 cmplw cr0,r5,r10
bc 12,0,L1101 bc 12,0,L1101
vand v0,v0,v18 vand v0,v0,v18
addis r31,0,hi16(L1307) addis r31,0,hi16(L1307)
ori r31,r31,lo16(L1307) ori r31,r31,lo16(L1307)
b L1199 b L1199
L1101: L1101:
addi r5,r5,16 addi r5,r5,16
lvx v2,0,r5 lvx v2,0,r5
vperm v1,v1,v2,v17 vperm v1,v1,v2,v17
addi r11,r11,-16 addi r11,r11,-16
lvx v10,0,r11 lvx v10,0,r11
vperm v9,v10,v9,v16 vperm v9,v10,v9,v16
cmplw cr0,r5,r10 cmplw cr0,r5,r10
bc 12,0,L1102 bc 12,0,L1102
vand v1,v1,v18 vand v1,v1,v18
addis r31,0,hi16(L1306) addis r31,0,hi16(L1306)
ori r31,r31,lo16(L1306) ori r31,r31,lo16(L1306)
b L1199 b L1199
L1102: L1102:
addi r5,r5,16 addi r5,r5,16
lvx v3,0,r5 lvx v3,0,r5
vperm v2,v2,v3,v17 vperm v2,v2,v3,v17
addi r11,r11,-16 addi r11,r11,-16
lvx v11,0,r11 lvx v11,0,r11
vperm v10,v11,v10,v16 vperm v10,v11,v10,v16
cmplw cr0,r5,r10 cmplw cr0,r5,r10
bc 12,0,L1103 bc 12,0,L1103
vand v2,v2,v18 vand v2,v2,v18
addis r31,0,hi16(L1305) addis r31,0,hi16(L1305)
ori r31,r31,lo16(L1305) ori r31,r31,lo16(L1305)
b L1199 b L1199
L1103: L1103:
addi r5,r5,16 addi r5,r5,16
lvx v4,0,r5 lvx v4,0,r5
vperm v3,v3,v4,v17 vperm v3,v3,v4,v17
addi r11,r11,-16 addi r11,r11,-16
lvx v12,0,r11 lvx v12,0,r11
vperm v11,v12,v11,v16 vperm v11,v12,v11,v16
cmplw cr0,r5,r10 cmplw cr0,r5,r10
bc 12,0,L1104 bc 12,0,L1104
vand v3,v3,v18 vand v3,v3,v18
addis r31,0,hi16(L1304) addis r31,0,hi16(L1304)
ori r31,r31,lo16(L1304) ori r31,r31,lo16(L1304)
b L1199 b L1199
L1104: L1104:
addi r5,r5,16 addi r5,r5,16
lvx v5,0,r5 lvx v5,0,r5
vperm v4,v4,v5,v17 vperm v4,v4,v5,v17
addi r11,r11,-16 addi r11,r11,-16
lvx v13,0,r11 lvx v13,0,r11
vperm v12,v13,v12,v16 vperm v12,v13,v12,v16
cmplw cr0,r5,r10 cmplw cr0,r5,r10
bc 12,0,L1105 bc 12,0,L1105
vand v4,v4,v18 vand v4,v4,v18
addis r31,0,hi16(L1303) addis r31,0,hi16(L1303)
ori r31,r31,lo16(L1303) ori r31,r31,lo16(L1303)
b L1199 b L1199
L1105: L1105:
addi r5,r5,16 addi r5,r5,16
lvx v6,0,r5 lvx v6,0,r5
vperm v5,v5,v6,v17 vperm v5,v5,v6,v17
addi r11,r11,-16 addi r11,r11,-16
lvx v14,0,r11 lvx v14,0,r11
vperm v13,v14,v13,v16 vperm v13,v14,v13,v16
cmplw cr0,r5,r10 cmplw cr0,r5,r10
bc 12,0,L1106 bc 12,0,L1106
vand v5,v5,v18 vand v5,v5,v18
addis r31,0,hi16(L1302) addis r31,0,hi16(L1302)
ori r31,r31,lo16(L1302) ori r31,r31,lo16(L1302)
b L1199 b L1199
L1106: L1106:
addi r5,r5,16 addi r5,r5,16
lvx v7,0,r5 lvx v7,0,r5
vperm v6,v6,v7,v17 vperm v6,v6,v7,v17
addi r11,r11,-16 addi r11,r11,-16
lvx v15,0,r11 lvx v15,0,r11
vperm v14,v15,v14,v16 vperm v14,v15,v14,v16
cmplw cr0,r5,r10 cmplw cr0,r5,r10
bc 12,0,L1107 bc 12,0,L1107
vand v6,v6,v18 vand v6,v6,v18
addis r31,0,hi16(L1301) addis r31,0,hi16(L1301)
ori r31,r31,lo16(L1301) ori r31,r31,lo16(L1301)
b L1199 b L1199
L1107: L1107:
addi r5,r5,16 addi r5,r5,16
lvx v19,0,r5 lvx v19,0,r5
vperm v7,v7,v19,v17 vperm v7,v7,v19,v17
addi r11,r11,-16 addi r11,r11,-16
lvx v19,0,r11 lvx v19,0,r11
vperm v15,v19,v15,v16 vperm v15,v19,v15,v16
vand v7,v7,v18 vand v7,v7,v18
addis r31,0,hi16(L1300) addis r31,0,hi16(L1300)
ori r31,r31,lo16(L1300) ori r31,r31,lo16(L1300)
L1199: L1199:
mtctr r31 mtctr r31
; set up invariant vectors ; set up invariant vectors
vspltish v16,0 ; v16: zero vector vspltish v16,0 ; v16: zero vector
li r10,-12 li r10,-12
lvsr v17,r10,r8 ; v17: result shift vector lvsr v17,r10,r8 ; v17: result shift vector
lvsl v18,r10,r3 ; v18: residual shift back vector lvsl v18,r10,r3 ; v18: residual shift back vector
li r10,-4 li r10,-4
stw r7,-4(r9) stw r7,-4(r9)
lvewx v19,r10,r9 ; v19: lp_quantization vector lvewx v19,r10,r9 ; v19: lp_quantization vector
L1200: L1200:
vmulosh v20,v0,v8 ; v20: sum vector vmulosh v20,v0,v8 ; v20: sum vector
bcctr 20,0 bcctr 20,0
L1300: L1300:
vmulosh v21,v7,v15 vmulosh v21,v7,v15
vsldoi v15,v15,v14,4 ; increment history vsldoi v15,v15,v14,4 ; increment history
vaddsws v20,v20,v21 vaddsws v20,v20,v21
L1301: L1301:
vmulosh v21,v6,v14 vmulosh v21,v6,v14
vsldoi v14,v14,v13,4 vsldoi v14,v14,v13,4
vaddsws v20,v20,v21 vaddsws v20,v20,v21
L1302: L1302:
vmulosh v21,v5,v13 vmulosh v21,v5,v13
vsldoi v13,v13,v12,4 vsldoi v13,v13,v12,4
vaddsws v20,v20,v21 vaddsws v20,v20,v21
L1303: L1303:
vmulosh v21,v4,v12 vmulosh v21,v4,v12
vsldoi v12,v12,v11,4 vsldoi v12,v12,v11,4
vaddsws v20,v20,v21 vaddsws v20,v20,v21
L1304: L1304:
vmulosh v21,v3,v11 vmulosh v21,v3,v11
vsldoi v11,v11,v10,4 vsldoi v11,v11,v10,4
vaddsws v20,v20,v21 vaddsws v20,v20,v21
L1305: L1305:
vmulosh v21,v2,v10 vmulosh v21,v2,v10
vsldoi v10,v10,v9,4 vsldoi v10,v10,v9,4
vaddsws v20,v20,v21 vaddsws v20,v20,v21
L1306: L1306:
vmulosh v21,v1,v9 vmulosh v21,v1,v9
vsldoi v9,v9,v8,4 vsldoi v9,v9,v8,4
vaddsws v20,v20,v21 vaddsws v20,v20,v21
L1307: L1307:
vsumsws v20,v20,v16 ; v20[3]: sum vsumsws v20,v20,v16 ; v20[3]: sum
vsraw v20,v20,v19 ; v20[3]: sum >> lp_quantization vsraw v20,v20,v19 ; v20[3]: sum >> lp_quantization
lvewx v21,0,r3 ; v21[n]: *residual lvewx v21,0,r3 ; v21[n]: *residual
vperm v21,v21,v21,v18 ; v21[3]: *residual vperm v21,v21,v21,v18 ; v21[3]: *residual
vaddsws v20,v21,v20 ; v20[3]: *residual + (sum >> lp_quantization) vaddsws v20,v21,v20 ; v20[3]: *residual + (sum >> lp_quantization)
vsldoi v18,v18,v18,4 ; increment shift vector vsldoi v18,v18,v18,4 ; increment shift vector
vperm v21,v20,v20,v17 ; v21[n]: shift for storage vperm v21,v20,v20,v17 ; v21[n]: shift for storage
vsldoi v17,v17,v17,12 ; increment shift vector vsldoi v17,v17,v17,12 ; increment shift vector
stvewx v21,0,r8 stvewx v21,0,r8
vsldoi v20,v20,v20,12 vsldoi v20,v20,v20,12
vsldoi v8,v8,v20,4 ; insert value onto history vsldoi v8,v8,v20,4 ; insert value onto history
addi r3,r3,4 addi r3,r3,4
addi r8,r8,4 addi r8,r8,4
cmplw cr0,r8,r4 ; i<data_len cmplw cr0,r8,r4 ; i<data_len
bc 12,0,L1200 bc 12,0,L1200
L1400: L1400:
mtspr 256,r0 ; restore old vrsave mtspr 256,r0 ; restore old vrsave
lmw r31,-4(r1) lmw r31,-4(r1)
blr blr
_FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8: _FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8:
; r3: residual[] ; r3: residual[]
; r4: data_len ; r4: data_len
; r5: qlp_coeff[] ; r5: qlp_coeff[]
; r6: order ; r6: order
; r7: lp_quantization ; r7: lp_quantization
; r8: data[] ; r8: data[]
; see _FLAC__lpc_restore_signal_asm_ppc_altivec_16() above ; see _FLAC__lpc_restore_signal_asm_ppc_altivec_16() above
; this version assumes order<=8; it uses fewer vector registers, which should ; this version assumes order<=8; it uses fewer vector registers, which should
; save time in context switches, and has less code, which may improve ; save time in context switches, and has less code, which may improve
; instruction caching ; instruction caching
stmw r31,-4(r1) stmw r31,-4(r1)
addi r9,r1,-28 addi r9,r1,-28
li r31,0xf li r31,0xf
andc r9,r9,r31 ; for quadword-aligned stack data andc r9,r9,r31 ; for quadword-aligned stack data
slwi r6,r6,2 ; adjust for word size slwi r6,r6,2 ; adjust for word size
slwi r4,r4,2 slwi r4,r4,2
add r4,r4,r8 ; r4 = data+data_len add r4,r4,r8 ; r4 = data+data_len
mfspr r0,256 ; cache old vrsave mfspr r0,256 ; cache old vrsave
addis r31,0,hi16(0xffc00000) addis r31,0,hi16(0xffc00000)
ori r31,r31,lo16(0xffc00000) ori r31,r31,lo16(0xffc00000)
mtspr 256,r31 ; declare VRs in vrsave mtspr 256,r31 ; declare VRs in vrsave
cmplw cr0,r8,r4 ; i<data_len cmplw cr0,r8,r4 ; i<data_len
bc 4,0,L2400 bc 4,0,L2400
; load coefficients into v0-v1 and initial history into v2-v3 ; load coefficients into v0-v1 and initial history into v2-v3
li r31,0xf li r31,0xf
and r31,r8,r31 ; r31: data%4 and r31,r8,r31 ; r31: data%4
li r11,16 li r11,16
subf r31,r31,r11 ; r31: 4-(data%4) subf r31,r31,r11 ; r31: 4-(data%4)
slwi r31,r31,3 ; convert to bits for vsro slwi r31,r31,3 ; convert to bits for vsro
li r10,-4 li r10,-4
stw r31,-4(r9) stw r31,-4(r9)
lvewx v0,r10,r9 lvewx v0,r10,r9
vspltisb v6,-1 vspltisb v6,-1
vsro v6,v6,v0 ; v6: mask vector vsro v6,v6,v0 ; v6: mask vector
li r31,0x8 li r31,0x8
lvsl v0,0,r31 lvsl v0,0,r31
vsldoi v0,v0,v0,12 vsldoi v0,v0,v0,12
li r31,0xc li r31,0xc
lvsl v1,0,r31 lvsl v1,0,r31
vspltisb v2,0 vspltisb v2,0
vspltisb v3,-1 vspltisb v3,-1
vmrglw v2,v2,v3 vmrglw v2,v2,v3
vsel v0,v1,v0,v2 ; v0: reversal permutation vector vsel v0,v1,v0,v2 ; v0: reversal permutation vector
add r10,r5,r6 add r10,r5,r6
lvsl v5,0,r5 ; v5: coefficient alignment permutation vector lvsl v5,0,r5 ; v5: coefficient alignment permutation vector
vperm v5,v5,v5,v0 ; v5: reversal coefficient alignment permutation vector vperm v5,v5,v5,v0 ; v5: reversal coefficient alignment permutation vector
mr r11,r8 mr r11,r8
lvsl v4,0,r11 ; v4: history alignment permutation vector lvsl v4,0,r11 ; v4: history alignment permutation vector
lvx v0,0,r5 lvx v0,0,r5
addi r5,r5,16 addi r5,r5,16
lvx v1,0,r5 lvx v1,0,r5
vperm v0,v0,v1,v5 vperm v0,v0,v1,v5
lvx v2,0,r11 lvx v2,0,r11
addi r11,r11,-16 addi r11,r11,-16
lvx v3,0,r11 lvx v3,0,r11
vperm v2,v3,v2,v4 vperm v2,v3,v2,v4
cmplw cr0,r5,r10 cmplw cr0,r5,r10
bc 12,0,L2101 bc 12,0,L2101
vand v0,v0,v6 vand v0,v0,v6
addis r31,0,hi16(L2301) addis r31,0,hi16(L2301)
ori r31,r31,lo16(L2301) ori r31,r31,lo16(L2301)
b L2199 b L2199
L2101: L2101:
addi r5,r5,16 addi r5,r5,16
lvx v7,0,r5 lvx v7,0,r5
vperm v1,v1,v7,v5 vperm v1,v1,v7,v5
addi r11,r11,-16 addi r11,r11,-16
lvx v7,0,r11 lvx v7,0,r11
vperm v3,v7,v3,v4 vperm v3,v7,v3,v4
vand v1,v1,v6 vand v1,v1,v6
addis r31,0,hi16(L2300) addis r31,0,hi16(L2300)
ori r31,r31,lo16(L2300) ori r31,r31,lo16(L2300)
L2199: L2199:
mtctr r31 mtctr r31
; set up invariant vectors ; set up invariant vectors
vspltish v4,0 ; v4: zero vector vspltish v4,0 ; v4: zero vector
li r10,-12 li r10,-12
lvsr v5,r10,r8 ; v5: result shift vector lvsr v5,r10,r8 ; v5: result shift vector
lvsl v6,r10,r3 ; v6: residual shift back vector lvsl v6,r10,r3 ; v6: residual shift back vector
li r10,-4 li r10,-4
stw r7,-4(r9) stw r7,-4(r9)
lvewx v7,r10,r9 ; v7: lp_quantization vector lvewx v7,r10,r9 ; v7: lp_quantization vector
L2200: L2200:
vmulosh v8,v0,v2 ; v8: sum vector vmulosh v8,v0,v2 ; v8: sum vector
bcctr 20,0 bcctr 20,0
L2300: L2300:
vmulosh v9,v1,v3 vmulosh v9,v1,v3
vsldoi v3,v3,v2,4 vsldoi v3,v3,v2,4
vaddsws v8,v8,v9 vaddsws v8,v8,v9
L2301: L2301:
vsumsws v8,v8,v4 ; v8[3]: sum vsumsws v8,v8,v4 ; v8[3]: sum
vsraw v8,v8,v7 ; v8[3]: sum >> lp_quantization vsraw v8,v8,v7 ; v8[3]: sum >> lp_quantization
lvewx v9,0,r3 ; v9[n]: *residual lvewx v9,0,r3 ; v9[n]: *residual
vperm v9,v9,v9,v6 ; v9[3]: *residual vperm v9,v9,v9,v6 ; v9[3]: *residual
vaddsws v8,v9,v8 ; v8[3]: *residual + (sum >> lp_quantization) vaddsws v8,v9,v8 ; v8[3]: *residual + (sum >> lp_quantization)
vsldoi v6,v6,v6,4 ; increment shift vector vsldoi v6,v6,v6,4 ; increment shift vector
vperm v9,v8,v8,v5 ; v9[n]: shift for storage vperm v9,v8,v8,v5 ; v9[n]: shift for storage
vsldoi v5,v5,v5,12 ; increment shift vector vsldoi v5,v5,v5,12 ; increment shift vector
stvewx v9,0,r8 stvewx v9,0,r8
vsldoi v8,v8,v8,12 vsldoi v8,v8,v8,12
vsldoi v2,v2,v8,4 ; insert value onto history vsldoi v2,v2,v8,4 ; insert value onto history
addi r3,r3,4 addi r3,r3,4
addi r8,r8,4 addi r8,r8,4
cmplw cr0,r8,r4 ; i<data_len cmplw cr0,r8,r4 ; i<data_len
bc 12,0,L2200 bc 12,0,L2200
L2400: L2400:
mtspr 256,r0 ; restore old vrsave mtspr 256,r0 ; restore old vrsave
lmw r31,-4(r1) lmw r31,-4(r1)
blr blr