|
@@ -1,206 +0,0 @@
|
|
|
-/*
|
|
|
- * Copyright (c) 2013 RISC OS Open Ltd
|
|
|
- *
|
|
|
- * This file is part of FFmpeg.
|
|
|
- *
|
|
|
- * FFmpeg is free software; you can redistribute it and/or
|
|
|
- * modify it under the terms of the GNU Lesser General Public
|
|
|
- * License as published by the Free Software Foundation; either
|
|
|
- * version 2.1 of the License, or (at your option) any later version.
|
|
|
- *
|
|
|
- * FFmpeg is distributed in the hope that it will be useful,
|
|
|
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
- * Lesser General Public License for more details.
|
|
|
- *
|
|
|
- * You should have received a copy of the GNU Lesser General Public
|
|
|
- * License along with FFmpeg; if not, write to the Free Software
|
|
|
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
- *
|
|
|
- * Author: Ben Avison <bavison@riscosopen.org>
|
|
|
- */
|
|
|
-
|
|
|
-#include "libavutil/arm/asm.S"
|
|
|
-
|
|
|
-IMDCT .req r0
|
|
|
-ORIG_P_SB .req r1
|
|
|
-P_SB_OFF .req r2
|
|
|
-I .req r0
|
|
|
-P_SB2_UP .req r1
|
|
|
-OLDFPSCR .req r2
|
|
|
-P_SB2_DN .req r3
|
|
|
-P_WIN_DN .req r4
|
|
|
-P_OUT_DN .req r5
|
|
|
-P_SB .req r6
|
|
|
-J_WRAP .req r7
|
|
|
-P_WIN_UP .req r12
|
|
|
-P_OUT_UP .req r14
|
|
|
-
|
|
|
-SCALE .req s0
|
|
|
-SBUF_DAT_REV0 .req s4
|
|
|
-SBUF_DAT_REV1 .req s5
|
|
|
-SBUF_DAT_REV2 .req s6
|
|
|
-SBUF_DAT_REV3 .req s7
|
|
|
-VA0 .req s8
|
|
|
-VA3 .req s11
|
|
|
-VB0 .req s12
|
|
|
-VB3 .req s15
|
|
|
-VC0 .req s8
|
|
|
-VC3 .req s11
|
|
|
-VD0 .req s12
|
|
|
-VD3 .req s15
|
|
|
-SBUF_DAT0 .req s16
|
|
|
-SBUF_DAT1 .req s17
|
|
|
-SBUF_DAT2 .req s18
|
|
|
-SBUF_DAT3 .req s19
|
|
|
-SBUF_DAT_ALT0 .req s20
|
|
|
-SBUF_DAT_ALT1 .req s21
|
|
|
-SBUF_DAT_ALT2 .req s22
|
|
|
-SBUF_DAT_ALT3 .req s23
|
|
|
-WIN_DN_DAT0 .req s24
|
|
|
-WIN_UP_DAT0 .req s28
|
|
|
-
|
|
|
-
|
|
|
-.macro inner_loop half, tail, head
|
|
|
- .if (OFFSET & (64*4)) == 0 @ even numbered call
|
|
|
- SBUF_DAT_THIS0 .req SBUF_DAT0
|
|
|
- SBUF_DAT_THIS1 .req SBUF_DAT1
|
|
|
- SBUF_DAT_THIS2 .req SBUF_DAT2
|
|
|
- SBUF_DAT_THIS3 .req SBUF_DAT3
|
|
|
- .ifnc "\head",""
|
|
|
- vldr d8, [P_SB, #OFFSET] @ d8 = SBUF_DAT
|
|
|
- vldr d9, [P_SB, #OFFSET+8]
|
|
|
- .endif
|
|
|
- .else
|
|
|
- SBUF_DAT_THIS0 .req SBUF_DAT_ALT0
|
|
|
- SBUF_DAT_THIS1 .req SBUF_DAT_ALT1
|
|
|
- SBUF_DAT_THIS2 .req SBUF_DAT_ALT2
|
|
|
- SBUF_DAT_THIS3 .req SBUF_DAT_ALT3
|
|
|
- .ifnc "\head",""
|
|
|
- vldr d10, [P_SB, #OFFSET] @ d10 = SBUF_DAT_ALT
|
|
|
- vldr d11, [P_SB, #OFFSET+8]
|
|
|
- .endif
|
|
|
- .endif
|
|
|
- .ifnc "\tail",""
|
|
|
- .ifc "\half","ab"
|
|
|
- vmls.f VA0, SBUF_DAT_REV0, WIN_DN_DAT0 @ all operands treated as vectors
|
|
|
- .else
|
|
|
- vmla.f VD0, SBUF_DAT_REV0, WIN_DN_DAT0 @ all operands treated as vectors
|
|
|
- .endif
|
|
|
- .endif
|
|
|
- .ifnc "\head",""
|
|
|
- vldr d14, [P_WIN_UP, #OFFSET] @ d14 = WIN_UP_DAT
|
|
|
- vldr d15, [P_WIN_UP, #OFFSET+8]
|
|
|
- vldr d12, [P_WIN_DN, #OFFSET] @ d12 = WIN_DN_DAT
|
|
|
- vldr d13, [P_WIN_DN, #OFFSET+8]
|
|
|
- vmov SBUF_DAT_REV3, SBUF_DAT_THIS0
|
|
|
- vmov SBUF_DAT_REV2, SBUF_DAT_THIS1
|
|
|
- vmov SBUF_DAT_REV1, SBUF_DAT_THIS2
|
|
|
- vmov SBUF_DAT_REV0, SBUF_DAT_THIS3
|
|
|
- .ifc "\half","ab"
|
|
|
- vmla.f VB0, SBUF_DAT_THIS0, WIN_UP_DAT0
|
|
|
- .else
|
|
|
- vmla.f VC0, SBUF_DAT_THIS0, WIN_UP_DAT0
|
|
|
- .endif
|
|
|
- teq J_WRAP, #J
|
|
|
- bne 2f @ strongly predictable, so better than cond exec in this case
|
|
|
- sub P_SB, P_SB, #512*4
|
|
|
-2:
|
|
|
- .set J, J - 64
|
|
|
- .set OFFSET, OFFSET + 64*4
|
|
|
- .endif
|
|
|
- .unreq SBUF_DAT_THIS0
|
|
|
- .unreq SBUF_DAT_THIS1
|
|
|
- .unreq SBUF_DAT_THIS2
|
|
|
- .unreq SBUF_DAT_THIS3
|
|
|
-.endm
|
|
|
-
|
|
|
-
|
|
|
-/* void ff_synth_filter_float_vfp(FFTContext *imdct,
|
|
|
- * float *synth_buf_ptr, int *synth_buf_offset,
|
|
|
- * float synth_buf2[32], const float window[512],
|
|
|
- * float out[32], const float in[32], float scale)
|
|
|
- */
|
|
|
-function ff_synth_filter_float_vfp, export=1
|
|
|
- push {r3-r7,lr}
|
|
|
- vpush {s16-s31}
|
|
|
- ldr lr, [P_SB_OFF]
|
|
|
- add a2, ORIG_P_SB, lr, LSL #2 @ calculate synth_buf to pass to imdct_half
|
|
|
- mov P_SB, a2 @ and keep a copy for ourselves
|
|
|
- bic J_WRAP, lr, #63 @ mangled to make testing for wrap easier in inner loop
|
|
|
- sub lr, lr, #32
|
|
|
- and lr, lr, #512-32
|
|
|
- str lr, [P_SB_OFF] @ rotate offset, modulo buffer size, ready for next call
|
|
|
- ldr a3, [sp, #(16+6+2)*4] @ fetch in from stack, to pass to imdct_half
|
|
|
-VFP vmov s16, SCALE @ imdct_half is free to corrupt s0, but it contains one of our arguments in hardfp case
|
|
|
- bl ff_imdct_half_c
|
|
|
-VFP vmov SCALE, s16
|
|
|
-
|
|
|
- vmrs OLDFPSCR, FPSCR
|
|
|
- ldr lr, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1
|
|
|
- vmsr FPSCR, lr
|
|
|
- ldr P_SB2_DN, [sp, #16*4]
|
|
|
- ldr P_WIN_DN, [sp, #(16+6+0)*4]
|
|
|
- ldr P_OUT_DN, [sp, #(16+6+1)*4]
|
|
|
-NOVFP vldr SCALE, [sp, #(16+6+3)*4]
|
|
|
-
|
|
|
-#define IMM_OFF_SKEW 956 /* also valid immediate constant when you add 16*4 */
|
|
|
- add P_SB, P_SB, #IMM_OFF_SKEW @ so we can use -ve offsets to use full immediate offset range
|
|
|
- add P_SB2_UP, P_SB2_DN, #16*4
|
|
|
- add P_WIN_UP, P_WIN_DN, #16*4+IMM_OFF_SKEW
|
|
|
- add P_OUT_UP, P_OUT_DN, #16*4
|
|
|
- add P_SB2_DN, P_SB2_DN, #16*4
|
|
|
- add P_WIN_DN, P_WIN_DN, #12*4+IMM_OFF_SKEW
|
|
|
- add P_OUT_DN, P_OUT_DN, #16*4
|
|
|
- mov I, #4
|
|
|
-1:
|
|
|
- vldmia P_SB2_UP!, {VB0-VB3}
|
|
|
- vldmdb P_SB2_DN!, {VA0-VA3}
|
|
|
- .set J, 512 - 64
|
|
|
- .set OFFSET, -IMM_OFF_SKEW
|
|
|
- inner_loop ab,, head
|
|
|
- .rept 7
|
|
|
- inner_loop ab, tail, head
|
|
|
- .endr
|
|
|
- inner_loop ab, tail
|
|
|
- add P_WIN_UP, P_WIN_UP, #4*4
|
|
|
- sub P_WIN_DN, P_WIN_DN, #4*4
|
|
|
- vmul.f VB0, VB0, SCALE @ SCALE treated as scalar
|
|
|
- add P_SB, P_SB, #(512+4)*4
|
|
|
- subs I, I, #1
|
|
|
- vmul.f VA0, VA0, SCALE
|
|
|
- vstmia P_OUT_UP!, {VB0-VB3}
|
|
|
- vstmdb P_OUT_DN!, {VA0-VA3}
|
|
|
- bne 1b
|
|
|
-
|
|
|
- add P_SB2_DN, P_SB2_DN, #(16+28-12)*4
|
|
|
- sub P_SB2_UP, P_SB2_UP, #(16+16)*4
|
|
|
- add P_WIN_DN, P_WIN_DN, #(32+16+28-12)*4
|
|
|
- mov I, #4
|
|
|
-1:
|
|
|
- vldr.d d4, zero @ d4 = VC0
|
|
|
- vldr.d d5, zero
|
|
|
- vldr.d d6, zero @ d6 = VD0
|
|
|
- vldr.d d7, zero
|
|
|
- .set J, 512 - 64
|
|
|
- .set OFFSET, -IMM_OFF_SKEW
|
|
|
- inner_loop cd,, head
|
|
|
- .rept 7
|
|
|
- inner_loop cd, tail, head
|
|
|
- .endr
|
|
|
- inner_loop cd, tail
|
|
|
- add P_WIN_UP, P_WIN_UP, #4*4
|
|
|
- sub P_WIN_DN, P_WIN_DN, #4*4
|
|
|
- add P_SB, P_SB, #(512+4)*4
|
|
|
- subs I, I, #1
|
|
|
- vstmia P_SB2_UP!, {VC0-VC3}
|
|
|
- vstmdb P_SB2_DN!, {VD0-VD3}
|
|
|
- bne 1b
|
|
|
-
|
|
|
- vmsr FPSCR, OLDFPSCR
|
|
|
- vpop {s16-s31}
|
|
|
- pop {r3-r7,pc}
|
|
|
-endfunc
|
|
|
-
|
|
|
- .align 3
|
|
|
-zero: .word 0, 0
|