12 ani în urmă · f6fed079c9
--- a/package/ffmpeg/Makefile
+++ b/package/ffmpeg/Makefile
@@ -5,7 +5,7 @@ include ${TOPDIR}/rules.mk
 
				 
			
 
				 PKG_NAME:=		ffmpeg
			
 
				 PKG_VERSION:=		2.0.2
			
 
				-PKG_RELEASE:=		1
			
 
				+PKG_RELEASE:=		2
			
 
				 PKG_MD5SUM:=		6c5cfed204d8a108325d1fc439ab734a
			
 
				 PKG_DESCR:=		record, convert and stream audio & video
			
 
				 PKG_SECTION:=		libs
			
--- a/package/ffmpeg/patches/patch-libavcodec_arm_Makefile
+++ b/package/ffmpeg/patches/patch-libavcodec_arm_Makefile
@@ -0,0 +1,12 @@
 
				+--- ffmpeg-2.0.1.orig/libavcodec/arm/Makefile	2013-08-11 01:23:24.000000000 +0200
			
 
				++++ ffmpeg-2.0.1/libavcodec/arm/Makefile	2013-10-14 17:47:19.000000000 +0200
			
 
				+@@ -52,7 +52,8 @@ ARMV6-OBJS-$(CONFIG_VP8_DECODER)       +
			
 
				+                                           arm/vp8dsp_init_armv6.o       \
			
 
				+                                           arm/vp8dsp_armv6.o
			
 
				+ 
			
 
				+-VFP-OBJS-$(HAVE_ARMV6)                 += arm/fmtconvert_vfp.o
			
 
				++VFP-OBJS-$(HAVE_ARMV6)                 += arm/fmtconvert_vfp.o \
			
 
				++					  arm/synth_filter_vfp.o	
			
 
				+ 
			
 
				+ NEON-OBJS                              += arm/fmtconvert_neon.o
			
 
				+ 
			
--- a/package/ffmpeg/patches/patch-libavcodec_arm_fft_init_arm_c
+++ b/package/ffmpeg/patches/patch-libavcodec_arm_fft_init_arm_c
@@ -0,0 +1,25 @@
 
				+--- ffmpeg-2.0.2.orig/libavcodec/arm/fft_init_arm.c	2013-10-08 19:52:31.000000000 +0200
			
 
				++++ ffmpeg-2.0.2/libavcodec/arm/fft_init_arm.c	2013-10-16 12:59:04.000000000 +0200
			
 
				+@@ -32,6 +32,12 @@ void ff_mdct_calc_neon(FFTContext *s, FF
			
 
				+ 
			
 
				+ void ff_rdft_calc_neon(struct RDFTContext *s, FFTSample *z);
			
 
				+ 
			
 
				++void ff_synth_filter_float_vfp(FFTContext *imdct,
			
 
				++                               float *synth_buf_ptr, int *synth_buf_offset,
			
 
				++                               float synth_buf2[32], const float window[512],
			
 
				++                               float out[32], const float in[32],
			
 
				++                               float scale);
			
 
				++
			
 
				+ void ff_synth_filter_float_neon(FFTContext *imdct,
			
 
				+                                 float *synth_buf_ptr, int *synth_buf_offset,
			
 
				+                                 float synth_buf2[32], const float window[512],
			
 
				+@@ -71,6 +77,9 @@ av_cold void ff_synth_filter_init_arm(Sy
			
 
				+ {
			
 
				+     int cpu_flags = av_get_cpu_flags();
			
 
				+ 
			
 
				++    if (have_vfp(cpu_flags))
			
 
				++        s->synth_filter_float = ff_synth_filter_float_vfp;
			
 
				++
			
 
				+     if (have_neon(cpu_flags))
			
 
				+         s->synth_filter_float = ff_synth_filter_float_neon;
			
 
				+ }
			
--- a/package/ffmpeg/patches/patch-libavcodec_arm_fft_init_arm_c.orig
+++ b/package/ffmpeg/patches/patch-libavcodec_arm_fft_init_arm_c.orig
@@ -0,0 +1,22 @@
 
				+--- ffmpeg-2.0.1.orig/libavcodec/arm/fft_init_arm.c	2013-08-11 01:23:24.000000000 +0200
			
 
				++++ ffmpeg-2.0.1/libavcodec/arm/fft_init_arm.c	2013-10-14 17:45:32.000000000 +0200
			
 
				+@@ -32,6 +32,12 @@ void ff_mdct_calc_neon(FFTContext *s, FF
			
 
				+ 
			
 
				+ void ff_rdft_calc_neon(struct RDFTContext *s, FFTSample *z);
			
 
				+ 
			
 
				++void ff_synth_filter_float_vfp(FFTContext *imdct,
			
 
				++                               float *synth_buf_ptr, int *synth_buf_offset,
			
 
				++                               float synth_buf2[32], const float window[512],
			
 
				++                               float out[32], const float in[32],
			
 
				++                               float scale);
			
 
				++
			
 
				+ void ff_synth_filter_float_neon(FFTContext *imdct,
			
 
				+                                 float *synth_buf_ptr, int *synth_buf_offset,
			
 
				+                                 float synth_buf2[32], const float window[512],
			
 
				+@@ -73,5 +79,6 @@ av_cold void ff_synth_filter_init_arm(Sy
			
 
				+ 
			
 
				+     if (have_neon(cpu_flags))
			
 
				+         s->synth_filter_float = ff_synth_filter_float_neon;
			
 
				++    s->synth_filter_float = ff_synth_filter_float_vfp;
			
 
				+ }
			
 
				+ #endif
			
--- a/package/ffmpeg/src/libavcodec/arm/synth_filter_vfp.S
+++ b/package/ffmpeg/src/libavcodec/arm/synth_filter_vfp.S
@@ -0,0 +1,206 @@
 
				+/*
			
 
				+ * Copyright (c) 2013 RISC OS Open Ltd
			
 
				+ *
			
 
				+ * This file is part of FFmpeg.
			
 
				+ *
			
 
				+ * FFmpeg is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU Lesser General Public
			
 
				+ * License as published by the Free Software Foundation; either
			
 
				+ * version 2.1 of the License, or (at your option) any later version.
			
 
				+ *
			
 
				+ * FFmpeg is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
			
 
				+ * Lesser General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU Lesser General Public
			
 
				+ * License along with FFmpeg; if not, write to the Free Software
			
 
				+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
			
 
				+ *
			
 
				+ * Author: Ben Avison <bavison@riscosopen.org>
			
 
				+ */
			
 
				+
			
 
				+#include "libavutil/arm/asm.S"
			
 
				+
			
 
				+IMDCT         .req    r0
			
 
				+ORIG_P_SB     .req    r1
			
 
				+P_SB_OFF      .req    r2
			
 
				+I             .req    r0
			
 
				+P_SB2_UP      .req    r1
			
 
				+OLDFPSCR      .req    r2
			
 
				+P_SB2_DN      .req    r3
			
 
				+P_WIN_DN      .req    r4
			
 
				+P_OUT_DN      .req    r5
			
 
				+P_SB          .req    r6
			
 
				+J_WRAP        .req    r7
			
 
				+P_WIN_UP      .req    r12
			
 
				+P_OUT_UP      .req    r14
			
 
				+
			
 
				+SCALE         .req    s0
			
 
				+SBUF_DAT_REV0 .req    s4
			
 
				+SBUF_DAT_REV1 .req    s5
			
 
				+SBUF_DAT_REV2 .req    s6
			
 
				+SBUF_DAT_REV3 .req    s7
			
 
				+VA0           .req    s8
			
 
				+VA3           .req    s11
			
 
				+VB0           .req    s12
			
 
				+VB3           .req    s15
			
 
				+VC0           .req    s8
			
 
				+VC3           .req    s11
			
 
				+VD0           .req    s12
			
 
				+VD3           .req    s15
			
 
				+SBUF_DAT0     .req    s16
			
 
				+SBUF_DAT1     .req    s17
			
 
				+SBUF_DAT2     .req    s18
			
 
				+SBUF_DAT3     .req    s19
			
 
				+SBUF_DAT_ALT0 .req    s20
			
 
				+SBUF_DAT_ALT1 .req    s21
			
 
				+SBUF_DAT_ALT2 .req    s22
			
 
				+SBUF_DAT_ALT3 .req    s23
			
 
				+WIN_DN_DAT0   .req    s24
			
 
				+WIN_UP_DAT0   .req    s28
			
 
				+
			
 
				+
			
 
				+.macro inner_loop  half, tail, head
			
 
				+ .if (OFFSET & (64*4)) == 0                @ even numbered call
			
 
				+        SBUF_DAT_THIS0 .req SBUF_DAT0
			
 
				+        SBUF_DAT_THIS1 .req SBUF_DAT1
			
 
				+        SBUF_DAT_THIS2 .req SBUF_DAT2
			
 
				+        SBUF_DAT_THIS3 .req SBUF_DAT3
			
 
				+  .ifnc "\head",""
			
 
				+        vldr    d8, [P_SB, #OFFSET]        @ d8 = SBUF_DAT
			
 
				+        vldr    d9, [P_SB, #OFFSET+8]
			
 
				+  .endif
			
 
				+ .else
			
 
				+        SBUF_DAT_THIS0 .req SBUF_DAT_ALT0
			
 
				+        SBUF_DAT_THIS1 .req SBUF_DAT_ALT1
			
 
				+        SBUF_DAT_THIS2 .req SBUF_DAT_ALT2
			
 
				+        SBUF_DAT_THIS3 .req SBUF_DAT_ALT3
			
 
				+  .ifnc "\head",""
			
 
				+        vldr    d10, [P_SB, #OFFSET]       @ d10 = SBUF_DAT_ALT
			
 
				+        vldr    d11, [P_SB, #OFFSET+8]
			
 
				+  .endif
			
 
				+ .endif
			
 
				+ .ifnc "\tail",""
			
 
				+  .ifc "\half","ab"
			
 
				+        vmls.f  VA0, SBUF_DAT_REV0, WIN_DN_DAT0  @ all operands treated as vectors
			
 
				+  .else
			
 
				+        vmla.f  VD0, SBUF_DAT_REV0, WIN_DN_DAT0  @ all operands treated as vectors
			
 
				+  .endif
			
 
				+ .endif
			
 
				+ .ifnc "\head",""
			
 
				+        vldr    d14, [P_WIN_UP, #OFFSET]   @ d14 = WIN_UP_DAT
			
 
				+        vldr    d15, [P_WIN_UP, #OFFSET+8]
			
 
				+        vldr    d12, [P_WIN_DN, #OFFSET]   @ d12 = WIN_DN_DAT
			
 
				+        vldr    d13, [P_WIN_DN, #OFFSET+8]
			
 
				+        vmov    SBUF_DAT_REV3, SBUF_DAT_THIS0
			
 
				+        vmov    SBUF_DAT_REV2, SBUF_DAT_THIS1
			
 
				+        vmov    SBUF_DAT_REV1, SBUF_DAT_THIS2
			
 
				+        vmov    SBUF_DAT_REV0, SBUF_DAT_THIS3
			
 
				+  .ifc "\half","ab"
			
 
				+        vmla.f  VB0, SBUF_DAT_THIS0, WIN_UP_DAT0
			
 
				+  .else
			
 
				+        vmla.f  VC0, SBUF_DAT_THIS0, WIN_UP_DAT0
			
 
				+  .endif
			
 
				+        teq     J_WRAP, #J
			
 
				+        bne     2f             @ strongly predictable, so better than cond exec in this case
			
 
				+        sub     P_SB, P_SB, #512*4
			
 
				+2:
			
 
				+  .set J, J - 64
			
 
				+  .set OFFSET, OFFSET + 64*4
			
 
				+ .endif
			
 
				+        .unreq  SBUF_DAT_THIS0
			
 
				+        .unreq  SBUF_DAT_THIS1
			
 
				+        .unreq  SBUF_DAT_THIS2
			
 
				+        .unreq  SBUF_DAT_THIS3
			
 
				+.endm
			
 
				+
			
 
				+
			
 
				+/* void ff_synth_filter_float_vfp(FFTContext *imdct,
			
 
				+ *                                float *synth_buf_ptr, int *synth_buf_offset,
			
 
				+ *                                float synth_buf2[32], const float window[512],
			
 
				+ *                                float out[32], const float in[32], float scale)
			
 
				+ */
			
 
				+function ff_synth_filter_float_vfp, export=1
			
 
				+        push    {r3-r7,lr}
			
 
				+        vpush   {s16-s31}
			
 
				+        ldr     lr, [P_SB_OFF]
			
 
				+        add     a2, ORIG_P_SB, lr, LSL #2 @ calculate synth_buf to pass to imdct_half
			
 
				+        mov     P_SB, a2                  @ and keep a copy for ourselves
			
 
				+        bic     J_WRAP, lr, #63           @ mangled to make testing for wrap easier in inner loop
			
 
				+        sub     lr, lr, #32
			
 
				+        and     lr, lr, #512-32
			
 
				+        str     lr, [P_SB_OFF]            @ rotate offset, modulo buffer size, ready for next call
			
 
				+        ldr     a3, [sp, #(16+6+2)*4]     @ fetch in from stack, to pass to imdct_half
			
 
				+VFP     vmov    s16, SCALE                @ imdct_half is free to corrupt s0, but it contains one of our arguments in hardfp case
			
 
				+        bl      ff_imdct_half_c
			
 
				+VFP     vmov    SCALE, s16
			
 
				+
			
 
				+        vmrs    OLDFPSCR, FPSCR
			
 
				+        ldr     lr, =0x03030000           @ RunFast mode, short vectors of length 4, stride 1
			
 
				+        vmsr    FPSCR, lr
			
 
				+        ldr     P_SB2_DN, [sp, #16*4]
			
 
				+        ldr     P_WIN_DN, [sp, #(16+6+0)*4]
			
 
				+        ldr     P_OUT_DN, [sp, #(16+6+1)*4]
			
 
				+NOVFP   vldr    SCALE, [sp, #(16+6+3)*4]
			
 
				+
			
 
				+#define IMM_OFF_SKEW 956                   /* also valid immediate constant when you add 16*4 */
			
 
				+        add     P_SB, P_SB, #IMM_OFF_SKEW  @ so we can use -ve offsets to use full immediate offset range
			
 
				+        add     P_SB2_UP, P_SB2_DN, #16*4
			
 
				+        add     P_WIN_UP, P_WIN_DN, #16*4+IMM_OFF_SKEW
			
 
				+        add     P_OUT_UP, P_OUT_DN, #16*4
			
 
				+        add     P_SB2_DN, P_SB2_DN, #16*4
			
 
				+        add     P_WIN_DN, P_WIN_DN, #12*4+IMM_OFF_SKEW
			
 
				+        add     P_OUT_DN, P_OUT_DN, #16*4
			
 
				+        mov     I, #4
			
 
				+1:
			
 
				+        vldmia  P_SB2_UP!, {VB0-VB3}
			
 
				+        vldmdb  P_SB2_DN!, {VA0-VA3}
			
 
				+ .set J, 512 - 64
			
 
				+ .set OFFSET, -IMM_OFF_SKEW
			
 
				+        inner_loop  ab,, head
			
 
				+ .rept 7
			
 
				+        inner_loop  ab, tail, head
			
 
				+ .endr
			
 
				+        inner_loop  ab, tail
			
 
				+        add     P_WIN_UP, P_WIN_UP, #4*4
			
 
				+        sub     P_WIN_DN, P_WIN_DN, #4*4
			
 
				+        vmul.f  VB0, VB0, SCALE      @ SCALE treated as scalar
			
 
				+        add     P_SB, P_SB, #(512+4)*4
			
 
				+        subs    I, I, #1
			
 
				+        vmul.f  VA0, VA0, SCALE
			
 
				+        vstmia  P_OUT_UP!, {VB0-VB3}
			
 
				+        vstmdb  P_OUT_DN!, {VA0-VA3}
			
 
				+        bne     1b
			
 
				+
			
 
				+        add     P_SB2_DN, P_SB2_DN, #(16+28-12)*4
			
 
				+        sub     P_SB2_UP, P_SB2_UP, #(16+16)*4
			
 
				+        add     P_WIN_DN, P_WIN_DN, #(32+16+28-12)*4
			
 
				+        mov     I, #4
			
 
				+1:
			
 
				+        vldr.d  d4, zero             @ d4 = VC0
			
 
				+        vldr.d  d5, zero
			
 
				+        vldr.d  d6, zero             @ d6 = VD0
			
 
				+        vldr.d  d7, zero
			
 
				+ .set J, 512 - 64
			
 
				+ .set OFFSET, -IMM_OFF_SKEW
			
 
				+        inner_loop  cd,, head
			
 
				+ .rept 7
			
 
				+        inner_loop  cd, tail, head
			
 
				+ .endr
			
 
				+        inner_loop  cd, tail
			
 
				+        add     P_WIN_UP, P_WIN_UP, #4*4
			
 
				+        sub     P_WIN_DN, P_WIN_DN, #4*4
			
 
				+        add     P_SB, P_SB, #(512+4)*4
			
 
				+        subs    I, I, #1
			
 
				+        vstmia  P_SB2_UP!, {VC0-VC3}
			
 
				+        vstmdb  P_SB2_DN!, {VD0-VD3}
			
 
				+        bne     1b
			
 
				+
			
 
				+        vmsr    FPSCR, OLDFPSCR
			
 
				+        vpop    {s16-s31}
			
 
				+        pop     {r3-r7,pc}
			
 
				+endfunc
			
 
				+
			
 
				+        .align  3
			
 
				+zero:   .word   0, 0