mirror of
https://mirror.skon.top/https://github.com/FFmpeg/FFmpeg
synced 2026-04-20 12:50:49 +08:00
avutil/arm: Add armv6 optimizations for pixelutils
Adapted from the corresponding me_cmp code. Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
OBJS += arm/cpu.o \
|
||||
arm/float_dsp_init_arm.o \
|
||||
|
||||
ARMV6-OBJS-$(CONFIG_PIXELUTILS) += arm/pixelutils_armv6.o
|
||||
|
||||
VFP-OBJS += arm/float_dsp_init_vfp.o \
|
||||
arm/float_dsp_vfp.o \
|
||||
|
||||
|
||||
46
libavutil/arm/pixelutils.h
Normal file
46
libavutil/arm/pixelutils.h
Normal file
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_ARM_PIXELUTILS_H
|
||||
#define AVUTIL_ARM_PIXELUTILS_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "cpu.h"
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/pixelutils.h"
|
||||
|
||||
int ff_pixelutils_sad16_armv6(const uint8_t *src1, ptrdiff_t stride1,
|
||||
const uint8_t *src2, ptrdiff_t stride2);
|
||||
int ff_pixelutils_sad8_armv6 (const uint8_t *src1, ptrdiff_t stride1,
|
||||
const uint8_t *src2, ptrdiff_t stride2);
|
||||
|
||||
static inline av_cold void ff_pixelutils_sad_init_arm(av_pixelutils_sad_fn *sad, int aligned)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
if (have_armv6(cpu_flags)) {
|
||||
if (aligned != 0) {
|
||||
sad[2] = ff_pixelutils_sad8_armv6;
|
||||
sad[3] = ff_pixelutils_sad16_armv6;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
80
libavutil/arm/pixelutils_armv6.S
Normal file
80
libavutil/arm/pixelutils_armv6.S
Normal file
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "asm.S"
|
||||
|
||||
function ff_pixelutils_sad16_armv6, export=1
|
||||
push {r4-r10, lr}
|
||||
mov r12, #0
|
||||
mov r10, #16
|
||||
mov lr, #0
|
||||
ldm r0, {r4-r7}
|
||||
ldr r8, [r2]
|
||||
1:
|
||||
ldr r9, [r2, #4]
|
||||
pld [r0, r1]
|
||||
usada8 r12, r4, r8, r12
|
||||
ldr r8, [r2, #8]
|
||||
pld [r2, r3]
|
||||
usada8 lr, r5, r9, lr
|
||||
ldr r9, [r2, #12]
|
||||
usada8 r12, r6, r8, r12
|
||||
subs r10, r10, #1
|
||||
usada8 lr, r7, r9, lr
|
||||
beq 2f
|
||||
add r0, r0, r1
|
||||
ldm r0, {r4-r7}
|
||||
add r2, r2, r3
|
||||
ldr r8, [r2]
|
||||
b 1b
|
||||
2:
|
||||
add r0, r12, lr
|
||||
pop {r4-r10, pc}
|
||||
endfunc
|
||||
|
||||
function ff_pixelutils_sad8_armv6, export=1
|
||||
pld [r2, r3]
|
||||
push {r4-r10, lr}
|
||||
mov r10, #8
|
||||
mov r12, #0
|
||||
mov lr, #0
|
||||
ldrd_post r4, r5, r0, r1
|
||||
1:
|
||||
subs r10, r10, #2
|
||||
ldr r7, [r2, #4]
|
||||
ldr_post r6, r2, r3
|
||||
ldrd_post r8, r9, r0, r1
|
||||
usada8 r12, r4, r6, r12
|
||||
pld [r2, r3]
|
||||
usada8 lr, r5, r7, lr
|
||||
ldr r7, [r2, #4]
|
||||
ldr_post r6, r2, r3
|
||||
beq 2f
|
||||
ldrd_post r4, r5, r0, r1
|
||||
usada8 r12, r8, r6, r12
|
||||
pld [r2, r3]
|
||||
usada8 lr, r9, r7, lr
|
||||
b 1b
|
||||
2:
|
||||
usada8 r12, r8, r6, r12
|
||||
usada8 lr, r9, r7, lr
|
||||
add r0, r12, lr
|
||||
pop {r4-r10, pc}
|
||||
endfunc
|
||||
@@ -30,6 +30,8 @@
|
||||
|
||||
#if ARCH_AARCH64 && HAVE_NEON
|
||||
#include "aarch64/pixelutils.h"
|
||||
#elif ARCH_ARM && HAVE_ARMV6
|
||||
#include "arm/pixelutils.h"
|
||||
#elif ARCH_RISCV
|
||||
#include "riscv/pixelutils.h"
|
||||
#elif ARCH_X86 && HAVE_X86ASM
|
||||
@@ -94,6 +96,8 @@ av_pixelutils_sad_fn av_pixelutils_get_sad_fn(int w_bits, int h_bits, int aligne
|
||||
|
||||
#if ARCH_AARCH64 && HAVE_NEON
|
||||
ff_pixelutils_sad_init_aarch64(sad, aligned);
|
||||
#elif ARCH_ARM
|
||||
ff_pixelutils_sad_init_arm(sad, aligned);
|
||||
#elif ARCH_RISCV
|
||||
ff_pixelutils_init_riscv(sad, aligned);
|
||||
#elif ARCH_X86 && HAVE_X86ASM
|
||||
|
||||
Reference in New Issue
Block a user