mirror of
https://mirror.skon.top/https://github.com/FFmpeg/FFmpeg
synced 2026-04-20 12:50:49 +08:00
avutil/riscv: Add rvv optimizations for pixelutils
Adapted from the corresponding me_cmp code. Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
@@ -30,6 +30,8 @@
|
||||
|
||||
#if ARCH_AARCH64 && HAVE_NEON
|
||||
#include "aarch64/pixelutils.h"
|
||||
#elif ARCH_RISCV
|
||||
#include "riscv/pixelutils.h"
|
||||
#elif ARCH_X86 && HAVE_X86ASM
|
||||
#include "x86/pixelutils.h"
|
||||
#endif
|
||||
@@ -92,6 +94,8 @@ av_pixelutils_sad_fn av_pixelutils_get_sad_fn(int w_bits, int h_bits, int aligne
|
||||
|
||||
#if ARCH_AARCH64 && HAVE_NEON
|
||||
ff_pixelutils_sad_init_aarch64(sad, aligned);
|
||||
#elif ARCH_RISCV
|
||||
ff_pixelutils_init_riscv(sad, aligned);
|
||||
#elif ARCH_X86 && HAVE_X86ASM
|
||||
ff_pixelutils_sad_init_x86(sad, aligned);
|
||||
#endif
|
||||
|
||||
@@ -6,3 +6,4 @@ OBJS += riscv/float_dsp_init.o \
|
||||
RVV-OBJS += riscv/float_dsp_rvv.o \
|
||||
riscv/fixed_dsp_rvv.o \
|
||||
riscv/lls_rvv.o
|
||||
RVV-OBJS-$(CONFIG_PIXELUTILS) += riscv/pixelutils_rvv.o
|
||||
|
||||
48
libavutil/riscv/pixelutils.h
Normal file
48
libavutil/riscv/pixelutils.h
Normal file
@@ -0,0 +1,48 @@
|
||||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_RISCV_PIXELUTILS_H
|
||||
#define AVUTIL_RISCV_PIXELUTILS_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "cpu.h"
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/pixelutils.h"
|
||||
|
||||
int ff_pixelutils_sad16_rvv(const uint8_t *src1, ptrdiff_t stride1,
|
||||
const uint8_t *src2, ptrdiff_t stride2);
|
||||
int ff_pixelutils_sad8_rvv (const uint8_t *src1, ptrdiff_t stride1,
|
||||
const uint8_t *src2, ptrdiff_t stride2);
|
||||
|
||||
static inline av_cold void ff_pixelutils_init_riscv(av_pixelutils_sad_fn *sad, int aligned)
|
||||
{
|
||||
#if HAVE_RVV
|
||||
int flags = av_get_cpu_flags();
|
||||
|
||||
if (flags & AV_CPU_FLAG_RVV_I32 && ff_rv_vlen_least(128)) {
|
||||
sad[3] = ff_pixelutils_sad16_rvv;
|
||||
sad[2] = ff_pixelutils_sad8_rvv;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
71
libavutil/riscv/pixelutils_rvv.S
Normal file
71
libavutil/riscv/pixelutils_rvv.S
Normal file
@@ -0,0 +1,71 @@
|
||||
/*
|
||||
* Copyright (c) 2024 Institute of Software Chinese Academy of Sciences (ISCAS).
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "asm.S"
|
||||
|
||||
.macro pix_abs_ret
|
||||
vsetivli zero, 1, e32, m1, ta, ma
|
||||
vmv.x.s a0, v0
|
||||
ret
|
||||
.endm
|
||||
|
||||
func ff_pixelutils_sad16_rvv, zve32x
|
||||
lpad 0
|
||||
li a4, 16
|
||||
vsetivli zero, 1, e32, m1, ta, ma
|
||||
vmv.s.x v0, zero
|
||||
1:
|
||||
vsetivli zero, 16, e8, m1, tu, ma
|
||||
vle8.v v4, (a0)
|
||||
vle8.v v12, (a2)
|
||||
addi a4, a4, -1
|
||||
vwsubu.vv v16, v4, v12
|
||||
add a0, a0, a1
|
||||
vwsubu.vv v20, v12, v4
|
||||
vsetvli zero, zero, e16, m2, tu, ma
|
||||
vmax.vv v16, v16, v20
|
||||
add a2, a2, a3
|
||||
vwredsum.vs v0, v16, v0
|
||||
bnez a4, 1b
|
||||
|
||||
pix_abs_ret
|
||||
endfunc
|
||||
|
||||
func ff_pixelutils_sad8_rvv, zve32x
|
||||
lpad 0
|
||||
li a4, 8
|
||||
vsetivli zero, 1, e32, m1, ta, ma
|
||||
vmv.s.x v0, zero
|
||||
1:
|
||||
vsetivli zero, 8, e8, mf2, tu, ma
|
||||
vle8.v v4, (a0)
|
||||
vle8.v v12, (a2)
|
||||
addi a4, a4, -1
|
||||
vwsubu.vv v16, v4, v12
|
||||
add a0, a0, a1
|
||||
vwsubu.vv v20, v12, v4
|
||||
vsetvli zero, zero, e16, m1, tu, ma
|
||||
vmax.vv v16, v16, v20
|
||||
add a2, a2, a3
|
||||
vwredsum.vs v0, v16, v0
|
||||
bnez a4, 1b
|
||||
|
||||
pix_abs_ret
|
||||
endfunc
|
||||
Reference in New Issue
Block a user