mirror of
https://mirror.skon.top/https://github.com/FFmpeg/FFmpeg
synced 2026-04-20 21:00:41 +08:00
avutil/mips: Add msa optimizations for pixelutils
Adapted from the corresponding me_cmp code. Only the width 16 function has been adapted, because it seems that the width 8 function actually reads 16 bytes per line. Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
@@ -1 +1,3 @@
|
||||
OBJS += mips/float_dsp_mips.o mips/cpu.o
|
||||
|
||||
MSA-OBJS-$(CONFIG_PIXELUTILS) += mips/pixelutils_msa.o
|
||||
|
||||
41
libavutil/mips/pixelutils.h
Normal file
41
libavutil/mips/pixelutils.h
Normal file
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef AVUTIL_MIPS_PIXELUTILS_H
|
||||
#define AVUTIL_MIPS_PIXELUTILS_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "cpu.h"
|
||||
#include "libavutil/attributes.h"
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/pixelutils.h"
|
||||
|
||||
int ff_pixelutils_sad16_msa(const uint8_t *src1, ptrdiff_t stride1,
|
||||
const uint8_t *src2, ptrdiff_t stride2);
|
||||
|
||||
static inline av_cold void ff_pixelutils_sad_init_mips(av_pixelutils_sad_fn *sad, int aligned)
|
||||
{
|
||||
int cpu_flags = av_get_cpu_flags();
|
||||
|
||||
if (have_msa(cpu_flags)) {
|
||||
sad[3] = ff_pixelutils_sad16_msa;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
48
libavutil/mips/pixelutils_msa.c
Normal file
48
libavutil/mips/pixelutils_msa.c
Normal file
@@ -0,0 +1,48 @@
|
||||
/*
|
||||
* Copyright (c) 2015 Parag Salasakar (Parag.Salasakar@imgtec.com)
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "generic_macros_msa.h"
|
||||
#include "pixelutils.h"
|
||||
|
||||
int ff_pixelutils_sad16_msa(const uint8_t *src, ptrdiff_t src_stride,
|
||||
const uint8_t *ref, ptrdiff_t ref_stride)
|
||||
{
|
||||
int32_t ht_cnt = 16/4;
|
||||
v16u8 src0, src1, ref0, ref1;
|
||||
v8u16 sad = { 0 };
|
||||
|
||||
for (; ht_cnt--; ) {
|
||||
LD_UB2(src, src_stride, src0, src1);
|
||||
src += (2 * src_stride);
|
||||
LD_UB2(ref, ref_stride, ref0, ref1);
|
||||
ref += (2 * ref_stride);
|
||||
sad += SAD_UB2_UH(src0, src1, ref0, ref1);
|
||||
|
||||
LD_UB2(src, src_stride, src0, src1);
|
||||
src += (2 * src_stride);
|
||||
LD_UB2(ref, ref_stride, ref0, ref1);
|
||||
ref += (2 * ref_stride);
|
||||
sad += SAD_UB2_UH(src0, src1, ref0, ref1);
|
||||
}
|
||||
return (HADD_UH_U32(sad));
|
||||
}
|
||||
@@ -32,6 +32,8 @@
|
||||
#include "aarch64/pixelutils.h"
|
||||
#elif ARCH_ARM && HAVE_ARMV6
|
||||
#include "arm/pixelutils.h"
|
||||
#elif ARCH_MIPS && HAVE_MSA
|
||||
#include "mips/pixelutils.h"
|
||||
#elif ARCH_RISCV
|
||||
#include "riscv/pixelutils.h"
|
||||
#elif ARCH_X86 && HAVE_X86ASM
|
||||
@@ -98,6 +100,8 @@ av_pixelutils_sad_fn av_pixelutils_get_sad_fn(int w_bits, int h_bits, int aligne
|
||||
ff_pixelutils_sad_init_aarch64(sad, aligned);
|
||||
#elif ARCH_ARM
|
||||
ff_pixelutils_sad_init_arm(sad, aligned);
|
||||
#elif ARCH_MIPS && HAVE_MSA
|
||||
ff_pixelutils_sad_init_mips(sad, aligned);
|
||||
#elif ARCH_RISCV
|
||||
ff_pixelutils_init_riscv(sad, aligned);
|
||||
#elif ARCH_X86 && HAVE_X86ASM
|
||||
|
||||
Reference in New Issue
Block a user