mirror of
https://mirror.skon.top/https://github.com/FFmpeg/FFmpeg
synced 2026-04-20 21:00:41 +08:00
avutil/cpu: add aarch64 CPU feature flag for PMULL and EOR3
This commit is contained in:
committed by
Martin Storsjö
parent
fb088f224b
commit
b19bd0de6c
14
configure
vendored
14
configure
vendored
@@ -482,6 +482,8 @@ Optimization options (experts only):
|
||||
--disable-arm-crc disable ARM/AArch64 CRC optimizations
|
||||
--disable-dotprod disable DOTPROD optimizations
|
||||
--disable-i8mm disable I8MM optimizations
|
||||
--disable-pmull disable PMULL optimizations
|
||||
--disable-eor3 disable EOR3 optimizations
|
||||
--disable-sve disable SVE optimizations
|
||||
--disable-sve2 disable SVE2 optimizations
|
||||
--disable-sme disable SME optimizations
|
||||
@@ -2299,6 +2301,8 @@ ARCH_EXT_LIST_ARM="
|
||||
arm_crc
|
||||
dotprod
|
||||
i8mm
|
||||
pmull
|
||||
eor3
|
||||
neon
|
||||
vfp
|
||||
vfpv3
|
||||
@@ -2575,6 +2579,8 @@ TOOLCHAIN_FEATURES="
|
||||
as_archext_crc_directive
|
||||
as_archext_dotprod_directive
|
||||
as_archext_i8mm_directive
|
||||
as_archext_sha3_directive
|
||||
as_archext_aes_directive
|
||||
as_archext_sve_directive
|
||||
as_archext_sve2_directive
|
||||
as_archext_sme_directive
|
||||
@@ -2918,6 +2924,8 @@ setend_deps="arm"
|
||||
arm_crc_deps="aarch64"
|
||||
dotprod_deps="aarch64 neon"
|
||||
i8mm_deps="aarch64 neon"
|
||||
pmull_deps="aarch64 neon"
|
||||
eor3_deps="aarch64 neon"
|
||||
sve_deps="aarch64 neon"
|
||||
sve2_deps="aarch64 neon sve"
|
||||
sme_deps="aarch64 neon sve sve2"
|
||||
@@ -6561,8 +6569,10 @@ if enabled aarch64; then
|
||||
# internal assembler in clang 3.3 does not support this instruction
|
||||
enabled neon && check_insn neon 'ext v0.8B, v0.8B, v1.8B, #1'
|
||||
|
||||
archext_list="arm_crc dotprod i8mm sve sve2 sme sme_i16i64 sme2"
|
||||
archext_list="arm_crc dotprod i8mm pmull eor3 sve sve2 sme sme_i16i64 sme2"
|
||||
enabled arm_crc && check_archext_name_insn arm_crc crc 'crc32x w0, w0, x0'
|
||||
enabled pmull && check_archext_name_insn pmull aes 'pmull v0.1q, v0.1d, v0.1d'
|
||||
enabled eor3 && check_archext_name_insn eor3 sha3 'eor3 v0.16b, v1.16b, v2.16b, v3.16b'
|
||||
enabled dotprod && check_archext_insn dotprod 'udot v0.4s, v0.16b, v0.16b'
|
||||
enabled i8mm && check_archext_insn i8mm 'usdot v0.4s, v0.16b, v0.16b'
|
||||
enabled sve && check_archext_insn sve 'whilelt p0.s, x0, x1'
|
||||
@@ -8400,6 +8410,8 @@ if enabled aarch64; then
|
||||
echo "NEON enabled ${neon-no}"
|
||||
echo "DOTPROD enabled ${dotprod-no}"
|
||||
echo "I8MM enabled ${i8mm-no}"
|
||||
echo "PMULL enabled ${pmull-no}"
|
||||
echo "EOR3 enabled ${eor3-no}"
|
||||
echo "SVE enabled ${sve-no}"
|
||||
echo "SVE2 enabled ${sve2-no}"
|
||||
echo "SME enabled ${sme-no}"
|
||||
|
||||
@@ -64,6 +64,22 @@
|
||||
#define DISABLE_I8MM
|
||||
#endif
|
||||
|
||||
#if HAVE_AS_ARCHEXT_AES_DIRECTIVE
|
||||
#define ENABLE_PMULL .arch_extension aes
|
||||
#define DISABLE_PMULL .arch_extension noaes
|
||||
#else
|
||||
#define ENABLE_PMULL
|
||||
#define DISABLE_PMULL
|
||||
#endif
|
||||
|
||||
#if HAVE_AS_ARCHEXT_SHA3_DIRECTIVE
|
||||
#define ENABLE_EOR3 .arch_extension sha3
|
||||
#define DISABLE_EOR3 .arch_extension nosha3
|
||||
#else
|
||||
#define ENABLE_EOR3
|
||||
#define DISABLE_EOR3
|
||||
#endif
|
||||
|
||||
#if HAVE_AS_ARCHEXT_SVE_DIRECTIVE
|
||||
#define ENABLE_SVE .arch_extension sve
|
||||
#define DISABLE_SVE .arch_extension nosve
|
||||
@@ -105,6 +121,8 @@
|
||||
#endif
|
||||
|
||||
DISABLE_ARM_CRC
|
||||
DISABLE_PMULL
|
||||
DISABLE_EOR3
|
||||
DISABLE_DOTPROD
|
||||
DISABLE_I8MM
|
||||
DISABLE_SVE
|
||||
|
||||
@@ -24,7 +24,9 @@
|
||||
#include <stdint.h>
|
||||
#include <sys/auxv.h>
|
||||
|
||||
#define HWCAP_AARCH64_PMULL (1 << 4)
|
||||
#define HWCAP_AARCH64_CRC32 (1 << 7)
|
||||
#define HWCAP_AARCH64_SHA3 (1 << 17)
|
||||
#define HWCAP_AARCH64_ASIMDDP (1 << 20)
|
||||
#define HWCAP_AARCH64_SVE (1 << 22)
|
||||
#define HWCAP2_AARCH64_SVE2 (1 << 1)
|
||||
@@ -40,6 +42,10 @@ static int detect_flags(void)
|
||||
unsigned long hwcap = ff_getauxval(AT_HWCAP);
|
||||
unsigned long hwcap2 = ff_getauxval(AT_HWCAP2);
|
||||
|
||||
if (hwcap & HWCAP_AARCH64_PMULL)
|
||||
flags |= AV_CPU_FLAG_PMULL;
|
||||
if (hwcap & HWCAP_AARCH64_SHA3)
|
||||
flags |= AV_CPU_FLAG_EOR3;
|
||||
if (hwcap & HWCAP_AARCH64_CRC32)
|
||||
flags |= AV_CPU_FLAG_ARM_CRC;
|
||||
if (hwcap & HWCAP_AARCH64_ASIMDDP)
|
||||
@@ -85,6 +91,10 @@ static int detect_flags(void)
|
||||
flags |= AV_CPU_FLAG_SME_I16I64;
|
||||
if (have_feature("hw.optional.armv8_crc32"))
|
||||
flags |= AV_CPU_FLAG_ARM_CRC;
|
||||
if (have_feature("hw.optional.arm.FEAT_PMULL"))
|
||||
flags |= AV_CPU_FLAG_PMULL;
|
||||
if (have_feature("hw.optional.armv8_2_sha3"))
|
||||
flags |= AV_CPU_FLAG_EOR3;
|
||||
if (have_feature("hw.optional.arm.FEAT_SME2"))
|
||||
flags |= AV_CPU_FLAG_SME2;
|
||||
|
||||
@@ -115,6 +125,10 @@ static int detect_flags(void)
|
||||
flags |= AV_CPU_FLAG_DOTPROD;
|
||||
if (ID_AA64ISAR0_CRC32(isar0) >= ID_AA64ISAR0_CRC32_BASE)
|
||||
flags |= AV_CPU_FLAG_ARM_CRC;
|
||||
if (ID_AA64ISAR0_AES(isar0) >= ID_AA64ISAR0_AES_PMULL)
|
||||
flags |= AV_CPU_FLAG_PMULL;
|
||||
if (ID_AA64ISAR0_SHA3(isar0) >= ID_AA64ISAR0_SHA3_IMPL)
|
||||
flags |= AV_CPU_FLAG_EOR3;
|
||||
}
|
||||
|
||||
mib[0] = CTL_MACHDEP;
|
||||
@@ -141,6 +155,14 @@ static int detect_flags(void)
|
||||
if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE))
|
||||
flags |= AV_CPU_FLAG_ARM_CRC;
|
||||
#endif
|
||||
#ifdef PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE
|
||||
if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE))
|
||||
flags |= AV_CPU_FLAG_PMULL;
|
||||
#endif
|
||||
#ifdef PF_ARM_SHA3_INSTRUCTIONS_AVAILABLE
|
||||
if (IsProcessorFeaturePresent(PF_ARM_SHA3_INSTRUCTIONS_AVAILABLE))
|
||||
flags |= AV_CPU_FLAG_EOR3;
|
||||
#endif
|
||||
#ifdef PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE
|
||||
if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE))
|
||||
flags |= AV_CPU_FLAG_DOTPROD;
|
||||
@@ -203,6 +225,12 @@ int ff_get_cpu_flags_aarch64(void)
|
||||
#ifdef __ARM_FEATURE_CRC32
|
||||
flags |= AV_CPU_FLAG_ARM_CRC;
|
||||
#endif
|
||||
#ifdef __ARM_FEATURE_AES
|
||||
flags |= AV_CPU_FLAG_PMULL;
|
||||
#endif
|
||||
#ifdef __ARM_FEATURE_SHA3
|
||||
flags |= AV_CPU_FLAG_EOR3;
|
||||
#endif
|
||||
#ifdef __ARM_FEATURE_SME_I16I64
|
||||
flags |= AV_CPU_FLAG_SME_I16I64;
|
||||
#endif
|
||||
|
||||
@@ -22,10 +22,12 @@
|
||||
#include "libavutil/cpu.h"
|
||||
#include "libavutil/cpu_internal.h"
|
||||
|
||||
#define have_armv8(flags) CPUEXT(flags, ARMV8)
|
||||
#define have_neon(flags) CPUEXT(flags, NEON)
|
||||
#define have_vfp(flags) CPUEXT(flags, VFP)
|
||||
#define have_armv8(flags) CPUEXT(flags, ARMV8)
|
||||
#define have_neon(flags) CPUEXT(flags, NEON)
|
||||
#define have_vfp(flags) CPUEXT(flags, VFP)
|
||||
#define have_arm_crc(flags) CPUEXT(flags, ARM_CRC)
|
||||
#define have_pmull(flags) CPUEXT(flags, PMULL)
|
||||
#define have_eor3(flags) CPUEXT(flags, EOR3)
|
||||
#define have_dotprod(flags) CPUEXT(flags, DOTPROD)
|
||||
#define have_i8mm(flags) CPUEXT(flags, I8MM)
|
||||
#define have_sve(flags) CPUEXT(flags, SVE)
|
||||
|
||||
@@ -190,8 +190,9 @@ int av_parse_cpu_caps(unsigned *flags, const char *s)
|
||||
{ "sme", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SME }, .unit = "flags" },
|
||||
{ "crc", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ARM_CRC }, .unit = "flags" },
|
||||
{ "sme_i16i64", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SME_I16I64 }, .unit = "flags" },
|
||||
|
||||
{ "sme2", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SME2 }, .unit = "flags" },
|
||||
{ "pmull", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_PMULL }, .unit = "flags" },
|
||||
{ "eor3", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_EOR3 }, .unit = "flags" },
|
||||
#elif ARCH_MIPS
|
||||
{ "mmi", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MMI }, .unit = "flags" },
|
||||
{ "msa", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MSA }, .unit = "flags" },
|
||||
|
||||
@@ -82,6 +82,8 @@
|
||||
#define AV_CPU_FLAG_SME2 (1 <<14)
|
||||
#define AV_CPU_FLAG_SME_I16I64 (1 <<15)
|
||||
#define AV_CPU_FLAG_SETEND (1 <<16)
|
||||
#define AV_CPU_FLAG_PMULL (1 <<17)
|
||||
#define AV_CPU_FLAG_EOR3 (1 <<18)
|
||||
|
||||
#define AV_CPU_FLAG_MMI (1 << 0)
|
||||
#define AV_CPU_FLAG_MSA (1 << 1)
|
||||
|
||||
@@ -52,6 +52,8 @@ static const struct {
|
||||
{ AV_CPU_FLAG_SME_I16I64, "sme_i16i64" },
|
||||
{ AV_CPU_FLAG_ARM_CRC, "crc" },
|
||||
{ AV_CPU_FLAG_SME2, "sme2" },
|
||||
{ AV_CPU_FLAG_PMULL, "pmull" },
|
||||
{ AV_CPU_FLAG_EOR3, "eor3" },
|
||||
#elif ARCH_ARM
|
||||
{ AV_CPU_FLAG_ARMV5TE, "armv5te" },
|
||||
{ AV_CPU_FLAG_ARMV6, "armv6" },
|
||||
|
||||
@@ -383,6 +383,7 @@ static const struct {
|
||||
{ "SME-I16I64", "sme_i16i64", AV_CPU_FLAG_SME_I16I64 },
|
||||
{ "CRC", "crc", AV_CPU_FLAG_ARM_CRC },
|
||||
{ "SME2", "sme2", AV_CPU_FLAG_SME2 },
|
||||
{ "PMULL", "pmull_eor3", AV_CPU_FLAG_PMULL|AV_CPU_FLAG_EOR3 },
|
||||
#elif ARCH_ARM
|
||||
{ "ARMV5TE", "armv5te", AV_CPU_FLAG_ARMV5TE },
|
||||
{ "ARMV6", "armv6", AV_CPU_FLAG_ARMV6 },
|
||||
|
||||
Reference in New Issue
Block a user