avutil/cpu: add aarch64 CPU feature flag for PMULL and EOR3

This commit is contained in:
Shreesh Adiga
2026-02-05 18:22:17 +05:30
committed by Martin Storsjö
parent fb088f224b
commit b19bd0de6c
8 changed files with 71 additions and 5 deletions

14
configure vendored
View File

@@ -482,6 +482,8 @@ Optimization options (experts only):
--disable-arm-crc disable ARM/AArch64 CRC optimizations
--disable-dotprod disable DOTPROD optimizations
--disable-i8mm disable I8MM optimizations
--disable-pmull disable PMULL optimizations
--disable-eor3 disable EOR3 optimizations
--disable-sve disable SVE optimizations
--disable-sve2 disable SVE2 optimizations
--disable-sme disable SME optimizations
@@ -2299,6 +2301,8 @@ ARCH_EXT_LIST_ARM="
arm_crc
dotprod
i8mm
pmull
eor3
neon
vfp
vfpv3
@@ -2575,6 +2579,8 @@ TOOLCHAIN_FEATURES="
as_archext_crc_directive
as_archext_dotprod_directive
as_archext_i8mm_directive
as_archext_sha3_directive
as_archext_aes_directive
as_archext_sve_directive
as_archext_sve2_directive
as_archext_sme_directive
@@ -2918,6 +2924,8 @@ setend_deps="arm"
arm_crc_deps="aarch64"
dotprod_deps="aarch64 neon"
i8mm_deps="aarch64 neon"
pmull_deps="aarch64 neon"
eor3_deps="aarch64 neon"
sve_deps="aarch64 neon"
sve2_deps="aarch64 neon sve"
sme_deps="aarch64 neon sve sve2"
@@ -6561,8 +6569,10 @@ if enabled aarch64; then
# internal assembler in clang 3.3 does not support this instruction
enabled neon && check_insn neon 'ext v0.8B, v0.8B, v1.8B, #1'
archext_list="arm_crc dotprod i8mm sve sve2 sme sme_i16i64 sme2"
archext_list="arm_crc dotprod i8mm pmull eor3 sve sve2 sme sme_i16i64 sme2"
enabled arm_crc && check_archext_name_insn arm_crc crc 'crc32x w0, w0, x0'
enabled pmull && check_archext_name_insn pmull aes 'pmull v0.1q, v0.1d, v0.1d'
enabled eor3 && check_archext_name_insn eor3 sha3 'eor3 v0.16b, v1.16b, v2.16b, v3.16b'
enabled dotprod && check_archext_insn dotprod 'udot v0.4s, v0.16b, v0.16b'
enabled i8mm && check_archext_insn i8mm 'usdot v0.4s, v0.16b, v0.16b'
enabled sve && check_archext_insn sve 'whilelt p0.s, x0, x1'
@@ -8400,6 +8410,8 @@ if enabled aarch64; then
echo "NEON enabled ${neon-no}"
echo "DOTPROD enabled ${dotprod-no}"
echo "I8MM enabled ${i8mm-no}"
echo "PMULL enabled ${pmull-no}"
echo "EOR3 enabled ${eor3-no}"
echo "SVE enabled ${sve-no}"
echo "SVE2 enabled ${sve2-no}"
echo "SME enabled ${sme-no}"

View File

@@ -64,6 +64,22 @@
#define DISABLE_I8MM
#endif
#if HAVE_AS_ARCHEXT_AES_DIRECTIVE
#define ENABLE_PMULL .arch_extension aes
#define DISABLE_PMULL .arch_extension noaes
#else
#define ENABLE_PMULL
#define DISABLE_PMULL
#endif
#if HAVE_AS_ARCHEXT_SHA3_DIRECTIVE
#define ENABLE_EOR3 .arch_extension sha3
#define DISABLE_EOR3 .arch_extension nosha3
#else
#define ENABLE_EOR3
#define DISABLE_EOR3
#endif
#if HAVE_AS_ARCHEXT_SVE_DIRECTIVE
#define ENABLE_SVE .arch_extension sve
#define DISABLE_SVE .arch_extension nosve
@@ -105,6 +121,8 @@
#endif
DISABLE_ARM_CRC
DISABLE_PMULL
DISABLE_EOR3
DISABLE_DOTPROD
DISABLE_I8MM
DISABLE_SVE

View File

@@ -24,7 +24,9 @@
#include <stdint.h>
#include <sys/auxv.h>
#define HWCAP_AARCH64_PMULL (1 << 4)
#define HWCAP_AARCH64_CRC32 (1 << 7)
#define HWCAP_AARCH64_SHA3 (1 << 17)
#define HWCAP_AARCH64_ASIMDDP (1 << 20)
#define HWCAP_AARCH64_SVE (1 << 22)
#define HWCAP2_AARCH64_SVE2 (1 << 1)
@@ -40,6 +42,10 @@ static int detect_flags(void)
unsigned long hwcap = ff_getauxval(AT_HWCAP);
unsigned long hwcap2 = ff_getauxval(AT_HWCAP2);
if (hwcap & HWCAP_AARCH64_PMULL)
flags |= AV_CPU_FLAG_PMULL;
if (hwcap & HWCAP_AARCH64_SHA3)
flags |= AV_CPU_FLAG_EOR3;
if (hwcap & HWCAP_AARCH64_CRC32)
flags |= AV_CPU_FLAG_ARM_CRC;
if (hwcap & HWCAP_AARCH64_ASIMDDP)
@@ -85,6 +91,10 @@ static int detect_flags(void)
flags |= AV_CPU_FLAG_SME_I16I64;
if (have_feature("hw.optional.armv8_crc32"))
flags |= AV_CPU_FLAG_ARM_CRC;
if (have_feature("hw.optional.arm.FEAT_PMULL"))
flags |= AV_CPU_FLAG_PMULL;
if (have_feature("hw.optional.armv8_2_sha3"))
flags |= AV_CPU_FLAG_EOR3;
if (have_feature("hw.optional.arm.FEAT_SME2"))
flags |= AV_CPU_FLAG_SME2;
@@ -115,6 +125,10 @@ static int detect_flags(void)
flags |= AV_CPU_FLAG_DOTPROD;
if (ID_AA64ISAR0_CRC32(isar0) >= ID_AA64ISAR0_CRC32_BASE)
flags |= AV_CPU_FLAG_ARM_CRC;
if (ID_AA64ISAR0_AES(isar0) >= ID_AA64ISAR0_AES_PMULL)
flags |= AV_CPU_FLAG_PMULL;
if (ID_AA64ISAR0_SHA3(isar0) >= ID_AA64ISAR0_SHA3_IMPL)
flags |= AV_CPU_FLAG_EOR3;
}
mib[0] = CTL_MACHDEP;
@@ -141,6 +155,14 @@ static int detect_flags(void)
if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE))
flags |= AV_CPU_FLAG_ARM_CRC;
#endif
#ifdef PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE
if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE))
flags |= AV_CPU_FLAG_PMULL;
#endif
#ifdef PF_ARM_SHA3_INSTRUCTIONS_AVAILABLE
if (IsProcessorFeaturePresent(PF_ARM_SHA3_INSTRUCTIONS_AVAILABLE))
flags |= AV_CPU_FLAG_EOR3;
#endif
#ifdef PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE
if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE))
flags |= AV_CPU_FLAG_DOTPROD;
@@ -203,6 +225,12 @@ int ff_get_cpu_flags_aarch64(void)
#ifdef __ARM_FEATURE_CRC32
flags |= AV_CPU_FLAG_ARM_CRC;
#endif
#ifdef __ARM_FEATURE_AES
flags |= AV_CPU_FLAG_PMULL;
#endif
#ifdef __ARM_FEATURE_SHA3
flags |= AV_CPU_FLAG_EOR3;
#endif
#ifdef __ARM_FEATURE_SME_I16I64
flags |= AV_CPU_FLAG_SME_I16I64;
#endif

View File

@@ -22,10 +22,12 @@
#include "libavutil/cpu.h"
#include "libavutil/cpu_internal.h"
#define have_armv8(flags) CPUEXT(flags, ARMV8)
#define have_neon(flags) CPUEXT(flags, NEON)
#define have_vfp(flags) CPUEXT(flags, VFP)
#define have_armv8(flags) CPUEXT(flags, ARMV8)
#define have_neon(flags) CPUEXT(flags, NEON)
#define have_vfp(flags) CPUEXT(flags, VFP)
#define have_arm_crc(flags) CPUEXT(flags, ARM_CRC)
#define have_pmull(flags) CPUEXT(flags, PMULL)
#define have_eor3(flags) CPUEXT(flags, EOR3)
#define have_dotprod(flags) CPUEXT(flags, DOTPROD)
#define have_i8mm(flags) CPUEXT(flags, I8MM)
#define have_sve(flags) CPUEXT(flags, SVE)

View File

@@ -190,8 +190,9 @@ int av_parse_cpu_caps(unsigned *flags, const char *s)
{ "sme", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SME }, .unit = "flags" },
{ "crc", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_ARM_CRC }, .unit = "flags" },
{ "sme_i16i64", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SME_I16I64 }, .unit = "flags" },
{ "sme2", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SME2 }, .unit = "flags" },
{ "pmull", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_PMULL }, .unit = "flags" },
{ "eor3", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_EOR3 }, .unit = "flags" },
#elif ARCH_MIPS
{ "mmi", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MMI }, .unit = "flags" },
{ "msa", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MSA }, .unit = "flags" },

View File

@@ -82,6 +82,8 @@
#define AV_CPU_FLAG_SME2 (1 <<14)
#define AV_CPU_FLAG_SME_I16I64 (1 <<15)
#define AV_CPU_FLAG_SETEND (1 <<16)
#define AV_CPU_FLAG_PMULL (1 <<17)
#define AV_CPU_FLAG_EOR3 (1 <<18)
#define AV_CPU_FLAG_MMI (1 << 0)
#define AV_CPU_FLAG_MSA (1 << 1)

View File

@@ -52,6 +52,8 @@ static const struct {
{ AV_CPU_FLAG_SME_I16I64, "sme_i16i64" },
{ AV_CPU_FLAG_ARM_CRC, "crc" },
{ AV_CPU_FLAG_SME2, "sme2" },
{ AV_CPU_FLAG_PMULL, "pmull" },
{ AV_CPU_FLAG_EOR3, "eor3" },
#elif ARCH_ARM
{ AV_CPU_FLAG_ARMV5TE, "armv5te" },
{ AV_CPU_FLAG_ARMV6, "armv6" },

View File

@@ -383,6 +383,7 @@ static const struct {
{ "SME-I16I64", "sme_i16i64", AV_CPU_FLAG_SME_I16I64 },
{ "CRC", "crc", AV_CPU_FLAG_ARM_CRC },
{ "SME2", "sme2", AV_CPU_FLAG_SME2 },
{ "PMULL", "pmull_eor3", AV_CPU_FLAG_PMULL|AV_CPU_FLAG_EOR3 },
#elif ARCH_ARM
{ "ARMV5TE", "armv5te", AV_CPU_FLAG_ARMV5TE },
{ "ARMV6", "armv6", AV_CPU_FLAG_ARMV6 },