diff --git a/libavutil/aarch64/crc.S b/libavutil/aarch64/crc.S
index 892c7ff229..6ff109aa71 100644
--- a/libavutil/aarch64/crc.S
+++ b/libavutil/aarch64/crc.S
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2025 Zhao Zhili <quinkblack@foxmail.com>
+ * Copyright (c) 2026 Shreesh Adiga <16567adigashreesh@gmail.com>
  *
  * This file is part of FFmpeg.
  *
@@ -67,3 +68,267 @@ endfunc
 
 DISABLE_ARM_CRC
 #endif
+
+#if HAVE_PMULL && HAVE_EOR3
+ENABLE_PMULL
+ENABLE_EOR3
+
+const reverse_shuffle, align=4
+        .byte  15, 14, 13, 12
+        .byte  11, 10,  9,  8
+        .byte   7,  6,  5,  4
+        .byte   3,  2,  1,  0
+endconst
+
+const partial_bytes_shuf_tab, align=4
+        .byte  255, 254, 253, 252
+        .byte  251, 250, 249, 248
+        .byte  247, 246, 245, 244
+        .byte  243, 242, 241, 240
+        .byte   0,  1,  2,  3
+        .byte   4,  5,  6,  7
+        .byte   8,  9, 10, 11
+        .byte  12, 13, 14, 15
+endconst
+
+// performs Vfold = pmull(Vfold, Vconst) xor pmull2(Vfold, Vconst) xor Vdata
+.macro FOLD_SINGLE Vfold, Vconst, Vdata, Vtemp
+        pmull           \Vtemp\().1q, \Vconst\().1d, \Vfold\().1d
+        pmull2          \Vfold\().1q, \Vconst\().2d, \Vfold\().2d
+        eor3            \Vfold\().16b, \Vfold\().16b, \Vdata\().16b, \Vtemp\().16b
+.endm
+
+// assume Vfold is v16 and v0 is filled with 0
+// uses v17 as temp
+.macro FOLD_128_TO_64 le, Vconst
+.if ! \le
+        fmov            d17, d16
+        pmull2          v16.1q, v16.2d, \Vconst\().2d
+        ext             v17.16b, v0.16b, v17.16b, #12
+        eor             v16.16b, v17.16b, v16.16b
+        ext             \Vconst\().16b, \Vconst\().16b, \Vconst\().16b, #8
+        pmull2          v17.1q, \Vconst\().2d, v16.2d
+        eor             v16.16b, v16.16b, v17.16b
+.else
+        ext             v17.16b, v16.16b, v0.16b, #8
+        pmull           v16.1q, v16.1d, \Vconst\().1d
+        eor             v16.16b, v16.16b, v17.16b
+        ext             v17.16b, v0.16b, v16.16b, #12
+        ext             \Vconst\().16b, \Vconst\().16b, \Vconst\().16b, #8
+        pmull           v17.1q, \Vconst\().1d, v17.1d
+        eor             v16.16b, v16.16b, v17.16b
+.endif
+.endm
+
+// assume Vfold is v16 and v0 is filled with 0
+// uses v17 as temp
+.macro FOLD_64_TO_32 le, Vconst
+.if ! \le
+        pmull           v17.1q, v16.1d, \Vconst\().1d
+        pmull2          v17.1q, v17.2d, \Vconst\().2d
+        eor             v16.16b, v16.16b, v17.16b
+        fmov            w0, s16
+        rev             w0, w0
+.else
+        mov             v16.s[0], wzr
+        pmull           v17.1q, v16.1d, \Vconst\().1d
+        eor             v17.16b, v17.16b, v16.16b
+        ext             \Vconst\().16b, \Vconst\().16b, \Vconst\().16b, #8
+        pmull           v17.1q, v17.1d, \Vconst\().1d
+        eor             v16.16b, v16.16b, v17.16b
+        mov             w0, v16.s[2]
+.endif
+.endm
+
+#define CTX_OFFSET 4
+
+// ff_crc[_le]_neon_pmull(const AVCRC *ctx, uint32_t crc, const uint8_t *buffer, size_t length)
+// x0 - ctx; pre-computed fold constants, see crc_init_aarch64 for details on the structure.
+// x1 - crc initial value
+// x2 - buffer pointer
+// x3 - length of buffer
+//
+// Additional registers used:
+// x10 - buffer + length; points to end of buffer
+// x11-x15 used as temp registers as needed
+//
+// v0 - zero register to perform vector shift during reduction using EXT
+// v1 - crc input from x1
+// v2 - holds the reverse shuffle values 15, 14 ... 0 to reverse vector using TBL
+// v3 - holds the constant values used in PMULL/PMULL2 loaded via x0 ctx
+// v4-v7 - used as temp for PMULL/PMULL2 output
+// v16 - holds the primary fold register later reduced to 32 bits
+// Registers v16-v19 are used in parallel 4x fold loop
+// Registers v20-23 are used to load the 4 next data chunks
+// Registers v17-v21 are used as temp elsewhere
+.macro crc_fn_template le
+.if \le
+function ff_crc_le_neon_pmull, export=1
+.else
+function ff_crc_neon_pmull, export=1
+        movrel          x9, reverse_shuffle
+        ldr             q2, [x9]
+.endif
+        add             x10, x2, x3
+        movi            v0.2d, #0
+        mov             v1.16b, v0.16b
+        mov             v1.s[0], w1
+        cmp             x3, #64
+        b.lo            8f // less than 64 bytes
+        ld1             {v16.16b-v19.16b}, [x2], #64
+        eor             v16.16b, v16.16b, v1.16b
+.if ! \le
+        tbl             v16.16b, { v16.16b }, v2.16b
+        tbl             v17.16b, { v17.16b }, v2.16b
+        tbl             v18.16b, { v18.16b }, v2.16b
+        tbl             v19.16b, { v19.16b }, v2.16b
+.endif
+        sub             x11, x10, x2
+        cmp             x11, #64
+        b.lo            2f // reduce 4x to 1x
+        ldr             q3, [x0, #(CTX_OFFSET + 0)]
+
+1: // fold 4x loop
+        ld1             {v20.16b-v23.16b}, [x2], #64
+.if ! \le
+        tbl             v20.16b, { v20.16b }, v2.16b
+        tbl             v21.16b, { v21.16b }, v2.16b
+        tbl             v22.16b, { v22.16b }, v2.16b
+        tbl             v23.16b, { v23.16b }, v2.16b
+.endif
+        pmull           v4.1q, v16.1d, v3.1d
+        pmull           v5.1q, v17.1d, v3.1d
+        pmull           v6.1q, v18.1d, v3.1d
+        pmull           v7.1q, v19.1d, v3.1d
+        pmull2          v16.1q, v16.2d, v3.2d
+        pmull2          v17.1q, v17.2d, v3.2d
+        pmull2          v18.1q, v18.2d, v3.2d
+        pmull2          v19.1q, v19.2d, v3.2d
+        eor3            v16.16b, v16.16b, v4.16b, v20.16b
+        eor3            v17.16b, v17.16b, v5.16b, v21.16b
+        eor3            v18.16b, v18.16b, v6.16b, v22.16b
+        eor3            v19.16b, v19.16b, v7.16b, v23.16b
+        sub             x11, x10, x2
+        cmp             x11, #64
+        b.hs            1b // fold 4x loop
+
+2: // reduce 4x to 1x
+        ldr             q3, [x0, #(CTX_OFFSET + 16)]
+        FOLD_SINGLE     v16, v3, v17, v4
+        FOLD_SINGLE     v16, v3, v18, v4
+        FOLD_SINGLE     v16, v3, v19, v4
+
+3: // fold 1x pre
+        sub             x11, x10, x2
+        cmp             x11, #16
+        b.lo            5f // partial_block
+
+4: // fold 1x loop
+        ldr             q17, [x2], #16
+.if ! \le
+        tbl             v17.16b, { v17.16b }, v2.16b
+.endif
+        FOLD_SINGLE     v16, v3, v17, v4
+        sub             x11, x10, x2
+        cmp             x11, #16
+        b.hs            4b // fold 1x loop
+
+5: // partial_block
+        cmp             x10, x2
+        b.eq            6f // reduce 128 to 64
+        ldr             q17, [x10, #-16]
+        movrel          x1, partial_bytes_shuf_tab
+        ldr             q18, [x1, x11]
+.if ! \le
+        tbl             v16.16b, { v16.16b }, v2.16b
+.endif
+        mvn             v19.16b, v18.16b
+        tbl             v20.16b, { v16.16b }, v19.16b
+        cmge            v21.16b, v18.16b, #0
+        bsl             v21.16b, v17.16b, v20.16b
+        tbl             v16.16b, { v16.16b }, v18.16b
+.if ! \le
+        tbl             v16.16b, { v16.16b }, v2.16b
+        tbl             v21.16b, { v21.16b }, v2.16b
+.endif
+        FOLD_SINGLE     v16, v3, v21, v4
+
+6: // reduce 128 to 64
+        ldr             q3, [x0, #(CTX_OFFSET + 32)]
+        FOLD_128_TO_64  \le, v3
+
+7: // reduce 64 to 32
+        ldr             q3, [x0, #(CTX_OFFSET + 48)]
+        FOLD_64_TO_32   \le, v3
+        ret
+
+8: // less than 64 bytes
+        cmp             x3, #16
+        b.lo            9f // less than 16 bytes
+        ldr             q16, [x2], #16
+        eor             v16.16b, v16.16b, v1.16b
+.if ! \le
+        tbl             v16.16b, { v16.16b }, v2.16b
+.endif
+        ldr             q3, [x0, #(CTX_OFFSET + 16)]
+        b               3b // fold 1x pre
+
+9: // less than 16 bytes
+        movrel          x15, partial_bytes_shuf_tab
+        add             x15, x15, x3
+        str             q0, [sp, #-16]!
+        add             x9, sp, x3
+        // memcpy 0 to 15 bytes from src to stack and load it to v16 with zero extension
+        tbz             x3, #3, 10f
+        ldr             x11, [x2]
+        ldr             x12, [x10, #-8]
+        str             x11, [sp]
+        str             x12, [x9, #-8]
+        b               12f
+10:
+        tbz             x3, #2, 11f
+        ldr             w11, [x2]
+        ldr             w12, [x10, #-4]
+        str             w11, [sp]
+        str             w12, [x9, #-4]
+        b               12f
+11:
+        cbz             x3, 12f
+        lsr             x11, x3, #1
+        ldrb            w12, [x2]
+        ldrb            w14, [x10, #-1]
+        ldrb            w13, [x2, x11]
+        strb            w12, [sp]
+        strb            w13, [sp, x11]
+        strb            w14, [x9, #-1]
+12:
+        ldr             q16, [sp], #16
+        eor             v16.16b, v16.16b, v1.16b
+        cmp             x3, #5
+        b.lo            13f // less than 5 bytes
+        ldr             q17, [x15]
+        tbl             v16.16b, { v16.16b }, v17.16b
+.if ! \le
+        tbl             v16.16b, { v16.16b }, v2.16b
+.endif
+        b               6b // reduce 128 to 64
+
+13: // less than 5 bytes
+.if ! \le
+        ldr             q17, [x15, #12]
+        tbl             v16.16b, { v16.16b }, v17.16b
+        rev64           v16.16b, v16.16b
+.else
+        ldr             q17, [x15, #8]
+        tbl             v16.16b, { v16.16b }, v17.16b
+.endif
+        b               7b // reduce 64 to 32
+endfunc
+.endm
+
+crc_fn_template 0
+crc_fn_template 1
+
+DISABLE_PMULL
+DISABLE_EOR3
+#endif
diff --git a/libavutil/aarch64/crc.h b/libavutil/aarch64/crc.h
index 08efdd28f3..e31625606a 100644
--- a/libavutil/aarch64/crc.h
+++ b/libavutil/aarch64/crc.h
@@ -29,26 +29,165 @@
 #include "libavutil/avassert.h"
 #include "libavutil/cpu.h"
 #include "libavutil/crc.h"
+#include "libavutil/intreadwrite.h"
 
+#if HAVE_ARM_CRC
 FF_VISIBILITY_PUSH_HIDDEN
 uint32_t ff_crc32_aarch64(const AVCRC *ctx, uint32_t crc, const uint8_t *buffer,
                           size_t length);
 FF_VISIBILITY_POP_HIDDEN
+#endif
+
+#if HAVE_PMULL && HAVE_EOR3
+#include "libavutil/crc_internal.h"
+
+FF_VISIBILITY_PUSH_HIDDEN
+uint32_t ff_crc_neon_pmull(const AVCRC *ctx, uint32_t crc, const uint8_t *buffer,
+                           size_t length);
+uint32_t ff_crc_le_neon_pmull(const AVCRC *ctx, uint32_t crc, const uint8_t *buffer,
+                              size_t length);
+FF_VISIBILITY_POP_HIDDEN
+
+enum {
+    CRC_C    = 0,
+    PMULL_BE,
+    PMULL_LE,
+};
+
+static const AVCRC crc_table_pmull[AV_CRC_MAX][17] = {
+    [AV_CRC_8_ATM] = {
+        PMULL_BE,
+        0xbc000000, 0x0, 0x32000000, 0x0,
+        0x94000000, 0x0, 0xc4000000, 0x0,
+        0x62000000, 0x0, 0x79000000, 0x0,
+        0x07156a16, 0x1, 0x07000000, 0x1,
+    },
+    [AV_CRC_8_EBU] = {
+        PMULL_BE,
+        0xf3000000, 0x0, 0xb5000000, 0x0,
+        0x0d000000, 0x0, 0xfc000000, 0x0,
+        0x6a000000, 0x0, 0x65000000, 0x0,
+        0x1c4b8192, 0x1, 0x1d000000, 0x1,
+    },
+    [AV_CRC_16_ANSI] = {
+        PMULL_BE,
+        0x807d0000, 0x0, 0xf9e30000, 0x0,
+        0xff830000, 0x0, 0xf9130000, 0x0,
+        0x807b0000, 0x0, 0x86630000, 0x0,
+        0xfffbffe7, 0x1, 0x80050000, 0x1,
+    },
+    [AV_CRC_16_CCITT] = {
+        PMULL_BE,
+        0x59b00000, 0x0, 0x60190000, 0x0,
+        0x45630000, 0x0, 0xd5f60000, 0x0,
+        0xaa510000, 0x0, 0xeb230000, 0x0,
+        0x11303471, 0x1, 0x10210000, 0x1,
+    },
+    [AV_CRC_24_IEEE] = {
+        PMULL_BE,
+        0x467d2400, 0x0, 0x1f428700, 0x0,
+        0x64e4d700, 0x0, 0x2c8c9d00, 0x0,
+        0xd9fe8c00, 0x0, 0xfd7e0c00, 0x0,
+        0xf845fe24, 0x1, 0x864cfb00, 0x1,
+    },
+    [AV_CRC_32_IEEE] = {
+        PMULL_BE,
+        0xe6228b11, 0x0, 0x8833794c, 0x0,
+        0xe8a45605, 0x0, 0xc5b9cd4c, 0x0,
+        0x490d678d, 0x0, 0xf200aa66, 0x0,
+        0x04d101df, 0x1, 0x04c11db7, 0x1,
+    },
+    [AV_CRC_32_IEEE_LE] = {
+        PMULL_LE,
+        0x54442bd4, 0x1, 0xc6e41596, 0x1,
+        0x751997d0, 0x1, 0xccaa009e, 0x0,
+        0xccaa009e, 0x0, 0x63cd6124, 0x1,
+        0xf7011640, 0x1, 0xdb710641, 0x1,
+    },
+    [AV_CRC_16_ANSI_LE] = {
+        PMULL_LE,
+        0x1b0c2, 0x0, 0x0000bffa, 0x0,
+        0x1d0c2, 0x0, 0x00018cc2, 0x0,
+        0x00018cc2, 0x0, 0x1bc02, 0x0,
+        0xcfffbffe, 0x1, 0x14003, 0x0,
+    },
+};
+
+
+static inline void crc_init_aarch64(AVCRC *ctx, int le, int bits, uint32_t poly, int ctx_size)
+{
+    uint64_t poly_;
+    if (le) {
+        // convert the reversed representation to regular form
+        poly = reverse(poly, bits) >> 1;
+    }
+    // convert to 32 degree polynomial
+    poly_ = ((uint64_t)poly) << (32 - bits);
+
+    uint64_t div;
+    uint8_t *dst = (uint8_t*)(ctx + 1);
+    if (le) {
+        ctx[0] = PMULL_LE;
+        AV_WN64(dst +  0, xnmodp(4 * 128 + 32, poly_, 32, &div, le));
+        AV_WN64(dst +  8, xnmodp(4 * 128 - 32, poly_, 32, &div, le));
+        uint64_t tmp = xnmodp(128 - 32, poly_, 32, &div, le);
+        AV_WN64(dst + 16, xnmodp(128 + 32, poly_, 32, &div, le));
+        AV_WN64(dst + 24, tmp);
+        AV_WN64(dst + 32, tmp);
+        AV_WN64(dst + 40, xnmodp(64, poly_, 32, &div, le));
+        AV_WN64(dst + 48, div);
+        AV_WN64(dst + 56, reverse(poly_ | (1ULL << 32), 32));
+    } else {
+        ctx[0] = PMULL_BE;
+        AV_WN64(dst +  0, xnmodp(4 * 128, poly_, 32, &div, le));
+        AV_WN64(dst +  8, xnmodp(4 * 128 + 64, poly_, 32, &div, le));
+        AV_WN64(dst + 16, xnmodp(128, poly_, 32, &div, le));
+        AV_WN64(dst + 24, xnmodp(128 + 64, poly_, 32, &div, le));
+        AV_WN64(dst + 32, xnmodp(64, poly_, 32, &div, le));
+        AV_WN64(dst + 48, div);
+        AV_WN64(dst + 40, xnmodp(96, poly_, 32, &div, le));
+        AV_WN64(dst + 56, poly_ | (1ULL << 32));
+    }
+}
+#endif
+
+static inline av_cold int ff_crc_init_aarch64(AVCRC *ctx, int le, int bits, uint32_t poly, int ctx_size)
+{
+#if HAVE_PMULL && HAVE_EOR3
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_pmull(cpu_flags) && have_eor3(cpu_flags)) {
+        crc_init_aarch64(ctx, le, bits, poly, ctx_size);
+        return 1;
+    }
+#endif
+    return 0;
+}
 
 static inline uint32_t ff_crc_aarch64(const AVCRC *ctx, uint32_t crc,
                                       const uint8_t *buffer, size_t length)
 {
-#if HAVE_ARM_CRC
-    av_assert2(ctx[0] == AV_CRC_32_IEEE_LE + 1);
-    return ff_crc32_aarch64(ctx, crc, buffer, length);
-#else
-    av_unreachable("AARCH64 has only AV_CRC_32_IEEE_LE arch-specific CRC code");
-    return 0;
+    switch (ctx[0]) {
+#if HAVE_PMULL && HAVE_EOR3
+    case PMULL_BE: return ff_crc_neon_pmull(ctx, crc, buffer, length);
+    case PMULL_LE: return ff_crc_le_neon_pmull(ctx, crc, buffer, length);
 #endif
+#if HAVE_ARM_CRC
+    case (AV_CRC_32_IEEE_LE + 1): return ff_crc32_aarch64(ctx, crc, buffer, length);
+#endif
+    default: av_unreachable("AARCH64 has PMULL_LE, PMULL_BE and AV_CRC_32_IEEE_LE arch-specific CRC code");
+    }
+    return 0;
 }
 
 static inline const AVCRC *ff_crc_get_table_aarch64(AVCRCId crc_id)
 {
+    int cpu_flags = av_get_cpu_flags();
+#if HAVE_PMULL && HAVE_EOR3
+    if (have_pmull(cpu_flags) && have_eor3(cpu_flags)) {
+        return crc_table_pmull[crc_id];
+    }
+#endif
 #if HAVE_ARM_CRC
     static const AVCRC crc32_ieee_le_ctx[] = {
         AV_CRC_32_IEEE_LE + 1
@@ -57,7 +196,6 @@ static inline const AVCRC *ff_crc_get_table_aarch64(AVCRCId crc_id)
     if (crc_id != AV_CRC_32_IEEE_LE)
         return NULL;
 
-    int cpu_flags = av_get_cpu_flags();
     if (have_arm_crc(cpu_flags)) {
         return crc32_ieee_le_ctx;
     }
diff --git a/libavutil/crc.c b/libavutil/crc.c
index c8ccaf8162..335448f8db 100644
--- a/libavutil/crc.c
+++ b/libavutil/crc.c
@@ -357,6 +357,10 @@ int av_crc_init(AVCRC *ctx, int le, int bits, uint32_t poly, int ctx_size)
     int done = ff_crc_init_x86(ctx, le, bits, poly, ctx_size);
     if (done)
         return 0;
+#elif ARCH_AARCH64
+    int done = ff_crc_init_aarch64(ctx, le, bits, poly, ctx_size);
+    if (done)
+        return 0;
 #endif
 
     for (i = 0; i < 256; i++) {