From 10928524068fa00b63bc60d8b297c06dbb4a72c8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kacper=20Michaj=C5=82ow?= <kasper93@gmail.com>
Date: Thu, 9 Apr 2026 19:40:48 +0200
Subject: [PATCH] swscale/ops: remove type from continuation functions

The glue code doesn't care about types, so long the functions are
chained correctly. Let's not pretend there is any type safety there, as
the function pointers were casted anyway from unrelated types.
Particularly some f32 and u32 are shared.

This fixes errors like so:
src/libswscale/ops_tmpl_int.c:471:1: runtime error: call to function linear_diagoff3_f32 through pointer to incorrect function type 'void (*)(struct SwsOpIter *, const struct SwsOpImpl *, unsigned int *, unsigned int *, unsigned int *, unsigned int *)'
libswscale/ops_tmpl_float.c:208: note: linear_diagoff3_f32 defined here

Fixes: #22332
---
 libswscale/ops_backend.h     |  27 +++++----
 libswscale/ops_tmpl_common.c |  26 ++++----
 libswscale/ops_tmpl_float.c  |  14 ++---
 libswscale/ops_tmpl_int.c    | 111 +++++++++++++++++++----------------
 4 files changed, 92 insertions(+), 86 deletions(-)

diff --git a/libswscale/ops_backend.h b/libswscale/ops_backend.h
index 7aadc9d32e..f97b4303aa 100644
--- a/libswscale/ops_backend.h
+++ b/libswscale/ops_backend.h
@@ -102,17 +102,21 @@ typedef struct SwsOpIter {
                (pixel_t *) iter->out[2], (pixel_t *) iter->out[3], __VA_ARGS__)
 
 /* Helper macros to declare continuation functions */
-#define DECL_IMPL(NAME)                                                         \
-    static void fn(NAME)(SwsOpIter *restrict iter,                              \
-                         const SwsOpImpl *restrict impl,                        \
-                         block_t x, block_t y,                                  \
-                         block_t z, block_t w)
+#define DECL_IMPL(FUNC, NAME, ...)                                              \
+    static void av_flatten fn(NAME)(SwsOpIter *restrict iter,                   \
+                                    const SwsOpImpl *restrict impl,             \
+                                    void *restrict x, void *restrict y,         \
+                                    void *restrict z, void *restrict w)         \
+    {                                                                           \
+        CALL(FUNC, __VA_ARGS__);                                                \
+    }
 
-/* Helper macro to call into the next continuation with a given type */
-#define CONTINUE(TYPE, ...)                                                     \
+/* Helper macro to call into the next continuation */
+#define CONTINUE(X, Y, Z, W)                                                    \
     ((void (*)(SwsOpIter *, const SwsOpImpl *,                                  \
-               TYPE x, TYPE y, TYPE z, TYPE w)) impl->cont)                     \
-        (iter, &impl[1], __VA_ARGS__)
+               void *restrict, void *restrict,                                  \
+               void *restrict, void *restrict)) impl->cont)                     \
+        (iter, &impl[1], (X), (Y), (Z), (W))
 
 /* Helper macros for common op setup code */
 #define DECL_SETUP(NAME, PARAMS, OUT)                                           \
@@ -139,10 +143,7 @@ static inline int ff_setup_memdup(const void *c, size_t size, SwsImplResult *out
     DECL_FUNC(NAME, const bool X, const bool Y, const bool Z, const bool W)
 
 #define WRAP_PATTERN(FUNC, X, Y, Z, W, ...)                                     \
-    DECL_IMPL(FUNC##_##X##Y##Z##W)                                              \
-    {                                                                           \
-        CALL(FUNC, X, Y, Z, W);                                                 \
-    }                                                                           \
+    DECL_IMPL(FUNC, FUNC##_##X##Y##Z##W, X, Y, Z, W)                            \
                                                                                 \
     DECL_ENTRY(FUNC##_##X##Y##Z##W,                                             \
         .unused = { !X, !Y, !Z, !W },                                           \
diff --git a/libswscale/ops_tmpl_common.c b/libswscale/ops_tmpl_common.c
index 3817a437e5..0763a08b1c 100644
--- a/libswscale/ops_tmpl_common.c
+++ b/libswscale/ops_tmpl_common.c
@@ -41,7 +41,7 @@ DECL_PATTERN(convert_uint##N)
             wu[i] = w[i];                                                       \
     }                                                                           \
                                                                                 \
-    CONTINUE(u##N##block_t, xu, yu, zu, wu);                                    \
+    CONTINUE(xu, yu, zu, wu);                                                   \
 }                                                                               \
                                                                                 \
 WRAP_COMMON_PATTERNS(convert_uint##N,                                           \
@@ -75,14 +75,11 @@ DECL_PATTERN(clear)
             w[i] = impl->priv.px[3];
     }
 
-    CONTINUE(block_t, x, y, z, w);
+    CONTINUE(x, y, z, w);
 }
 
 #define WRAP_CLEAR(X, Y, Z, W)                                                  \
-DECL_IMPL(clear##_##X##Y##Z##W)                                                 \
-{                                                                               \
-    CALL(clear, X, Y, Z, W);                                                    \
-}                                                                               \
+DECL_IMPL(clear, clear##_##X##Y##Z##W, X, Y, Z, W)                              \
                                                                                 \
 DECL_ENTRY(clear##_##X##Y##Z##W,                                                \
     .setup = ff_sws_setup_clear,                                                \
@@ -119,7 +116,7 @@ DECL_PATTERN(min)
             w[i] = FFMIN(w[i], impl->priv.px[3]);
     }
 
-    CONTINUE(block_t, x, y, z, w);
+    CONTINUE(x, y, z, w);
 }
 
 DECL_PATTERN(max)
@@ -136,7 +133,7 @@ DECL_PATTERN(max)
             w[i] = FFMAX(w[i], impl->priv.px[3]);
     }
 
-    CONTINUE(block_t, x, y, z, w);
+    CONTINUE(x, y, z, w);
 }
 
 WRAP_COMMON_PATTERNS(min,
@@ -167,7 +164,7 @@ DECL_PATTERN(scale)
             w[i] *= scale;
     }
 
-    CONTINUE(block_t, x, y, z, w);
+    CONTINUE(x, y, z, w);
 }
 
 WRAP_COMMON_PATTERNS(scale,
@@ -239,7 +236,7 @@ DECL_READ(filter_v, const int elems)
     for (int i = 0; i < elems; i++)
         iter->in[i] += sizeof(block_t);
 
-    CONTINUE(f32block_t, xs, ys, zs, ws);
+    CONTINUE(xs, ys, zs, ws);
 }
 
 DECL_SETUP(setup_filter_h, params, out)
@@ -292,11 +289,14 @@ DECL_READ(filter_h, const int elems)
         weights += filter_size;
     }
 
-    CONTINUE(f32block_t, xs, ys, zs, ws);
+    CONTINUE(xs, ys, zs, ws);
 }
 
 #define WRAP_FILTER(FUNC, DIR, ELEMS, SUFFIX)                                   \
-DECL_IMPL(FUNC##ELEMS##SUFFIX)                                                  \
+static av_flatten void fn(FUNC##ELEMS##SUFFIX)(SwsOpIter *restrict iter,        \
+                                             const SwsOpImpl *restrict impl,    \
+                                             void *restrict x, void *restrict y,\
+                                             void *restrict z, void *restrict w)\
 {                                                                               \
     CALL_READ(FUNC##SUFFIX, ELEMS);                                             \
 }                                                                               \
@@ -337,7 +337,7 @@ static void fn(process)(const SwsOpExec *exec, const void *priv,
     for (iter->y = y_start; iter->y < y_end; iter->y++) {
         for (int block = bx_start; block < bx_end; block++) {
             iter->x = block * SWS_BLOCK_SIZE;
-            CONTINUE(block_t, (void *) x, (void *) y, (void *) z, (void *) w);
+            CONTINUE(x, y, z, w);
         }
 
         const int y_bump = exec->in_bump_y ? exec->in_bump_y[iter->y] : 0;
diff --git a/libswscale/ops_tmpl_float.c b/libswscale/ops_tmpl_float.c
index 0d00714ff4..687b08b99b 100644
--- a/libswscale/ops_tmpl_float.c
+++ b/libswscale/ops_tmpl_float.c
@@ -98,14 +98,11 @@ DECL_FUNC(dither, const int size_log2)
     DITHER_COMP(z, 2)
     DITHER_COMP(w, 3)
 
-    CONTINUE(block_t, x, y, z, w);
+    CONTINUE(x, y, z, w);
 }
 
 #define WRAP_DITHER(N)                                                          \
-DECL_IMPL(dither##N)                                                            \
-{                                                                               \
-    CALL(dither, N);                                                            \
-}                                                                               \
+DECL_IMPL(dither, dither##N, N)                                                 \
                                                                                 \
 DECL_ENTRY(dither##N,                                                           \
     .op = SWS_OP_DITHER,                                                        \
@@ -185,14 +182,11 @@ DECL_FUNC(linear_mask, const uint32_t mask)
         w[i] += (mask & SWS_MASK(3, 3))  ? c.m[3][3] * ww : ww;
     }
 
-    CONTINUE(block_t, x, y, z, w);
+    CONTINUE(x, y, z, w);
 }
 
 #define WRAP_LINEAR(NAME, MASK)                                                 \
-DECL_IMPL(linear_##NAME)                                                        \
-{                                                                               \
-    CALL(linear_mask, MASK);                                                    \
-}                                                                               \
+DECL_IMPL(linear_mask, linear_##NAME, MASK)                                     \
                                                                                 \
 DECL_ENTRY(linear_##NAME,                                                       \
     .op    = SWS_OP_LINEAR,                                                     \
diff --git a/libswscale/ops_tmpl_int.c b/libswscale/ops_tmpl_int.c
index fea0430799..baa3526029 100644
--- a/libswscale/ops_tmpl_int.c
+++ b/libswscale/ops_tmpl_int.c
@@ -71,7 +71,7 @@ DECL_READ(read_planar, const int elems)
             w[i] = in3[i];
     }
 
-    CONTINUE(block_t, x, y, z, w);
+    CONTINUE(x, y, z, w);
 }
 
 DECL_READ(read_packed, const int elems)
@@ -87,7 +87,7 @@ DECL_READ(read_packed, const int elems)
             w[i] = in0[elems * i + 3];
     }
 
-    CONTINUE(block_t, x, y, z, w);
+    CONTINUE(x, y, z, w);
 }
 
 DECL_WRITE(write_planar, const int elems)
@@ -119,7 +119,10 @@ DECL_WRITE(write_packed, const int elems)
 }
 
 #define WRAP_READ(FUNC, ELEMS, FRAC, PACKED)                                    \
-DECL_IMPL(FUNC##ELEMS)                                                          \
+static av_flatten void fn(FUNC##ELEMS)(SwsOpIter *restrict iter,                \
+                                       const SwsOpImpl *restrict impl,          \
+                                       void *restrict x, void *restrict y,      \
+                                       void *restrict z, void *restrict w)      \
 {                                                                               \
     CALL_READ(FUNC, ELEMS);                                                     \
     for (int i = 0; i < (PACKED ? 1 : ELEMS); i++)                              \
@@ -144,7 +147,10 @@ WRAP_READ(read_packed, 3, 0, true)
 WRAP_READ(read_packed, 4, 0, true)
 
 #define WRAP_WRITE(FUNC, ELEMS, FRAC, PACKED)                                   \
-DECL_IMPL(FUNC##ELEMS)                                                          \
+static av_flatten void fn(FUNC##ELEMS)(SwsOpIter *restrict iter,                \
+                                       const SwsOpImpl *restrict impl,          \
+                                       void *restrict x, void *restrict y,      \
+                                       void *restrict z, void *restrict w)      \
 {                                                                               \
     CALL_WRITE(FUNC, ELEMS);                                                    \
     for (int i = 0; i < (PACKED ? 1 : ELEMS); i++)                              \
@@ -178,7 +184,7 @@ DECL_READ(read_nibbles, const int elems)
         x[i + 1] = val & 0xF; /* low nibble */
     }
 
-    CONTINUE(block_t, x, y, z, w);
+    CONTINUE(x, y, z, w);
 }
 
 DECL_READ(read_bits, const int elems)
@@ -196,7 +202,7 @@ DECL_READ(read_bits, const int elems)
         x[i + 7] = (val >> 0) & 1;
     }
 
-    CONTINUE(block_t, x, y, z, w);
+    CONTINUE(x, y, z, w);
 }
 
 WRAP_READ(read_nibbles, 1, 1, false)
@@ -243,7 +249,7 @@ DECL_PATTERN(swap_bytes)
             w[i] = SWAP_BYTES(w[i]);
     }
 
-    CONTINUE(block_t, x, y, z, w);
+    CONTINUE(x, y, z, w);
 }
 
 WRAP_COMMON_PATTERNS(swap_bytes, .op = SWS_OP_SWAP_BYTES);
@@ -266,7 +272,7 @@ DECL_PATTERN(expand16)
             w16[i] = w[i] << 8 | w[i];
     }
 
-    CONTINUE(u16block_t, x16, y16, z16, w16);
+    CONTINUE(x16, y16, z16, w16);
 }
 
 WRAP_COMMON_PATTERNS(expand16,
@@ -287,7 +293,7 @@ DECL_PATTERN(expand32)
         w32[i] = (uint32_t)w[i] << 24 | w[i] << 16 | w[i] << 8 | w[i];
     }
 
-    CONTINUE(u32block_t, x32, y32, z32, w32);
+    CONTINUE(x32, y32, z32, w32);
 }
 
 WRAP_COMMON_PATTERNS(expand32,
@@ -297,44 +303,48 @@ WRAP_COMMON_PATTERNS(expand32,
 );
 #endif
 
+DECL_FUNC(pack, const int bits0, const int bits1, const int bits2, const int bits3)
+{
+    SWS_LOOP
+    for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
+        x[i] = x[i] << (bits1 + bits2 + bits3);
+        if (bits1)
+            x[i] |= y[i] << (bits2 + bits3);
+        if (bits2)
+            x[i] |= z[i] << bits3;
+        if (bits3)
+            x[i] |= w[i];
+    }
+
+    CONTINUE(x, y, z, w);
+}
+
+DECL_FUNC(unpack, const int bits0, const int bits1, const int bits2, const int bits3)
+{
+    SWS_LOOP
+    for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
+        const pixel_t val = x[i];
+        x[i] = val >> (bits1 + bits2 + bits3);
+        if (bits1)
+            y[i] = (val >> (bits2 + bits3)) & ((1 << bits1) - 1);
+        if (bits2)
+            z[i] = (val >> bits3) & ((1 << bits2) - 1);
+        if (bits3)
+            w[i] = val & ((1 << bits3) - 1);
+    }
+
+    CONTINUE(x, y, z, w);
+}
+
 #define WRAP_PACK_UNPACK(X, Y, Z, W)                                            \
-inline DECL_IMPL(pack_##X##Y##Z##W)                                             \
-{                                                                               \
-    SWS_LOOP                                                                    \
-    for (int i = 0; i < SWS_BLOCK_SIZE; i++) {                                  \
-        x[i] = x[i] << (Y+Z+W);                                                 \
-        if (Y)                                                                  \
-            x[i] |= y[i] << (Z+W);                                              \
-        if (Z)                                                                  \
-            x[i] |= z[i] << W;                                                  \
-        if (W)                                                                  \
-            x[i] |= w[i];                                                       \
-    }                                                                           \
-                                                                                \
-    CONTINUE(block_t, x, y, z, w);                                              \
-}                                                                               \
+DECL_IMPL(pack, pack_##X##Y##Z##W, X, Y, Z, W)                                  \
                                                                                 \
 DECL_ENTRY(pack_##X##Y##Z##W,                                                   \
     .op = SWS_OP_PACK,                                                          \
     .pack.pattern = { X, Y, Z, W },                                             \
 );                                                                              \
                                                                                 \
-inline DECL_IMPL(unpack_##X##Y##Z##W)                                           \
-{                                                                               \
-    SWS_LOOP                                                                    \
-    for (int i = 0; i < SWS_BLOCK_SIZE; i++) {                                  \
-        const pixel_t val = x[i];                                               \
-        x[i] = val >> (Y+Z+W);                                                  \
-        if (Y)                                                                  \
-            y[i] = (val >> (Z+W)) & ((1 << Y) - 1);                             \
-        if (Z)                                                                  \
-            z[i] = (val >> W) & ((1 << Z) - 1);                                 \
-        if (W)                                                                  \
-            w[i] = val & ((1 << W) - 1);                                        \
-    }                                                                           \
-                                                                                \
-    CONTINUE(block_t, x, y, z, w);                                              \
-}                                                                               \
+DECL_IMPL(unpack, unpack_##X##Y##Z##W, X, Y, Z, W)                              \
                                                                                 \
 DECL_ENTRY(unpack_##X##Y##Z##W,                                                 \
     .op = SWS_OP_UNPACK,                                                        \
@@ -363,7 +373,7 @@ DECL_PATTERN(lshift)
         w[i] <<= amount;
     }
 
-    CONTINUE(block_t, x, y, z, w);
+    CONTINUE(x, y, z, w);
 }
 
 DECL_PATTERN(rshift)
@@ -378,7 +388,7 @@ DECL_PATTERN(rshift)
         w[i] >>= amount;
     }
 
-    CONTINUE(block_t, x, y, z, w);
+    CONTINUE(x, y, z, w);
 }
 
 WRAP_COMMON_PATTERNS(lshift,
@@ -406,7 +416,7 @@ DECL_PATTERN(convert_float)
         wf[i] = w[i];
     }
 
-    CONTINUE(f32block_t, xf, yf, zf, wf);
+    CONTINUE(xf, yf, zf, wf);
 }
 
 WRAP_COMMON_PATTERNS(convert_float,
@@ -422,9 +432,10 @@ WRAP_COMMON_PATTERNS(convert_float,
 static void                                                                     \
 fn(swizzle_##X##Y##Z##W)(SwsOpIter *restrict iter,                              \
                          const SwsOpImpl *restrict impl,                        \
-                         block_t c0, block_t c1, block_t c2, block_t c3)        \
+                         void *restrict c0, void *restrict c1,                  \
+                         void *restrict c2, void *restrict c3)                  \
 {                                                                               \
-    CONTINUE(block_t, c##X, c##Y, c##Z, c##W);                                  \
+    CONTINUE(c##X, c##Y, c##Z, c##W);                                           \
 }                                                                               \
                                                                                 \
 DECL_ENTRY(swizzle_##X##Y##Z##W,                                                \
@@ -453,18 +464,18 @@ DECL_SWIZZLE(0, 3, 2, 1)
 
 /* Broadcast luma -> rgb (only used for y(a) -> rgb(a)) */
 #define DECL_EXPAND_LUMA(X, W, T0, T1)                                          \
-static void                                                                     \
-fn(expand_luma_##X##W)(SwsOpIter *restrict iter,                                \
-                       const SwsOpImpl *restrict impl,                          \
-                       block_t c0, block_t c1,  block_t c2, block_t c3)         \
+DECL_FUNC(expand_luma_##X##W##_impl,                                            \
+          block_t c0, block_t c1, block_t c2, block_t c3)                       \
 {                                                                               \
     SWS_LOOP                                                                    \
     for (int i = 0; i < SWS_BLOCK_SIZE; i++)                                    \
         T0[i] = T1[i] = c0[i];                                                  \
                                                                                 \
-    CONTINUE(block_t, c##X, T0, T1, c##W);                                      \
+    CONTINUE(c##X, T0, T1, c##W);                                               \
 }                                                                               \
                                                                                 \
+DECL_IMPL(expand_luma_##X##W##_impl, expand_luma_##X##W, x, y, z, w)            \
+                                                                                \
 DECL_ENTRY(expand_luma_##X##W,                                                  \
     .op = SWS_OP_SWIZZLE,                                                       \
     .swizzle.in = { X, 0, 0, W },                                               \