swscale/ops_chain: replace SwsOpEntry.unused by SwsCompMask

Needed to allow us to phase out SwsComps.unused altogether.

It's worth pointing out the change in semantics; while unused tracks the
unused *input* components, the mask is defined as representing the
computed *output* components.

This is 90% the same, expect for read/write, pack/unpack, and clear; which
are the only operations that can be used to change the number of components.

Signed-off-by: Niklas Haas <git@haasn.dev>
This commit is contained in:
Niklas Haas
2026-04-15 19:06:31 +02:00
parent 215cd90201
commit df4fe85ae3
7 changed files with 38 additions and 48 deletions

View File

@@ -131,10 +131,11 @@ static inline int ff_setup_memdup(const void *c, size_t size, SwsImplResult *out
}
/* Helper macro for declaring op table entries */
#define DECL_ENTRY(NAME, ...) \
#define DECL_ENTRY(NAME, MASK, ...) \
static const SwsOpEntry fn(op_##NAME) = { \
.func = (SwsFuncPtr) fn(NAME), \
.type = PIXEL_TYPE, \
.mask = (MASK), \
__VA_ARGS__ \
}
@@ -144,11 +145,7 @@ static inline int ff_setup_memdup(const void *c, size_t size, SwsImplResult *out
#define WRAP_PATTERN(FUNC, X, Y, Z, W, ...) \
DECL_IMPL(FUNC, FUNC##_##X##Y##Z##W, X, Y, Z, W) \
\
DECL_ENTRY(FUNC##_##X##Y##Z##W, \
.unused = { !X, !Y, !Z, !W }, \
__VA_ARGS__ \
)
DECL_ENTRY(FUNC##_##X##Y##Z##W, SWS_COMP_MASK(X, Y, Z, W), __VA_ARGS__)
#define WRAP_COMMON_PATTERNS(FUNC, ...) \
WRAP_PATTERN(FUNC, 1, 0, 0, 0, __VA_ARGS__); \

View File

@@ -64,10 +64,6 @@ int ff_sws_op_chain_append(SwsOpChain *chain, SwsFuncPtr func,
* Match an operation against a reference operation. Returns a score for how
* well the reference matches the operation, or 0 if there is no match.
*
* If `ref->comps` has any flags set, they must be set in `op` as well.
* Likewise, if `ref->comps` has any components marked as unused, they must be
* marked as unused in `ops` as well.
*
* For unfiltered SWS_OP_READ/SWS_OP_WRITE, SWS_OP_SWAP_BYTES and
* SWS_OP_SWIZZLE, the exact type is not checked, just the size.
*
@@ -99,14 +95,12 @@ static int op_match(const SwsOp *op, const SwsOpEntry *entry)
break;
}
for (int i = 0; i < 4; i++) {
if (entry->unused[i]) {
if (op->comps.unused[i])
score += 1; /* Operating on fewer components is better .. */
else
return 0; /* .. but not too few! */
}
}
const SwsCompMask needed = ff_sws_comp_mask_needed(op);
if (needed & ~entry->mask)
return 0; /* Entry doesn't compute all needed components */
/* Otherwise, operating on fewer components is better */
score += av_popcount(SWS_COMP_INV(entry->mask));
/* Flexible variants always match, but lower the score to prioritize more
* specific implementations if they exist */

View File

@@ -120,8 +120,8 @@ typedef struct SwsOpEntry {
/* Kernel metadata; reduced size subset of SwsOp */
SwsOpType op;
SwsPixelType type;
SwsCompMask mask; /* mask of active components (after operation) */
bool flexible; /* if true, only the type and op are matched */
bool unused[4]; /* for kernels which operate on a subset of components */
union { /* extra data defining the operation, unless `flexible` is true */
SwsReadWriteOp rw;

View File

@@ -81,7 +81,7 @@ DECL_PATTERN(clear)
#define WRAP_CLEAR(X, Y, Z, W) \
DECL_IMPL(clear, clear##_##X##Y##Z##W, X, Y, Z, W) \
\
DECL_ENTRY(clear##_##X##Y##Z##W, \
DECL_ENTRY(clear##_##X##Y##Z##W, SWS_COMP_ALL, \
.setup = ff_sws_setup_clear, \
.op = SWS_OP_CLEAR, \
.clear.mask = SWS_COMP_MASK(!X, !Y, !Z, !W), \
@@ -300,7 +300,7 @@ static av_flatten void fn(FUNC##ELEMS##SUFFIX)(SwsOpIter *restrict iter,
CALL_READ(FUNC##SUFFIX, ELEMS); \
} \
\
DECL_ENTRY(FUNC##ELEMS##SUFFIX, \
DECL_ENTRY(FUNC##ELEMS##SUFFIX, SWS_COMP_ELEMS(ELEMS), \
.op = SWS_OP_READ, \
.setup = fn(setup_filter##SUFFIX), \
.rw.elems = ELEMS, \

View File

@@ -104,7 +104,7 @@ DECL_FUNC(dither, const int size_log2)
#define WRAP_DITHER(N) \
DECL_IMPL(dither, dither##N, N) \
\
DECL_ENTRY(dither##N, \
DECL_ENTRY(dither##N, SWS_COMP_ALL, \
.op = SWS_OP_DITHER, \
.dither_size = N, \
.setup = fn(setup_dither), \
@@ -188,7 +188,7 @@ DECL_FUNC(linear_mask, const uint32_t mask)
#define WRAP_LINEAR(NAME, MASK) \
DECL_IMPL(linear_mask, linear_##NAME, MASK) \
\
DECL_ENTRY(linear_##NAME, \
DECL_ENTRY(linear_##NAME, SWS_COMP_ALL, \
.op = SWS_OP_LINEAR, \
.setup = fn(setup_linear), \
.linear_mask = (MASK), \

View File

@@ -129,7 +129,7 @@ static av_flatten void fn(FUNC##ELEMS)(SwsOpIter *restrict iter,
iter->in[i] += sizeof(block_t) * (PACKED ? ELEMS : 1) >> FRAC; \
} \
\
DECL_ENTRY(FUNC##ELEMS, \
DECL_ENTRY(FUNC##ELEMS, SWS_COMP_ELEMS(ELEMS), \
.op = SWS_OP_READ, \
.rw = { \
.elems = ELEMS, \
@@ -157,7 +157,7 @@ static av_flatten void fn(FUNC##ELEMS)(SwsOpIter *restrict iter,
iter->out[i] += sizeof(block_t) * (PACKED ? ELEMS : 1) >> FRAC; \
} \
\
DECL_ENTRY(FUNC##ELEMS, \
DECL_ENTRY(FUNC##ELEMS, SWS_COMP_ALL, \
.op = SWS_OP_WRITE, \
.rw = { \
.elems = ELEMS, \
@@ -339,14 +339,14 @@ DECL_FUNC(unpack, const int bits0, const int bits1, const int bits2, const int b
#define WRAP_PACK_UNPACK(X, Y, Z, W) \
DECL_IMPL(pack, pack_##X##Y##Z##W, X, Y, Z, W) \
\
DECL_ENTRY(pack_##X##Y##Z##W, \
DECL_ENTRY(pack_##X##Y##Z##W, SWS_COMP(0), \
.op = SWS_OP_PACK, \
.pack.pattern = { X, Y, Z, W }, \
); \
\
DECL_IMPL(unpack, unpack_##X##Y##Z##W, X, Y, Z, W) \
\
DECL_ENTRY(unpack_##X##Y##Z##W, \
DECL_ENTRY(unpack_##X##Y##Z##W, SWS_COMP_MASK(X, Y, Z, W), \
.op = SWS_OP_UNPACK, \
.pack.pattern = { X, Y, Z, W }, \
);
@@ -438,7 +438,7 @@ fn(swizzle_##X##Y##Z##W)(SwsOpIter *restrict iter,
CONTINUE(c##X, c##Y, c##Z, c##W); \
} \
\
DECL_ENTRY(swizzle_##X##Y##Z##W, \
DECL_ENTRY(swizzle_##X##Y##Z##W, SWS_COMP_ALL, \
.op = SWS_OP_SWIZZLE, \
.swizzle.in = { X, Y, Z, W }, \
);
@@ -476,7 +476,7 @@ DECL_FUNC(expand_luma_##X##W##_impl,
\
DECL_IMPL(expand_luma_##X##W##_impl, expand_luma_##X##W, x, y, z, w) \
\
DECL_ENTRY(expand_luma_##X##W, \
DECL_ENTRY(expand_luma_##X##W, SWS_COMP_ALL, \
.op = SWS_OP_SWIZZLE, \
.swizzle.in = { X, 0, 0, W }, \
);

View File

@@ -25,21 +25,21 @@
#include "../ops_chain.h"
#define DECL_ENTRY(TYPE, NAME, ...) \
#define DECL_ENTRY(TYPE, MASK, NAME, ...) \
static const SwsOpEntry op_##NAME = { \
.type = SWS_PIXEL_##TYPE, \
.mask = MASK, \
__VA_ARGS__ \
}
#define DECL_ASM(TYPE, NAME, ...) \
#define DECL_ASM(TYPE, MASK, NAME, ...) \
void ff_##NAME(void); \
DECL_ENTRY(TYPE, NAME, \
DECL_ENTRY(TYPE, MASK, NAME, \
.func = ff_##NAME, \
__VA_ARGS__)
#define DECL_PATTERN(TYPE, NAME, X, Y, Z, W, ...) \
DECL_ASM(TYPE, p##X##Y##Z##W##_##NAME, \
.unused = { !X, !Y, !Z, !W }, \
DECL_ASM(TYPE, SWS_COMP_MASK(X, Y, Z, W), p##X##Y##Z##W##_##NAME, \
__VA_ARGS__ \
)
@@ -74,7 +74,7 @@ static int setup_rw(const SwsImplParams *params, SwsImplResult *out)
}
#define DECL_RW(EXT, TYPE, NAME, OP, ELEMS, PACKED, FRAC) \
DECL_ASM(TYPE, NAME##ELEMS##EXT, \
DECL_ASM(TYPE, SWS_COMP_ELEMS(ELEMS), NAME##ELEMS##EXT, \
.op = SWS_OP_##OP, \
.rw = { .elems = ELEMS, .packed = PACKED, .frac = FRAC }, \
.setup = setup_rw, \
@@ -89,12 +89,12 @@ static int setup_rw(const SwsImplParams *params, SwsImplResult *out)
DECL_RW(EXT, U##DEPTH, write##DEPTH##_packed, WRITE, 4, true, 0) \
#define DECL_PACK_UNPACK(EXT, TYPE, X, Y, Z, W) \
DECL_ASM(TYPE, pack_##X##Y##Z##W##EXT, \
DECL_ASM(TYPE, SWS_COMP(0), pack_##X##Y##Z##W##EXT, \
.op = SWS_OP_PACK, \
.pack.pattern = {X, Y, Z, W}, \
); \
\
DECL_ASM(TYPE, unpack_##X##Y##Z##W##EXT, \
DECL_ASM(TYPE, SWS_COMP_MASK(X, Y, Z, W), unpack_##X##Y##Z##W##EXT, \
.op = SWS_OP_UNPACK, \
.pack.pattern = {X, Y, Z, W}, \
); \
@@ -108,22 +108,22 @@ static int setup_swap_bytes(const SwsImplParams *params, SwsImplResult *out)
}
#define DECL_SWAP_BYTES(EXT, TYPE, X, Y, Z, W) \
DECL_ENTRY(TYPE, p##X##Y##Z##W##_swap_bytes_##TYPE##EXT, \
DECL_ENTRY(TYPE, SWS_COMP_MASK(X, Y, Z, W), \
p##X##Y##Z##W##_swap_bytes_##TYPE##EXT, \
.op = SWS_OP_SWAP_BYTES, \
.unused = { !X, !Y, !Z, !W }, \
.func = ff_p##X##Y##Z##W##_shuffle##EXT, \
.setup = setup_swap_bytes, \
);
#define DECL_CLEAR_ALPHA(EXT, IDX) \
DECL_ASM(U8, clear_alpha##IDX##EXT, \
DECL_ASM(U8, SWS_COMP_ALL, clear_alpha##IDX##EXT, \
.op = SWS_OP_CLEAR, \
.clear.mask = SWS_COMP(IDX), \
.clear.value[IDX] = { -1, 1 }, \
); \
#define DECL_CLEAR_ZERO(EXT, IDX) \
DECL_ASM(U8, clear_zero##IDX##EXT, \
DECL_ASM(U8, SWS_COMP_ALL, clear_zero##IDX##EXT, \
.op = SWS_OP_CLEAR, \
.clear.mask = SWS_COMP(IDX), \
.clear.value[IDX] = { 0, 1 }, \
@@ -138,14 +138,14 @@ static int setup_clear(const SwsImplParams *params, SwsImplResult *out)
}
#define DECL_CLEAR(EXT, X, Y, Z, W) \
DECL_PATTERN(U8, clear##EXT, X, Y, Z, W, \
DECL_ASM(U8, SWS_COMP_ALL, p##X##Y##Z##W##_clear##EXT, \
.op = SWS_OP_CLEAR, \
.setup = setup_clear, \
.clear.mask = SWS_COMP_MASK(!X, !Y, !Z, !W), \
);
#define DECL_SWIZZLE(EXT, X, Y, Z, W) \
DECL_ASM(U8, swizzle_##X##Y##Z##W##EXT, \
DECL_ASM(U8, SWS_COMP_ALL, swizzle_##X##Y##Z##W##EXT, \
.op = SWS_OP_SWIZZLE, \
.swizzle.in = {X, Y, Z, W}, \
);
@@ -203,10 +203,9 @@ static int setup_shift(const SwsImplParams *params, SwsImplResult *out)
);
#define DECL_EXPAND_BITS(EXT, BITS) \
DECL_ASM(U##BITS, expand_bits##BITS##EXT, \
DECL_ASM(U##BITS, SWS_COMP(0), expand_bits##BITS##EXT, \
.op = SWS_OP_SCALE, \
.scale = { .num = ((1 << (BITS)) - 1), .den = 1 }, \
.unused = { false, true, true, true }, \
);
static int setup_dither(const SwsImplParams *params, SwsImplResult *out)
@@ -261,7 +260,7 @@ static int setup_dither(const SwsImplParams *params, SwsImplResult *out)
);
#define DECL_DITHER(EXT, SIZE) \
DECL_ASM(F32, dither##SIZE##EXT, \
DECL_ASM(F32, SWS_COMP_ALL, dither##SIZE##EXT, \
.op = SWS_OP_DITHER, \
.setup = setup_dither, \
.dither_size = SIZE, \
@@ -285,7 +284,7 @@ static int setup_linear(const SwsImplParams *params, SwsImplResult *out)
}
#define DECL_LINEAR(EXT, NAME, MASK) \
DECL_ASM(F32, NAME##EXT, \
DECL_ASM(F32, SWS_COMP_ALL, NAME##EXT, \
.op = SWS_OP_LINEAR, \
.setup = setup_linear, \
.linear_mask = (MASK), \
@@ -514,7 +513,7 @@ static int setup_filter_4x4_h(const SwsImplParams *params, SwsImplResult *out)
}
#define DECL_FILTER(EXT, TYPE, DIR, NAME, ELEMS, ...) \
DECL_ASM(TYPE, NAME##ELEMS##_##TYPE##EXT, \
DECL_ASM(TYPE, SWS_COMP_ELEMS(ELEMS), NAME##ELEMS##_##TYPE##EXT, \
.op = SWS_OP_READ, \
.rw.elems = ELEMS, \
.rw.filter = SWS_OP_FILTER_##DIR, \