diff --git a/libswscale/ops_backend.h b/libswscale/ops_backend.h index f97b4303aa..82eb92fc36 100644 --- a/libswscale/ops_backend.h +++ b/libswscale/ops_backend.h @@ -131,10 +131,11 @@ static inline int ff_setup_memdup(const void *c, size_t size, SwsImplResult *out } /* Helper macro for declaring op table entries */ -#define DECL_ENTRY(NAME, ...) \ +#define DECL_ENTRY(NAME, MASK, ...) \ static const SwsOpEntry fn(op_##NAME) = { \ .func = (SwsFuncPtr) fn(NAME), \ .type = PIXEL_TYPE, \ + .mask = (MASK), \ __VA_ARGS__ \ } @@ -144,11 +145,7 @@ static inline int ff_setup_memdup(const void *c, size_t size, SwsImplResult *out #define WRAP_PATTERN(FUNC, X, Y, Z, W, ...) \ DECL_IMPL(FUNC, FUNC##_##X##Y##Z##W, X, Y, Z, W) \ - \ - DECL_ENTRY(FUNC##_##X##Y##Z##W, \ - .unused = { !X, !Y, !Z, !W }, \ - __VA_ARGS__ \ - ) + DECL_ENTRY(FUNC##_##X##Y##Z##W, SWS_COMP_MASK(X, Y, Z, W), __VA_ARGS__) #define WRAP_COMMON_PATTERNS(FUNC, ...) \ WRAP_PATTERN(FUNC, 1, 0, 0, 0, __VA_ARGS__); \ diff --git a/libswscale/ops_chain.c b/libswscale/ops_chain.c index 7c9f4e853a..ff301d4a50 100644 --- a/libswscale/ops_chain.c +++ b/libswscale/ops_chain.c @@ -64,10 +64,6 @@ int ff_sws_op_chain_append(SwsOpChain *chain, SwsFuncPtr func, * Match an operation against a reference operation. Returns a score for how * well the reference matches the operation, or 0 if there is no match. * - * If `ref->comps` has any flags set, they must be set in `op` as well. - * Likewise, if `ref->comps` has any components marked as unused, they must be - * marked as unused in `ops` as well. - * * For unfiltered SWS_OP_READ/SWS_OP_WRITE, SWS_OP_SWAP_BYTES and * SWS_OP_SWIZZLE, the exact type is not checked, just the size. * @@ -99,14 +95,12 @@ static int op_match(const SwsOp *op, const SwsOpEntry *entry) break; } - for (int i = 0; i < 4; i++) { - if (entry->unused[i]) { - if (op->comps.unused[i]) - score += 1; /* Operating on fewer components is better .. */ - else - return 0; /* .. but not too few! */ - } - } + const SwsCompMask needed = ff_sws_comp_mask_needed(op); + if (needed & ~entry->mask) + return 0; /* Entry doesn't compute all needed components */ + + /* Otherwise, operating on fewer components is better */ + score += av_popcount(SWS_COMP_INV(entry->mask)); /* Flexible variants always match, but lower the score to prioritize more * specific implementations if they exist */ diff --git a/libswscale/ops_chain.h b/libswscale/ops_chain.h index 31e0e6a403..9fe2c06613 100644 --- a/libswscale/ops_chain.h +++ b/libswscale/ops_chain.h @@ -120,8 +120,8 @@ typedef struct SwsOpEntry { /* Kernel metadata; reduced size subset of SwsOp */ SwsOpType op; SwsPixelType type; + SwsCompMask mask; /* mask of active components (after operation) */ bool flexible; /* if true, only the type and op are matched */ - bool unused[4]; /* for kernels which operate on a subset of components */ union { /* extra data defining the operation, unless `flexible` is true */ SwsReadWriteOp rw; diff --git a/libswscale/ops_tmpl_common.c b/libswscale/ops_tmpl_common.c index 34016d062d..b8b202e6e6 100644 --- a/libswscale/ops_tmpl_common.c +++ b/libswscale/ops_tmpl_common.c @@ -81,7 +81,7 @@ DECL_PATTERN(clear) #define WRAP_CLEAR(X, Y, Z, W) \ DECL_IMPL(clear, clear##_##X##Y##Z##W, X, Y, Z, W) \ \ -DECL_ENTRY(clear##_##X##Y##Z##W, \ +DECL_ENTRY(clear##_##X##Y##Z##W, SWS_COMP_ALL, \ .setup = ff_sws_setup_clear, \ .op = SWS_OP_CLEAR, \ .clear.mask = SWS_COMP_MASK(!X, !Y, !Z, !W), \ @@ -300,7 +300,7 @@ static av_flatten void fn(FUNC##ELEMS##SUFFIX)(SwsOpIter *restrict iter, CALL_READ(FUNC##SUFFIX, ELEMS); \ } \ \ -DECL_ENTRY(FUNC##ELEMS##SUFFIX, \ +DECL_ENTRY(FUNC##ELEMS##SUFFIX, SWS_COMP_ELEMS(ELEMS), \ .op = SWS_OP_READ, \ .setup = fn(setup_filter##SUFFIX), \ .rw.elems = ELEMS, \ diff --git a/libswscale/ops_tmpl_float.c b/libswscale/ops_tmpl_float.c index 695de4ceb6..9e98534383 100644 --- a/libswscale/ops_tmpl_float.c +++ b/libswscale/ops_tmpl_float.c @@ -104,7 +104,7 @@ DECL_FUNC(dither, const int size_log2) #define WRAP_DITHER(N) \ DECL_IMPL(dither, dither##N, N) \ \ -DECL_ENTRY(dither##N, \ +DECL_ENTRY(dither##N, SWS_COMP_ALL, \ .op = SWS_OP_DITHER, \ .dither_size = N, \ .setup = fn(setup_dither), \ @@ -188,7 +188,7 @@ DECL_FUNC(linear_mask, const uint32_t mask) #define WRAP_LINEAR(NAME, MASK) \ DECL_IMPL(linear_mask, linear_##NAME, MASK) \ \ -DECL_ENTRY(linear_##NAME, \ +DECL_ENTRY(linear_##NAME, SWS_COMP_ALL, \ .op = SWS_OP_LINEAR, \ .setup = fn(setup_linear), \ .linear_mask = (MASK), \ diff --git a/libswscale/ops_tmpl_int.c b/libswscale/ops_tmpl_int.c index baa3526029..1cf6bc0f9d 100644 --- a/libswscale/ops_tmpl_int.c +++ b/libswscale/ops_tmpl_int.c @@ -129,7 +129,7 @@ static av_flatten void fn(FUNC##ELEMS)(SwsOpIter *restrict iter, iter->in[i] += sizeof(block_t) * (PACKED ? ELEMS : 1) >> FRAC; \ } \ \ -DECL_ENTRY(FUNC##ELEMS, \ +DECL_ENTRY(FUNC##ELEMS, SWS_COMP_ELEMS(ELEMS), \ .op = SWS_OP_READ, \ .rw = { \ .elems = ELEMS, \ @@ -157,7 +157,7 @@ static av_flatten void fn(FUNC##ELEMS)(SwsOpIter *restrict iter, iter->out[i] += sizeof(block_t) * (PACKED ? ELEMS : 1) >> FRAC; \ } \ \ -DECL_ENTRY(FUNC##ELEMS, \ +DECL_ENTRY(FUNC##ELEMS, SWS_COMP_ALL, \ .op = SWS_OP_WRITE, \ .rw = { \ .elems = ELEMS, \ @@ -339,14 +339,14 @@ DECL_FUNC(unpack, const int bits0, const int bits1, const int bits2, const int b #define WRAP_PACK_UNPACK(X, Y, Z, W) \ DECL_IMPL(pack, pack_##X##Y##Z##W, X, Y, Z, W) \ \ -DECL_ENTRY(pack_##X##Y##Z##W, \ +DECL_ENTRY(pack_##X##Y##Z##W, SWS_COMP(0), \ .op = SWS_OP_PACK, \ .pack.pattern = { X, Y, Z, W }, \ ); \ \ DECL_IMPL(unpack, unpack_##X##Y##Z##W, X, Y, Z, W) \ \ -DECL_ENTRY(unpack_##X##Y##Z##W, \ +DECL_ENTRY(unpack_##X##Y##Z##W, SWS_COMP_MASK(X, Y, Z, W), \ .op = SWS_OP_UNPACK, \ .pack.pattern = { X, Y, Z, W }, \ ); @@ -438,7 +438,7 @@ fn(swizzle_##X##Y##Z##W)(SwsOpIter *restrict iter, CONTINUE(c##X, c##Y, c##Z, c##W); \ } \ \ -DECL_ENTRY(swizzle_##X##Y##Z##W, \ +DECL_ENTRY(swizzle_##X##Y##Z##W, SWS_COMP_ALL, \ .op = SWS_OP_SWIZZLE, \ .swizzle.in = { X, Y, Z, W }, \ ); @@ -476,7 +476,7 @@ DECL_FUNC(expand_luma_##X##W##_impl, \ DECL_IMPL(expand_luma_##X##W##_impl, expand_luma_##X##W, x, y, z, w) \ \ -DECL_ENTRY(expand_luma_##X##W, \ +DECL_ENTRY(expand_luma_##X##W, SWS_COMP_ALL, \ .op = SWS_OP_SWIZZLE, \ .swizzle.in = { X, 0, 0, W }, \ ); diff --git a/libswscale/x86/ops.c b/libswscale/x86/ops.c index f3c20a9236..fa18c17104 100644 --- a/libswscale/x86/ops.c +++ b/libswscale/x86/ops.c @@ -25,21 +25,21 @@ #include "../ops_chain.h" -#define DECL_ENTRY(TYPE, NAME, ...) \ +#define DECL_ENTRY(TYPE, MASK, NAME, ...) \ static const SwsOpEntry op_##NAME = { \ .type = SWS_PIXEL_##TYPE, \ + .mask = MASK, \ __VA_ARGS__ \ } -#define DECL_ASM(TYPE, NAME, ...) \ +#define DECL_ASM(TYPE, MASK, NAME, ...) \ void ff_##NAME(void); \ - DECL_ENTRY(TYPE, NAME, \ + DECL_ENTRY(TYPE, MASK, NAME, \ .func = ff_##NAME, \ __VA_ARGS__) #define DECL_PATTERN(TYPE, NAME, X, Y, Z, W, ...) \ - DECL_ASM(TYPE, p##X##Y##Z##W##_##NAME, \ - .unused = { !X, !Y, !Z, !W }, \ + DECL_ASM(TYPE, SWS_COMP_MASK(X, Y, Z, W), p##X##Y##Z##W##_##NAME, \ __VA_ARGS__ \ ) @@ -74,7 +74,7 @@ static int setup_rw(const SwsImplParams *params, SwsImplResult *out) } #define DECL_RW(EXT, TYPE, NAME, OP, ELEMS, PACKED, FRAC) \ - DECL_ASM(TYPE, NAME##ELEMS##EXT, \ + DECL_ASM(TYPE, SWS_COMP_ELEMS(ELEMS), NAME##ELEMS##EXT, \ .op = SWS_OP_##OP, \ .rw = { .elems = ELEMS, .packed = PACKED, .frac = FRAC }, \ .setup = setup_rw, \ @@ -89,12 +89,12 @@ static int setup_rw(const SwsImplParams *params, SwsImplResult *out) DECL_RW(EXT, U##DEPTH, write##DEPTH##_packed, WRITE, 4, true, 0) \ #define DECL_PACK_UNPACK(EXT, TYPE, X, Y, Z, W) \ - DECL_ASM(TYPE, pack_##X##Y##Z##W##EXT, \ + DECL_ASM(TYPE, SWS_COMP(0), pack_##X##Y##Z##W##EXT, \ .op = SWS_OP_PACK, \ .pack.pattern = {X, Y, Z, W}, \ ); \ \ - DECL_ASM(TYPE, unpack_##X##Y##Z##W##EXT, \ + DECL_ASM(TYPE, SWS_COMP_MASK(X, Y, Z, W), unpack_##X##Y##Z##W##EXT, \ .op = SWS_OP_UNPACK, \ .pack.pattern = {X, Y, Z, W}, \ ); \ @@ -108,22 +108,22 @@ static int setup_swap_bytes(const SwsImplParams *params, SwsImplResult *out) } #define DECL_SWAP_BYTES(EXT, TYPE, X, Y, Z, W) \ - DECL_ENTRY(TYPE, p##X##Y##Z##W##_swap_bytes_##TYPE##EXT, \ + DECL_ENTRY(TYPE, SWS_COMP_MASK(X, Y, Z, W), \ + p##X##Y##Z##W##_swap_bytes_##TYPE##EXT, \ .op = SWS_OP_SWAP_BYTES, \ - .unused = { !X, !Y, !Z, !W }, \ .func = ff_p##X##Y##Z##W##_shuffle##EXT, \ .setup = setup_swap_bytes, \ ); #define DECL_CLEAR_ALPHA(EXT, IDX) \ - DECL_ASM(U8, clear_alpha##IDX##EXT, \ + DECL_ASM(U8, SWS_COMP_ALL, clear_alpha##IDX##EXT, \ .op = SWS_OP_CLEAR, \ .clear.mask = SWS_COMP(IDX), \ .clear.value[IDX] = { -1, 1 }, \ ); \ #define DECL_CLEAR_ZERO(EXT, IDX) \ - DECL_ASM(U8, clear_zero##IDX##EXT, \ + DECL_ASM(U8, SWS_COMP_ALL, clear_zero##IDX##EXT, \ .op = SWS_OP_CLEAR, \ .clear.mask = SWS_COMP(IDX), \ .clear.value[IDX] = { 0, 1 }, \ @@ -138,14 +138,14 @@ static int setup_clear(const SwsImplParams *params, SwsImplResult *out) } #define DECL_CLEAR(EXT, X, Y, Z, W) \ - DECL_PATTERN(U8, clear##EXT, X, Y, Z, W, \ + DECL_ASM(U8, SWS_COMP_ALL, p##X##Y##Z##W##_clear##EXT, \ .op = SWS_OP_CLEAR, \ .setup = setup_clear, \ .clear.mask = SWS_COMP_MASK(!X, !Y, !Z, !W), \ ); #define DECL_SWIZZLE(EXT, X, Y, Z, W) \ - DECL_ASM(U8, swizzle_##X##Y##Z##W##EXT, \ + DECL_ASM(U8, SWS_COMP_ALL, swizzle_##X##Y##Z##W##EXT, \ .op = SWS_OP_SWIZZLE, \ .swizzle.in = {X, Y, Z, W}, \ ); @@ -203,10 +203,9 @@ static int setup_shift(const SwsImplParams *params, SwsImplResult *out) ); #define DECL_EXPAND_BITS(EXT, BITS) \ - DECL_ASM(U##BITS, expand_bits##BITS##EXT, \ + DECL_ASM(U##BITS, SWS_COMP(0), expand_bits##BITS##EXT, \ .op = SWS_OP_SCALE, \ .scale = { .num = ((1 << (BITS)) - 1), .den = 1 }, \ - .unused = { false, true, true, true }, \ ); static int setup_dither(const SwsImplParams *params, SwsImplResult *out) @@ -261,7 +260,7 @@ static int setup_dither(const SwsImplParams *params, SwsImplResult *out) ); #define DECL_DITHER(EXT, SIZE) \ - DECL_ASM(F32, dither##SIZE##EXT, \ + DECL_ASM(F32, SWS_COMP_ALL, dither##SIZE##EXT, \ .op = SWS_OP_DITHER, \ .setup = setup_dither, \ .dither_size = SIZE, \ @@ -285,7 +284,7 @@ static int setup_linear(const SwsImplParams *params, SwsImplResult *out) } #define DECL_LINEAR(EXT, NAME, MASK) \ - DECL_ASM(F32, NAME##EXT, \ + DECL_ASM(F32, SWS_COMP_ALL, NAME##EXT, \ .op = SWS_OP_LINEAR, \ .setup = setup_linear, \ .linear_mask = (MASK), \ @@ -514,7 +513,7 @@ static int setup_filter_4x4_h(const SwsImplParams *params, SwsImplResult *out) } #define DECL_FILTER(EXT, TYPE, DIR, NAME, ELEMS, ...) \ - DECL_ASM(TYPE, NAME##ELEMS##_##TYPE##EXT, \ + DECL_ASM(TYPE, SWS_COMP_ELEMS(ELEMS), NAME##ELEMS##_##TYPE##EXT, \ .op = SWS_OP_READ, \ .rw.elems = ELEMS, \ .rw.filter = SWS_OP_FILTER_##DIR, \