swscale/ops: add explicit clear mask to SwsClearOp

Instead of implicitly testing for NaN values. This is mostly a straightforward
translation, but we need some slight extra boilerplate to ensure the mask
is correctly updated when e.g. commuting past a swizzle.

Signed-off-by: Niklas Haas <git@haasn.dev>
This commit is contained in:
Niklas Haas
2026-03-29 19:32:45 +02:00
parent 4020607f0a
commit cf2d40f65d
10 changed files with 55 additions and 38 deletions

View File

@@ -931,10 +931,15 @@ static SwsClearOp fmt_clear(enum AVPixelFormat fmt)
const bool has_alpha = desc->flags & AV_PIX_FMT_FLAG_ALPHA;
SwsClearOp c = {0};
if (!has_chroma)
if (!has_chroma) {
c.mask |= SWS_COMP(1) | SWS_COMP(2);
c.value[1] = c.value[2] = Q0;
if (!has_alpha)
}
if (!has_alpha) {
c.mask |= SWS_COMP(3);
c.value[3] = Q0;
}
return c;
}
@@ -1060,6 +1065,7 @@ int ff_sws_encode_pixfmt(SwsOpList *ops, enum AVPixelFormat fmt)
RET(ff_sws_op_list_append(ops, &(SwsOp) {
.op = SWS_OP_CLEAR,
.type = pixel_type,
.clear.mask = SWS_COMP(3),
.clear.value[3] = Q0,
}));
}

View File

@@ -217,7 +217,7 @@ void ff_sws_apply_op_q(const SwsOp *op, AVRational x[4])
return;
case SWS_OP_CLEAR:
for (int i = 0; i < 4; i++) {
if (op->clear.value[i].den)
if (SWS_COMP_TEST(op->clear.mask, i))
x[i] = op->clear.value[i];
}
return;
@@ -446,7 +446,7 @@ void ff_sws_op_list_update_comps(SwsOpList *ops)
}
case SWS_OP_CLEAR:
for (int i = 0; i < 4; i++) {
if (op->clear.value[i].den) {
if (SWS_COMP_TEST(op->clear.mask, i)) {
op->comps.flags[i] = 0;
if (op->clear.value[i].num == 0)
op->comps.flags[i] |= SWS_COMP_ZERO;
@@ -565,7 +565,7 @@ void ff_sws_op_list_update_comps(SwsOpList *ops)
break;
case SWS_OP_CLEAR:
for (int i = 0; i < 4; i++) {
if (!op->clear.value[i].den)
if (!SWS_COMP_TEST(op->clear.mask, i))
need_in[i] = need_out[i];
}
break;

View File

@@ -167,7 +167,8 @@ typedef struct SwsShiftOp {
} SwsShiftOp;
typedef struct SwsClearOp {
AVRational value[4]; /* value to set, or {0, 0} for no-op */
SwsCompMask mask; /* mask of components to clear */
AVRational value[4]; /* value to set */
} SwsClearOp;
typedef struct SwsConvertOp {

View File

@@ -117,7 +117,7 @@ static int op_match(const SwsOp *op, const SwsOpEntry *entry)
for (int i = 0; i < 4; i++) {
if (!SWS_OP_NEEDED(op, i))
continue;
if (entry->unused[i] != !!op->clear.value[i].den)
if (entry->unused[i] != SWS_COMP_TEST(op->clear.mask, i))
return 0;
}
}
@@ -149,7 +149,7 @@ static int op_match(const SwsOp *op, const SwsOpEntry *entry)
return score;
case SWS_OP_CLEAR:
for (int i = 0; i < 4; i++) {
if (!op->clear.value[i].den || !SWS_OP_NEEDED(op, i))
if (!SWS_COMP_TEST(op->clear.mask, i) || !SWS_OP_NEEDED(op, i))
continue;
if (av_cmp_q(op->clear.value[i], Q(entry->clear_value)))
return 0;

View File

@@ -99,7 +99,7 @@ static int compile(SwsContext *ctx, SwsOpList *ops, SwsCompiledOp *out)
case SWS_OP_CLEAR:
for (int i = 0; i < 4; i++) {
if (!op->clear.value[i].den)
if (!SWS_COMP_TEST(op->clear.mask, i))
continue;
if (op->clear.value[i].den != 1)
return AVERROR(ENOTSUP);

View File

@@ -53,11 +53,14 @@ static bool op_commute_clear(SwsOp *op, SwsOp *next)
case SWS_OP_MAX:
case SWS_OP_SCALE:
case SWS_OP_READ:
case SWS_OP_SWIZZLE:
case SWS_OP_FILTER_H:
case SWS_OP_FILTER_V:
ff_sws_apply_op_q(next, op->clear.value);
return true;
case SWS_OP_SWIZZLE:
op->clear.mask = ff_sws_comp_mask_swizzle(op->clear.mask, next->swizzle);
ff_sws_apply_op_q(next, op->clear.value);
return true;
case SWS_OP_SWAP_BYTES:
switch (next->type) {
case SWS_PIXEL_U16:
@@ -65,7 +68,7 @@ static bool op_commute_clear(SwsOp *op, SwsOp *next)
return true;
case SWS_PIXEL_U32:
for (int i = 0; i < 4; i++) {
if (!op->clear.value[i].den)
if (!SWS_COMP_TEST(op->clear.mask, i))
continue;
uint32_t v = av_bswap32(op->clear.value[i].num);
if (v > INT_MAX)
@@ -277,6 +280,7 @@ static bool extract_constant_rows(SwsLinearOp *c, SwsComps prev,
(prev.flags[j] & SWS_COMP_ZERO); /* input is zero */
}
if (const_row && (c->mask & SWS_MASK_ROW(i))) {
clear.mask |= SWS_COMP(i);
clear.value[i] = c->m[i][4];
for (int j = 0; j < 5; j++)
c->m[i][j] = Q(i == j);
@@ -444,7 +448,7 @@ retry:
case SWS_OP_CLEAR:
for (int i = 0; i < 4; i++) {
if (!op->clear.value[i].den)
if (!SWS_COMP_TEST(op->clear.mask, i))
continue;
if ((prev->comps.flags[i] & SWS_COMP_ZERO) &&
@@ -452,11 +456,11 @@ retry:
op->clear.value[i].num == 0)
{
/* Redundant clear-to-zero of zero component */
op->clear.value[i].den = 0;
op->clear.mask ^= SWS_COMP(i);
} else if (!SWS_OP_NEEDED(op, i)) {
/* Unnecessary clear of unused component */
op->clear.value[i] = (AVRational) {0, 0};
} else if (op->clear.value[i].den) {
op->clear.mask ^= SWS_COMP(i);
} else {
noop = false;
}
}
@@ -469,9 +473,10 @@ retry:
/* Transitive clear */
if (next->op == SWS_OP_CLEAR) {
for (int i = 0; i < 4; i++) {
if (next->clear.value[i].den)
if (SWS_COMP_TEST(next->clear.mask, i))
op->clear.value[i] = next->clear.value[i];
}
op->clear.mask |= next->clear.mask;
ff_sws_op_list_remove_at(ops, n + 1, 1);
goto retry;
}
@@ -816,7 +821,7 @@ int ff_sws_solve_shuffle(const SwsOpList *const ops, uint8_t shuffle[],
case SWS_OP_CLEAR:
for (int i = 0; i < 4; i++) {
if (!op->clear.value[i].den)
if (!SWS_COMP_TEST(op->clear.mask, i))
continue;
if (op->clear.value[i].num != 0 || !clear_val)
return AVERROR(ENOTSUP);

View File

@@ -74,7 +74,7 @@ static int register_op(SwsContext *ctx, void *opaque, SwsOp *op)
break;
case SWS_OP_CLEAR:
for (int i = 0; i < 4; i++)
op->clear.value[i] = (AVRational) { 0, !!op->clear.value[i].den };
op->clear.value[i] = (AVRational) { 0, SWS_COMP_TEST(op->clear.mask, i) };
break;
case SWS_OP_DITHER:
/* Strip arbitrary offset */

View File

@@ -398,12 +398,14 @@ static void define_shader_consts(SwsOpList *ops, SPICtx *spi, SPIRVIDs *id)
break;
case SWS_OP_CLEAR:
for (int i = 0; i < 4; i++) {
if (!SWS_COMP_TEST(op->clear.mask, i))
continue;
AVRational cv = op->clear.value[i];
if (cv.den && op->type == SWS_PIXEL_F32) {
if (op->type == SWS_PIXEL_F32) {
float q = (float)cv.num/cv.den;
id->const_ids[id->nb_const_ids++] =
spi_OpConstantFloat(spi, f32_type, q);
} else if (op->clear.value[i].den) {
} else {
av_assert0(cv.den == 1);
id->const_ids[id->nb_const_ids++] =
spi_OpConstantUInt(spi, u32_type, cv.num);
@@ -1036,7 +1038,7 @@ static int add_ops_glsl(VulkanPriv *p, FFVulkanOpsCtx *s,
}
case SWS_OP_CLEAR: {
for (int i = 0; i < 4; i++) {
if (!op->clear.value[i].den)
if (!SWS_COMP_TEST(op->clear.mask, i))
continue;
av_bprintf(&shd->src, " %s.%c = %s"QSTR";\n", type_name,
"xyzw"[i], type_s, QTYPE(op->clear.value[i]));

View File

@@ -953,7 +953,7 @@ static void normalize_clear(SwsOp *op)
ff_sws_setup_clear(&(const SwsImplParams) { .op = op }, &res);
for (int i = 0; i < 4; i++) {
if (!op->clear.value[i].den)
if (!SWS_COMP_TEST(op->clear.mask, i))
continue;
switch (ff_sws_pixel_type_size(op->type)) {
case 1: c.u32 = 0x1010101U * res.priv.u8[i]; break;

View File

@@ -540,32 +540,34 @@ static void check_clear(void)
const AVRational zero = (AVRational) { 0, 1};
const AVRational none = {0};
const SwsClearOp patterns[] = {
const AVRational patterns[][4] = {
/* Zero only */
{{ none, none, none, zero }},
{{ zero, none, none, none }},
{ none, none, none, zero },
{ zero, none, none, none },
/* Alpha only */
{{ none, none, none, alpha }},
{{ alpha, none, none, none }},
{ none, none, none, alpha },
{ alpha, none, none, none },
/* Chroma only */
{{ chroma, chroma, none, none }},
{{ none, chroma, chroma, none }},
{{ none, none, chroma, chroma }},
{{ chroma, none, chroma, none }},
{{ none, chroma, none, chroma }},
{ chroma, chroma, none, none },
{ none, chroma, chroma, none },
{ none, none, chroma, chroma },
{ chroma, none, chroma, none },
{ none, chroma, none, chroma },
/* Alpha+chroma */
{{ chroma, chroma, none, alpha }},
{{ none, chroma, chroma, alpha }},
{{ alpha, none, chroma, chroma }},
{{ chroma, none, chroma, alpha }},
{{ alpha, chroma, none, chroma }},
{ chroma, chroma, none, alpha },
{ none, chroma, chroma, alpha },
{ alpha, none, chroma, chroma },
{ chroma, none, chroma, alpha },
{ alpha, chroma, none, chroma },
};
for (int i = 0; i < FF_ARRAY_ELEMS(patterns); i++) {
SwsClearOp clear = { .mask = ff_sws_comp_mask_q4(patterns[i]) };
memcpy(clear.value, patterns[i], sizeof(clear.value));
CHECK(FMT("clear_pattern_%s[%d]", type, i), 4, 4, t, t, {
.op = SWS_OP_CLEAR,
.type = t,
.clear = patterns[i],
.clear = clear,
});
}
} else if (!ff_sws_pixel_type_is_int(t)) {
@@ -574,6 +576,7 @@ static void check_clear(void)
.op = SWS_OP_CLEAR,
.type = t,
.clear.value[3] = { 0, 1 },
.clear.mask = SWS_COMP(3),
});
}
}