mirror of
https://mirror.skon.top/https://github.com/FFmpeg/FFmpeg
synced 2026-04-20 21:00:41 +08:00
Mark the components to be cleared, not the components to be preserved. Signed-off-by: Niklas Haas <git@haasn.dev>
349 lines
11 KiB
C
349 lines
11 KiB
C
/**
|
|
* Copyright (C) 2025 Niklas Haas
|
|
*
|
|
* This file is part of FFmpeg.
|
|
*
|
|
* FFmpeg is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* FFmpeg is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with FFmpeg; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#include "ops_backend.h"
|
|
|
|
#ifndef BIT_DEPTH
|
|
# error Should only be included from ops_tmpl_*.c!
|
|
#endif
|
|
|
|
#define WRAP_CONVERT_UINT(N) \
|
|
DECL_PATTERN(convert_uint##N) \
|
|
{ \
|
|
u##N##block_t xu, yu, zu, wu; \
|
|
\
|
|
SWS_LOOP \
|
|
for (int i = 0; i < SWS_BLOCK_SIZE; i++) { \
|
|
if (X) \
|
|
xu[i] = x[i]; \
|
|
if (Y) \
|
|
yu[i] = y[i]; \
|
|
if (Z) \
|
|
zu[i] = z[i]; \
|
|
if (W) \
|
|
wu[i] = w[i]; \
|
|
} \
|
|
\
|
|
CONTINUE(xu, yu, zu, wu); \
|
|
} \
|
|
\
|
|
WRAP_COMMON_PATTERNS(convert_uint##N, \
|
|
.op = SWS_OP_CONVERT, \
|
|
.convert.to = SWS_PIXEL_U##N, \
|
|
);
|
|
|
|
#if BIT_DEPTH != 8
|
|
WRAP_CONVERT_UINT(8)
|
|
#endif
|
|
|
|
#if BIT_DEPTH != 16
|
|
WRAP_CONVERT_UINT(16)
|
|
#endif
|
|
|
|
#if BIT_DEPTH != 32 || defined(IS_FLOAT)
|
|
WRAP_CONVERT_UINT(32)
|
|
#endif
|
|
|
|
DECL_PATTERN(clear)
|
|
{
|
|
SWS_LOOP
|
|
for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
|
|
if (X)
|
|
x[i] = impl->priv.px[0];
|
|
if (Y)
|
|
y[i] = impl->priv.px[1];
|
|
if (Z)
|
|
z[i] = impl->priv.px[2];
|
|
if (W)
|
|
w[i] = impl->priv.px[3];
|
|
}
|
|
|
|
CONTINUE(x, y, z, w);
|
|
}
|
|
|
|
#define WRAP_CLEAR(X, Y, Z, W) \
|
|
DECL_IMPL(clear, clear##_##X##Y##Z##W, X, Y, Z, W) \
|
|
\
|
|
DECL_ENTRY(clear##_##X##Y##Z##W, SWS_COMP_ALL, \
|
|
.setup = ff_sws_setup_clear, \
|
|
.op = SWS_OP_CLEAR, \
|
|
.clear.mask = SWS_COMP_MASK(X, Y, Z, W), \
|
|
);
|
|
|
|
WRAP_CLEAR(0, 0, 0, 1) /* rgba alpha */
|
|
WRAP_CLEAR(1, 0, 0, 0) /* argb alpha */
|
|
WRAP_CLEAR(0, 1, 0, 0) /* ya alpha */
|
|
|
|
WRAP_CLEAR(1, 1, 0, 0) /* vuya chroma */
|
|
WRAP_CLEAR(0, 1, 1, 0) /* yuva chroma */
|
|
WRAP_CLEAR(0, 0, 1, 1) /* ayuv chroma */
|
|
WRAP_CLEAR(1, 0, 1, 0) /* uyva chroma */
|
|
WRAP_CLEAR(0, 1, 0, 1) /* xvyu chroma */
|
|
|
|
WRAP_CLEAR(0, 1, 1, 1) /* gray -> yuva */
|
|
WRAP_CLEAR(1, 0, 1, 1) /* gray -> ayuv */
|
|
WRAP_CLEAR(1, 1, 0, 1) /* gray -> vuya */
|
|
|
|
DECL_PATTERN(min)
|
|
{
|
|
SWS_LOOP
|
|
for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
|
|
if (X)
|
|
x[i] = FFMIN(x[i], impl->priv.px[0]);
|
|
if (Y)
|
|
y[i] = FFMIN(y[i], impl->priv.px[1]);
|
|
if (Z)
|
|
z[i] = FFMIN(z[i], impl->priv.px[2]);
|
|
if (W)
|
|
w[i] = FFMIN(w[i], impl->priv.px[3]);
|
|
}
|
|
|
|
CONTINUE(x, y, z, w);
|
|
}
|
|
|
|
DECL_PATTERN(max)
|
|
{
|
|
SWS_LOOP
|
|
for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
|
|
if (X)
|
|
x[i] = FFMAX(x[i], impl->priv.px[0]);
|
|
if (Y)
|
|
y[i] = FFMAX(y[i], impl->priv.px[1]);
|
|
if (Z)
|
|
z[i] = FFMAX(z[i], impl->priv.px[2]);
|
|
if (W)
|
|
w[i] = FFMAX(w[i], impl->priv.px[3]);
|
|
}
|
|
|
|
CONTINUE(x, y, z, w);
|
|
}
|
|
|
|
WRAP_COMMON_PATTERNS(min,
|
|
.op = SWS_OP_MIN,
|
|
.setup = ff_sws_setup_clamp,
|
|
.flexible = true,
|
|
);
|
|
|
|
WRAP_COMMON_PATTERNS(max,
|
|
.op = SWS_OP_MAX,
|
|
.setup = ff_sws_setup_clamp,
|
|
.flexible = true,
|
|
);
|
|
|
|
DECL_PATTERN(scale)
|
|
{
|
|
const pixel_t scale = impl->priv.px[0];
|
|
|
|
SWS_LOOP
|
|
for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
|
|
if (X)
|
|
x[i] *= scale;
|
|
if (Y)
|
|
y[i] *= scale;
|
|
if (Z)
|
|
z[i] *= scale;
|
|
if (W)
|
|
w[i] *= scale;
|
|
}
|
|
|
|
CONTINUE(x, y, z, w);
|
|
}
|
|
|
|
WRAP_COMMON_PATTERNS(scale,
|
|
.op = SWS_OP_SCALE,
|
|
.setup = ff_sws_setup_scale,
|
|
.flexible = true,
|
|
);
|
|
|
|
DECL_SETUP(setup_filter_v, params, out)
|
|
{
|
|
const SwsFilterWeights *filter = params->op->rw.kernel;
|
|
static_assert(sizeof(out->priv.ptr) <= sizeof(int32_t[2]),
|
|
">8 byte pointers not supported");
|
|
|
|
/* Pre-convert weights to float */
|
|
float *weights = av_calloc(filter->num_weights, sizeof(float));
|
|
if (!weights)
|
|
return AVERROR(ENOMEM);
|
|
|
|
for (int i = 0; i < filter->num_weights; i++)
|
|
weights[i] = (float) filter->weights[i] / SWS_FILTER_SCALE;
|
|
|
|
out->priv.ptr = weights;
|
|
out->priv.i32[2] = filter->filter_size;
|
|
out->free = ff_op_priv_free;
|
|
return 0;
|
|
}
|
|
|
|
/* Fully general vertical planar filter case */
|
|
DECL_READ(filter_v, const int elems)
|
|
{
|
|
const SwsOpExec *exec = iter->exec;
|
|
const float *restrict weights = impl->priv.ptr;
|
|
const int filter_size = impl->priv.i32[2];
|
|
weights += filter_size * iter->y;
|
|
|
|
f32block_t xs, ys, zs, ws;
|
|
memset(xs, 0, sizeof(xs));
|
|
if (elems > 1)
|
|
memset(ys, 0, sizeof(ys));
|
|
if (elems > 2)
|
|
memset(zs, 0, sizeof(zs));
|
|
if (elems > 3)
|
|
memset(ws, 0, sizeof(ws));
|
|
|
|
for (int j = 0; j < filter_size; j++) {
|
|
const float weight = weights[j];
|
|
|
|
SWS_LOOP
|
|
for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
|
|
xs[i] += weight * in0[i];
|
|
if (elems > 1)
|
|
ys[i] += weight * in1[i];
|
|
if (elems > 2)
|
|
zs[i] += weight * in2[i];
|
|
if (elems > 3)
|
|
ws[i] += weight * in3[i];
|
|
}
|
|
|
|
in0 = bump_ptr(in0, exec->in_stride[0]);
|
|
if (elems > 1)
|
|
in1 = bump_ptr(in1, exec->in_stride[1]);
|
|
if (elems > 2)
|
|
in2 = bump_ptr(in2, exec->in_stride[2]);
|
|
if (elems > 3)
|
|
in3 = bump_ptr(in3, exec->in_stride[3]);
|
|
}
|
|
|
|
for (int i = 0; i < elems; i++)
|
|
iter->in[i] += sizeof(block_t);
|
|
|
|
CONTINUE(xs, ys, zs, ws);
|
|
}
|
|
|
|
DECL_SETUP(setup_filter_h, params, out)
|
|
{
|
|
SwsFilterWeights *filter = params->op->rw.kernel;
|
|
out->priv.ptr = av_refstruct_ref(filter->weights);
|
|
out->priv.i32[2] = filter->filter_size;
|
|
out->free = ff_op_priv_unref;
|
|
return 0;
|
|
}
|
|
|
|
/* Fully general horizontal planar filter case */
|
|
DECL_READ(filter_h, const int elems)
|
|
{
|
|
const SwsOpExec *exec = iter->exec;
|
|
const int *restrict weights = impl->priv.ptr;
|
|
const int filter_size = impl->priv.i32[2];
|
|
const float scale = 1.0f / SWS_FILTER_SCALE;
|
|
const int xpos = iter->x;
|
|
weights += filter_size * iter->x;
|
|
|
|
f32block_t xs, ys, zs, ws;
|
|
for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
|
|
const int offset = exec->in_offset_x[xpos + i];
|
|
pixel_t *start0 = bump_ptr(in0, offset);
|
|
pixel_t *start1 = bump_ptr(in1, offset);
|
|
pixel_t *start2 = bump_ptr(in2, offset);
|
|
pixel_t *start3 = bump_ptr(in3, offset);
|
|
|
|
inter_t sx = 0, sy = 0, sz = 0, sw = 0;
|
|
for (int j = 0; j < filter_size; j++) {
|
|
const int weight = weights[j];
|
|
sx += weight * start0[j];
|
|
if (elems > 1)
|
|
sy += weight * start1[j];
|
|
if (elems > 2)
|
|
sz += weight * start2[j];
|
|
if (elems > 3)
|
|
sw += weight * start3[j];
|
|
}
|
|
|
|
xs[i] = (float) sx * scale;
|
|
if (elems > 1)
|
|
ys[i] = (float) sy * scale;
|
|
if (elems > 2)
|
|
zs[i] = (float) sz * scale;
|
|
if (elems > 3)
|
|
ws[i] = (float) sw * scale;
|
|
|
|
weights += filter_size;
|
|
}
|
|
|
|
CONTINUE(xs, ys, zs, ws);
|
|
}
|
|
|
|
#define WRAP_FILTER(FUNC, DIR, ELEMS, SUFFIX) \
|
|
static av_flatten void fn(FUNC##ELEMS##SUFFIX)(SwsOpIter *restrict iter, \
|
|
const SwsOpImpl *restrict impl, \
|
|
void *restrict x, void *restrict y,\
|
|
void *restrict z, void *restrict w)\
|
|
{ \
|
|
CALL_READ(FUNC##SUFFIX, ELEMS); \
|
|
} \
|
|
\
|
|
DECL_ENTRY(FUNC##ELEMS##SUFFIX, SWS_COMP_ELEMS(ELEMS), \
|
|
.op = SWS_OP_READ, \
|
|
.setup = fn(setup_filter##SUFFIX), \
|
|
.rw.elems = ELEMS, \
|
|
.rw.filter = SWS_OP_FILTER_##DIR, \
|
|
);
|
|
|
|
WRAP_FILTER(filter, V, 1, _v)
|
|
WRAP_FILTER(filter, V, 2, _v)
|
|
WRAP_FILTER(filter, V, 3, _v)
|
|
WRAP_FILTER(filter, V, 4, _v)
|
|
|
|
WRAP_FILTER(filter, H, 1, _h)
|
|
WRAP_FILTER(filter, H, 2, _h)
|
|
WRAP_FILTER(filter, H, 3, _h)
|
|
WRAP_FILTER(filter, H, 4, _h)
|
|
|
|
static void fn(process)(const SwsOpExec *exec, const void *priv,
|
|
const int bx_start, const int y_start,
|
|
int bx_end, int y_end)
|
|
{
|
|
const SwsOpChain *chain = priv;
|
|
const SwsOpImpl *impl = chain->impl;
|
|
u32block_t x, y, z, w; /* allocate enough space for any intermediate */
|
|
|
|
SwsOpIter iterdata;
|
|
SwsOpIter *iter = &iterdata; /* for CONTINUE() macro to work */
|
|
iter->exec = exec;
|
|
for (int i = 0; i < 4; i++) {
|
|
iter->in[i] = (uintptr_t) exec->in[i];
|
|
iter->out[i] = (uintptr_t) exec->out[i];
|
|
}
|
|
|
|
for (iter->y = y_start; iter->y < y_end; iter->y++) {
|
|
for (int block = bx_start; block < bx_end; block++) {
|
|
iter->x = block * SWS_BLOCK_SIZE;
|
|
CONTINUE(x, y, z, w);
|
|
}
|
|
|
|
const int y_bump = exec->in_bump_y ? exec->in_bump_y[iter->y] : 0;
|
|
for (int i = 0; i < 4; i++) {
|
|
iter->in[i] += exec->in_bump[i] + y_bump * exec->in_stride[i];
|
|
iter->out[i] += exec->out_bump[i];
|
|
}
|
|
}
|
|
}
|