mirror of
https://mirror.skon.top/https://github.com/FFmpeg/FFmpeg
synced 2026-04-22 05:40:27 +08:00
Instead of implicitly relying on SwsComps.unused, which contains the exact same information. (cf. ff_sws_op_list_update_comps) Signed-off-by: Niklas Haas <git@haasn.dev>
234 lines
8.7 KiB
C
234 lines
8.7 KiB
C
/*
|
|
* Copyright (C) 2026 Ramiro Polla
|
|
*
|
|
* This file is part of FFmpeg.
|
|
*
|
|
* FFmpeg is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* FFmpeg is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with FFmpeg; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
/**
|
|
* NOTE: This file is #include'd directly by both the NEON backend and
|
|
* the sws_ops_aarch64 tool.
|
|
*/
|
|
|
|
#include "libavutil/error.h"
|
|
#include "libavutil/rational.h"
|
|
#include "libswscale/ops.h"
|
|
|
|
#include "ops_impl.h"
|
|
|
|
static uint8_t sws_pixel_to_aarch64(SwsPixelType type)
|
|
{
|
|
switch (type) {
|
|
case SWS_PIXEL_U8: return AARCH64_PIXEL_U8;
|
|
case SWS_PIXEL_U16: return AARCH64_PIXEL_U16;
|
|
case SWS_PIXEL_U32: return AARCH64_PIXEL_U32;
|
|
case SWS_PIXEL_F32: return AARCH64_PIXEL_F32;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* The column index order for SwsLinearOp.mask follows the affine transform
|
|
* order, where the offset is the last element. SwsAArch64LinearOpMask, on
|
|
* the other hand, follows execution order, where the offset is the first
|
|
* element.
|
|
*/
|
|
static int linear_index_from_sws_op(int idx)
|
|
{
|
|
const int reorder_col[5] = { 1, 2, 3, 4, 0 };
|
|
return reorder_col[idx];
|
|
}
|
|
|
|
/**
|
|
* Convert SwsOp to a SwsAArch64OpImplParams. Read the comments regarding
|
|
* SwsAArch64OpImplParams in ops_impl.h for more information.
|
|
*/
|
|
static int convert_to_aarch64_impl(SwsContext *ctx, const SwsOpList *ops, int n,
|
|
int block_size, SwsAArch64OpImplParams *out)
|
|
{
|
|
const SwsOp *op = &ops->ops[n];
|
|
|
|
out->block_size = block_size;
|
|
|
|
/**
|
|
* Most SwsOp work on fields described by SWS_OP_NEEDED().
|
|
* The few that don't will override this field later.
|
|
*/
|
|
out->mask = 0;
|
|
for (int i = 0; i < 4; i++) {
|
|
if (SWS_OP_NEEDED(op, i))
|
|
MASK_SET(out->mask, i, 1);
|
|
}
|
|
|
|
out->type = sws_pixel_to_aarch64(op->type);
|
|
|
|
/* Map SwsOpType to SwsAArch64OpType */
|
|
switch (op->op) {
|
|
case SWS_OP_READ:
|
|
if (op->rw.filter)
|
|
return AVERROR(ENOTSUP);
|
|
/**
|
|
* The different types of read operations have been split into
|
|
* their own SwsAArch64OpType to simplify the implementation.
|
|
*/
|
|
if (op->rw.frac == 1)
|
|
out->op = AARCH64_SWS_OP_READ_NIBBLE;
|
|
else if (op->rw.frac == 3)
|
|
out->op = AARCH64_SWS_OP_READ_BIT;
|
|
else if (op->rw.packed && op->rw.elems != 1)
|
|
out->op = AARCH64_SWS_OP_READ_PACKED;
|
|
else
|
|
out->op = AARCH64_SWS_OP_READ_PLANAR;
|
|
break;
|
|
case SWS_OP_WRITE:
|
|
if (op->rw.filter)
|
|
return AVERROR(ENOTSUP);
|
|
/**
|
|
* The different types of write operations have been split into
|
|
* their own SwsAArch64OpType to simplify the implementation.
|
|
*/
|
|
if (op->rw.frac == 1)
|
|
out->op = AARCH64_SWS_OP_WRITE_NIBBLE;
|
|
else if (op->rw.frac == 3)
|
|
out->op = AARCH64_SWS_OP_WRITE_BIT;
|
|
else if (op->rw.packed && op->rw.elems != 1)
|
|
out->op = AARCH64_SWS_OP_WRITE_PACKED;
|
|
else
|
|
out->op = AARCH64_SWS_OP_WRITE_PLANAR;
|
|
break;
|
|
case SWS_OP_SWAP_BYTES: out->op = AARCH64_SWS_OP_SWAP_BYTES; break;
|
|
case SWS_OP_SWIZZLE: out->op = AARCH64_SWS_OP_SWIZZLE; break;
|
|
case SWS_OP_UNPACK: out->op = AARCH64_SWS_OP_UNPACK; break;
|
|
case SWS_OP_PACK: out->op = AARCH64_SWS_OP_PACK; break;
|
|
case SWS_OP_LSHIFT: out->op = AARCH64_SWS_OP_LSHIFT; break;
|
|
case SWS_OP_RSHIFT: out->op = AARCH64_SWS_OP_RSHIFT; break;
|
|
case SWS_OP_CLEAR: out->op = AARCH64_SWS_OP_CLEAR; break;
|
|
case SWS_OP_CONVERT:
|
|
out->op = op->convert.expand ? AARCH64_SWS_OP_EXPAND : AARCH64_SWS_OP_CONVERT;
|
|
break;
|
|
case SWS_OP_MIN: out->op = AARCH64_SWS_OP_MIN; break;
|
|
case SWS_OP_MAX: out->op = AARCH64_SWS_OP_MAX; break;
|
|
case SWS_OP_SCALE: out->op = AARCH64_SWS_OP_SCALE; break;
|
|
case SWS_OP_LINEAR: out->op = AARCH64_SWS_OP_LINEAR; break;
|
|
case SWS_OP_DITHER: out->op = AARCH64_SWS_OP_DITHER; break;
|
|
}
|
|
|
|
switch (out->op) {
|
|
case AARCH64_SWS_OP_READ_BIT:
|
|
case AARCH64_SWS_OP_READ_NIBBLE:
|
|
case AARCH64_SWS_OP_READ_PACKED:
|
|
case AARCH64_SWS_OP_READ_PLANAR:
|
|
case AARCH64_SWS_OP_WRITE_BIT:
|
|
case AARCH64_SWS_OP_WRITE_NIBBLE:
|
|
case AARCH64_SWS_OP_WRITE_PACKED:
|
|
case AARCH64_SWS_OP_WRITE_PLANAR:
|
|
switch (op->rw.elems) {
|
|
case 1: out->mask = 0x0001; break;
|
|
case 2: out->mask = 0x0011; break;
|
|
case 3: out->mask = 0x0111; break;
|
|
case 4: out->mask = 0x1111; break;
|
|
};
|
|
break;
|
|
case AARCH64_SWS_OP_SWAP_BYTES:
|
|
/* Only the element size matters, not the type. */
|
|
if (out->type == AARCH64_PIXEL_F32)
|
|
out->type = AARCH64_PIXEL_U32;
|
|
break;
|
|
case AARCH64_SWS_OP_SWIZZLE:
|
|
out->mask = 0;
|
|
MASK_SET(out->mask, 0, op->swizzle.in[0] != 0);
|
|
MASK_SET(out->mask, 1, op->swizzle.in[1] != 1);
|
|
MASK_SET(out->mask, 2, op->swizzle.in[2] != 2);
|
|
MASK_SET(out->mask, 3, op->swizzle.in[3] != 3);
|
|
MASK_SET(out->swizzle, 0, op->swizzle.in[0]);
|
|
MASK_SET(out->swizzle, 1, op->swizzle.in[1]);
|
|
MASK_SET(out->swizzle, 2, op->swizzle.in[2]);
|
|
MASK_SET(out->swizzle, 3, op->swizzle.in[3]);
|
|
/* The element size and type don't matter. */
|
|
out->block_size = block_size * ff_sws_pixel_type_size(op->type);
|
|
out->type = AARCH64_PIXEL_U8;
|
|
break;
|
|
case AARCH64_SWS_OP_UNPACK:
|
|
MASK_SET(out->pack, 0, op->pack.pattern[0]);
|
|
MASK_SET(out->pack, 1, op->pack.pattern[1]);
|
|
MASK_SET(out->pack, 2, op->pack.pattern[2]);
|
|
MASK_SET(out->pack, 3, op->pack.pattern[3]);
|
|
break;
|
|
case AARCH64_SWS_OP_PACK:
|
|
out->mask = 0;
|
|
for (int i = 0; i < 4 && op->pack.pattern[i]; i++)
|
|
MASK_SET(out->mask, i, 1);
|
|
MASK_SET(out->pack, 0, op->pack.pattern[0]);
|
|
MASK_SET(out->pack, 1, op->pack.pattern[1]);
|
|
MASK_SET(out->pack, 2, op->pack.pattern[2]);
|
|
MASK_SET(out->pack, 3, op->pack.pattern[3]);
|
|
break;
|
|
case AARCH64_SWS_OP_LSHIFT:
|
|
case AARCH64_SWS_OP_RSHIFT:
|
|
out->shift = op->shift.amount;
|
|
break;
|
|
case AARCH64_SWS_OP_CLEAR:
|
|
out->mask = 0;
|
|
MASK_SET(out->mask, 0, !!op->clear.value[0].den);
|
|
MASK_SET(out->mask, 1, !!op->clear.value[1].den);
|
|
MASK_SET(out->mask, 2, !!op->clear.value[2].den);
|
|
MASK_SET(out->mask, 3, !!op->clear.value[3].den);
|
|
break;
|
|
case AARCH64_SWS_OP_EXPAND:
|
|
case AARCH64_SWS_OP_CONVERT:
|
|
out->to_type = sws_pixel_to_aarch64(op->convert.to);
|
|
break;
|
|
case AARCH64_SWS_OP_LINEAR:
|
|
/**
|
|
* The out->linear.mask field packs the 4x5 matrix from SwsLinearOp as
|
|
* 2 bits per element:
|
|
* 00: m[i][j] == 0
|
|
* 01: m[i][j] == 1
|
|
* 11: m[i][j] is any other coefficient
|
|
*/
|
|
out->mask = 0;
|
|
for (int i = 0; i < 4; i++) {
|
|
/* Skip unused or identity rows */
|
|
if (!SWS_OP_NEEDED(op, i) || !(op->lin.mask & SWS_MASK_ROW(i)))
|
|
continue;
|
|
MASK_SET(out->mask, i, 1);
|
|
for (int j = 0; j < 5; j++) {
|
|
int jj = linear_index_from_sws_op(j);
|
|
if (!av_cmp_q(op->lin.m[i][j], av_make_q(1, 1)))
|
|
LINEAR_MASK_SET(out->linear.mask, i, jj, LINEAR_MASK_1);
|
|
else if (av_cmp_q(op->lin.m[i][j], av_make_q(0, 1)))
|
|
LINEAR_MASK_SET(out->linear.mask, i, jj, LINEAR_MASK_X);
|
|
}
|
|
}
|
|
out->linear.fmla = !(ctx->flags & SWS_BITEXACT);
|
|
break;
|
|
case AARCH64_SWS_OP_DITHER:
|
|
out->mask = 0;
|
|
MASK_SET(out->mask, 0, op->dither.y_offset[0] >= 0);
|
|
MASK_SET(out->mask, 1, op->dither.y_offset[1] >= 0);
|
|
MASK_SET(out->mask, 2, op->dither.y_offset[2] >= 0);
|
|
MASK_SET(out->mask, 3, op->dither.y_offset[3] >= 0);
|
|
MASK_SET(out->dither.y_offset, 0, op->dither.y_offset[0]);
|
|
MASK_SET(out->dither.y_offset, 1, op->dither.y_offset[1]);
|
|
MASK_SET(out->dither.y_offset, 2, op->dither.y_offset[2]);
|
|
MASK_SET(out->dither.y_offset, 3, op->dither.y_offset[3]);
|
|
out->dither.size_log2 = op->dither.size_log2;
|
|
break;
|
|
}
|
|
|
|
return 0;
|
|
}
|