mirror of
https://mirror.skon.top/https://github.com/FFmpeg/FFmpeg
synced 2026-04-20 21:00:41 +08:00
swscale/ops_backend: allocate block storage up-front
Instead of in each read() function. Not only is this slightly faster, due to promoting more tail calls, but it also allows us to have operation chains that don't start with a read. Also simplifies the implementations. Sponsored-by: Sovereign Tech Fund Signed-off-by: Niklas Haas <git@haasn.dev>
This commit is contained in:
@@ -53,18 +53,20 @@ static void process(const SwsOpExec *exec, const void *priv,
|
||||
{
|
||||
const SwsOpChain *chain = priv;
|
||||
const SwsOpImpl *impl = chain->impl;
|
||||
SwsOpIter iter;
|
||||
u32block_t x, y, z, w; /* allocate enough space for any intermediate */
|
||||
|
||||
for (iter.y = y_start; iter.y < y_end; iter.y++) {
|
||||
SwsOpIter iterdata;
|
||||
SwsOpIter *iter = &iterdata; /* for CONTINUE() macro to work */
|
||||
|
||||
for (iter->y = y_start; iter->y < y_end; iter->y++) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
iter.in[i] = exec->in[i] + (iter.y - y_start) * exec->in_stride[i];
|
||||
iter.out[i] = exec->out[i] + (iter.y - y_start) * exec->out_stride[i];
|
||||
iter->in[i] = exec->in[i] + (iter->y - y_start) * exec->in_stride[i];
|
||||
iter->out[i] = exec->out[i] + (iter->y - y_start) * exec->out_stride[i];
|
||||
}
|
||||
|
||||
for (int block = bx_start; block < bx_end; block++) {
|
||||
iter.x = block * SWS_BLOCK_SIZE;
|
||||
((void (*)(SwsOpIter *, const SwsOpImpl *)) impl->cont)
|
||||
(&iter, &impl[1]);
|
||||
iter->x = block * SWS_BLOCK_SIZE;
|
||||
CONTINUE(u32block_t, x, y, z, w);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -78,13 +78,9 @@ typedef struct SwsOpIter {
|
||||
__VA_ARGS__)
|
||||
|
||||
#define DECL_READ(NAME, ...) \
|
||||
static av_always_inline void fn(NAME)(SwsOpIter *restrict iter, \
|
||||
const SwsOpImpl *restrict impl, \
|
||||
const pixel_t *restrict in0, \
|
||||
const pixel_t *restrict in1, \
|
||||
const pixel_t *restrict in2, \
|
||||
const pixel_t *restrict in3, \
|
||||
__VA_ARGS__)
|
||||
DECL_FUNC(NAME, const pixel_t *restrict in0, const pixel_t *restrict in1, \
|
||||
const pixel_t *restrict in2, const pixel_t *restrict in3, \
|
||||
__VA_ARGS__)
|
||||
|
||||
#define DECL_WRITE(NAME, ...) \
|
||||
DECL_FUNC(NAME, pixel_t *restrict out0, pixel_t *restrict out1, \
|
||||
@@ -96,10 +92,9 @@ typedef struct SwsOpIter {
|
||||
fn(FUNC)(iter, impl, x, y, z, w, __VA_ARGS__)
|
||||
|
||||
#define CALL_READ(FUNC, ...) \
|
||||
fn(FUNC)(iter, impl, (const pixel_t *) iter->in[0], \
|
||||
(const pixel_t *) iter->in[1], \
|
||||
(const pixel_t *) iter->in[2], \
|
||||
(const pixel_t *) iter->in[3], __VA_ARGS__)
|
||||
CALL(FUNC, (const pixel_t *) iter->in[0], (const pixel_t *) iter->in[1], \
|
||||
(const pixel_t *) iter->in[2], (const pixel_t *) iter->in[3], \
|
||||
__VA_ARGS__)
|
||||
|
||||
#define CALL_WRITE(FUNC, ...) \
|
||||
CALL(FUNC, (pixel_t *) iter->out[0], (pixel_t *) iter->out[1], \
|
||||
@@ -112,10 +107,6 @@ typedef struct SwsOpIter {
|
||||
block_t x, block_t y, \
|
||||
block_t z, block_t w)
|
||||
|
||||
#define DECL_IMPL_READ(NAME) \
|
||||
static SWS_FUNC void fn(NAME)(SwsOpIter *restrict iter, \
|
||||
const SwsOpImpl *restrict impl)
|
||||
|
||||
/* Helper macro to call into the next continuation with a given type */
|
||||
#define CONTINUE(TYPE, ...) \
|
||||
((void (*)(SwsOpIter *, const SwsOpImpl *, \
|
||||
|
||||
@@ -58,8 +58,6 @@
|
||||
|
||||
DECL_READ(read_planar, const int elems)
|
||||
{
|
||||
block_t x, y, z, w;
|
||||
|
||||
SWS_LOOP
|
||||
for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
|
||||
x[i] = in0[i];
|
||||
@@ -76,8 +74,6 @@ DECL_READ(read_planar, const int elems)
|
||||
|
||||
DECL_READ(read_packed, const int elems)
|
||||
{
|
||||
block_t x, y, z, w;
|
||||
|
||||
SWS_LOOP
|
||||
for (int i = 0; i < SWS_BLOCK_SIZE; i++) {
|
||||
x[i] = in0[elems * i + 0];
|
||||
@@ -121,7 +117,7 @@ DECL_WRITE(write_packed, const int elems)
|
||||
}
|
||||
|
||||
#define WRAP_READ(FUNC, ELEMS, FRAC, PACKED) \
|
||||
DECL_IMPL_READ(FUNC##ELEMS) \
|
||||
DECL_IMPL(FUNC##ELEMS) \
|
||||
{ \
|
||||
CALL_READ(FUNC, ELEMS); \
|
||||
for (int i = 0; i < (PACKED ? 1 : ELEMS); i++) \
|
||||
@@ -173,8 +169,6 @@ WRAP_WRITE(write_packed, 4, 0, true)
|
||||
#if BIT_DEPTH == 8
|
||||
DECL_READ(read_nibbles, const int elems)
|
||||
{
|
||||
block_t x, y, z, w;
|
||||
|
||||
SWS_LOOP
|
||||
for (int i = 0; i < SWS_BLOCK_SIZE; i += 2) {
|
||||
const pixel_t val = ((const pixel_t *) in0)[i >> 1];
|
||||
@@ -187,8 +181,6 @@ DECL_READ(read_nibbles, const int elems)
|
||||
|
||||
DECL_READ(read_bits, const int elems)
|
||||
{
|
||||
block_t x, y, z, w;
|
||||
|
||||
SWS_LOOP
|
||||
for (int i = 0; i < SWS_BLOCK_SIZE; i += 8) {
|
||||
const pixel_t val = ((const pixel_t *) in0)[i >> 3];
|
||||
|
||||
Reference in New Issue
Block a user