diff --git a/libswscale/ops_backend.c b/libswscale/ops_backend.c index 248a591fd2..a503139016 100644 --- a/libswscale/ops_backend.c +++ b/libswscale/ops_backend.c @@ -53,18 +53,20 @@ static void process(const SwsOpExec *exec, const void *priv, { const SwsOpChain *chain = priv; const SwsOpImpl *impl = chain->impl; - SwsOpIter iter; + u32block_t x, y, z, w; /* allocate enough space for any intermediate */ - for (iter.y = y_start; iter.y < y_end; iter.y++) { + SwsOpIter iterdata; + SwsOpIter *iter = &iterdata; /* for CONTINUE() macro to work */ + + for (iter->y = y_start; iter->y < y_end; iter->y++) { for (int i = 0; i < 4; i++) { - iter.in[i] = exec->in[i] + (iter.y - y_start) * exec->in_stride[i]; - iter.out[i] = exec->out[i] + (iter.y - y_start) * exec->out_stride[i]; + iter->in[i] = exec->in[i] + (iter->y - y_start) * exec->in_stride[i]; + iter->out[i] = exec->out[i] + (iter->y - y_start) * exec->out_stride[i]; } for (int block = bx_start; block < bx_end; block++) { - iter.x = block * SWS_BLOCK_SIZE; - ((void (*)(SwsOpIter *, const SwsOpImpl *)) impl->cont) - (&iter, &impl[1]); + iter->x = block * SWS_BLOCK_SIZE; + CONTINUE(u32block_t, x, y, z, w); } } } diff --git a/libswscale/ops_backend.h b/libswscale/ops_backend.h index 4a1794af8a..b1616f6b02 100644 --- a/libswscale/ops_backend.h +++ b/libswscale/ops_backend.h @@ -78,13 +78,9 @@ typedef struct SwsOpIter { __VA_ARGS__) #define DECL_READ(NAME, ...) \ - static av_always_inline void fn(NAME)(SwsOpIter *restrict iter, \ - const SwsOpImpl *restrict impl, \ - const pixel_t *restrict in0, \ - const pixel_t *restrict in1, \ - const pixel_t *restrict in2, \ - const pixel_t *restrict in3, \ - __VA_ARGS__) + DECL_FUNC(NAME, const pixel_t *restrict in0, const pixel_t *restrict in1, \ + const pixel_t *restrict in2, const pixel_t *restrict in3, \ + __VA_ARGS__) #define DECL_WRITE(NAME, ...) \ DECL_FUNC(NAME, pixel_t *restrict out0, pixel_t *restrict out1, \ @@ -96,10 +92,9 @@ typedef struct SwsOpIter { fn(FUNC)(iter, impl, x, y, z, w, __VA_ARGS__) #define CALL_READ(FUNC, ...) \ - fn(FUNC)(iter, impl, (const pixel_t *) iter->in[0], \ - (const pixel_t *) iter->in[1], \ - (const pixel_t *) iter->in[2], \ - (const pixel_t *) iter->in[3], __VA_ARGS__) + CALL(FUNC, (const pixel_t *) iter->in[0], (const pixel_t *) iter->in[1], \ + (const pixel_t *) iter->in[2], (const pixel_t *) iter->in[3], \ + __VA_ARGS__) #define CALL_WRITE(FUNC, ...) \ CALL(FUNC, (pixel_t *) iter->out[0], (pixel_t *) iter->out[1], \ @@ -112,10 +107,6 @@ typedef struct SwsOpIter { block_t x, block_t y, \ block_t z, block_t w) -#define DECL_IMPL_READ(NAME) \ - static SWS_FUNC void fn(NAME)(SwsOpIter *restrict iter, \ - const SwsOpImpl *restrict impl) - /* Helper macro to call into the next continuation with a given type */ #define CONTINUE(TYPE, ...) \ ((void (*)(SwsOpIter *, const SwsOpImpl *, \ diff --git a/libswscale/ops_tmpl_int.c b/libswscale/ops_tmpl_int.c index 84596e2763..d9870faf34 100644 --- a/libswscale/ops_tmpl_int.c +++ b/libswscale/ops_tmpl_int.c @@ -58,8 +58,6 @@ DECL_READ(read_planar, const int elems) { - block_t x, y, z, w; - SWS_LOOP for (int i = 0; i < SWS_BLOCK_SIZE; i++) { x[i] = in0[i]; @@ -76,8 +74,6 @@ DECL_READ(read_planar, const int elems) DECL_READ(read_packed, const int elems) { - block_t x, y, z, w; - SWS_LOOP for (int i = 0; i < SWS_BLOCK_SIZE; i++) { x[i] = in0[elems * i + 0]; @@ -121,7 +117,7 @@ DECL_WRITE(write_packed, const int elems) } #define WRAP_READ(FUNC, ELEMS, FRAC, PACKED) \ -DECL_IMPL_READ(FUNC##ELEMS) \ +DECL_IMPL(FUNC##ELEMS) \ { \ CALL_READ(FUNC, ELEMS); \ for (int i = 0; i < (PACKED ? 1 : ELEMS); i++) \ @@ -173,8 +169,6 @@ WRAP_WRITE(write_packed, 4, 0, true) #if BIT_DEPTH == 8 DECL_READ(read_nibbles, const int elems) { - block_t x, y, z, w; - SWS_LOOP for (int i = 0; i < SWS_BLOCK_SIZE; i += 2) { const pixel_t val = ((const pixel_t *) in0)[i >> 1]; @@ -187,8 +181,6 @@ DECL_READ(read_nibbles, const int elems) DECL_READ(read_bits, const int elems) { - block_t x, y, z, w; - SWS_LOOP for (int i = 0; i < SWS_BLOCK_SIZE; i += 8) { const pixel_t val = ((const pixel_t *) in0)[i >> 3];