From 35174913ac992082bc08d2e7bd0d15b9306c01a9 Mon Sep 17 00:00:00 2001 From: Niklas Haas Date: Tue, 14 Apr 2026 15:43:03 +0200 Subject: [PATCH] swscale/ops_dispatch: fix and generalize tail buffer size calculation This code had two issues: 1. It was over-allocating bytes for the input offset map case, and 2. It was hard-coding the assumption that there is only a single tail block We can fix both of these issues by rewriting the way the tail size is derived. In the non-offset case, and assuming only 1 tail block: aligned_w - safe_width = num_blocks * block_size - (num_blocks - 1) * block_size = block_size Additionally, the FFMAX(tail_size_in/out) is unnecessary, because: tail_size = pass->width - safe_width <= aligned_w - safe_width In the input offset case, we instead realize that the input kernel already never over-reads the input due to the filter size adjustment/clamping, so the only thing we need to ensure is that we allocate extra bytes for the input over-read. Signed-off-by: Niklas Haas --- libswscale/ops_dispatch.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/libswscale/ops_dispatch.c b/libswscale/ops_dispatch.c index 7d0de060f7..45bd194c77 100644 --- a/libswscale/ops_dispatch.c +++ b/libswscale/ops_dispatch.c @@ -235,20 +235,25 @@ static int op_pass_setup(const SwsFrame *out, const SwsFrame *in, p->tail_size_in = pixel_bytes(tail_size, p->pixel_bits_in, AV_ROUND_UP); } + const size_t alloc_width = aligned_w - safe_width; for (int i = 0; memcpy_in && i < p->planes_in; i++) { - size_t block_size = pixel_bytes(comp->block_size, p->pixel_bits_in, AV_ROUND_UP); - block_size += comp->over_read; - block_size = FFMAX(block_size, p->tail_size_in); - tail->in_stride[i] = FFALIGN(block_size, align); + size_t needed_size; + if (exec->in_offset_x) { + /* The input offset map is already padded to multiples of the block + * size, and clamps the input offsets to the image boundaries; so + * we just need to compensate for the comp->over_read */ + needed_size = p->tail_size_in; + } else { + needed_size = pixel_bytes(alloc_width, p->pixel_bits_in, AV_ROUND_UP); + } + tail->in_stride[i] = FFALIGN(needed_size + comp->over_read, align); tail->in_bump[i] = tail->in_stride[i] - exec->block_size_in; alloc_size += tail->in_stride[i] * in->height; } for (int i = 0; p->memcpy_out && i < p->planes_out; i++) { - size_t block_size = pixel_bytes(comp->block_size, p->pixel_bits_out, AV_ROUND_UP); - block_size += comp->over_write; - block_size = FFMAX(block_size, p->tail_size_out); - tail->out_stride[i] = FFALIGN(block_size, align); + size_t needed_size = pixel_bytes(alloc_width, p->pixel_bits_out, AV_ROUND_UP); + tail->out_stride[i] = FFALIGN(needed_size + comp->over_write, align); tail->out_bump[i] = tail->out_stride[i] - exec->block_size_out; alloc_size += tail->out_stride[i] * out->height; }