mirror of
https://mirror.skon.top/https://github.com/FFmpeg/FFmpeg
synced 2026-04-20 12:50:49 +08:00
swscale/ops_dispatch: fix and generalize tail buffer size calculation
This code had two issues:
1. It was over-allocating bytes for the input offset map case, and
2. It was hard-coding the assumption that there is only a single tail block
We can fix both of these issues by rewriting the way the tail size is derived.
In the non-offset case, and assuming only 1 tail block:
aligned_w - safe_width
= num_blocks * block_size - (num_blocks - 1) * block_size
= block_size
Additionally, the FFMAX(tail_size_in/out) is unnecessary, because:
tail_size = pass->width - safe_width <= aligned_w - safe_width
In the input offset case, we instead realize that the input kernel already
never over-reads the input due to the filter size adjustment/clamping, so
the only thing we need to ensure is that we allocate extra bytes for the
input over-read.
Signed-off-by: Niklas Haas <git@haasn.dev>
This commit is contained in:
@@ -235,20 +235,25 @@ static int op_pass_setup(const SwsFrame *out, const SwsFrame *in,
|
||||
p->tail_size_in = pixel_bytes(tail_size, p->pixel_bits_in, AV_ROUND_UP);
|
||||
}
|
||||
|
||||
const size_t alloc_width = aligned_w - safe_width;
|
||||
for (int i = 0; memcpy_in && i < p->planes_in; i++) {
|
||||
size_t block_size = pixel_bytes(comp->block_size, p->pixel_bits_in, AV_ROUND_UP);
|
||||
block_size += comp->over_read;
|
||||
block_size = FFMAX(block_size, p->tail_size_in);
|
||||
tail->in_stride[i] = FFALIGN(block_size, align);
|
||||
size_t needed_size;
|
||||
if (exec->in_offset_x) {
|
||||
/* The input offset map is already padded to multiples of the block
|
||||
* size, and clamps the input offsets to the image boundaries; so
|
||||
* we just need to compensate for the comp->over_read */
|
||||
needed_size = p->tail_size_in;
|
||||
} else {
|
||||
needed_size = pixel_bytes(alloc_width, p->pixel_bits_in, AV_ROUND_UP);
|
||||
}
|
||||
tail->in_stride[i] = FFALIGN(needed_size + comp->over_read, align);
|
||||
tail->in_bump[i] = tail->in_stride[i] - exec->block_size_in;
|
||||
alloc_size += tail->in_stride[i] * in->height;
|
||||
}
|
||||
|
||||
for (int i = 0; p->memcpy_out && i < p->planes_out; i++) {
|
||||
size_t block_size = pixel_bytes(comp->block_size, p->pixel_bits_out, AV_ROUND_UP);
|
||||
block_size += comp->over_write;
|
||||
block_size = FFMAX(block_size, p->tail_size_out);
|
||||
tail->out_stride[i] = FFALIGN(block_size, align);
|
||||
size_t needed_size = pixel_bytes(alloc_width, p->pixel_bits_out, AV_ROUND_UP);
|
||||
tail->out_stride[i] = FFALIGN(needed_size + comp->over_write, align);
|
||||
tail->out_bump[i] = tail->out_stride[i] - exec->block_size_out;
|
||||
alloc_size += tail->out_stride[i] * out->height;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user