From 0e983a060451bc46f320274b5a0c10f492b555ac Mon Sep 17 00:00:00 2001 From: Niklas Haas Date: Wed, 25 Mar 2026 14:09:30 +0100 Subject: [PATCH] swscale: align allocated frame buffers to SwsPass hints This avoids hitting the slow memcpy fallback paths altogether, whenever swscale.c is handling plane allocation. Signed-off-by: Niklas Haas --- libswscale/graph.c | 11 +++++++++++ libswscale/graph.h | 5 +++++ libswscale/swscale.c | 24 +++++++++++++++++------- 3 files changed, 33 insertions(+), 7 deletions(-) diff --git a/libswscale/graph.c b/libswscale/graph.c index 6836f8cf60..2020fcf8c7 100644 --- a/libswscale/graph.c +++ b/libswscale/graph.c @@ -38,6 +38,17 @@ #include "graph.h" #include "ops.h" +int ff_sws_pass_aligned_width(const SwsPass *pass, int width) +{ + if (!pass) + return width; + + size_t aligned_w = width; + aligned_w = FFALIGN(aligned_w, pass->output->width_align); + aligned_w += pass->output->width_pad; + return aligned_w <= INT_MAX ? aligned_w : width; +} + /* Allocates one buffer per plane */ static int frame_alloc_planes(AVFrame *dst) { diff --git a/libswscale/graph.h b/libswscale/graph.h index 886f0d2996..116f2a89bf 100644 --- a/libswscale/graph.h +++ b/libswscale/graph.h @@ -110,6 +110,11 @@ struct SwsPass { void *priv; }; +/** + * Align `width` to the optimal size for `pass`. + */ +int ff_sws_pass_aligned_width(const SwsPass *pass, int width); + /** * Filter graph, which represents a 'baked' pixel format conversion. */ diff --git a/libswscale/swscale.c b/libswscale/swscale.c index d5c91e541c..3ebbcd5d3f 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -1479,14 +1479,9 @@ int sws_frame_setup(SwsContext *ctx, const AVFrame *dst, const AVFrame *src) if (ret < 0) return ret; #endif - } else { - /* Software frames */ - ret = ff_frame_pool_video_reinit(&s->frame_pool, dst->width, dst->height, - dst->format, av_cpu_max_align()); - if (ret < 0) - return ret; } + int dst_width = dst->width; for (int field = 0; field < 2; field++) { SwsFormat src_fmt = ff_fmt_from_frame(src, field); SwsFormat dst_fmt = ff_fmt_from_frame(dst, field); @@ -1512,12 +1507,20 @@ int sws_frame_setup(SwsContext *ctx, const AVFrame *dst, const AVFrame *src) goto fail; } - if (s->graph[field]->incomplete && ctx->flags & SWS_STRICT) { + const SwsGraph *graph = s->graph[field]; + if (graph->incomplete && ctx->flags & SWS_STRICT) { err_msg = "Incomplete scaling graph"; ret = AVERROR(EINVAL); goto fail; } + if (!graph->noop) { + av_assert0(graph->num_passes); + const SwsPass *last_pass = graph->passes[graph->num_passes - 1]; + const int aligned_w = ff_sws_pass_aligned_width(last_pass, dst->width); + dst_width = FFMAX(dst_width, aligned_w); + } + if (!src_fmt.interlaced) { ff_sws_graph_free(&s->graph[FIELD_BOTTOM]); break; @@ -1540,6 +1543,13 @@ int sws_frame_setup(SwsContext *ctx, const AVFrame *dst, const AVFrame *src) return ret; } + if (!dst->hw_frames_ctx) { + ret = ff_frame_pool_video_reinit(&s->frame_pool, dst_width, dst->height, + dst->format, av_cpu_max_align()); + if (ret < 0) + return ret; + } + return 0; }