avfilter/scale_cuda: Add support for 4:2:2 chroma subsampling

The supported YUV pixel formats were separated between planar
and semiplanar. This approach reduces the number of CUDA kernels
for all pixel formats.

This patch:
1. Adds support for YUV 4:2:2 planar and semi-planar formats:
        yuv422p, yuv422p10, nv16, p210, p216
2. Implements new conversion structures and kernel definitions
        for planar and semi-planar formats

Signed-off-by: Diego de Souza <ddesouza@nvidia.com>
This commit is contained in:
Diego de Souza
2025-11-18 17:16:43 +01:00
committed by Timo Rothenpieler
parent 04b5e25d35
commit 75b8567591
5 changed files with 672 additions and 590 deletions

View File

@@ -678,6 +678,7 @@ SKIPHEADERS-$(CONFIG_QSVVPP) += qsvvpp.h stack_internal.h
SKIPHEADERS-$(CONFIG_OPENCL) += opencl.h
SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_vpp.h stack_internal.h
SKIPHEADERS-$(CONFIG_VULKAN) += vulkan_filter.h
SKIPHEADERS-$(CONFIG_SCALE_CUDA_FILTER) += vf_scale_cuda.h
TOOLS = graph2dot
TESTPROGS = drawutils filtfmts formats integral

View File

@@ -32,7 +32,7 @@
#include "version_major.h"
#define LIBAVFILTER_VERSION_MINOR 10
#define LIBAVFILTER_VERSION_MICRO 100
#define LIBAVFILTER_VERSION_MICRO 101
#define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \

View File

@@ -39,17 +39,29 @@
#include "cuda/load_helper.h"
#include "vf_scale_cuda.h"
static const enum AVPixelFormat supported_formats[] = {
AV_PIX_FMT_YUV420P,
AV_PIX_FMT_NV12,
AV_PIX_FMT_YUV444P,
AV_PIX_FMT_P010,
AV_PIX_FMT_P016,
AV_PIX_FMT_YUV444P16,
AV_PIX_FMT_0RGB32,
AV_PIX_FMT_0BGR32,
AV_PIX_FMT_RGB32,
AV_PIX_FMT_BGR32,
struct format_entry {
enum AVPixelFormat format;
char name[13];
};
static const struct format_entry supported_formats[] = {
{AV_PIX_FMT_YUV420P, "planar8"},
{AV_PIX_FMT_YUV422P, "planar8"},
{AV_PIX_FMT_YUV444P, "planar8"},
{AV_PIX_FMT_YUV420P10,"planar10"},
{AV_PIX_FMT_YUV422P10,"planar10"},
{AV_PIX_FMT_YUV444P10,"planar10"},
{AV_PIX_FMT_YUV444P16,"planar16"},
{AV_PIX_FMT_NV12, "semiplanar8"},
{AV_PIX_FMT_NV16, "semiplanar8"},
{AV_PIX_FMT_P010, "semiplanar10"},
{AV_PIX_FMT_P210, "semiplanar10"},
{AV_PIX_FMT_P016, "semiplanar16"},
{AV_PIX_FMT_P216, "semiplanar16"},
{AV_PIX_FMT_0RGB32, "bgr0"},
{AV_PIX_FMT_0BGR32, "rgb0"},
{AV_PIX_FMT_RGB32, "bgra"},
{AV_PIX_FMT_BGR32, "rgba"},
};
#define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
@@ -184,14 +196,20 @@ fail:
static int format_is_supported(enum AVPixelFormat fmt)
{
int i;
for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
if (supported_formats[i] == fmt)
for (int i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
if (supported_formats[i].format == fmt)
return 1;
return 0;
}
static const char* get_format_name(enum AVPixelFormat fmt)
{
for (int i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
if (supported_formats[i].format == fmt)
return supported_formats[i].name;
return NULL;
}
static av_cold void set_format_info(AVFilterContext *ctx, enum AVPixelFormat in_format, enum AVPixelFormat out_format)
{
CUDAScaleContext *s = ctx->priv;
@@ -284,8 +302,8 @@ static av_cold int cudascale_load_functions(AVFilterContext *ctx)
char buf[128];
int ret;
const char *in_fmt_name = av_get_pix_fmt_name(s->in_fmt);
const char *out_fmt_name = av_get_pix_fmt_name(s->out_fmt);
const char *in_fmt_name = get_format_name(s->in_fmt);
const char *out_fmt_name = get_format_name(s->out_fmt);
const char *function_infix = "";
@@ -335,11 +353,13 @@ static av_cold int cudascale_load_functions(AVFilterContext *ctx)
ret = AVERROR(ENOSYS);
goto fail;
}
av_log(ctx, AV_LOG_DEBUG, "Luma filter: %s (%s -> %s)\n", buf, av_get_pix_fmt_name(s->in_fmt), av_get_pix_fmt_name(s->out_fmt));
snprintf(buf, sizeof(buf), "Subsample_%s_%s_%s_uv", function_infix, in_fmt_name, out_fmt_name);
ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uv, s->cu_module, buf));
if (ret < 0)
goto fail;
av_log(ctx, AV_LOG_DEBUG, "Chroma filter: %s (%s -> %s)\n", buf, av_get_pix_fmt_name(s->in_fmt), av_get_pix_fmt_name(s->out_fmt));
fail:
CHECK_CU(cu->cuCtxPopCurrent(&dummy));
@@ -416,26 +436,35 @@ fail:
static int call_resize_kernel(AVFilterContext *ctx, CUfunction func,
CUtexObject src_tex[4], int src_left, int src_top, int src_width, int src_height,
AVFrame *out_frame, int dst_width, int dst_height, int dst_pitch)
AVFrame *out_frame, int dst_width, int dst_height, int dst_pitch, int mpeg_range)
{
CUDAScaleContext *s = ctx->priv;
CudaFunctions *cu = s->hwctx->internal->cuda_dl;
CUdeviceptr dst_devptr[4] = {
(CUdeviceptr)out_frame->data[0], (CUdeviceptr)out_frame->data[1],
(CUdeviceptr)out_frame->data[2], (CUdeviceptr)out_frame->data[3]
CUDAScaleKernelParams params = {
.src_tex = {src_tex[0], src_tex[1], src_tex[2], src_tex[3]},
.dst = {
(CUdeviceptr)out_frame->data[0],
(CUdeviceptr)out_frame->data[1],
(CUdeviceptr)out_frame->data[2],
(CUdeviceptr)out_frame->data[3]
},
.dst_width = dst_width,
.dst_height = dst_height,
.dst_pitch = dst_pitch,
.src_left = src_left,
.src_top = src_top,
.src_width = src_width,
.src_height = src_height,
.param = s->param,
.mpeg_range = mpeg_range
};
void *args_uchar[] = {
&src_tex[0], &src_tex[1], &src_tex[2], &src_tex[3],
&dst_devptr[0], &dst_devptr[1], &dst_devptr[2], &dst_devptr[3],
&dst_width, &dst_height, &dst_pitch,
&src_left, &src_top, &src_width, &src_height, &s->param
};
void *args[] = { &params };
return CHECK_CU(cu->cuLaunchKernel(func,
DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1,
BLOCKX, BLOCKY, 1, 0, s->cu_stream, args_uchar, NULL));
BLOCKX, BLOCKY, 1, 0, s->cu_stream, args, NULL));
}
static int scalecuda_resize(AVFilterContext *ctx,
@@ -445,6 +474,7 @@ static int scalecuda_resize(AVFilterContext *ctx,
CudaFunctions *cu = s->hwctx->internal->cuda_dl;
CUcontext dummy, cuda_ctx = s->hwctx->cuda_ctx;
int i, ret;
int mpeg_range = in->color_range != AVCOL_RANGE_JPEG;
CUtexObject tex[4] = { 0, 0, 0, 0 };
@@ -489,7 +519,7 @@ static int scalecuda_resize(AVFilterContext *ctx,
// scale primary plane(s). Usually Y (and A), or single plane of RGB frames.
ret = call_resize_kernel(ctx, s->cu_func,
tex, in->crop_left, in->crop_top, crop_width, crop_height,
out, out->width, out->height, out->linesize[0]);
out, out->width, out->height, out->linesize[0], mpeg_range);
if (ret < 0)
goto exit;
@@ -503,7 +533,7 @@ static int scalecuda_resize(AVFilterContext *ctx,
out,
AV_CEIL_RSHIFT(out->width, s->out_desc->log2_chroma_w),
AV_CEIL_RSHIFT(out->height, s->out_desc->log2_chroma_h),
out->linesize[1]);
out->linesize[1], mpeg_range);
if (ret < 0)
goto exit;
}

File diff suppressed because it is too large Load Diff

View File

@@ -23,6 +23,28 @@
#ifndef AVFILTER_SCALE_CUDA_H
#define AVFILTER_SCALE_CUDA_H
#if defined(__CUDACC__) || defined(__CUDA__)
#include <stdint.h>
typedef cudaTextureObject_t CUtexObject;
typedef uint8_t* CUdeviceptr;
#else
#include <ffnvcodec/dynlink_cuda.h>
#endif
#define SCALE_CUDA_PARAM_DEFAULT 999999.0f
typedef struct {
CUtexObject src_tex[4];
CUdeviceptr dst[4];
int dst_width;
int dst_height;
int dst_pitch;
int src_left;
int src_top;
int src_width;
int src_height;
float param;
int mpeg_range;
} CUDAScaleKernelParams;
#endif