mirror of
https://mirror.skon.top/https://github.com/FFmpeg/FFmpeg
synced 2026-04-20 21:00:41 +08:00
avfilter/scale_cuda: Add support for 4:2:2 chroma subsampling
The supported YUV pixel formats were separated between planar
and semiplanar. This approach reduces the number of CUDA kernels
for all pixel formats.
This patch:
1. Adds support for YUV 4:2:2 planar and semi-planar formats:
yuv422p, yuv422p10, nv16, p210, p216
2. Implements new conversion structures and kernel definitions
for planar and semi-planar formats
Signed-off-by: Diego de Souza <ddesouza@nvidia.com>
This commit is contained in:
committed by
Timo Rothenpieler
parent
04b5e25d35
commit
75b8567591
@@ -678,6 +678,7 @@ SKIPHEADERS-$(CONFIG_QSVVPP) += qsvvpp.h stack_internal.h
|
||||
SKIPHEADERS-$(CONFIG_OPENCL) += opencl.h
|
||||
SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_vpp.h stack_internal.h
|
||||
SKIPHEADERS-$(CONFIG_VULKAN) += vulkan_filter.h
|
||||
SKIPHEADERS-$(CONFIG_SCALE_CUDA_FILTER) += vf_scale_cuda.h
|
||||
|
||||
TOOLS = graph2dot
|
||||
TESTPROGS = drawutils filtfmts formats integral
|
||||
|
||||
@@ -32,7 +32,7 @@
|
||||
#include "version_major.h"
|
||||
|
||||
#define LIBAVFILTER_VERSION_MINOR 10
|
||||
#define LIBAVFILTER_VERSION_MICRO 100
|
||||
#define LIBAVFILTER_VERSION_MICRO 101
|
||||
|
||||
|
||||
#define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
|
||||
|
||||
@@ -39,17 +39,29 @@
|
||||
#include "cuda/load_helper.h"
|
||||
#include "vf_scale_cuda.h"
|
||||
|
||||
static const enum AVPixelFormat supported_formats[] = {
|
||||
AV_PIX_FMT_YUV420P,
|
||||
AV_PIX_FMT_NV12,
|
||||
AV_PIX_FMT_YUV444P,
|
||||
AV_PIX_FMT_P010,
|
||||
AV_PIX_FMT_P016,
|
||||
AV_PIX_FMT_YUV444P16,
|
||||
AV_PIX_FMT_0RGB32,
|
||||
AV_PIX_FMT_0BGR32,
|
||||
AV_PIX_FMT_RGB32,
|
||||
AV_PIX_FMT_BGR32,
|
||||
struct format_entry {
|
||||
enum AVPixelFormat format;
|
||||
char name[13];
|
||||
};
|
||||
|
||||
static const struct format_entry supported_formats[] = {
|
||||
{AV_PIX_FMT_YUV420P, "planar8"},
|
||||
{AV_PIX_FMT_YUV422P, "planar8"},
|
||||
{AV_PIX_FMT_YUV444P, "planar8"},
|
||||
{AV_PIX_FMT_YUV420P10,"planar10"},
|
||||
{AV_PIX_FMT_YUV422P10,"planar10"},
|
||||
{AV_PIX_FMT_YUV444P10,"planar10"},
|
||||
{AV_PIX_FMT_YUV444P16,"planar16"},
|
||||
{AV_PIX_FMT_NV12, "semiplanar8"},
|
||||
{AV_PIX_FMT_NV16, "semiplanar8"},
|
||||
{AV_PIX_FMT_P010, "semiplanar10"},
|
||||
{AV_PIX_FMT_P210, "semiplanar10"},
|
||||
{AV_PIX_FMT_P016, "semiplanar16"},
|
||||
{AV_PIX_FMT_P216, "semiplanar16"},
|
||||
{AV_PIX_FMT_0RGB32, "bgr0"},
|
||||
{AV_PIX_FMT_0BGR32, "rgb0"},
|
||||
{AV_PIX_FMT_RGB32, "bgra"},
|
||||
{AV_PIX_FMT_BGR32, "rgba"},
|
||||
};
|
||||
|
||||
#define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
|
||||
@@ -184,14 +196,20 @@ fail:
|
||||
|
||||
static int format_is_supported(enum AVPixelFormat fmt)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
|
||||
if (supported_formats[i] == fmt)
|
||||
for (int i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
|
||||
if (supported_formats[i].format == fmt)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const char* get_format_name(enum AVPixelFormat fmt)
|
||||
{
|
||||
for (int i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
|
||||
if (supported_formats[i].format == fmt)
|
||||
return supported_formats[i].name;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static av_cold void set_format_info(AVFilterContext *ctx, enum AVPixelFormat in_format, enum AVPixelFormat out_format)
|
||||
{
|
||||
CUDAScaleContext *s = ctx->priv;
|
||||
@@ -284,8 +302,8 @@ static av_cold int cudascale_load_functions(AVFilterContext *ctx)
|
||||
char buf[128];
|
||||
int ret;
|
||||
|
||||
const char *in_fmt_name = av_get_pix_fmt_name(s->in_fmt);
|
||||
const char *out_fmt_name = av_get_pix_fmt_name(s->out_fmt);
|
||||
const char *in_fmt_name = get_format_name(s->in_fmt);
|
||||
const char *out_fmt_name = get_format_name(s->out_fmt);
|
||||
|
||||
const char *function_infix = "";
|
||||
|
||||
@@ -335,11 +353,13 @@ static av_cold int cudascale_load_functions(AVFilterContext *ctx)
|
||||
ret = AVERROR(ENOSYS);
|
||||
goto fail;
|
||||
}
|
||||
av_log(ctx, AV_LOG_DEBUG, "Luma filter: %s (%s -> %s)\n", buf, av_get_pix_fmt_name(s->in_fmt), av_get_pix_fmt_name(s->out_fmt));
|
||||
|
||||
snprintf(buf, sizeof(buf), "Subsample_%s_%s_%s_uv", function_infix, in_fmt_name, out_fmt_name);
|
||||
ret = CHECK_CU(cu->cuModuleGetFunction(&s->cu_func_uv, s->cu_module, buf));
|
||||
if (ret < 0)
|
||||
goto fail;
|
||||
av_log(ctx, AV_LOG_DEBUG, "Chroma filter: %s (%s -> %s)\n", buf, av_get_pix_fmt_name(s->in_fmt), av_get_pix_fmt_name(s->out_fmt));
|
||||
|
||||
fail:
|
||||
CHECK_CU(cu->cuCtxPopCurrent(&dummy));
|
||||
@@ -416,26 +436,35 @@ fail:
|
||||
|
||||
static int call_resize_kernel(AVFilterContext *ctx, CUfunction func,
|
||||
CUtexObject src_tex[4], int src_left, int src_top, int src_width, int src_height,
|
||||
AVFrame *out_frame, int dst_width, int dst_height, int dst_pitch)
|
||||
AVFrame *out_frame, int dst_width, int dst_height, int dst_pitch, int mpeg_range)
|
||||
{
|
||||
CUDAScaleContext *s = ctx->priv;
|
||||
CudaFunctions *cu = s->hwctx->internal->cuda_dl;
|
||||
|
||||
CUdeviceptr dst_devptr[4] = {
|
||||
(CUdeviceptr)out_frame->data[0], (CUdeviceptr)out_frame->data[1],
|
||||
(CUdeviceptr)out_frame->data[2], (CUdeviceptr)out_frame->data[3]
|
||||
CUDAScaleKernelParams params = {
|
||||
.src_tex = {src_tex[0], src_tex[1], src_tex[2], src_tex[3]},
|
||||
.dst = {
|
||||
(CUdeviceptr)out_frame->data[0],
|
||||
(CUdeviceptr)out_frame->data[1],
|
||||
(CUdeviceptr)out_frame->data[2],
|
||||
(CUdeviceptr)out_frame->data[3]
|
||||
},
|
||||
.dst_width = dst_width,
|
||||
.dst_height = dst_height,
|
||||
.dst_pitch = dst_pitch,
|
||||
.src_left = src_left,
|
||||
.src_top = src_top,
|
||||
.src_width = src_width,
|
||||
.src_height = src_height,
|
||||
.param = s->param,
|
||||
.mpeg_range = mpeg_range
|
||||
};
|
||||
|
||||
void *args_uchar[] = {
|
||||
&src_tex[0], &src_tex[1], &src_tex[2], &src_tex[3],
|
||||
&dst_devptr[0], &dst_devptr[1], &dst_devptr[2], &dst_devptr[3],
|
||||
&dst_width, &dst_height, &dst_pitch,
|
||||
&src_left, &src_top, &src_width, &src_height, &s->param
|
||||
};
|
||||
void *args[] = { ¶ms };
|
||||
|
||||
return CHECK_CU(cu->cuLaunchKernel(func,
|
||||
DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1,
|
||||
BLOCKX, BLOCKY, 1, 0, s->cu_stream, args_uchar, NULL));
|
||||
BLOCKX, BLOCKY, 1, 0, s->cu_stream, args, NULL));
|
||||
}
|
||||
|
||||
static int scalecuda_resize(AVFilterContext *ctx,
|
||||
@@ -445,6 +474,7 @@ static int scalecuda_resize(AVFilterContext *ctx,
|
||||
CudaFunctions *cu = s->hwctx->internal->cuda_dl;
|
||||
CUcontext dummy, cuda_ctx = s->hwctx->cuda_ctx;
|
||||
int i, ret;
|
||||
int mpeg_range = in->color_range != AVCOL_RANGE_JPEG;
|
||||
|
||||
CUtexObject tex[4] = { 0, 0, 0, 0 };
|
||||
|
||||
@@ -489,7 +519,7 @@ static int scalecuda_resize(AVFilterContext *ctx,
|
||||
// scale primary plane(s). Usually Y (and A), or single plane of RGB frames.
|
||||
ret = call_resize_kernel(ctx, s->cu_func,
|
||||
tex, in->crop_left, in->crop_top, crop_width, crop_height,
|
||||
out, out->width, out->height, out->linesize[0]);
|
||||
out, out->width, out->height, out->linesize[0], mpeg_range);
|
||||
if (ret < 0)
|
||||
goto exit;
|
||||
|
||||
@@ -503,7 +533,7 @@ static int scalecuda_resize(AVFilterContext *ctx,
|
||||
out,
|
||||
AV_CEIL_RSHIFT(out->width, s->out_desc->log2_chroma_w),
|
||||
AV_CEIL_RSHIFT(out->height, s->out_desc->log2_chroma_h),
|
||||
out->linesize[1]);
|
||||
out->linesize[1], mpeg_range);
|
||||
if (ret < 0)
|
||||
goto exit;
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -23,6 +23,28 @@
|
||||
#ifndef AVFILTER_SCALE_CUDA_H
|
||||
#define AVFILTER_SCALE_CUDA_H
|
||||
|
||||
#if defined(__CUDACC__) || defined(__CUDA__)
|
||||
#include <stdint.h>
|
||||
typedef cudaTextureObject_t CUtexObject;
|
||||
typedef uint8_t* CUdeviceptr;
|
||||
#else
|
||||
#include <ffnvcodec/dynlink_cuda.h>
|
||||
#endif
|
||||
|
||||
#define SCALE_CUDA_PARAM_DEFAULT 999999.0f
|
||||
|
||||
typedef struct {
|
||||
CUtexObject src_tex[4];
|
||||
CUdeviceptr dst[4];
|
||||
int dst_width;
|
||||
int dst_height;
|
||||
int dst_pitch;
|
||||
int src_left;
|
||||
int src_top;
|
||||
int src_width;
|
||||
int src_height;
|
||||
float param;
|
||||
int mpeg_range;
|
||||
} CUDAScaleKernelParams;
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user