vulkan_dpx: switch to compile-time SPIR-V generation

This commit is contained in:
Lynne
2026-01-09 11:16:53 +01:00
parent 0f4667fc11
commit f2a55af9a4
6 changed files with 209 additions and 157 deletions

2
configure vendored
View File

@@ -3316,7 +3316,7 @@ av1_videotoolbox_hwaccel_deps="videotoolbox"
av1_videotoolbox_hwaccel_select="av1_decoder"
av1_vulkan_hwaccel_deps="vulkan"
av1_vulkan_hwaccel_select="av1_decoder"
dpx_vulkan_hwaccel_deps="vulkan spirv_library"
dpx_vulkan_hwaccel_deps="vulkan"
dpx_vulkan_hwaccel_select="dpx_decoder"
ffv1_vulkan_hwaccel_deps="vulkan spirv_library"
ffv1_vulkan_hwaccel_select="ffv1_decoder"

View File

@@ -18,9 +18,8 @@ OBJS-$(CONFIG_PRORES_RAW_VULKAN_HWACCEL) += vulkan/prores_raw_decode.comp.spv.o
OBJS-$(CONFIG_PRORES_VULKAN_HWACCEL) += vulkan/prores_vld.comp.spv.o \
vulkan/prores_idct.comp.spv.o
OBJS-$(CONFIG_DPX_VULKAN_HWACCEL) += vulkan/common.o \
vulkan/dpx_unpack.o \
vulkan/dpx_copy.o
OBJS-$(CONFIG_DPX_VULKAN_HWACCEL) += vulkan/dpx_unpack.comp.spv.o \
vulkan/dpx_copy.comp.spv.o
VULKAN = $(subst $(SRC_PATH)/,,$(wildcard $(SRC_PATH)/libavcodec/vulkan/*.comp))
.SECONDARY: $(VULKAN:.comp=.c)

View File

@@ -1,55 +0,0 @@
/*
* Copyright (c) 2025 Lynne <dev@lynne.ee>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
TYPE read_data(uint off)
{
#ifdef BIG_ENDIAN
return TYPE_REVERSE(data[off]);
#else
return data[off];
#endif
}
void main(void)
{
ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
if (!IS_WITHIN(pos, imageSize(dst[0])))
return;
uint linesize;
linesize = align(imageSize(dst[0]).x*BITS_PER_COMP*COMPONENTS, 32);
linesize >>= BITS_LOG2;
uint offs = pos.y*linesize + pos.x*COMPONENTS;
#if NB_IMAGES == 1
TYPE_VEC val;
for (int i = 0; i < COMPONENTS; i++)
val[i] = read_data(offs + i);
val >>= SHIFT;
imageStore(dst[0], pos, val);
#else
const ivec4 fmt_lut = ivec4(2, 0, 1, 3);
for (int i = 0; i < COMPONENTS; i++) {
TYPE val = read_data(offs + i);
val >>= SHIFT;
imageStore(dst[fmt_lut[i]], pos, TYPE_VEC(val));
}
#endif
}

View File

@@ -0,0 +1,95 @@
/*
* Copyright (c) 2025 Lynne <dev@lynne.ee>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#version 460
#pragma shader_stage(compute)
#extension GL_GOOGLE_include_directive : require
#extension GL_EXT_nontemporal_keyword : require
#include "common.comp"
layout (constant_id = 0) const bool big_endian = false;
layout (constant_id = 1) const int type_bits = 0;
layout (set = 0, binding = 0) uniform writeonly uimage2D dst[];
layout (set = 0, binding = 1, scalar) nontemporal readonly buffer data_buf8 {
uint8_t data8[];
};
layout (set = 0, binding = 2, scalar) nontemporal readonly buffer data_buf16 {
uint16_t data16[];
};
layout (set = 0, binding = 3, scalar) nontemporal readonly buffer data_buf32 {
uint32_t data32[];
};
layout (push_constant, scalar) uniform pushConstants {
int bits_per_comp;
int nb_comp;
int nb_images;
int stride;
int need_align;
int padded_10bit;
int shift;
};
#define READ_FN(bits, bytes) \
uint read_val##bits(uint off) \
{ \
if (big_endian) \
return uint(reverse##bytes(data##bits[off])); \
return uint(data##bits[off]); \
}
READ_FN(16, 2)
READ_FN(32, 4)
uint read_data(uint off)
{
if (type_bits == 8)
return uint(data8[off]);
else if (type_bits == 16)
return read_val16(off);
return read_val32(off);
}
void main(void)
{
ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
uint linesize;
linesize = align(imageSize(dst[0]).x*bits_per_comp*nb_comp, 32);
uint offs = pos.y*linesize + pos.x*nb_comp*bits_per_comp;
offs /= bits_per_comp;
if (nb_images == 1) {
uvec4 val;
for (int i = 0; i < nb_comp; i++)
val[i] = read_data(offs + i);
val >>= shift;
imageStore(dst[0], pos, val);
} else {
const ivec4 fmt_lut = ivec4(2, 0, 1, 3);
for (int i = 0; i < nb_comp; i++) {
uint32_t val = read_data(offs + i);
val >>= shift;
imageStore(dst[fmt_lut[i]], pos, uvec4(val));
}
}
}

View File

@@ -18,17 +18,39 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#version 460
#pragma shader_stage(compute)
#extension GL_GOOGLE_include_directive : require
#extension GL_EXT_nontemporal_keyword : require
#include "common.comp"
layout (constant_id = 0) const bool big_endian = false;
layout (constant_id = 1) const bool packed_10bit = false;
layout (set = 0, binding = 0) uniform writeonly uimage2D dst[];
layout (set = 0, binding = 1, scalar) nontemporal readonly buffer data_buf {
uint32_t data[];
};
layout (push_constant, scalar) uniform pushConstants {
int bits_per_comp;
int nb_comp;
int nb_images;
int stride;
int need_align;
int padded_10bit;
int shift;
};
uint32_t read_data(uint off)
{
#ifdef BIG_ENDIAN
return reverse4(data[off]);
#else
if (big_endian)
return reverse4(data[off]);
return data[off];
#endif
}
#ifdef PACKED_10BIT
i16vec4 parse_packed_in_32(ivec2 pos, int stride)
i16vec4 parse_packed10_in_32(ivec2 pos, int stride)
{
uint32_t d = read_data(pos.y*stride + pos.x);
i16vec4 v;
@@ -41,15 +63,15 @@ i16vec4 parse_packed_in_32(ivec2 pos, int stride)
v[3] = int16_t(0);
return v;
}
#else
i16vec4 parse_packed_in_32(ivec2 pos, int stride)
{
uint line_size = stride*BITS_PER_COMP*COMPONENTS;
uint line_size = stride*bits_per_comp*nb_comp;
line_size += line_size & 31;
line_size += need_align << 3;
uint line_off = pos.y*line_size;
uint pix_off = pos.x*BITS_PER_COMP*COMPONENTS;
uint pix_off = pos.x*bits_per_comp*nb_comp;
uint off = (line_off + pix_off) >> 5;
uint bit = pix_off & 0x1f;
@@ -61,12 +83,11 @@ i16vec4 parse_packed_in_32(ivec2 pos, int stride)
combined >>= bit;
return i16vec4(combined,
combined >> (BITS_PER_COMP*1),
combined >> (BITS_PER_COMP*2),
combined >> (BITS_PER_COMP*3)) &
int16_t((1 << BITS_PER_COMP) - 1);
combined >> (bits_per_comp*1),
combined >> (bits_per_comp*2),
combined >> (bits_per_comp*3)) &
int16_t((1 << bits_per_comp) - 1);
}
#endif
void main(void)
{
@@ -74,13 +95,17 @@ void main(void)
if (!IS_WITHIN(pos, imageSize(dst[0])))
return;
i16vec4 p = parse_packed_in_32(pos, imageSize(dst[0]).x);
i16vec4 p;
if (packed_10bit)
p = parse_packed10_in_32(pos, imageSize(dst[0]).x);
else
p = parse_packed_in_32(pos, imageSize(dst[0]).x);
#if NB_IMAGES == 1
imageStore(dst[0], pos, p);
#else
const ivec4 fmt_lut = COMPONENTS == 1 ? ivec4(0) : ivec4(2, 0, 1, 3);
for (uint i = 0; i < COMPONENTS; i++)
imageStore(dst[fmt_lut[i]], pos, i16vec4(p[i]));
#endif
if (nb_images == 1) {
imageStore(dst[0], pos, p);
} else {
const ivec4 fmt_lut = ivec4(2, 0, 1, 3);
for (uint i = 0; i < nb_comp; i++)
imageStore(dst[fmt_lut[i]], pos, i16vec4(p[i]));
}
}

View File

@@ -22,12 +22,13 @@
#include "hwaccel_internal.h"
#include "dpx.h"
#include "libavutil/vulkan_spirv.h"
#include "libavutil/mem.h"
extern const char *ff_source_common_comp;
extern const char *ff_source_dpx_unpack_comp;
extern const char *ff_source_dpx_copy_comp;
extern const unsigned char ff_dpx_unpack_comp_spv_data[];
extern const unsigned int ff_dpx_unpack_comp_spv_len;
extern const unsigned char ff_dpx_copy_comp_spv_data[];
extern const unsigned int ff_dpx_copy_comp_spv_len;
const FFVulkanDecodeDescriptor ff_vk_dec_dpx_desc = {
.codec_id = AV_CODEC_ID_DPX,
@@ -44,9 +45,13 @@ typedef struct DPXVulkanDecodeContext {
} DPXVulkanDecodeContext;
typedef struct DecodePushData {
int bits_per_comp;
int nb_comp;
int nb_images;
int stride;
int need_align;
int padded_10bit;
int shift;
} DecodePushData;
static int host_upload_image(AVCodecContext *avctx,
@@ -214,6 +219,9 @@ static int vk_dpx_end_frame(AVCodecContext *avctx)
DPXVulkanDecodePicture *pp = dpx->hwaccel_picture_private;
FFVulkanDecodePicture *vp = &pp->vp;
int unpack = (avctx->bits_per_raw_sample == 12 && !dpx->packing) ||
avctx->bits_per_raw_sample == 10;
FFVkBuffer *slices_buf = (FFVkBuffer *)vp->slices_buf->data;
VkImageMemoryBarrier2 img_bar[8];
@@ -266,14 +274,31 @@ static int vk_dpx_end_frame(AVCodecContext *avctx)
slices_buf,
0, slices_buf->size,
VK_FORMAT_UNDEFINED);
if (!unpack) {
ff_vk_shader_update_desc_buffer(&ctx->s, exec, shd,
0, 2, 0,
slices_buf,
0, slices_buf->size,
VK_FORMAT_UNDEFINED);
ff_vk_shader_update_desc_buffer(&ctx->s, exec, shd,
0, 3, 0,
slices_buf,
0, slices_buf->size,
VK_FORMAT_UNDEFINED);
}
ff_vk_exec_bind_shader(&ctx->s, exec, shd);
/* Update push data */
DecodePushData pd = (DecodePushData) {
.bits_per_comp = avctx->bits_per_raw_sample,
.nb_comp = dpx->components,
.nb_images = ff_vk_count_images(vkf),
.stride = dpx->stride,
.need_align = dpx->need_align,
.padded_10bit = !dpx->unpadded_10bit,
.shift = FFALIGN(avctx->bits_per_raw_sample, 8) -
avctx->bits_per_raw_sample,
};
ff_vk_shader_update_push_const(&ctx->s, exec, shd,
@@ -294,92 +319,65 @@ fail:
}
static int init_shader(AVCodecContext *avctx, FFVulkanContext *s,
FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
FFVulkanShader *shd, int bits)
FFVkExecPool *pool, FFVulkanShader *shd, int bits)
{
int err;
DPXDecContext *dpx = avctx->priv_data;
FFVulkanDescriptorSetBinding *desc_set;
AVHWFramesContext *dec_frames_ctx;
dec_frames_ctx = (AVHWFramesContext *)avctx->hw_frames_ctx->data;
int planes = av_pix_fmt_count_planes(dec_frames_ctx->sw_format);
uint8_t *spv_data;
size_t spv_len;
void *spv_opaque = NULL;
RET(ff_vk_shader_init(s, shd, "dpx",
VK_SHADER_STAGE_COMPUTE_BIT,
(const char *[]) { "GL_EXT_buffer_reference",
"GL_EXT_buffer_reference2" }, 2,
512, 1, 1,
0));
/* Common codec header */
GLSLD(ff_source_common_comp);
GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
GLSLC(1, int stride; );
GLSLC(1, int need_align; );
GLSLC(1, int padded_10bit; );
GLSLC(0, }; );
GLSLC(0, );
ff_vk_shader_add_push_const(shd, 0, sizeof(DecodePushData),
VK_SHADER_STAGE_COMPUTE_BIT);
int unpack = (avctx->bits_per_raw_sample == 12 && !dpx->packing) ||
avctx->bits_per_raw_sample == 10;
desc_set = (FFVulkanDescriptorSetBinding []) {
SPEC_LIST_CREATE(sl, 2, 2*sizeof(uint32_t))
SPEC_LIST_ADD(sl, 0, 32, dpx->endian && bits > 8); /* big endian */
if (unpack)
SPEC_LIST_ADD(sl, 1, 32, bits == 10); /* packed_10bit */
else
SPEC_LIST_ADD(sl, 1, 32, FFALIGN(bits, 8)); /* type_bits */
ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, sl,
(uint32_t []) { 512, 1, 1 }, 0);
ff_vk_shader_add_push_const(shd, 0, sizeof(DecodePushData),
VK_SHADER_STAGE_COMPUTE_BIT);
const FFVulkanDescriptorSetBinding desc_set[] = {
{
.name = "dst",
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.dimensions = 2,
.mem_quali = "writeonly",
.mem_layout = ff_vk_shader_rep_fmt(dec_frames_ctx->sw_format,
FF_VK_REP_NATIVE),
.elems = planes,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.elems = av_pix_fmt_count_planes(dec_frames_ctx->sw_format),
},
{
.name = "data_buf",
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.mem_quali = "readonly",
.buf_content = (unpack || bits == 32) ? "uint32_t data[];" :
bits == 8 ? "uint8_t data[];" : "uint16_t data[];",
},
{
.name = "data_buf16",
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
},
{
.name = "data_buf32",
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
},
};
RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 0, 0));
ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2 + (2*!unpack), 0, 0);
if (dpx->endian && bits > 8)
GLSLC(0, #define BIG_ENDIAN );
GLSLF(0, #define COMPONENTS (%i) ,dpx->components);
GLSLF(0, #define BITS_PER_COMP (%i) ,bits);
GLSLF(0, #define BITS_LOG2 (%i) ,av_log2(bits));
GLSLF(0, #define NB_IMAGES (%i) ,planes);
const unsigned char *src = ff_dpx_copy_comp_spv_data;
size_t src_len = ff_dpx_copy_comp_spv_len;
if (unpack) {
if (bits == 10)
GLSLC(0, #define PACKED_10BIT );
GLSLD(ff_source_dpx_unpack_comp);
} else {
GLSLF(0, #define SHIFT (%i) ,FFALIGN(bits, 8) - bits);
GLSLF(0, #define TYPE uint%i_t ,FFALIGN(bits, 8));
GLSLF(0, #define TYPE_VEC u%ivec4 ,FFALIGN(bits, 8));
GLSLF(0, #define TYPE_REVERSE(x) (reverse%i(x)), FFALIGN(bits, 8)/8);
GLSLD(ff_source_dpx_copy_comp);
src = ff_dpx_unpack_comp_spv_data;
src_len = ff_dpx_unpack_comp_spv_len;
}
RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main",
&spv_opaque));
RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main"));
RET(ff_vk_shader_link(s, shd, src, src_len, "main"));
RET(ff_vk_shader_register_exec(s, pool, shd));
fail:
if (spv_opaque)
spv->free_shader(spv, &spv_opaque);
return err;
}
@@ -415,31 +413,21 @@ static int vk_decode_dpx_init(AVCodecContext *avctx)
break;
}
FFVkSPIRVCompiler *spv = ff_vk_spirv_init();
if (!spv) {
av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
return AVERROR_EXTERNAL;
}
err = ff_vk_decode_init(avctx);
if (err < 0)
return err;
FFVulkanDecodeShared *ctx = dec->shared_ctx;
DPXVulkanDecodeContext *dxv = ctx->sd_ctx = av_mallocz(sizeof(*dxv));
if (!dxv) {
err = AVERROR(ENOMEM);
goto fail;
}
if (!dxv)
return AVERROR(ENOMEM);
ctx->sd_ctx_free = &vk_decode_dpx_uninit;
RET(init_shader(avctx, &ctx->s, &ctx->exec_pool,
spv, &dxv->shader, avctx->bits_per_raw_sample));
&dxv->shader, avctx->bits_per_raw_sample));
fail:
spv->uninit(&spv);
return err;
}