vulkan_prores_raw: clean up and optimize

This commit is contained in:
Lynne
2026-01-10 02:12:41 +01:00
parent 771b58c0b6
commit 0f4667fc11
3 changed files with 20 additions and 23 deletions

View File

@@ -39,12 +39,10 @@ layout (set = 0, binding = 1, scalar) readonly buffer frame_data_buf {
layout (push_constant, scalar) uniform pushConstants {
u8buf pkt_data;
ivec2 frame_size;
ivec2 tile_size;
uint8_t qmat[64];
};
#define COMP_ID (gl_LocalInvocationID.x)
#define COMP_ID (gl_LocalInvocationID.y)
GetBitContext gb;
@@ -223,7 +221,8 @@ void main(void)
const uint tile_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
TileData td = tile_data[tile_idx];
if (expectEXT(td.pos.x >= frame_size.x, false))
int width = imageSize(dst).x;
if (expectEXT(td.pos.x >= width, false))
return;
uint64_t pkt_offset = uint64_t(pkt_data) + td.offset;
@@ -239,8 +238,8 @@ void main(void)
return;
const ivec2 offs = td.pos + ivec2(COMP_ID & 1, COMP_ID >> 1);
const int w = min(tile_size.x, frame_size.x - td.pos.x) / 2;
const int nb_blocks = w / 8;
const int w = min(tile_size.x, width - td.pos.x) >> 1;
const int nb_blocks = w >> 3;
const ivec4 comp_offset = ivec4(size[2] + size[1] + size[3],
size[2],

View File

@@ -40,7 +40,6 @@ layout (set = 0, binding = 1, scalar) readonly buffer frame_data_buf {
layout (push_constant, scalar) uniform pushConstants {
u8buf pkt_data;
ivec2 frame_size;
ivec2 tile_size;
uint8_t qmat[64];
};
@@ -73,7 +72,8 @@ void main(void)
const uint tile_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
TileData td = tile_data[tile_idx];
if (expectEXT(td.pos.x >= frame_size.x, false))
int width = imageSize(dst).x;
if (expectEXT(td.pos.x >= width, false))
return;
uint64_t pkt_offset = uint64_t(pkt_data) + td.offset;
@@ -81,8 +81,8 @@ void main(void)
int qscale = pack16(hdr_data[0].v.yx);
const ivec2 offs = td.pos + ivec2(COMP_ID & 1, COMP_ID >> 1);
const uint w = min(tile_size.x, frame_size.x - td.pos.x) / 2;
const uint nb_blocks = w / 8;
const uint w = min(tile_size.x, width - td.pos.x) >> 1;
const uint nb_blocks = w >> 3;
/* We have to do non-uniform access, so copy it */
uint8_t qmat_buf[64] = qmat;

View File

@@ -51,7 +51,6 @@ typedef struct ProResRAWVulkanDecodeContext {
typedef struct DecodePushData {
VkDeviceAddress pkt_data;
int32_t frame_size[2];
int32_t tile_size[2];
uint8_t qmat[64];
} DecodePushData;
@@ -231,15 +230,13 @@ static int vk_prores_raw_end_frame(AVCodecContext *avctx)
/* Update push data */
DecodePushData pd_decode = (DecodePushData) {
.pkt_data = slices_buf->address,
.frame_size[0] = avctx->width,
.frame_size[1] = avctx->height,
.tile_size[0] = prr->tw,
.tile_size[1] = prr->th,
};
memcpy(pd_decode.qmat, prr->qmat, 64);
ff_vk_shader_update_push_const(&ctx->s, exec, decode_shader,
VK_SHADER_STAGE_COMPUTE_BIT,
0, sizeof(pd_decode), &pd_decode);
0, sizeof(pd_decode) - 64, &pd_decode);
vk->CmdDispatch(exec->buf, prr->nb_tw, prr->nb_th, 1);
@@ -284,12 +281,9 @@ fail:
return 0;
}
static int add_common_data(AVCodecContext *avctx, FFVulkanContext *s,
FFVulkanShader *shd, int writeonly)
static int add_desc(AVCodecContext *avctx, FFVulkanContext *s,
FFVulkanShader *shd)
{
ff_vk_shader_add_push_const(shd, 0, sizeof(DecodePushData),
VK_SHADER_STAGE_COMPUTE_BIT);
FFVulkanDescriptorSetBinding desc_set[] = {
{
.name = "dst",
@@ -312,10 +306,12 @@ static int init_decode_shader(AVCodecContext *avctx, FFVulkanContext *s,
{
int err;
ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, NULL,
(uint32_t []) { 4, 1, 1 }, 0);
ff_vk_shader_add_push_const(shd, 0, sizeof(DecodePushData) - 64,
VK_SHADER_STAGE_COMPUTE_BIT);
ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, NULL,
(uint32_t []) { 1, 4, 1 }, 0);
add_common_data(avctx, s, shd, 1);
add_desc(avctx, s, shd);
RET(ff_vk_shader_link(s, shd,
ff_prores_raw_decode_comp_spv_data,
@@ -351,7 +347,9 @@ static int init_idct_shader(AVCodecContext *avctx, FFVulkanContext *s,
ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, sl,
(uint32_t []) { 8, nb_blocks, 4 }, 0);
add_common_data(avctx, s, shd, 0);
ff_vk_shader_add_push_const(shd, 0, sizeof(DecodePushData),
VK_SHADER_STAGE_COMPUTE_BIT);
add_desc(avctx, s, shd);
RET(ff_vk_shader_link(s, shd,
ff_prores_raw_idct_comp_spv_data,