mirror of
https://mirror.skon.top/https://github.com/FFmpeg/FFmpeg
synced 2026-04-20 21:00:41 +08:00
vulkan_prores_raw: clean up and optimize
This commit is contained in:
@@ -39,12 +39,10 @@ layout (set = 0, binding = 1, scalar) readonly buffer frame_data_buf {
|
||||
|
||||
layout (push_constant, scalar) uniform pushConstants {
|
||||
u8buf pkt_data;
|
||||
ivec2 frame_size;
|
||||
ivec2 tile_size;
|
||||
uint8_t qmat[64];
|
||||
};
|
||||
|
||||
#define COMP_ID (gl_LocalInvocationID.x)
|
||||
#define COMP_ID (gl_LocalInvocationID.y)
|
||||
|
||||
GetBitContext gb;
|
||||
|
||||
@@ -223,7 +221,8 @@ void main(void)
|
||||
const uint tile_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
|
||||
TileData td = tile_data[tile_idx];
|
||||
|
||||
if (expectEXT(td.pos.x >= frame_size.x, false))
|
||||
int width = imageSize(dst).x;
|
||||
if (expectEXT(td.pos.x >= width, false))
|
||||
return;
|
||||
|
||||
uint64_t pkt_offset = uint64_t(pkt_data) + td.offset;
|
||||
@@ -239,8 +238,8 @@ void main(void)
|
||||
return;
|
||||
|
||||
const ivec2 offs = td.pos + ivec2(COMP_ID & 1, COMP_ID >> 1);
|
||||
const int w = min(tile_size.x, frame_size.x - td.pos.x) / 2;
|
||||
const int nb_blocks = w / 8;
|
||||
const int w = min(tile_size.x, width - td.pos.x) >> 1;
|
||||
const int nb_blocks = w >> 3;
|
||||
|
||||
const ivec4 comp_offset = ivec4(size[2] + size[1] + size[3],
|
||||
size[2],
|
||||
|
||||
@@ -40,7 +40,6 @@ layout (set = 0, binding = 1, scalar) readonly buffer frame_data_buf {
|
||||
|
||||
layout (push_constant, scalar) uniform pushConstants {
|
||||
u8buf pkt_data;
|
||||
ivec2 frame_size;
|
||||
ivec2 tile_size;
|
||||
uint8_t qmat[64];
|
||||
};
|
||||
@@ -73,7 +72,8 @@ void main(void)
|
||||
const uint tile_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
|
||||
TileData td = tile_data[tile_idx];
|
||||
|
||||
if (expectEXT(td.pos.x >= frame_size.x, false))
|
||||
int width = imageSize(dst).x;
|
||||
if (expectEXT(td.pos.x >= width, false))
|
||||
return;
|
||||
|
||||
uint64_t pkt_offset = uint64_t(pkt_data) + td.offset;
|
||||
@@ -81,8 +81,8 @@ void main(void)
|
||||
int qscale = pack16(hdr_data[0].v.yx);
|
||||
|
||||
const ivec2 offs = td.pos + ivec2(COMP_ID & 1, COMP_ID >> 1);
|
||||
const uint w = min(tile_size.x, frame_size.x - td.pos.x) / 2;
|
||||
const uint nb_blocks = w / 8;
|
||||
const uint w = min(tile_size.x, width - td.pos.x) >> 1;
|
||||
const uint nb_blocks = w >> 3;
|
||||
|
||||
/* We have to do non-uniform access, so copy it */
|
||||
uint8_t qmat_buf[64] = qmat;
|
||||
|
||||
@@ -51,7 +51,6 @@ typedef struct ProResRAWVulkanDecodeContext {
|
||||
|
||||
typedef struct DecodePushData {
|
||||
VkDeviceAddress pkt_data;
|
||||
int32_t frame_size[2];
|
||||
int32_t tile_size[2];
|
||||
uint8_t qmat[64];
|
||||
} DecodePushData;
|
||||
@@ -231,15 +230,13 @@ static int vk_prores_raw_end_frame(AVCodecContext *avctx)
|
||||
/* Update push data */
|
||||
DecodePushData pd_decode = (DecodePushData) {
|
||||
.pkt_data = slices_buf->address,
|
||||
.frame_size[0] = avctx->width,
|
||||
.frame_size[1] = avctx->height,
|
||||
.tile_size[0] = prr->tw,
|
||||
.tile_size[1] = prr->th,
|
||||
};
|
||||
memcpy(pd_decode.qmat, prr->qmat, 64);
|
||||
ff_vk_shader_update_push_const(&ctx->s, exec, decode_shader,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
0, sizeof(pd_decode), &pd_decode);
|
||||
0, sizeof(pd_decode) - 64, &pd_decode);
|
||||
|
||||
vk->CmdDispatch(exec->buf, prr->nb_tw, prr->nb_th, 1);
|
||||
|
||||
@@ -284,12 +281,9 @@ fail:
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int add_common_data(AVCodecContext *avctx, FFVulkanContext *s,
|
||||
FFVulkanShader *shd, int writeonly)
|
||||
static int add_desc(AVCodecContext *avctx, FFVulkanContext *s,
|
||||
FFVulkanShader *shd)
|
||||
{
|
||||
ff_vk_shader_add_push_const(shd, 0, sizeof(DecodePushData),
|
||||
VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
|
||||
FFVulkanDescriptorSetBinding desc_set[] = {
|
||||
{
|
||||
.name = "dst",
|
||||
@@ -312,10 +306,12 @@ static int init_decode_shader(AVCodecContext *avctx, FFVulkanContext *s,
|
||||
{
|
||||
int err;
|
||||
|
||||
ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, NULL,
|
||||
(uint32_t []) { 4, 1, 1 }, 0);
|
||||
ff_vk_shader_add_push_const(shd, 0, sizeof(DecodePushData) - 64,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, NULL,
|
||||
(uint32_t []) { 1, 4, 1 }, 0);
|
||||
|
||||
add_common_data(avctx, s, shd, 1);
|
||||
add_desc(avctx, s, shd);
|
||||
|
||||
RET(ff_vk_shader_link(s, shd,
|
||||
ff_prores_raw_decode_comp_spv_data,
|
||||
@@ -351,7 +347,9 @@ static int init_idct_shader(AVCodecContext *avctx, FFVulkanContext *s,
|
||||
ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, sl,
|
||||
(uint32_t []) { 8, nb_blocks, 4 }, 0);
|
||||
|
||||
add_common_data(avctx, s, shd, 0);
|
||||
ff_vk_shader_add_push_const(shd, 0, sizeof(DecodePushData),
|
||||
VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
add_desc(avctx, s, shd);
|
||||
|
||||
RET(ff_vk_shader_link(s, shd,
|
||||
ff_prores_raw_idct_comp_spv_data,
|
||||
|
||||
Reference in New Issue
Block a user