mirror of
https://mirror.skon.top/https://github.com/FFmpeg/FFmpeg
synced 2026-04-20 21:00:41 +08:00
ffv1enc_vulkan: use regular descriptors for slice state
This commit is contained in:
@@ -42,7 +42,6 @@ int ff_ffv1_vk_init_crc_table_data(FFVulkanContext *s,
|
||||
|
||||
typedef struct FFv1ShaderParams {
|
||||
VkDeviceAddress slice_data;
|
||||
VkDeviceAddress slice_state;
|
||||
|
||||
uint32_t extend_lookup[8];
|
||||
uint16_t context_count[8];
|
||||
|
||||
@@ -286,7 +286,6 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
|
||||
/* With everything allocated, setup push data */
|
||||
FFv1ShaderParams pd = {
|
||||
.slice_data = out_data_buf->address,
|
||||
.slice_state = slice_data_buf->address + f->slice_count*256,
|
||||
|
||||
.img_size[0] = fv->s.frames->width,
|
||||
.img_size[1] = fv->s.frames->height,
|
||||
@@ -422,6 +421,12 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
|
||||
slice_data_buf,
|
||||
0, slice_data_size*f->slice_count,
|
||||
VK_FORMAT_UNDEFINED);
|
||||
ff_vk_shader_update_desc_buffer(&fv->s, exec, &fv->reset,
|
||||
1, 1, 0,
|
||||
slice_data_buf,
|
||||
f->slice_count*256,
|
||||
VK_WHOLE_SIZE,
|
||||
VK_FORMAT_UNDEFINED);
|
||||
|
||||
ff_vk_exec_bind_shader(&fv->s, exec, &fv->reset);
|
||||
ff_vk_shader_update_push_const(&fv->s, exec, &fv->reset,
|
||||
@@ -485,15 +490,21 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
|
||||
results_data_buf,
|
||||
0, results_data_buf->size,
|
||||
VK_FORMAT_UNDEFINED);
|
||||
ff_vk_shader_update_desc_buffer(&fv->s, exec, &fv->enc,
|
||||
1, 2, 0,
|
||||
slice_data_buf,
|
||||
f->slice_count*256,
|
||||
VK_WHOLE_SIZE,
|
||||
VK_FORMAT_UNDEFINED);
|
||||
ff_vk_shader_update_img_array(&fv->s, exec, &fv->enc,
|
||||
src, src_views,
|
||||
1, 2,
|
||||
1, 3,
|
||||
VK_IMAGE_LAYOUT_GENERAL,
|
||||
VK_NULL_HANDLE);
|
||||
if (fv->is_rgb)
|
||||
ff_vk_shader_update_img_array(&fv->s, exec, &fv->enc,
|
||||
tmp, tmp_views,
|
||||
1, 3,
|
||||
1, 4,
|
||||
VK_IMAGE_LAYOUT_GENERAL,
|
||||
VK_NULL_HANDLE);
|
||||
|
||||
@@ -878,8 +889,12 @@ static int init_reset_shader(AVCodecContext *avctx, VkSpecializationInfo *sl)
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
},
|
||||
{ /* slice_state_buf */
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
},
|
||||
};
|
||||
ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 1, 0, 0);
|
||||
ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 0, 0);
|
||||
|
||||
if (fv->ctx.ac == AC_GOLOMB_RICE)
|
||||
RET(ff_vk_shader_link(&fv->s, shd,
|
||||
@@ -933,6 +948,10 @@ static int init_encode_shader(AVCodecContext *avctx, VkSpecializationInfo *sl)
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
},
|
||||
{ /* slice_state_buf */
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
},
|
||||
{ /* src */
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
@@ -943,7 +962,7 @@ static int init_encode_shader(AVCodecContext *avctx, VkSpecializationInfo *sl)
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
},
|
||||
};
|
||||
ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3 + fv->is_rgb, 0, 0);
|
||||
ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 4 + fv->is_rgb, 0, 0);
|
||||
|
||||
if (fv->ctx.ac == AC_GOLOMB_RICE) {
|
||||
if (fv->is_rgb)
|
||||
|
||||
@@ -59,7 +59,6 @@ layout (constant_id = 17) const uint context_model = 0;
|
||||
|
||||
layout (push_constant, scalar) uniform pushConstants {
|
||||
u8buf slice_data;
|
||||
u8buf slice_state;
|
||||
|
||||
bool extend_lookup[MAX_QUANT_TABLES];
|
||||
uint16_t context_count[MAX_QUANT_TABLES];
|
||||
|
||||
@@ -34,12 +34,15 @@ layout (set = 0, binding = 2, scalar) uniform crc_ieee_buf {
|
||||
layout (set = 1, binding = 1, scalar) writeonly buffer slice_results_buf {
|
||||
uint64_t slice_results[];
|
||||
};
|
||||
layout (set = 1, binding = 2) uniform uimage2D src[];
|
||||
layout (set = 1, binding = 3) uniform uimage2D src[];
|
||||
|
||||
#ifndef GOLOMB
|
||||
#define WRITE(c, off, val) put_rac(c, uint64_t(slice_state) + (state_off + off), val)
|
||||
|
||||
/* Note - only handles signed values */
|
||||
layout (set = 1, binding = 2, scalar) buffer slice_state_buf {
|
||||
uint8_t slice_rc_state[];
|
||||
};
|
||||
|
||||
#define WRITE(c, off, val) put_rac_direct(c, slice_rc_state[state_off + off], val)
|
||||
void put_symbol(inout RangeCoder c, uint state_off, int v)
|
||||
{
|
||||
bool is_nil = (v == 0);
|
||||
@@ -112,6 +115,10 @@ void encode_line(inout SliceContext sc, readonly uimage2D img, uint state_off,
|
||||
|
||||
#else /* GOLOMB */
|
||||
|
||||
layout (set = 1, binding = 2, scalar) buffer slice_state_buf {
|
||||
VlcState slice_vlc_state[];
|
||||
};
|
||||
|
||||
uint hdr_len = 0;
|
||||
PutBitContext pb;
|
||||
|
||||
@@ -174,9 +181,8 @@ void encode_line(inout SliceContext sc, readonly uimage2D img, uint state_off,
|
||||
}
|
||||
|
||||
if (!run_mode) {
|
||||
VlcState sb = VlcState(uint64_t(slice_state) +
|
||||
state_off + VLC_STATE_SIZE*d[0]);
|
||||
Symbol sym = get_vlc_symbol(sb, d[1], bits);
|
||||
Symbol sym = get_vlc_symbol(slice_vlc_state[state_off + d[0]],
|
||||
d[1], bits);
|
||||
put_bits(pb, sym.bits, sym.val);
|
||||
}
|
||||
}
|
||||
@@ -276,6 +282,7 @@ void encode_slice(inout SliceContext sc, const uint slice_idx)
|
||||
uvec4(0, 1, 1, 2))*plane_state_size;
|
||||
|
||||
#ifdef GOLOMB
|
||||
slice_state_off >>= 3;
|
||||
init_golomb(slice_ctx[slice_idx]);
|
||||
#endif
|
||||
|
||||
|
||||
@@ -23,6 +23,5 @@
|
||||
#pragma shader_stage(compute)
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
|
||||
#define VLC_BUFFER
|
||||
#define GOLOMB
|
||||
#include "ffv1_enc.comp.glsl"
|
||||
|
||||
@@ -26,34 +26,41 @@
|
||||
#include "common.glsl"
|
||||
#include "ffv1_common.glsl"
|
||||
|
||||
#ifdef GOLOMB
|
||||
#define PS_SHIFT 3
|
||||
layout (set = 1, binding = 1, scalar) writeonly buffer slice_state_buf {
|
||||
VlcState slice_vlc_state[];
|
||||
};
|
||||
#else
|
||||
#define PS_SHIFT 2
|
||||
layout (set = 1, binding = 1, scalar) writeonly buffer slice_state_buf {
|
||||
uint32_t slice_rc_state[];
|
||||
};
|
||||
#endif
|
||||
|
||||
void main(void)
|
||||
{
|
||||
const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
|
||||
|
||||
uint contexts = context_count[context_model];
|
||||
uint64_t slice_state_off = uint64_t(slice_state) +
|
||||
slice_idx*plane_state_size*codec_planes;
|
||||
uint plane_state_len = plane_state_size >> PS_SHIFT;
|
||||
uint offs = slice_idx*plane_state_len*codec_planes +
|
||||
gl_WorkGroupID.z*plane_state_len +
|
||||
gl_LocalInvocationID.x;
|
||||
|
||||
#ifdef GOLOMB
|
||||
uint64_t start = slice_state_off +
|
||||
(gl_WorkGroupID.z*(plane_state_size/VLC_STATE_SIZE) +
|
||||
gl_LocalInvocationID.x)*VLC_STATE_SIZE;
|
||||
for (uint x = gl_LocalInvocationID.x; x < contexts; x += gl_WorkGroupSize.x) {
|
||||
VlcState sb = VlcState(start);
|
||||
sb.drift = int16_t(0);
|
||||
sb.error_sum = uint16_t(4);
|
||||
sb.bias = int8_t(0);
|
||||
sb.count = uint8_t(1);
|
||||
start += gl_WorkGroupSize.x*VLC_STATE_SIZE;
|
||||
slice_vlc_state[offs].drift = int16_t(0);
|
||||
slice_vlc_state[offs].error_sum = uint16_t(4);
|
||||
slice_vlc_state[offs].bias = int8_t(0);
|
||||
slice_vlc_state[offs].count = uint8_t(1);
|
||||
offs += gl_WorkGroupSize.x;
|
||||
}
|
||||
#else
|
||||
uint64_t start = slice_state_off +
|
||||
gl_WorkGroupID.z*plane_state_size +
|
||||
(gl_LocalInvocationID.x << 2 /* dwords */); /* Bytes */
|
||||
uint count_total = contexts*(CONTEXT_SIZE /* bytes */ >> 2 /* dwords */);
|
||||
for (uint x = gl_LocalInvocationID.x; x < count_total; x += gl_WorkGroupSize.x) {
|
||||
u32buf(start).v = 0x80808080;
|
||||
start += gl_WorkGroupSize.x*(CONTEXT_SIZE >> 3 /* 1/8th of context */);
|
||||
slice_rc_state[offs] = 0x80808080;
|
||||
offs += gl_WorkGroupSize.x;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -23,6 +23,5 @@
|
||||
#pragma shader_stage(compute)
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
|
||||
#define VLC_BUFFER
|
||||
#define GOLOMB
|
||||
#include "ffv1_enc_reset.comp.glsl"
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
#extension GL_EXT_shader_image_load_formatted : require
|
||||
|
||||
layout (set = 1, binding = 3) uniform uimage2D tmp;
|
||||
layout (set = 1, binding = 4) uniform uimage2D tmp;
|
||||
|
||||
#define RGB
|
||||
#include "ffv1_enc.comp.glsl"
|
||||
|
||||
@@ -23,6 +23,5 @@
|
||||
#pragma shader_stage(compute)
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
|
||||
#define VLC_BUFFER
|
||||
#define GOLOMB
|
||||
#include "ffv1_enc_rgb.comp.glsl"
|
||||
|
||||
@@ -23,14 +23,7 @@
|
||||
#ifndef VULKAN_FFV1_VLC_H
|
||||
#define VULKAN_FFV1_VLC_H
|
||||
|
||||
#define VLC_STATE_SIZE 8
|
||||
#ifdef VLC_BUFFER
|
||||
layout(buffer_reference, buffer_reference_align = VLC_STATE_SIZE) buffer
|
||||
#else
|
||||
struct
|
||||
#endif
|
||||
|
||||
VlcState {
|
||||
struct VlcState {
|
||||
uint32_t error_sum;
|
||||
int16_t drift;
|
||||
int8_t bias;
|
||||
|
||||
@@ -355,7 +355,6 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
|
||||
|
||||
FFv1ShaderParams pd = {
|
||||
.slice_data = slices_buf->address,
|
||||
.slice_state = slice_state->address + f->slice_count*fp->slice_data_size,
|
||||
|
||||
.img_size[0] = f->picture.f->width,
|
||||
.img_size[1] = f->picture.f->height,
|
||||
|
||||
Reference in New Issue
Block a user