mirror of
https://mirror.skon.top/https://github.com/FFmpeg/FFmpeg
synced 2026-04-20 21:00:41 +08:00
swscale/vulkan: use uniform buffers for dither matrix
Uniform buffers are much simpler to index, and require no work from the driver compiler to optimize. In SPIR-V, large 2D shader constants can be spilled into scratch memory, since you need to create a function variable to index them during runtime. Sponsored-by: Sovereign Tech Fund
This commit is contained in:
@@ -78,10 +78,14 @@ int ff_sws_vk_init(SwsContext *sws, AVBufferRef *dev_ref)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define MAX_DITHER_BUFS 4
|
||||
|
||||
typedef struct VulkanPriv {
|
||||
FFVulkanOpsCtx *s;
|
||||
FFVkExecPool e;
|
||||
FFVulkanShader shd;
|
||||
FFVkBuffer dither_buf[MAX_DITHER_BUFS];
|
||||
int nb_dither_buf;
|
||||
enum FFVkShaderRepFormat src_rep;
|
||||
enum FFVkShaderRepFormat dst_rep;
|
||||
} VulkanPriv;
|
||||
@@ -149,10 +153,51 @@ static void free_fn(void *priv)
|
||||
VulkanPriv *p = priv;
|
||||
ff_vk_exec_pool_free(&p->s->vkctx, &p->e);
|
||||
ff_vk_shader_free(&p->s->vkctx, &p->shd);
|
||||
for (int i = 0; i < p->nb_dither_buf; i++)
|
||||
ff_vk_free_buf(&p->s->vkctx, &p->dither_buf[i]);
|
||||
av_refstruct_unref(&p->s);
|
||||
av_free(priv);
|
||||
}
|
||||
|
||||
static int create_dither_bufs(FFVulkanOpsCtx *s, VulkanPriv *p, SwsOpList *ops)
|
||||
{
|
||||
int err;
|
||||
p->nb_dither_buf = 0;
|
||||
for (int n = 0; n < ops->num_ops; n++) {
|
||||
const SwsOp *op = &ops->ops[n];
|
||||
if (op->op != SWS_OP_DITHER)
|
||||
continue;
|
||||
av_assert0(p->nb_dither_buf + 1 <= MAX_DITHER_BUFS);
|
||||
|
||||
int size = (1 << op->dither.size_log2);
|
||||
err = ff_vk_create_buf(&s->vkctx, &p->dither_buf[p->nb_dither_buf],
|
||||
size*size*sizeof(float), NULL, NULL,
|
||||
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
float *dither_data;
|
||||
err = ff_vk_map_buffer(&s->vkctx, &p->dither_buf[p->nb_dither_buf],
|
||||
(uint8_t **)&dither_data, 0);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
for (int j = 0; j < size; j++) {
|
||||
const AVRational r = op->dither.matrix[i*size + j];
|
||||
dither_data[i*size + j] = r.num/(float)r.den;
|
||||
}
|
||||
}
|
||||
|
||||
ff_vk_unmap_buffer(&s->vkctx, &p->dither_buf[p->nb_dither_buf], 1);
|
||||
p->nb_dither_buf++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if CONFIG_LIBSHADERC || CONFIG_LIBGLSLANG
|
||||
static void add_desc_read_write(FFVulkanDescriptorSetBinding *out_desc,
|
||||
enum FFVkShaderRepFormat *out_rep,
|
||||
@@ -207,6 +252,35 @@ static int add_ops_glsl(VulkanPriv *p, FFVulkanOpsCtx *s,
|
||||
add_desc_read_write(&buf_desc[nb_desc++], &p->dst_rep, write);
|
||||
ff_vk_shader_add_descriptor_set(&s->vkctx, shd, buf_desc, nb_desc, 0, 0);
|
||||
|
||||
err = create_dither_bufs(s, p, ops);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
nb_desc = 0;
|
||||
char dither_buf_name[MAX_DITHER_BUFS][64];
|
||||
char dither_mat_name[MAX_DITHER_BUFS][64];
|
||||
for (int n = 0; n < ops->num_ops; n++) {
|
||||
const SwsOp *op = &ops->ops[n];
|
||||
if (op->op != SWS_OP_DITHER)
|
||||
continue;
|
||||
int size = (1 << op->dither.size_log2);
|
||||
av_assert0(size < 8192);
|
||||
snprintf(dither_buf_name[nb_desc], 64, "dither_buf%i", n);
|
||||
snprintf(dither_mat_name[nb_desc], 64, "float dither_mat%i[%i][%i];",
|
||||
n, size, size);
|
||||
buf_desc[nb_desc] = (FFVulkanDescriptorSetBinding) {
|
||||
.name = dither_buf_name[nb_desc],
|
||||
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.mem_layout = "scalar",
|
||||
.buf_content = dither_mat_name[nb_desc],
|
||||
};
|
||||
nb_desc++;
|
||||
}
|
||||
if (nb_desc)
|
||||
ff_vk_shader_add_descriptor_set(&s->vkctx, shd, buf_desc,
|
||||
nb_desc, 1, 0);
|
||||
|
||||
GLSLC(0, void main() );
|
||||
GLSLC(0, { );
|
||||
GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
|
||||
@@ -308,27 +382,19 @@ static int add_ops_glsl(VulkanPriv *p, FFVulkanOpsCtx *s,
|
||||
type_name, type_v, ff_sws_pixel_type_name(op->type));
|
||||
}
|
||||
break;
|
||||
case SWS_OP_DITHER:
|
||||
av_bprintf(&shd->src, " precise const float dm%i[%i][%i] = {\n",
|
||||
n, 1 << op->dither.size_log2, 1 << op->dither.size_log2);
|
||||
case SWS_OP_DITHER: {
|
||||
int size = (1 << op->dither.size_log2);
|
||||
for (int i = 0; i < size; i++) {
|
||||
av_bprintf(&shd->src, " { ");
|
||||
for (int j = 0; j < size; j++)
|
||||
av_bprintf(&shd->src, QSTR", ",
|
||||
QTYPE(op->dither.matrix[i*size + j]));
|
||||
av_bprintf(&shd->src, "}, %s\n", i == (size - 1) ? "\n };" : "");
|
||||
}
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (op->dither.y_offset[i] < 0)
|
||||
continue;
|
||||
av_bprintf(&shd->src, " %s.%c += dm%i[(pos.y + %i) & %i]"
|
||||
"[pos.x & %i];\n",
|
||||
av_bprintf(&shd->src, " %s.%c += dither_mat%i[(pos.y + %i) & %i]"
|
||||
"[pos.x & %i];\n",
|
||||
type_name, "xyzw"[i], n,
|
||||
op->dither.y_offset[i], size - 1,
|
||||
size - 1);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case SWS_OP_LINEAR:
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (op->lin.m[i][4].num)
|
||||
@@ -420,6 +486,11 @@ static int compile(SwsContext *sws, SwsOpList *ops, SwsCompiledOp *out)
|
||||
if (err < 0)
|
||||
goto fail;
|
||||
|
||||
for (int i = 0; i < p->nb_dither_buf; i++)
|
||||
ff_vk_shader_update_desc_buffer(&s->vkctx, &p->e.contexts[0], &p->shd,
|
||||
1, i, 0, &p->dither_buf[i],
|
||||
0, VK_WHOLE_SIZE, VK_FORMAT_UNDEFINED);
|
||||
|
||||
*out = (SwsCompiledOp) {
|
||||
.opaque = true,
|
||||
.func_opaque = process,
|
||||
|
||||
Reference in New Issue
Block a user