swscale/vulkan: use uniform buffers for dither matrix

Uniform buffers are much simpler to index, and require no work from
the driver compiler to optimize.
In SPIR-V, large 2D shader constants can be spilled into scratch memory,
since you need to create a function variable to index them during runtime.

Sponsored-by: Sovereign Tech Fund
This commit is contained in:
Lynne
2026-03-26 08:45:59 +01:00
parent d4bcd3340e
commit 5fa4085774

View File

@@ -78,10 +78,14 @@ int ff_sws_vk_init(SwsContext *sws, AVBufferRef *dev_ref)
return 0;
}
#define MAX_DITHER_BUFS 4
typedef struct VulkanPriv {
FFVulkanOpsCtx *s;
FFVkExecPool e;
FFVulkanShader shd;
FFVkBuffer dither_buf[MAX_DITHER_BUFS];
int nb_dither_buf;
enum FFVkShaderRepFormat src_rep;
enum FFVkShaderRepFormat dst_rep;
} VulkanPriv;
@@ -149,10 +153,51 @@ static void free_fn(void *priv)
VulkanPriv *p = priv;
ff_vk_exec_pool_free(&p->s->vkctx, &p->e);
ff_vk_shader_free(&p->s->vkctx, &p->shd);
for (int i = 0; i < p->nb_dither_buf; i++)
ff_vk_free_buf(&p->s->vkctx, &p->dither_buf[i]);
av_refstruct_unref(&p->s);
av_free(priv);
}
static int create_dither_bufs(FFVulkanOpsCtx *s, VulkanPriv *p, SwsOpList *ops)
{
int err;
p->nb_dither_buf = 0;
for (int n = 0; n < ops->num_ops; n++) {
const SwsOp *op = &ops->ops[n];
if (op->op != SWS_OP_DITHER)
continue;
av_assert0(p->nb_dither_buf + 1 <= MAX_DITHER_BUFS);
int size = (1 << op->dither.size_log2);
err = ff_vk_create_buf(&s->vkctx, &p->dither_buf[p->nb_dither_buf],
size*size*sizeof(float), NULL, NULL,
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
if (err < 0)
return err;
float *dither_data;
err = ff_vk_map_buffer(&s->vkctx, &p->dither_buf[p->nb_dither_buf],
(uint8_t **)&dither_data, 0);
if (err < 0)
return err;
for (int i = 0; i < size; i++) {
for (int j = 0; j < size; j++) {
const AVRational r = op->dither.matrix[i*size + j];
dither_data[i*size + j] = r.num/(float)r.den;
}
}
ff_vk_unmap_buffer(&s->vkctx, &p->dither_buf[p->nb_dither_buf], 1);
p->nb_dither_buf++;
}
return 0;
}
#if CONFIG_LIBSHADERC || CONFIG_LIBGLSLANG
static void add_desc_read_write(FFVulkanDescriptorSetBinding *out_desc,
enum FFVkShaderRepFormat *out_rep,
@@ -207,6 +252,35 @@ static int add_ops_glsl(VulkanPriv *p, FFVulkanOpsCtx *s,
add_desc_read_write(&buf_desc[nb_desc++], &p->dst_rep, write);
ff_vk_shader_add_descriptor_set(&s->vkctx, shd, buf_desc, nb_desc, 0, 0);
err = create_dither_bufs(s, p, ops);
if (err < 0)
return err;
nb_desc = 0;
char dither_buf_name[MAX_DITHER_BUFS][64];
char dither_mat_name[MAX_DITHER_BUFS][64];
for (int n = 0; n < ops->num_ops; n++) {
const SwsOp *op = &ops->ops[n];
if (op->op != SWS_OP_DITHER)
continue;
int size = (1 << op->dither.size_log2);
av_assert0(size < 8192);
snprintf(dither_buf_name[nb_desc], 64, "dither_buf%i", n);
snprintf(dither_mat_name[nb_desc], 64, "float dither_mat%i[%i][%i];",
n, size, size);
buf_desc[nb_desc] = (FFVulkanDescriptorSetBinding) {
.name = dither_buf_name[nb_desc],
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.mem_layout = "scalar",
.buf_content = dither_mat_name[nb_desc],
};
nb_desc++;
}
if (nb_desc)
ff_vk_shader_add_descriptor_set(&s->vkctx, shd, buf_desc,
nb_desc, 1, 0);
GLSLC(0, void main() );
GLSLC(0, { );
GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
@@ -308,27 +382,19 @@ static int add_ops_glsl(VulkanPriv *p, FFVulkanOpsCtx *s,
type_name, type_v, ff_sws_pixel_type_name(op->type));
}
break;
case SWS_OP_DITHER:
av_bprintf(&shd->src, " precise const float dm%i[%i][%i] = {\n",
n, 1 << op->dither.size_log2, 1 << op->dither.size_log2);
case SWS_OP_DITHER: {
int size = (1 << op->dither.size_log2);
for (int i = 0; i < size; i++) {
av_bprintf(&shd->src, " { ");
for (int j = 0; j < size; j++)
av_bprintf(&shd->src, QSTR", ",
QTYPE(op->dither.matrix[i*size + j]));
av_bprintf(&shd->src, "}, %s\n", i == (size - 1) ? "\n };" : "");
}
for (int i = 0; i < 4; i++) {
if (op->dither.y_offset[i] < 0)
continue;
av_bprintf(&shd->src, " %s.%c += dm%i[(pos.y + %i) & %i]"
"[pos.x & %i];\n",
av_bprintf(&shd->src, " %s.%c += dither_mat%i[(pos.y + %i) & %i]"
"[pos.x & %i];\n",
type_name, "xyzw"[i], n,
op->dither.y_offset[i], size - 1,
size - 1);
}
break;
}
case SWS_OP_LINEAR:
for (int i = 0; i < 4; i++) {
if (op->lin.m[i][4].num)
@@ -420,6 +486,11 @@ static int compile(SwsContext *sws, SwsOpList *ops, SwsCompiledOp *out)
if (err < 0)
goto fail;
for (int i = 0; i < p->nb_dither_buf; i++)
ff_vk_shader_update_desc_buffer(&s->vkctx, &p->e.contexts[0], &p->shd,
1, i, 0, &p->dither_buf[i],
0, VK_WHOLE_SIZE, VK_FORMAT_UNDEFINED);
*out = (SwsCompiledOp) {
.opaque = true,
.func_opaque = process,