diff --git a/libswscale/vulkan/ops.c b/libswscale/vulkan/ops.c index 1788c720f6..3dd6fed6da 100644 --- a/libswscale/vulkan/ops.c +++ b/libswscale/vulkan/ops.c @@ -78,10 +78,14 @@ int ff_sws_vk_init(SwsContext *sws, AVBufferRef *dev_ref) return 0; } +#define MAX_DITHER_BUFS 4 + typedef struct VulkanPriv { FFVulkanOpsCtx *s; FFVkExecPool e; FFVulkanShader shd; + FFVkBuffer dither_buf[MAX_DITHER_BUFS]; + int nb_dither_buf; enum FFVkShaderRepFormat src_rep; enum FFVkShaderRepFormat dst_rep; } VulkanPriv; @@ -149,10 +153,51 @@ static void free_fn(void *priv) VulkanPriv *p = priv; ff_vk_exec_pool_free(&p->s->vkctx, &p->e); ff_vk_shader_free(&p->s->vkctx, &p->shd); + for (int i = 0; i < p->nb_dither_buf; i++) + ff_vk_free_buf(&p->s->vkctx, &p->dither_buf[i]); av_refstruct_unref(&p->s); av_free(priv); } +static int create_dither_bufs(FFVulkanOpsCtx *s, VulkanPriv *p, SwsOpList *ops) +{ + int err; + p->nb_dither_buf = 0; + for (int n = 0; n < ops->num_ops; n++) { + const SwsOp *op = &ops->ops[n]; + if (op->op != SWS_OP_DITHER) + continue; + av_assert0(p->nb_dither_buf + 1 <= MAX_DITHER_BUFS); + + int size = (1 << op->dither.size_log2); + err = ff_vk_create_buf(&s->vkctx, &p->dither_buf[p->nb_dither_buf], + size*size*sizeof(float), NULL, NULL, + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); + if (err < 0) + return err; + + float *dither_data; + err = ff_vk_map_buffer(&s->vkctx, &p->dither_buf[p->nb_dither_buf], + (uint8_t **)&dither_data, 0); + if (err < 0) + return err; + + for (int i = 0; i < size; i++) { + for (int j = 0; j < size; j++) { + const AVRational r = op->dither.matrix[i*size + j]; + dither_data[i*size + j] = r.num/(float)r.den; + } + } + + ff_vk_unmap_buffer(&s->vkctx, &p->dither_buf[p->nb_dither_buf], 1); + p->nb_dither_buf++; + } + + return 0; +} + #if CONFIG_LIBSHADERC || CONFIG_LIBGLSLANG static void add_desc_read_write(FFVulkanDescriptorSetBinding *out_desc, enum FFVkShaderRepFormat *out_rep, @@ -207,6 +252,35 @@ static int add_ops_glsl(VulkanPriv *p, FFVulkanOpsCtx *s, add_desc_read_write(&buf_desc[nb_desc++], &p->dst_rep, write); ff_vk_shader_add_descriptor_set(&s->vkctx, shd, buf_desc, nb_desc, 0, 0); + err = create_dither_bufs(s, p, ops); + if (err < 0) + return err; + + nb_desc = 0; + char dither_buf_name[MAX_DITHER_BUFS][64]; + char dither_mat_name[MAX_DITHER_BUFS][64]; + for (int n = 0; n < ops->num_ops; n++) { + const SwsOp *op = &ops->ops[n]; + if (op->op != SWS_OP_DITHER) + continue; + int size = (1 << op->dither.size_log2); + av_assert0(size < 8192); + snprintf(dither_buf_name[nb_desc], 64, "dither_buf%i", n); + snprintf(dither_mat_name[nb_desc], 64, "float dither_mat%i[%i][%i];", + n, size, size); + buf_desc[nb_desc] = (FFVulkanDescriptorSetBinding) { + .name = dither_buf_name[nb_desc], + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .stages = VK_SHADER_STAGE_COMPUTE_BIT, + .mem_layout = "scalar", + .buf_content = dither_mat_name[nb_desc], + }; + nb_desc++; + } + if (nb_desc) + ff_vk_shader_add_descriptor_set(&s->vkctx, shd, buf_desc, + nb_desc, 1, 0); + GLSLC(0, void main() ); GLSLC(0, { ); GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); ); @@ -308,27 +382,19 @@ static int add_ops_glsl(VulkanPriv *p, FFVulkanOpsCtx *s, type_name, type_v, ff_sws_pixel_type_name(op->type)); } break; - case SWS_OP_DITHER: - av_bprintf(&shd->src, " precise const float dm%i[%i][%i] = {\n", - n, 1 << op->dither.size_log2, 1 << op->dither.size_log2); + case SWS_OP_DITHER: { int size = (1 << op->dither.size_log2); - for (int i = 0; i < size; i++) { - av_bprintf(&shd->src, " { "); - for (int j = 0; j < size; j++) - av_bprintf(&shd->src, QSTR", ", - QTYPE(op->dither.matrix[i*size + j])); - av_bprintf(&shd->src, "}, %s\n", i == (size - 1) ? "\n };" : ""); - } for (int i = 0; i < 4; i++) { if (op->dither.y_offset[i] < 0) continue; - av_bprintf(&shd->src, " %s.%c += dm%i[(pos.y + %i) & %i]" - "[pos.x & %i];\n", + av_bprintf(&shd->src, " %s.%c += dither_mat%i[(pos.y + %i) & %i]" + "[pos.x & %i];\n", type_name, "xyzw"[i], n, op->dither.y_offset[i], size - 1, size - 1); } break; + } case SWS_OP_LINEAR: for (int i = 0; i < 4; i++) { if (op->lin.m[i][4].num) @@ -420,6 +486,11 @@ static int compile(SwsContext *sws, SwsOpList *ops, SwsCompiledOp *out) if (err < 0) goto fail; + for (int i = 0; i < p->nb_dither_buf; i++) + ff_vk_shader_update_desc_buffer(&s->vkctx, &p->e.contexts[0], &p->shd, + 1, i, 0, &p->dither_buf[i], + 0, VK_WHOLE_SIZE, VK_FORMAT_UNDEFINED); + *out = (SwsCompiledOp) { .opaque = true, .func_opaque = process,