/* * Copyright (c) 2024 Lynne * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include "vulkan_decode.h" #include "hwaccel_internal.h" #include "ffv1.h" #include "ffv1_vulkan.h" #include "libavutil/mem.h" #define RGB_LINECACHE 2 extern const unsigned char ff_ffv1_dec_setup_comp_spv_data[]; extern const unsigned int ff_ffv1_dec_setup_comp_spv_len; extern const unsigned char ff_ffv1_dec_reset_comp_spv_data[]; extern const unsigned int ff_ffv1_dec_reset_comp_spv_len; extern const unsigned char ff_ffv1_dec_reset_golomb_comp_spv_data[]; extern const unsigned int ff_ffv1_dec_reset_golomb_comp_spv_len; extern const unsigned char ff_ffv1_dec_comp_spv_data[]; extern const unsigned int ff_ffv1_dec_comp_spv_len; extern const unsigned char ff_ffv1_dec_rgb_comp_spv_data[]; extern const unsigned int ff_ffv1_dec_rgb_comp_spv_len; extern const unsigned char ff_ffv1_dec_golomb_comp_spv_data[]; extern const unsigned int ff_ffv1_dec_golomb_comp_spv_len; extern const unsigned char ff_ffv1_dec_rgb_golomb_comp_spv_data[]; extern const unsigned int ff_ffv1_dec_rgb_golomb_comp_spv_len; extern const unsigned char ff_ffv1_dec_rgb_float_comp_spv_data[]; extern const unsigned int ff_ffv1_dec_rgb_float_comp_spv_len; extern const unsigned char ff_ffv1_dec_rgb_float_golomb_comp_spv_data[]; extern const unsigned int ff_ffv1_dec_rgb_float_golomb_comp_spv_len; const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc = { .codec_id = AV_CODEC_ID_FFV1, .queue_flags = VK_QUEUE_COMPUTE_BIT, }; typedef struct FFv1VulkanDecodePicture { FFVulkanDecodePicture vp; AVBufferRef *slice_state; uint32_t plane_state_size; uint32_t slice_state_size; uint32_t slice_data_size; AVBufferRef *slice_fltmap_buf; AVBufferRef *slice_feedback_buf; uint32_t *slice_offset; int slice_num; int crc_checked; } FFv1VulkanDecodePicture; typedef struct FFv1VulkanDecodeContext { AVBufferRef *intermediate_frames_ref; FFVulkanShader setup; FFVulkanShader reset; FFVulkanShader decode; FFVkBuffer consts_buf; AVBufferPool *slice_state_pool; AVBufferPool *slice_fltmap_pool; AVBufferPool *slice_feedback_pool; } FFv1VulkanDecodeContext; static int vk_ffv1_start_frame(AVCodecContext *avctx, const AVBufferRef *buffer_ref, av_unused const uint8_t *buffer, av_unused uint32_t size) { int err; FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; FFVulkanDecodeShared *ctx = dec->shared_ctx; FFv1VulkanDecodeContext *fv = ctx->sd_ctx; FFV1Context *f = avctx->priv_data; FFv1VulkanDecodePicture *fp = f->hwaccel_picture_private; FFVulkanDecodePicture *vp = &fp->vp; AVHWFramesContext *hwfc = (AVHWFramesContext *)avctx->hw_frames_ctx->data; enum AVPixelFormat sw_format = hwfc->sw_format; int max_contexts; int is_rgb = !(f->colorspace == 0 && sw_format != AV_PIX_FMT_YA8) && !(sw_format == AV_PIX_FMT_YA8); fp->slice_num = 0; max_contexts = 0; for (int i = 0; i < f->quant_table_count; i++) max_contexts = FFMAX(f->context_count[i], max_contexts); /* Allocate slice buffer data */ if (f->ac == AC_GOLOMB_RICE) fp->plane_state_size = 8; else fp->plane_state_size = CONTEXT_SIZE; fp->plane_state_size *= max_contexts; fp->slice_state_size = fp->plane_state_size*f->plane_count; fp->slice_data_size = 256; /* Overestimation for the SliceContext struct */ fp->slice_state_size += fp->slice_data_size; fp->slice_state_size = FFALIGN(fp->slice_state_size, 8); fp->crc_checked = f->ec && (avctx->err_recognition & AV_EF_CRCCHECK); /* Host map the input slices data if supported */ if (ctx->s.extensions & FF_VK_EXT_EXTERNAL_HOST_MEMORY) ff_vk_host_map_buffer(&ctx->s, &vp->slices_buf, buffer_ref->data, buffer_ref, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT); /* Allocate slice state data */ if (f->picture.f->flags & AV_FRAME_FLAG_KEY) { err = ff_vk_get_pooled_buffer(&ctx->s, &fv->slice_state_pool, &fp->slice_state, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, NULL, f->slice_count*fp->slice_state_size, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); if (err < 0) return err; } else { FFv1VulkanDecodePicture *fpl = f->hwaccel_last_picture_private; fp->slice_state = av_buffer_ref(fpl->slice_state); if (!fp->slice_state) return AVERROR(ENOMEM); } /* Allocate slice offsets/status buffer */ err = ff_vk_get_pooled_buffer(&ctx->s, &fv->slice_feedback_pool, &fp->slice_feedback_buf, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, NULL, 2*(2*f->slice_count*sizeof(uint32_t)), VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); if (err < 0) return err; /* Allocate slice offsets/status buffer */ if (f->version >=4 && f->micro_version >= 9) { err = ff_vk_get_pooled_buffer(&ctx->s, &fv->slice_fltmap_pool, &fp->slice_fltmap_buf, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, NULL, 65536*4*f->slice_count*sizeof(uint32_t), VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); if (err < 0) return err; } /* Prepare frame to be used */ err = ff_vk_decode_prepare_frame_sdr(dec, f->picture.f, vp, 1, FF_VK_REP_NATIVE, 0); if (err < 0) return err; /* Create a temporaty frame for RGB */ if (is_rgb) { vp->dpb_frame = av_frame_alloc(); if (!vp->dpb_frame) return AVERROR(ENOMEM); err = av_hwframe_get_buffer(fv->intermediate_frames_ref, vp->dpb_frame, 0); if (err < 0) return err; } return 0; } static int vk_ffv1_decode_slice(AVCodecContext *avctx, const uint8_t *data, uint32_t size) { FFV1Context *f = avctx->priv_data; FFv1VulkanDecodePicture *fp = f->hwaccel_picture_private; FFVulkanDecodePicture *vp = &fp->vp; FFVkBuffer *slice_offset = (FFVkBuffer *)fp->slice_feedback_buf->data; FFVkBuffer *slices_buf = vp->slices_buf ? (FFVkBuffer *)vp->slices_buf->data : NULL; if (slices_buf && slices_buf->host_ref) { AV_WN32(slice_offset->mapped_mem + (2*fp->slice_num + 0)*sizeof(uint32_t), data - slices_buf->mapped_mem); AV_WN32(slice_offset->mapped_mem + (2*fp->slice_num + 1)*sizeof(uint32_t), size); fp->slice_num++; } else { int err = ff_vk_decode_add_slice(avctx, vp, data, size, 0, &fp->slice_num, (const uint32_t **)&fp->slice_offset); if (err < 0) return err; AV_WN32(slice_offset->mapped_mem + (2*(fp->slice_num - 1) + 0)*sizeof(uint32_t), fp->slice_offset[fp->slice_num - 1]); AV_WN32(slice_offset->mapped_mem + (2*(fp->slice_num - 1) + 1)*sizeof(uint32_t), size); } return 0; } static int vk_ffv1_end_frame(AVCodecContext *avctx) { int err; FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; FFVulkanDecodeShared *ctx = dec->shared_ctx; FFVulkanFunctions *vk = &ctx->s.vkfn; FFV1Context *f = avctx->priv_data; FFv1VulkanDecodeContext *fv = ctx->sd_ctx; AVHWFramesContext *hwfc = (AVHWFramesContext *)avctx->hw_frames_ctx->data; enum AVPixelFormat sw_format = hwfc->sw_format; int is_rgb = !(f->colorspace == 0 && sw_format != AV_PIX_FMT_YA8) && !(sw_format == AV_PIX_FMT_YA8); int color_planes = av_pix_fmt_desc_get(avctx->sw_pix_fmt)->nb_components; FFv1VulkanDecodePicture *fp = f->hwaccel_picture_private; FFVulkanDecodePicture *vp = &fp->vp; FFVkBuffer *slices_buf = (FFVkBuffer *)vp->slices_buf->data; FFVkBuffer *slice_state = (FFVkBuffer *)fp->slice_state->data; FFVkBuffer *slice_feedback = (FFVkBuffer *)fp->slice_feedback_buf->data; FFVkBuffer *fltmap_buf = NULL; if (fp->slice_fltmap_buf) fltmap_buf = (FFVkBuffer *)fp->slice_fltmap_buf->data; VkImageView rct_image_views[AV_NUM_DATA_POINTERS]; VkImageMemoryBarrier2 img_bar[37]; int nb_img_bar = 0; VkBufferMemoryBarrier2 buf_bar[8]; int nb_buf_bar = 0; FFVkExecContext *exec = ff_vk_exec_get(&ctx->s, &ctx->exec_pool); ff_vk_exec_start(&ctx->s, exec); /* Prepare deps */ RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, f->picture.f, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); err = ff_vk_exec_mirror_sem_value(&ctx->s, exec, &vp->sem, &vp->sem_value, f->picture.f); if (err < 0) return err; if (is_rgb) { RET(ff_vk_create_imageviews(&ctx->s, exec, rct_image_views, vp->dpb_frame, FF_VK_REP_NATIVE)); RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, vp->dpb_frame, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_2_CLEAR_BIT)); } if (!(f->picture.f->flags & AV_FRAME_FLAG_KEY)) { FFv1VulkanDecodePicture *fpl = f->hwaccel_last_picture_private; FFVulkanDecodePicture *vpl = &fpl->vp; /* Wait on the previous frame */ RET(ff_vk_exec_add_dep_wait_sem(&ctx->s, exec, vpl->sem, vpl->sem_value, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT)); } RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->slice_state, 1, 1)); RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->slice_feedback_buf, 1, 1)); RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->slices_buf, 1, 0)); vp->slices_buf = NULL; if (fp->slice_fltmap_buf) { RET(ff_vk_exec_add_dep_buf(&ctx->s, exec, &fp->slice_fltmap_buf, 1, 0)); fp->slice_fltmap_buf = NULL; } AVVkFrame *vkf = (AVVkFrame *)f->picture.f->data[0]; for (int i = 0; i < ff_vk_count_images(vkf); i++) { vkf->layout[i] = VK_IMAGE_LAYOUT_UNDEFINED; vkf->access[i] = VK_ACCESS_2_NONE; } /* Setup shader */ ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->setup, 1, 0, 0, slice_state, 0, fp->slice_data_size*f->slice_count, VK_FORMAT_UNDEFINED); ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->setup, 1, 1, 0, slice_feedback, 0, 2*f->slice_count*sizeof(uint32_t), VK_FORMAT_UNDEFINED); ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->setup, 1, 2, 0, slice_feedback, 2*f->slice_count*sizeof(uint32_t), VK_WHOLE_SIZE, VK_FORMAT_UNDEFINED); if (fltmap_buf) ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->setup, 1, 3, 0, fltmap_buf, 0, VK_WHOLE_SIZE, VK_FORMAT_UNDEFINED); ff_vk_exec_bind_shader(&ctx->s, exec, &fv->setup); FFv1ShaderParams pd = { .slice_data = slices_buf->address, .img_size[0] = f->picture.f->width, .img_size[1] = f->picture.f->height, .plane_state_size = fp->plane_state_size, .key_frame = f->picture.f->flags & AV_FRAME_FLAG_KEY, .crcref = f->crcref, .micro_version = f->micro_version, }; for (int i = 0; i < f->quant_table_count; i++) { pd.context_count[i] = f->context_count[i]; pd.extend_lookup[i] = f->quant_tables[i][3][127] || f->quant_tables[i][4][127]; } /* For some reason the C FFv1 encoder/decoder treats these differently */ if (sw_format == AV_PIX_FMT_GBRP10 || sw_format == AV_PIX_FMT_GBRP12 || sw_format == AV_PIX_FMT_GBRP14) memcpy(pd.fmt_lut, (int [4]) { 2, 1, 0, 3 }, 4*sizeof(int)); else ff_vk_set_perm(sw_format, pd.fmt_lut, 0); ff_vk_shader_update_push_const(&ctx->s, exec, &fv->setup, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(FFv1ShaderParams), &pd); vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, 1); if (is_rgb) { vkf = (AVVkFrame *)vp->dpb_frame->data[0]; for (int i = 0; i < 4; i++) { vkf->layout[i] = VK_IMAGE_LAYOUT_UNDEFINED; vkf->access[i] = VK_ACCESS_2_NONE; } ff_vk_frame_barrier(&ctx->s, exec, vp->dpb_frame, img_bar, &nb_img_bar, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_2_CLEAR_BIT, VK_ACCESS_2_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL, VK_QUEUE_FAMILY_IGNORED); vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, .pImageMemoryBarriers = img_bar, .imageMemoryBarrierCount = nb_img_bar, .pBufferMemoryBarriers = buf_bar, .bufferMemoryBarrierCount = nb_buf_bar, }); nb_img_bar = 0; nb_buf_bar = 0; for (int i = 0; i < color_planes; i++) vk->CmdClearColorImage(exec->buf, vkf->img[i], VK_IMAGE_LAYOUT_GENERAL, &((VkClearColorValue) { 0 }), 1, &((VkImageSubresourceRange) { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .levelCount = 1, .layerCount = 1, })); } /* Sync between setup and reset shaders */ ff_vk_buf_barrier(buf_bar[nb_buf_bar++], slice_state, COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT, SHADER_STORAGE_WRITE_BIT, COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT, NONE_KHR, 0, fp->slice_data_size*f->slice_count); /* Probability data barrier for P-frames */ if (!(f->picture.f->flags & AV_FRAME_FLAG_KEY)) ff_vk_buf_barrier(buf_bar[nb_buf_bar++], slice_state, COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT, SHADER_STORAGE_WRITE_BIT, COMPUTE_SHADER_BIT, SHADER_STORAGE_WRITE_BIT, NONE_KHR, fp->slice_data_size*f->slice_count, VK_WHOLE_SIZE); vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, .pImageMemoryBarriers = img_bar, .imageMemoryBarrierCount = nb_img_bar, .pBufferMemoryBarriers = buf_bar, .bufferMemoryBarrierCount = nb_buf_bar, }); nb_buf_bar = 0; nb_img_bar = 0; /* Reset shader */ ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->reset, 1, 0, 0, slice_state, 0, fp->slice_data_size*f->slice_count, VK_FORMAT_UNDEFINED); ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->reset, 1, 1, 0, slice_state, f->slice_count*fp->slice_data_size, VK_WHOLE_SIZE, VK_FORMAT_UNDEFINED); ff_vk_exec_bind_shader(&ctx->s, exec, &fv->reset); ff_vk_shader_update_push_const(&ctx->s, exec, &fv->reset, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(FFv1ShaderParams), &pd); vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, f->plane_count); /* Sync probabilities between reset and decode shaders */ ff_vk_buf_barrier(buf_bar[nb_buf_bar++], slice_state, COMPUTE_SHADER_BIT, SHADER_STORAGE_WRITE_BIT, NONE_KHR, COMPUTE_SHADER_BIT, SHADER_STORAGE_READ_BIT, SHADER_STORAGE_WRITE_BIT, fp->slice_data_size*f->slice_count, VK_WHOLE_SIZE); /* Input frame barrier */ ff_vk_frame_barrier(&ctx->s, exec, f->picture.f, img_bar, &nb_img_bar, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_WRITE_BIT | (!is_rgb ? VK_ACCESS_SHADER_READ_BIT : 0), VK_IMAGE_LAYOUT_GENERAL, VK_QUEUE_FAMILY_IGNORED); if (is_rgb) ff_vk_frame_barrier(&ctx->s, exec, vp->dpb_frame, img_bar, &nb_img_bar, VK_PIPELINE_STAGE_2_CLEAR_BIT, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL, VK_QUEUE_FAMILY_IGNORED); vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, .pImageMemoryBarriers = img_bar, .imageMemoryBarrierCount = nb_img_bar, .pBufferMemoryBarriers = buf_bar, .bufferMemoryBarrierCount = nb_buf_bar, }); nb_img_bar = 0; nb_buf_bar = 0; /* Decode */ ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->decode, 1, 0, 0, slice_state, 0, fp->slice_data_size*f->slice_count, VK_FORMAT_UNDEFINED); ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->decode, 1, 1, 0, slice_feedback, 0, 2*f->slice_count*sizeof(uint32_t), VK_FORMAT_UNDEFINED); ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->decode, 1, 2, 0, slice_feedback, 2*f->slice_count*sizeof(uint32_t), VK_WHOLE_SIZE, VK_FORMAT_UNDEFINED); ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->decode, 1, 3, 0, slice_state, f->slice_count*fp->slice_data_size, VK_WHOLE_SIZE, VK_FORMAT_UNDEFINED); AVFrame *decode_dst = is_rgb ? vp->dpb_frame : f->picture.f; VkImageView *decode_dst_view = is_rgb ? rct_image_views : vp->view.out; ff_vk_shader_update_img_array(&ctx->s, exec, &fv->decode, decode_dst, decode_dst_view, 1, 4, VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); if (is_rgb) ff_vk_shader_update_img_array(&ctx->s, exec, &fv->decode, f->picture.f, vp->view.out, 1, 5, VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); if (fltmap_buf) ff_vk_shader_update_desc_buffer(&ctx->s, exec, &fv->decode, 1, 6, 0, fltmap_buf, 0, VK_WHOLE_SIZE, VK_FORMAT_UNDEFINED); ff_vk_exec_bind_shader(&ctx->s, exec, &fv->decode); ff_vk_shader_update_push_const(&ctx->s, exec, &fv->decode, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(FFv1ShaderParams), &pd); vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, 1); err = ff_vk_exec_submit(&ctx->s, exec); if (err < 0) return err; /* We don't need the temporary frame after decoding */ av_frame_free(&vp->dpb_frame); fail: return 0; } static int init_setup_shader(FFV1Context *f, FFVulkanContext *s, FFVkExecPool *pool, FFVulkanShader *shd, VkSpecializationInfo *sl) { int err; ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, sl, (uint32_t []) { 1, 1, 1 }, 0); ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1ShaderParams), VK_SHADER_STAGE_COMPUTE_BIT); const FFVulkanDescriptorSetBinding desc_set_const[] = { { /* rangecoder_buf */ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, { /* crc_ieee_buf */ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, }; ff_vk_shader_add_descriptor_set(s, shd, desc_set_const, 2, 1, 0); const FFVulkanDescriptorSetBinding desc_set[] = { { /* slice_data_buf */ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, { /* slice_offsets_buf */ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, { /* slice_status_buf */ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, { /* fltmap_buf */ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, }; ff_vk_shader_add_descriptor_set(s, shd, desc_set, 4, 0, 0); RET(ff_vk_shader_link(s, shd, ff_ffv1_dec_setup_comp_spv_data, ff_ffv1_dec_setup_comp_spv_len, "main")); RET(ff_vk_shader_register_exec(s, pool, shd)); fail: return err; } static int init_reset_shader(FFV1Context *f, FFVulkanContext *s, FFVkExecPool *pool, FFVulkanShader *shd, VkSpecializationInfo *sl, int ac) { int err; int wg_dim = FFMIN(s->props.properties.limits.maxComputeWorkGroupSize[0], 1024); ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, sl, (uint32_t []) { wg_dim, 1, 1 }, 0); ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1ShaderParams), VK_SHADER_STAGE_COMPUTE_BIT); const FFVulkanDescriptorSetBinding desc_set_const[] = { { /* rangecoder_buf */ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, }; ff_vk_shader_add_descriptor_set(s, shd, desc_set_const, 1, 1, 0); const FFVulkanDescriptorSetBinding desc_set[] = { { /* slice_data_buf */ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, { /* slice_state_buf */ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, }; ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 0, 0); if (ac == AC_GOLOMB_RICE) RET(ff_vk_shader_link(s, shd, ff_ffv1_dec_reset_golomb_comp_spv_data, ff_ffv1_dec_reset_golomb_comp_spv_len, "main")); else RET(ff_vk_shader_link(s, shd, ff_ffv1_dec_reset_comp_spv_data, ff_ffv1_dec_reset_comp_spv_len, "main")); RET(ff_vk_shader_register_exec(s, pool, shd)); fail: return err; } static int init_decode_shader(FFV1Context *f, FFVulkanContext *s, FFVkExecPool *pool, FFVulkanShader *shd, AVHWFramesContext *dec_frames_ctx, AVHWFramesContext *out_frames_ctx, VkSpecializationInfo *sl, int ac, int rgb) { int err; uint32_t wg_x = ac != AC_GOLOMB_RICE ? CONTEXT_SIZE : 1; ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, sl, (uint32_t []) { wg_x, 1, 1 }, 0); ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1ShaderParams), VK_SHADER_STAGE_COMPUTE_BIT); const FFVulkanDescriptorSetBinding desc_set_const[] = { { /* rangecoder_buf */ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, { /* quant_buf */ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, }; ff_vk_shader_add_descriptor_set(s, shd, desc_set_const, 2, 1, 0); const FFVulkanDescriptorSetBinding desc_set[] = { { /* slice_data_buf */ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, { /* slice_offsets_buf */ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, { /* slice_status_buf */ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, { /* slice_state_buf */ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, { /* dec */ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, .stages = VK_SHADER_STAGE_COMPUTE_BIT, .elems = av_pix_fmt_count_planes(dec_frames_ctx->sw_format), }, { /* dst */ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, .stages = VK_SHADER_STAGE_COMPUTE_BIT, .elems = av_pix_fmt_count_planes(out_frames_ctx->sw_format), }, { /* fltmap_buf */ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .stages = VK_SHADER_STAGE_COMPUTE_BIT, }, }; ff_vk_shader_add_descriptor_set(s, shd, desc_set, 5 + rgb + (f->micro_version >= 9), 0, 0); if (f->version >=4 && f->micro_version >= 9) { if (ac == AC_GOLOMB_RICE) ff_vk_shader_link(s, shd, ff_ffv1_dec_rgb_float_golomb_comp_spv_data, ff_ffv1_dec_rgb_float_golomb_comp_spv_len, "main"); else ff_vk_shader_link(s, shd, ff_ffv1_dec_rgb_float_comp_spv_data, ff_ffv1_dec_rgb_float_comp_spv_len, "main"); } else if (ac == AC_GOLOMB_RICE) { if (rgb) ff_vk_shader_link(s, shd, ff_ffv1_dec_rgb_golomb_comp_spv_data, ff_ffv1_dec_rgb_golomb_comp_spv_len, "main"); else ff_vk_shader_link(s, shd, ff_ffv1_dec_golomb_comp_spv_data, ff_ffv1_dec_golomb_comp_spv_len, "main"); } else { if (rgb) ff_vk_shader_link(s, shd, ff_ffv1_dec_rgb_comp_spv_data, ff_ffv1_dec_rgb_comp_spv_len, "main"); else ff_vk_shader_link(s, shd, ff_ffv1_dec_comp_spv_data, ff_ffv1_dec_comp_spv_len, "main"); } RET(ff_vk_shader_register_exec(s, pool, shd)); fail: return err; } static int init_indirect(AVCodecContext *avctx, FFVulkanContext *s, AVBufferRef **dst, enum AVPixelFormat sw_format) { int err; AVHWFramesContext *frames_ctx; AVVulkanFramesContext *vk_frames; FFV1Context *f = avctx->priv_data; *dst = av_hwframe_ctx_alloc(s->device_ref); if (!(*dst)) return AVERROR(ENOMEM); frames_ctx = (AVHWFramesContext *)((*dst)->data); frames_ctx->format = AV_PIX_FMT_VULKAN; frames_ctx->sw_format = sw_format; frames_ctx->width = s->frames->width; frames_ctx->height = f->num_v_slices*RGB_LINECACHE; vk_frames = frames_ctx->hwctx; vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL; vk_frames->img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; vk_frames->usage = VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; err = av_hwframe_ctx_init(*dst); if (err < 0) { av_log(avctx, AV_LOG_ERROR, "Unable to initialize frame pool with format %s: %s\n", av_get_pix_fmt_name(sw_format), av_err2str(err)); av_buffer_unref(dst); return err; } return 0; } static void vk_decode_ffv1_uninit(FFVulkanDecodeShared *ctx) { FFv1VulkanDecodeContext *fv = ctx->sd_ctx; av_buffer_unref(&fv->intermediate_frames_ref); ff_vk_shader_free(&ctx->s, &fv->setup); ff_vk_shader_free(&ctx->s, &fv->reset); ff_vk_shader_free(&ctx->s, &fv->decode); ff_vk_free_buf(&ctx->s, &fv->consts_buf); av_buffer_pool_uninit(&fv->slice_state_pool); av_buffer_pool_uninit(&fv->slice_fltmap_pool); av_buffer_pool_uninit(&fv->slice_feedback_pool); av_freep(&fv); } static int vk_decode_ffv1_init(AVCodecContext *avctx) { int err; FFV1Context *f = avctx->priv_data; FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data; FFVulkanDecodeShared *ctx = NULL; FFv1VulkanDecodeContext *fv; if (f->version < 3 || (f->version == 4 && f->micro_version >= 10)) return AVERROR(ENOTSUP); /* Streams with a low amount of slices will usually be much slower * to decode, so warn the user. */ if (f->slice_count < 16) av_log(avctx, AV_LOG_WARNING, "Stream has a low number of slices (%i), " "decoding may be very slow\n", f->slice_count); err = ff_vk_decode_init(avctx); if (err < 0) return err; ctx = dec->shared_ctx; fv = ctx->sd_ctx = av_mallocz(sizeof(*fv)); if (!fv) { err = AVERROR(ENOMEM); goto fail; } ctx->sd_ctx_free = &vk_decode_ffv1_uninit; AVHWFramesContext *hwfc = (AVHWFramesContext *)avctx->hw_frames_ctx->data; AVHWFramesContext *dctx = hwfc; enum AVPixelFormat sw_format = hwfc->sw_format; int is_rgb = !(f->colorspace == 0 && sw_format != AV_PIX_FMT_YA8) && !(sw_format == AV_PIX_FMT_YA8); /* Intermediate frame pool for RCT */ if (is_rgb) { RET(init_indirect(avctx, &ctx->s, &fv->intermediate_frames_ref, f->use32bit ? AV_PIX_FMT_GBRAP32 : AV_PIX_FMT_GBRAP16)); dctx = (AVHWFramesContext *)fv->intermediate_frames_ref->data; } SPEC_LIST_CREATE(sl, 15, 15*sizeof(uint32_t)) ff_ffv1_vk_set_common_sl(avctx, f, sl, sw_format); if (RGB_LINECACHE != 2) SPEC_LIST_ADD(sl, 0, 32, RGB_LINECACHE); if (f->ec && !!(avctx->err_recognition & AV_EF_CRCCHECK)) SPEC_LIST_ADD(sl, 1, 32, 1); /* Setup shader */ RET(init_setup_shader(f, &ctx->s, &ctx->exec_pool, &fv->setup, sl)); /* Reset shader */ RET(init_reset_shader(f, &ctx->s, &ctx->exec_pool, &fv->reset, sl, f->ac)); /* Decode shaders */ RET(init_decode_shader(f, &ctx->s, &ctx->exec_pool, &fv->decode, dctx, hwfc, sl, f->ac, is_rgb)); /* Init static data */ RET(ff_ffv1_vk_init_consts(&ctx->s, &fv->consts_buf, f)); /* Update setup global descriptors */ RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0], &fv->setup, 0, 0, 0, &fv->consts_buf, 256*sizeof(uint32_t), 512*sizeof(uint8_t), VK_FORMAT_UNDEFINED)); RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0], &fv->setup, 0, 1, 0, &fv->consts_buf, 0, 256*sizeof(uint32_t), VK_FORMAT_UNDEFINED)); /* Update decode global descriptors */ RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0], &fv->decode, 0, 0, 0, &fv->consts_buf, 256*sizeof(uint32_t), 512*sizeof(uint8_t), VK_FORMAT_UNDEFINED)); RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0], &fv->decode, 0, 1, 0, &fv->consts_buf, 256*sizeof(uint32_t) + 512*sizeof(uint8_t), VK_WHOLE_SIZE, VK_FORMAT_UNDEFINED)); fail: return err; } static void vk_ffv1_free_frame_priv(AVRefStructOpaque _hwctx, void *data) { AVHWDeviceContext *dev_ctx = _hwctx.nc; AVVulkanDeviceContext *hwctx = dev_ctx->hwctx; FFv1VulkanDecodePicture *fp = data; FFVulkanDecodePicture *vp = &fp->vp; FFVkBuffer *slice_feedback = (FFVkBuffer *)fp->slice_feedback_buf->data; uint8_t *ssp = slice_feedback->mapped_mem + 2*fp->slice_num*sizeof(uint32_t); ff_vk_decode_free_frame(dev_ctx, vp); /* Invalidate slice/output data if needed */ if (!(slice_feedback->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { VkMappedMemoryRange invalidate_data = { .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, .memory = slice_feedback->mem, .offset = 0, .size = 2*fp->slice_num*sizeof(uint32_t), }; vp->invalidate_memory_ranges(hwctx->act_dev, 1, &invalidate_data); } int slice_error_cnt = 0; int crc_mismatch_cnt = 0; uint32_t max_overread = 0; for (int i = 0; i < fp->slice_num; i++) { uint32_t crc_res = 0; if (fp->crc_checked) crc_res = AV_RN32(ssp + 2*i*sizeof(uint32_t) + 0); uint32_t overread = AV_RN32(ssp + 2*i*sizeof(uint32_t) + 4); max_overread = FFMAX(overread, max_overread); slice_error_cnt += !!overread; crc_mismatch_cnt += !!crc_res; } if (slice_error_cnt || crc_mismatch_cnt) av_log(dev_ctx, AV_LOG_ERROR, "Decode status: %i slices overread (%i bytes max), " "%i CRCs mismatched\n", slice_error_cnt, max_overread, crc_mismatch_cnt); av_buffer_unref(&fp->slice_state); av_buffer_unref(&fp->slice_feedback_buf); } const FFHWAccel ff_ffv1_vulkan_hwaccel = { .p.name = "ffv1_vulkan", .p.type = AVMEDIA_TYPE_VIDEO, .p.id = AV_CODEC_ID_FFV1, .p.pix_fmt = AV_PIX_FMT_VULKAN, .start_frame = &vk_ffv1_start_frame, .decode_slice = &vk_ffv1_decode_slice, .end_frame = &vk_ffv1_end_frame, .free_frame_priv = &vk_ffv1_free_frame_priv, .frame_priv_data_size = sizeof(FFv1VulkanDecodePicture), .init = &vk_decode_ffv1_init, .update_thread_context = &ff_vk_update_thread_context, .uninit = &ff_vk_decode_uninit, .frame_params = &ff_vk_frame_params, .priv_data_size = sizeof(FFVulkanDecodeContext), .caps_internal = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE, };