mirror of
https://mirror.skon.top/https://github.com/FFmpeg/FFmpeg
synced 2026-04-30 22:00:51 +08:00
avfilter: d3d12 motion estimation filter support
This commit introduces a video filter `mestimate_d3d12` that provides hardware-accelerated motion estimation using DirectX 12 Video Encoding APIs. The filter leverages GPU hardware motion estimation capabilities to achieve significant performance improvements over the existing software-based mestimate filter. Sample Command Line: 1. Basic mestimate_d3d12 functionality ``` ffmpeg_g.exe -hwaccel d3d12va -i test.mp4 -vf mestimate_d3d12=mb_size=16 -f null - ``` 2. Motion vector visualization ``` ffmpeg -hwaccel d3d12va -i input.mp4 -vf "mestimate_d3d12,hwdownload,format=nv12,codecview=mv=pf" -c:v libx264 output.mp4 ```
This commit is contained in:
@@ -19,6 +19,7 @@ version <next>:
|
||||
- JPEG-XS decoder and encoder through libsvtjpegxs
|
||||
- JPEG-XS raw bitstream muxer and demuxer
|
||||
- IAMF Projection mode Ambisonic Audio Elements muxing and demuxing
|
||||
- Add vf_mestimate_d3d12 filter
|
||||
|
||||
|
||||
version 8.0:
|
||||
|
||||
4
configure
vendored
4
configure
vendored
@@ -2663,6 +2663,7 @@ CONFIG_EXTRA="
|
||||
cbs_vp9
|
||||
celp_math
|
||||
d3d12_intra_refresh
|
||||
d3d12_motion_estimator
|
||||
d3d12va_encode
|
||||
d3d12va_me_precision_eighth_pixel
|
||||
deflate_wrapper
|
||||
@@ -3484,6 +3485,7 @@ gfxcapture_filter_deps="cxx17 threads d3d11va IGraphicsCaptureItemInterop __x_AB
|
||||
gfxcapture_filter_extralibs="-lstdc++"
|
||||
scale_d3d11_filter_deps="d3d11va"
|
||||
scale_d3d12_filter_deps="d3d12va ID3D12VideoProcessor"
|
||||
mestimate_d3d12_filter_deps="d3d12va ID3D12VideoMotionEstimator d3d12_motion_estimator"
|
||||
|
||||
amf_deps_any="libdl LoadLibrary"
|
||||
nvenc_deps="ffnvcodec"
|
||||
@@ -7025,11 +7027,13 @@ check_type "windows.h d3d12.h" "ID3D12Device"
|
||||
check_type "windows.h d3d12video.h" "ID3D12VideoDecoder"
|
||||
check_type "windows.h d3d12video.h" "ID3D12VideoEncoder"
|
||||
check_type "windows.h d3d12video.h" "ID3D12VideoProcessor"
|
||||
check_type "windows.h d3d12video.h" "ID3D12VideoMotionEstimator"
|
||||
test_code cc "windows.h d3d12video.h" "D3D12_FEATURE_VIDEO feature = D3D12_FEATURE_VIDEO_ENCODER_CODEC" && \
|
||||
test_code cc "windows.h d3d12video.h" "D3D12_FEATURE_DATA_VIDEO_ENCODER_RESOURCE_REQUIREMENTS req" && enable d3d12_encoder_feature
|
||||
test_code cc "windows.h d3d12video.h" "D3D12_VIDEO_ENCODER_CODEC c = D3D12_VIDEO_ENCODER_CODEC_AV1; (void)c;" && enable d3d12va_av1_headers
|
||||
test_code cc "windows.h d3d12video.h" "D3D12_FEATURE_DATA_VIDEO_ENCODER_INTRA_REFRESH_MODE check = { 0 };" && enable d3d12_intra_refresh
|
||||
test_code cc "windows.h d3d12video.h" "D3D12_VIDEO_ENCODER_MOTION_ESTIMATION_PRECISION_MODE m = D3D12_VIDEO_ENCODER_MOTION_ESTIMATION_PRECISION_MODE_EIGHTH_PIXEL; (void)m;" && enable d3d12va_me_precision_eighth_pixel
|
||||
test_code cc "windows.h d3d12video.h" "D3D12_FEATURE_DATA_VIDEO_MOTION_ESTIMATOR check = { 0 };" && enable d3d12_motion_estimator
|
||||
check_type "windows.h" "DPI_AWARENESS_CONTEXT" -D_WIN32_WINNT=0x0A00
|
||||
check_type "windows.h security.h schnlsp.h" SecPkgContext_KeyingMaterialInfo -DSECURITY_WIN32
|
||||
check_type "d3d9.h dxva2api.h" DXVA2_ConfigPictureDecode -D_WIN32_WINNT=0x0602
|
||||
|
||||
@@ -395,6 +395,7 @@ OBJS-$(CONFIG_MCDEINT_FILTER) += vf_mcdeint.o
|
||||
OBJS-$(CONFIG_MEDIAN_FILTER) += vf_median.o
|
||||
OBJS-$(CONFIG_MERGEPLANES_FILTER) += vf_mergeplanes.o framesync.o
|
||||
OBJS-$(CONFIG_MESTIMATE_FILTER) += vf_mestimate.o motion_estimation.o
|
||||
OBJS-$(CONFIG_MESTIMATE_D3D12_FILTER) += vf_mestimate_d3d12.o
|
||||
OBJS-$(CONFIG_METADATA_FILTER) += f_metadata.o
|
||||
OBJS-$(CONFIG_MIDEQUALIZER_FILTER) += vf_midequalizer.o framesync.o
|
||||
OBJS-$(CONFIG_MINTERPOLATE_FILTER) += vf_minterpolate.o motion_estimation.o
|
||||
|
||||
@@ -369,6 +369,7 @@ extern const FFFilter ff_vf_mcdeint;
|
||||
extern const FFFilter ff_vf_median;
|
||||
extern const FFFilter ff_vf_mergeplanes;
|
||||
extern const FFFilter ff_vf_mestimate;
|
||||
extern const FFFilter ff_vf_mestimate_d3d12;
|
||||
extern const FFFilter ff_vf_metadata;
|
||||
extern const FFFilter ff_vf_midequalizer;
|
||||
extern const FFFilter ff_vf_minterpolate;
|
||||
|
||||
985
libavfilter/vf_mestimate_d3d12.c
Normal file
985
libavfilter/vf_mestimate_d3d12.c
Normal file
@@ -0,0 +1,985 @@
|
||||
/*
|
||||
* D3D12 Hardware-Accelerated Motion Estimation Filter
|
||||
*
|
||||
* Copyright (c) 2025 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/avassert.h"
|
||||
#include "libavutil/buffer.h"
|
||||
#include "libavutil/hwcontext.h"
|
||||
#include "libavutil/hwcontext_d3d12va_internal.h"
|
||||
#include "libavutil/hwcontext_d3d12va.h"
|
||||
#include "libavutil/internal.h"
|
||||
#include "libavutil/opt.h"
|
||||
#include "libavutil/motion_vector.h"
|
||||
#include "libavutil/mem.h"
|
||||
#include "avfilter.h"
|
||||
#include "filters.h"
|
||||
#include "video.h"
|
||||
|
||||
|
||||
typedef struct MEstimateD3D12Context {
|
||||
const AVClass *class;
|
||||
|
||||
AVBufferRef *hw_device_ref;
|
||||
AVBufferRef *hw_frames_ref;
|
||||
|
||||
AVD3D12VADeviceContext *device_ctx;
|
||||
AVD3D12VAFramesContext *frames_ctx;
|
||||
|
||||
ID3D12Device *device;
|
||||
ID3D12VideoDevice1 *video_device;
|
||||
ID3D12VideoMotionEstimator *motion_estimator;
|
||||
ID3D12VideoMotionVectorHeap *motion_vector_heap;
|
||||
ID3D12VideoEncodeCommandList *command_list;
|
||||
ID3D12CommandQueue *command_queue;
|
||||
ID3D12CommandAllocator *command_allocator;
|
||||
|
||||
// Graphics command list and queue for copy operations
|
||||
ID3D12GraphicsCommandList *copy_command_list;
|
||||
ID3D12CommandAllocator *copy_command_allocator;
|
||||
ID3D12CommandQueue *copy_command_queue;
|
||||
|
||||
// Synchronization
|
||||
ID3D12Fence *fence;
|
||||
HANDLE fence_event;
|
||||
uint64_t fence_value;
|
||||
|
||||
// Motion estimation parameters
|
||||
int block_size; // 8 or 16
|
||||
D3D12_VIDEO_MOTION_ESTIMATOR_SEARCH_BLOCK_SIZE d3d12_block_size;
|
||||
D3D12_VIDEO_MOTION_ESTIMATOR_VECTOR_PRECISION precision;
|
||||
|
||||
// Frame buffer
|
||||
AVFrame *prev_frame;
|
||||
AVFrame *cur_frame;
|
||||
AVFrame *next_frame;
|
||||
|
||||
// Output textures for resolved motion vectors (GPU-side, DEFAULT heap)
|
||||
ID3D12Resource *resolved_mv_texture_back;
|
||||
ID3D12Resource *resolved_mv_texture_fwd;
|
||||
|
||||
// Readback buffers for CPU access (READBACK heap)
|
||||
ID3D12Resource *readback_buffer_back;
|
||||
ID3D12Resource *readback_buffer_fwd;
|
||||
size_t readback_buffer_size;
|
||||
|
||||
int initialized;
|
||||
} MEstimateD3D12Context;
|
||||
|
||||
static int mestimate_d3d12_init(AVFilterContext *ctx)
|
||||
{
|
||||
MEstimateD3D12Context *s = ctx->priv;
|
||||
|
||||
s->initialized = 0;
|
||||
s->fence_value = 0;
|
||||
|
||||
// Validate block size - only 8 and 16 are valid
|
||||
if (s->block_size != 8 && s->block_size != 16) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Invalid block_size %d. Only 8 and 16 are supported.\n", s->block_size);
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
// Set D3D12 block size based on user option
|
||||
if (s->block_size == 8)
|
||||
s->d3d12_block_size = D3D12_VIDEO_MOTION_ESTIMATOR_SEARCH_BLOCK_SIZE_8X8;
|
||||
else
|
||||
s->d3d12_block_size = D3D12_VIDEO_MOTION_ESTIMATOR_SEARCH_BLOCK_SIZE_16X16;
|
||||
|
||||
// Use quarter-pel precision
|
||||
s->precision = D3D12_VIDEO_MOTION_ESTIMATOR_VECTOR_PRECISION_QUARTER_PEL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mestimate_d3d12_create_objects(AVFilterContext *ctx)
|
||||
{
|
||||
MEstimateD3D12Context *s = ctx->priv;
|
||||
HRESULT hr;
|
||||
D3D12_COMMAND_QUEUE_DESC queue_desc = {
|
||||
.Type = D3D12_COMMAND_LIST_TYPE_VIDEO_ENCODE,
|
||||
.Priority = 0,
|
||||
.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE,
|
||||
.NodeMask = 0,
|
||||
};
|
||||
|
||||
// Create fence for synchronization
|
||||
hr = ID3D12Device_CreateFence(s->device, 0, D3D12_FENCE_FLAG_NONE,
|
||||
&IID_ID3D12Fence, (void **)&s->fence);
|
||||
if (FAILED(hr)) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to create fence\n");
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
s->fence_event = CreateEvent(NULL, FALSE, FALSE, NULL);
|
||||
if (!s->fence_event) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to create fence event\n");
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
// Create command queue
|
||||
hr = ID3D12Device_CreateCommandQueue(s->device, &queue_desc,
|
||||
&IID_ID3D12CommandQueue, (void **)&s->command_queue);
|
||||
if (FAILED(hr)) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to create command queue\n");
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
// Create command allocator
|
||||
hr = ID3D12Device_CreateCommandAllocator(s->device, D3D12_COMMAND_LIST_TYPE_VIDEO_ENCODE,
|
||||
&IID_ID3D12CommandAllocator, (void **)&s->command_allocator);
|
||||
if (FAILED(hr)) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to create command allocator\n");
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
// Create command list
|
||||
hr = ID3D12Device_CreateCommandList(s->device, 0, D3D12_COMMAND_LIST_TYPE_VIDEO_ENCODE,
|
||||
s->command_allocator, NULL, &IID_ID3D12VideoEncodeCommandList,
|
||||
(void **)&s->command_list);
|
||||
if (FAILED(hr)) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to create command list\n");
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
hr = ID3D12VideoEncodeCommandList_Close(s->command_list);
|
||||
if (FAILED(hr)) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to close command list\n");
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mestimate_d3d12_create_motion_estimator(AVFilterContext *ctx, int width, int height)
|
||||
{
|
||||
MEstimateD3D12Context *s = ctx->priv;
|
||||
HRESULT hr;
|
||||
D3D12_FEATURE_DATA_VIDEO_MOTION_ESTIMATOR feature_data = {0};
|
||||
D3D12_VIDEO_MOTION_ESTIMATOR_DESC me_desc = {0};
|
||||
D3D12_VIDEO_MOTION_VECTOR_HEAP_DESC heap_desc = {0};
|
||||
|
||||
// Check if motion estimation is supported
|
||||
// Set the input parameters for what we want to query
|
||||
feature_data.NodeIndex = 0;
|
||||
feature_data.InputFormat = s->frames_ctx->format;
|
||||
feature_data.BlockSizeFlags = 0; // Will be filled by CheckFeatureSupport with supported flags
|
||||
feature_data.PrecisionFlags = 0; // Will be filled by CheckFeatureSupport with supported flags
|
||||
feature_data.SizeRange.MaxWidth = width;
|
||||
feature_data.SizeRange.MaxHeight = height;
|
||||
feature_data.SizeRange.MinWidth = width;
|
||||
feature_data.SizeRange.MinHeight = height;
|
||||
|
||||
hr = ID3D12VideoDevice1_CheckFeatureSupport(s->video_device,
|
||||
D3D12_FEATURE_VIDEO_MOTION_ESTIMATOR,
|
||||
&feature_data, sizeof(feature_data));
|
||||
if (FAILED(hr)) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to check motion estimator support (hr=0x%lx)\n", (long)hr);
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
// Verify the requested features are actually supported (check returned flags)
|
||||
D3D12_VIDEO_MOTION_ESTIMATOR_SEARCH_BLOCK_SIZE_FLAGS requested_block_flag =
|
||||
(s->d3d12_block_size == D3D12_VIDEO_MOTION_ESTIMATOR_SEARCH_BLOCK_SIZE_8X8) ?
|
||||
D3D12_VIDEO_MOTION_ESTIMATOR_SEARCH_BLOCK_SIZE_FLAG_8X8 :
|
||||
D3D12_VIDEO_MOTION_ESTIMATOR_SEARCH_BLOCK_SIZE_FLAG_16X16;
|
||||
|
||||
if (!(feature_data.BlockSizeFlags & requested_block_flag)) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Requested block size (%dx%d) not supported by device (supported flags: 0x%x)\n",
|
||||
s->block_size, s->block_size, feature_data.BlockSizeFlags);
|
||||
return AVERROR(ENOSYS);
|
||||
}
|
||||
|
||||
if (!(feature_data.PrecisionFlags & D3D12_VIDEO_MOTION_ESTIMATOR_VECTOR_PRECISION_FLAG_QUARTER_PEL)) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Quarter-pel precision not supported by device (supported flags: 0x%x)\n",
|
||||
feature_data.PrecisionFlags);
|
||||
return AVERROR(ENOSYS);
|
||||
}
|
||||
|
||||
av_log(ctx, AV_LOG_VERBOSE, "Motion estimator support confirmed: block_size=%dx%d, precision=quarter-pel\n",
|
||||
s->block_size, s->block_size);
|
||||
|
||||
// Create motion estimator
|
||||
me_desc.NodeMask = 0;
|
||||
me_desc.InputFormat = s->frames_ctx->format;
|
||||
me_desc.BlockSize = s->d3d12_block_size;
|
||||
me_desc.Precision = s->precision;
|
||||
me_desc.SizeRange = feature_data.SizeRange;
|
||||
|
||||
hr = ID3D12VideoDevice1_CreateVideoMotionEstimator(s->video_device, &me_desc, NULL,
|
||||
&IID_ID3D12VideoMotionEstimator,
|
||||
(void **)&s->motion_estimator);
|
||||
if (FAILED(hr)) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to create motion estimator\n");
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
// Create motion vector heap
|
||||
heap_desc.NodeMask = 0;
|
||||
heap_desc.InputFormat = s->frames_ctx->format;
|
||||
heap_desc.BlockSize = s->d3d12_block_size;
|
||||
heap_desc.Precision = s->precision;
|
||||
heap_desc.SizeRange = feature_data.SizeRange;
|
||||
|
||||
hr = ID3D12VideoDevice1_CreateVideoMotionVectorHeap(s->video_device, &heap_desc, NULL,
|
||||
&IID_ID3D12VideoMotionVectorHeap,
|
||||
(void **)&s->motion_vector_heap);
|
||||
if (FAILED(hr)) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to create motion vector heap\n");
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
// Create resolved motion vector textures in DEFAULT heap (GPU writable)
|
||||
// ResolveMotionVectorHeap outputs to TEXTURE2D with DXGI_FORMAT_R16G16_SINT
|
||||
int mb_width = (width + s->block_size - 1) / s->block_size;
|
||||
int mb_height = (height + s->block_size - 1) / s->block_size;
|
||||
|
||||
D3D12_HEAP_PROPERTIES heap_props_default = {.Type = D3D12_HEAP_TYPE_DEFAULT};
|
||||
D3D12_RESOURCE_DESC texture_desc = {
|
||||
.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D,
|
||||
.Alignment = 0,
|
||||
.Width = mb_width,
|
||||
.Height = mb_height,
|
||||
.DepthOrArraySize = 1,
|
||||
.MipLevels = 1,
|
||||
.Format = DXGI_FORMAT_R16G16_SINT, // Motion vector format: signed 16-bit X,Y
|
||||
.SampleDesc = {.Count = 1, .Quality = 0},
|
||||
.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN,
|
||||
.Flags = D3D12_RESOURCE_FLAG_NONE,
|
||||
};
|
||||
|
||||
hr = ID3D12Device_CreateCommittedResource(s->device, &heap_props_default, D3D12_HEAP_FLAG_NONE,
|
||||
&texture_desc, D3D12_RESOURCE_STATE_COMMON, NULL,
|
||||
&IID_ID3D12Resource, (void **)&s->resolved_mv_texture_back);
|
||||
if (FAILED(hr)) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to create backward motion vector texture (hr=0x%lx)\n", (long)hr);
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
hr = ID3D12Device_CreateCommittedResource(s->device, &heap_props_default, D3D12_HEAP_FLAG_NONE,
|
||||
&texture_desc, D3D12_RESOURCE_STATE_COMMON, NULL,
|
||||
&IID_ID3D12Resource, (void **)&s->resolved_mv_texture_fwd);
|
||||
if (FAILED(hr)) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to create forward motion vector texture (hr=0x%lx)\n", (long)hr);
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
// Create READBACK buffers for CPU access
|
||||
// Need to calculate proper size accounting for D3D12 row pitch alignment
|
||||
// Get the footprint to determine the actual required buffer size
|
||||
D3D12_PLACED_SUBRESOURCE_FOOTPRINT temp_layout;
|
||||
UINT64 temp_total_size;
|
||||
|
||||
ID3D12Device_GetCopyableFootprints(s->device, &texture_desc, 0, 1, 0,
|
||||
&temp_layout, NULL, NULL, &temp_total_size);
|
||||
|
||||
s->readback_buffer_size = temp_total_size;
|
||||
|
||||
av_log(ctx, AV_LOG_DEBUG, "Readback buffer size: %llu bytes (texture: %dx%d, pitch: %u)\n",
|
||||
(unsigned long long)s->readback_buffer_size, mb_width, mb_height, temp_layout.Footprint.RowPitch);
|
||||
|
||||
D3D12_HEAP_PROPERTIES heap_props_readback = {.Type = D3D12_HEAP_TYPE_READBACK};
|
||||
D3D12_RESOURCE_DESC buffer_desc = {
|
||||
.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
|
||||
.Alignment = 0,
|
||||
.Width = s->readback_buffer_size,
|
||||
.Height = 1,
|
||||
.DepthOrArraySize = 1,
|
||||
.MipLevels = 1,
|
||||
.Format = DXGI_FORMAT_UNKNOWN,
|
||||
.SampleDesc = {.Count = 1, .Quality = 0},
|
||||
.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
|
||||
.Flags = D3D12_RESOURCE_FLAG_NONE,
|
||||
};
|
||||
|
||||
hr = ID3D12Device_CreateCommittedResource(s->device, &heap_props_readback, D3D12_HEAP_FLAG_NONE,
|
||||
&buffer_desc, D3D12_RESOURCE_STATE_COPY_DEST, NULL,
|
||||
&IID_ID3D12Resource, (void **)&s->readback_buffer_back);
|
||||
if (FAILED(hr)) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to create backward readback buffer (hr=0x%lx)\n", (long)hr);
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
hr = ID3D12Device_CreateCommittedResource(s->device, &heap_props_readback, D3D12_HEAP_FLAG_NONE,
|
||||
&buffer_desc, D3D12_RESOURCE_STATE_COPY_DEST, NULL,
|
||||
&IID_ID3D12Resource, (void **)&s->readback_buffer_fwd);
|
||||
if (FAILED(hr)) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to create forward readback buffer (hr=0x%lx)\n", (long)hr);
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
// Create graphics command queue, allocator and list for copy operations
|
||||
D3D12_COMMAND_QUEUE_DESC copy_queue_desc = {
|
||||
.Type = D3D12_COMMAND_LIST_TYPE_DIRECT,
|
||||
.Priority = 0,
|
||||
.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE,
|
||||
.NodeMask = 0,
|
||||
};
|
||||
|
||||
hr = ID3D12Device_CreateCommandQueue(s->device, ©_queue_desc,
|
||||
&IID_ID3D12CommandQueue, (void **)&s->copy_command_queue);
|
||||
if (FAILED(hr)) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to create copy command queue\n");
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
hr = ID3D12Device_CreateCommandAllocator(s->device, D3D12_COMMAND_LIST_TYPE_DIRECT,
|
||||
&IID_ID3D12CommandAllocator, (void **)&s->copy_command_allocator);
|
||||
if (FAILED(hr)) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to create copy command allocator\n");
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
hr = ID3D12Device_CreateCommandList(s->device, 0, D3D12_COMMAND_LIST_TYPE_DIRECT,
|
||||
s->copy_command_allocator, NULL, &IID_ID3D12GraphicsCommandList,
|
||||
(void **)&s->copy_command_list);
|
||||
if (FAILED(hr)) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to create copy command list\n");
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
hr = ID3D12GraphicsCommandList_Close(s->copy_command_list);
|
||||
if (FAILED(hr)) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to close copy command list\n");
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mestimate_d3d12_config_props(AVFilterLink *outlink)
|
||||
{
|
||||
AVFilterContext *ctx = outlink->src;
|
||||
AVFilterLink *inlink = ctx->inputs[0];
|
||||
FilterLink *inl = ff_filter_link(inlink);
|
||||
FilterLink *outl = ff_filter_link(outlink);
|
||||
MEstimateD3D12Context *s = ctx->priv;
|
||||
AVHWFramesContext *hw_frames_ctx;
|
||||
HRESULT hr;
|
||||
int err;
|
||||
|
||||
if (!inl->hw_frames_ctx) {
|
||||
av_log(ctx, AV_LOG_ERROR, "D3D12 hardware frames context required\n");
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
hw_frames_ctx = (AVHWFramesContext *)inl->hw_frames_ctx->data;
|
||||
if (hw_frames_ctx->format != AV_PIX_FMT_D3D12) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Input must be D3D12 frames\n");
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
s->hw_frames_ref = av_buffer_ref(inl->hw_frames_ctx);
|
||||
if (!s->hw_frames_ref)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
s->frames_ctx = hw_frames_ctx->hwctx;
|
||||
s->hw_device_ref = av_buffer_ref(hw_frames_ctx->device_ref);
|
||||
if (!s->hw_device_ref)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
s->device_ctx = ((AVHWDeviceContext *)s->hw_device_ref->data)->hwctx;
|
||||
s->device = s->device_ctx->device;
|
||||
|
||||
// Propagate hardware frames context to output
|
||||
outl->hw_frames_ctx = av_buffer_ref(inl->hw_frames_ctx);
|
||||
if (!outl->hw_frames_ctx)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
// Query for ID3D12VideoDevice1 interface from the base video device
|
||||
hr = ID3D12VideoDevice_QueryInterface(s->device_ctx->video_device, &IID_ID3D12VideoDevice1,
|
||||
(void **)&s->video_device);
|
||||
if (FAILED(hr)) {
|
||||
av_log(ctx, AV_LOG_ERROR, "ID3D12VideoDevice1 interface not supported\n");
|
||||
return AVERROR(ENOSYS);
|
||||
}
|
||||
|
||||
err = mestimate_d3d12_create_objects(ctx);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
err = mestimate_d3d12_create_motion_estimator(ctx, inlink->w, inlink->h);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
s->initialized = 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mestimate_d3d12_sync_gpu(MEstimateD3D12Context *s)
|
||||
{
|
||||
uint64_t completion = ID3D12Fence_GetCompletedValue(s->fence);
|
||||
|
||||
if (completion < s->fence_value) {
|
||||
if (FAILED(ID3D12Fence_SetEventOnCompletion(s->fence, s->fence_value, s->fence_event)))
|
||||
return AVERROR(EINVAL);
|
||||
WaitForSingleObjectEx(s->fence_event, INFINITE, FALSE);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void d3d12_barrier_transition(D3D12_RESOURCE_BARRIER *barrier,
|
||||
ID3D12Resource *resource,
|
||||
D3D12_RESOURCE_STATES state_before,
|
||||
D3D12_RESOURCE_STATES state_after)
|
||||
{
|
||||
barrier->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
||||
barrier->Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
|
||||
barrier->Transition.pResource = resource;
|
||||
barrier->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
||||
barrier->Transition.StateBefore = state_before;
|
||||
barrier->Transition.StateAfter = state_after;
|
||||
}
|
||||
|
||||
static void add_mv_data(AVMotionVector *mv, int mb_size,
|
||||
int x, int y, int x_mv, int y_mv, int dir)
|
||||
{
|
||||
mv->w = mb_size;
|
||||
mv->h = mb_size;
|
||||
mv->dst_x = x + (mb_size >> 1);
|
||||
mv->dst_y = y + (mb_size >> 1);
|
||||
mv->src_x = x_mv + (mb_size >> 1);
|
||||
mv->src_y = y_mv + (mb_size >> 1);
|
||||
mv->source = dir ? 1 : -1;
|
||||
mv->flags = 0;
|
||||
mv->motion_x = x_mv - x;
|
||||
mv->motion_y = y_mv - y;
|
||||
mv->motion_scale = 1;
|
||||
}
|
||||
|
||||
static int mestimate_d3d12_read_motion_vectors(AVFilterContext *ctx, AVFrame *out, int direction)
|
||||
{
|
||||
MEstimateD3D12Context *s = ctx->priv;
|
||||
uint8_t *mapped_data = NULL;
|
||||
HRESULT hr;
|
||||
int err = 0;
|
||||
AVFrameSideData *sd;
|
||||
AVMotionVector *mvs;
|
||||
int mb_x, mb_y, mv_idx;
|
||||
int mb_width, mb_height;
|
||||
int16_t *d3d12_mvs;
|
||||
ID3D12Resource *buffer = (direction == 0) ? s->readback_buffer_back : s->readback_buffer_fwd;
|
||||
|
||||
// Map the readback buffer
|
||||
hr = ID3D12Resource_Map(buffer, 0, NULL, (void **)&mapped_data);
|
||||
if (FAILED(hr)) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to map readback buffer (dir=%d, hr=0x%lx)\n", direction, (long)hr);
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
// Get the motion vector side data
|
||||
sd = av_frame_get_side_data(out, AV_FRAME_DATA_MOTION_VECTORS);
|
||||
if (!sd) {
|
||||
av_log(ctx, AV_LOG_ERROR, "No motion vector side data found\n");
|
||||
ID3D12Resource_Unmap(buffer, 0, NULL);
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
mvs = (AVMotionVector *)sd->data;
|
||||
mb_width = (out->width + s->block_size - 1) / s->block_size;
|
||||
mb_height = (out->height + s->block_size - 1) / s->block_size;
|
||||
|
||||
// Calculate offset for this direction (0 = backward, 1 = forward)
|
||||
mv_idx = direction * mb_width * mb_height;
|
||||
|
||||
// Parse D3D12 motion vector format
|
||||
// According to Microsoft documentation:
|
||||
// - Format: DXGI_FORMAT_R16G16_SINT (2D texture)
|
||||
// - Data: Signed 16-bit integers
|
||||
// - Units: Quarter-PEL (quarter pixel precision)
|
||||
// - Layout: X component in R channel, Y component in G channel
|
||||
// - Storage: 2D array matching block layout
|
||||
//
|
||||
// Each motion vector is stored as two int16_t values (X, Y) in quarter-pel units
|
||||
// The buffer is organized as a 2D array: [mb_height][mb_width][2]
|
||||
|
||||
d3d12_mvs = (int16_t *)mapped_data;
|
||||
|
||||
for (mb_y = 0; mb_y < mb_height; mb_y++) {
|
||||
for (mb_x = 0; mb_x < mb_width; mb_x++) {
|
||||
const int x_mb = mb_x * s->block_size;
|
||||
const int y_mb = mb_y * s->block_size;
|
||||
const int mv_offset = (mb_y * mb_width + mb_x) * 2;
|
||||
|
||||
// Read motion vector components in quarter-pel units
|
||||
// R component (index 0) = X motion
|
||||
// G component (index 1) = Y motion
|
||||
int16_t mv_x_qpel = d3d12_mvs[mv_offset + 0];
|
||||
int16_t mv_y_qpel = d3d12_mvs[mv_offset + 1];
|
||||
|
||||
// Convert from quarter-pel to full pixel coordinates
|
||||
// Quarter-pel means the value is 4x the actual pixel displacement
|
||||
// So divide by 4 to get pixel displacement
|
||||
int src_x = x_mb + (mv_x_qpel / 4);
|
||||
int src_y = y_mb + (mv_y_qpel / 4);
|
||||
|
||||
// Store the motion vector data
|
||||
// This will set dst (current position) and src (where it came from)
|
||||
add_mv_data(&mvs[mv_idx++], s->block_size, x_mb, y_mb, src_x, src_y, direction);
|
||||
|
||||
av_log(ctx, AV_LOG_TRACE, "Block[%d,%d] dir=%d: MV=(%d,%d) qpel -> (%d,%d) pixels\n",
|
||||
mb_x, mb_y, direction, mv_x_qpel, mv_y_qpel,
|
||||
mv_x_qpel / 4, mv_y_qpel / 4);
|
||||
}
|
||||
}
|
||||
|
||||
ID3D12Resource_Unmap(buffer, 0, NULL);
|
||||
|
||||
av_log(ctx, AV_LOG_DEBUG, "Parsed %d motion vectors for direction %d\n",
|
||||
mb_width * mb_height, direction);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int mestimate_d3d12_filter_frame(AVFilterLink *inlink, AVFrame *frame)
|
||||
{
|
||||
AVFilterContext *ctx = inlink->dst;
|
||||
MEstimateD3D12Context *s = ctx->priv;
|
||||
AVFrame *out;
|
||||
AVFrameSideData *sd;
|
||||
AVD3D12VAFrame *cur_hwframe, *prev_hwframe, *next_hwframe = NULL;
|
||||
HRESULT hr;
|
||||
int err;
|
||||
int mb_width, mb_height, mb_count;
|
||||
|
||||
if (!s->initialized) {
|
||||
err = mestimate_d3d12_config_props(ctx->outputs[0]);
|
||||
if (err < 0) {
|
||||
av_frame_free(&frame);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
// Manage frame buffer
|
||||
av_frame_free(&s->prev_frame);
|
||||
s->prev_frame = s->cur_frame;
|
||||
s->cur_frame = s->next_frame;
|
||||
s->next_frame = frame;
|
||||
|
||||
if (!s->cur_frame) {
|
||||
s->cur_frame = av_frame_clone(frame);
|
||||
if (!s->cur_frame)
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
|
||||
if (!s->prev_frame)
|
||||
return 0;
|
||||
|
||||
// Clone current frame for output
|
||||
out = av_frame_clone(s->cur_frame);
|
||||
if (!out)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
mb_width = (frame->width + s->block_size - 1) / s->block_size;
|
||||
mb_height = (frame->height + s->block_size - 1) / s->block_size;
|
||||
mb_count = mb_width * mb_height;
|
||||
|
||||
// Allocate side data for motion vectors (2 directions)
|
||||
sd = av_frame_new_side_data(out, AV_FRAME_DATA_MOTION_VECTORS,
|
||||
2 * mb_count * sizeof(AVMotionVector));
|
||||
if (!sd) {
|
||||
av_frame_free(&out);
|
||||
return AVERROR(ENOMEM);
|
||||
}
|
||||
|
||||
// Get hardware frame pointers
|
||||
cur_hwframe = (AVD3D12VAFrame *)s->cur_frame->data[0];
|
||||
prev_hwframe = (AVD3D12VAFrame *)s->prev_frame->data[0];
|
||||
if (s->next_frame)
|
||||
next_hwframe = (AVD3D12VAFrame *)s->next_frame->data[0];
|
||||
|
||||
// Reset command allocator and list ONCE for both estimations
|
||||
hr = ID3D12CommandAllocator_Reset(s->command_allocator);
|
||||
if (FAILED(hr)) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to reset command allocator\n");
|
||||
av_frame_free(&out);
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
hr = ID3D12VideoEncodeCommandList_Reset(s->command_list, s->command_allocator);
|
||||
if (FAILED(hr)) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to reset command list\n");
|
||||
av_frame_free(&out);
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
// Transition current and previous frames to VIDEO_ENCODE_READ
|
||||
D3D12_RESOURCE_BARRIER barriers[3];
|
||||
int barrier_count = 2;
|
||||
|
||||
d3d12_barrier_transition(&barriers[0], cur_hwframe->texture,
|
||||
D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ);
|
||||
d3d12_barrier_transition(&barriers[1], prev_hwframe->texture,
|
||||
D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ);
|
||||
|
||||
if (next_hwframe) {
|
||||
d3d12_barrier_transition(&barriers[2], next_hwframe->texture,
|
||||
D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_VIDEO_ENCODE_READ);
|
||||
barrier_count = 3;
|
||||
}
|
||||
|
||||
ID3D12VideoEncodeCommandList_ResourceBarrier(s->command_list, barrier_count, barriers);
|
||||
|
||||
// Backward motion estimation (cur -> prev)
|
||||
D3D12_VIDEO_MOTION_ESTIMATOR_INPUT input_back = {
|
||||
.pInputTexture2D = cur_hwframe->texture,
|
||||
.InputSubresourceIndex = 0,
|
||||
.pReferenceTexture2D = prev_hwframe->texture,
|
||||
.ReferenceSubresourceIndex = 0,
|
||||
.pHintMotionVectorHeap = NULL,
|
||||
};
|
||||
|
||||
D3D12_VIDEO_MOTION_ESTIMATOR_OUTPUT output = {
|
||||
.pMotionVectorHeap = s->motion_vector_heap,
|
||||
};
|
||||
|
||||
ID3D12VideoEncodeCommandList_EstimateMotion(s->command_list, s->motion_estimator,
|
||||
&output, &input_back);
|
||||
|
||||
D3D12_RESOLVE_VIDEO_MOTION_VECTOR_HEAP_INPUT resolve_input = {
|
||||
.pMotionVectorHeap = s->motion_vector_heap,
|
||||
.PixelWidth = s->cur_frame->width,
|
||||
.PixelHeight = s->cur_frame->height,
|
||||
};
|
||||
|
||||
D3D12_RESOLVE_VIDEO_MOTION_VECTOR_HEAP_OUTPUT resolve_output_back = {
|
||||
.pMotionVectorTexture2D = s->resolved_mv_texture_back,
|
||||
.MotionVectorCoordinate = {.X = 0, .Y = 0, .Z = 0, .SubresourceIndex = 0},
|
||||
};
|
||||
|
||||
ID3D12VideoEncodeCommandList_ResolveMotionVectorHeap(s->command_list,
|
||||
&resolve_output_back, &resolve_input);
|
||||
|
||||
// Copy resolved texture to readback buffer for CPU access
|
||||
// CopyTextureRegion is not available on video encode command list
|
||||
// We'll need to read directly from the resolved texture after GPU sync
|
||||
|
||||
// Forward motion estimation (cur -> next) if next frame exists
|
||||
if (next_hwframe) {
|
||||
D3D12_VIDEO_MOTION_ESTIMATOR_INPUT input_fwd = {
|
||||
.pInputTexture2D = cur_hwframe->texture,
|
||||
.InputSubresourceIndex = 0,
|
||||
.pReferenceTexture2D = next_hwframe->texture,
|
||||
.ReferenceSubresourceIndex = 0,
|
||||
.pHintMotionVectorHeap = NULL,
|
||||
};
|
||||
|
||||
ID3D12VideoEncodeCommandList_EstimateMotion(s->command_list, s->motion_estimator,
|
||||
&output, &input_fwd);
|
||||
|
||||
D3D12_RESOLVE_VIDEO_MOTION_VECTOR_HEAP_OUTPUT resolve_output_fwd = {
|
||||
.pMotionVectorTexture2D = s->resolved_mv_texture_fwd,
|
||||
.MotionVectorCoordinate = {.X = 0, .Y = 0, .Z = 0, .SubresourceIndex = 0},
|
||||
};
|
||||
|
||||
ID3D12VideoEncodeCommandList_ResolveMotionVectorHeap(s->command_list,
|
||||
&resolve_output_fwd, &resolve_input);
|
||||
|
||||
// Copy will be done after command list execution
|
||||
}
|
||||
|
||||
// Transition resources back to COMMON (reuse barriers by swapping states)
|
||||
for (int i = 0; i < barrier_count; i++)
|
||||
FFSWAP(D3D12_RESOURCE_STATES, barriers[i].Transition.StateBefore, barriers[i].Transition.StateAfter);
|
||||
|
||||
ID3D12VideoEncodeCommandList_ResourceBarrier(s->command_list, barrier_count, barriers);
|
||||
|
||||
// Close command list ONCE
|
||||
hr = ID3D12VideoEncodeCommandList_Close(s->command_list);
|
||||
if (FAILED(hr)) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to close command list (hr=0x%lx)\n", (long)hr);
|
||||
av_frame_free(&out);
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
// Wait for input frame sync
|
||||
hr = ID3D12CommandQueue_Wait(s->command_queue, cur_hwframe->sync_ctx.fence,
|
||||
cur_hwframe->sync_ctx.fence_value);
|
||||
if (FAILED(hr)) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to wait for current frame\n");
|
||||
av_frame_free(&out);
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
hr = ID3D12CommandQueue_Wait(s->command_queue, prev_hwframe->sync_ctx.fence,
|
||||
prev_hwframe->sync_ctx.fence_value);
|
||||
if (FAILED(hr)) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to wait for previous frame\n");
|
||||
av_frame_free(&out);
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
if (next_hwframe) {
|
||||
hr = ID3D12CommandQueue_Wait(s->command_queue, next_hwframe->sync_ctx.fence,
|
||||
next_hwframe->sync_ctx.fence_value);
|
||||
if (FAILED(hr)) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to wait for next frame\n");
|
||||
av_frame_free(&out);
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
}
|
||||
|
||||
// Execute command list ONCE
|
||||
ID3D12CommandQueue_ExecuteCommandLists(s->command_queue, 1, (ID3D12CommandList **)&s->command_list);
|
||||
|
||||
// Signal completion
|
||||
hr = ID3D12CommandQueue_Signal(s->command_queue, s->fence, ++s->fence_value);
|
||||
if (FAILED(hr)) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to signal fence\n");
|
||||
av_frame_free(&out);
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
// Wait for GPU to complete
|
||||
err = mestimate_d3d12_sync_gpu(s);
|
||||
if (err < 0) {
|
||||
av_frame_free(&out);
|
||||
return err;
|
||||
}
|
||||
|
||||
// Now copy the resolved textures to readback buffers using graphics command list
|
||||
hr = ID3D12CommandAllocator_Reset(s->copy_command_allocator);
|
||||
if (FAILED(hr)) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to reset copy command allocator\n");
|
||||
av_frame_free(&out);
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
hr = ID3D12GraphicsCommandList_Reset(s->copy_command_list, s->copy_command_allocator, NULL);
|
||||
if (FAILED(hr)) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to reset copy command list\n");
|
||||
av_frame_free(&out);
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
// Transition resolved textures to COPY_SOURCE state
|
||||
D3D12_RESOURCE_BARRIER copy_barriers[2];
|
||||
int copy_barrier_count = 1;
|
||||
|
||||
d3d12_barrier_transition(©_barriers[0], s->resolved_mv_texture_back,
|
||||
D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
|
||||
if (s->next_frame) {
|
||||
d3d12_barrier_transition(©_barriers[1], s->resolved_mv_texture_fwd,
|
||||
D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
copy_barrier_count = 2;
|
||||
}
|
||||
|
||||
ID3D12GraphicsCommandList_ResourceBarrier(s->copy_command_list, copy_barrier_count, copy_barriers);
|
||||
|
||||
// Get texture layout for backward copy
|
||||
D3D12_RESOURCE_DESC texture_desc_back;
|
||||
D3D12_PLACED_SUBRESOURCE_FOOTPRINT layout_back;
|
||||
UINT64 row_size_back, total_size_back;
|
||||
UINT num_rows_back;
|
||||
|
||||
// Get the resource description for backward texture
|
||||
s->resolved_mv_texture_back->lpVtbl->GetDesc(s->resolved_mv_texture_back, &texture_desc_back);
|
||||
|
||||
av_log(ctx, AV_LOG_DEBUG, "Back texture desc: Width=%llu, Height=%u, Format=%d\n",
|
||||
(unsigned long long)texture_desc_back.Width, texture_desc_back.Height, texture_desc_back.Format);
|
||||
|
||||
// Get the copyable footprints for the backward texture
|
||||
ID3D12Device_GetCopyableFootprints(s->device, &texture_desc_back, 0, 1, 0,
|
||||
&layout_back, &num_rows_back, &row_size_back, &total_size_back);
|
||||
|
||||
av_log(ctx, AV_LOG_DEBUG, "Back layout: Offset=%llu, Width=%u, Height=%u, Depth=%u, RowPitch=%u\n",
|
||||
(unsigned long long)layout_back.Offset, layout_back.Footprint.Width, layout_back.Footprint.Height,
|
||||
layout_back.Footprint.Depth, layout_back.Footprint.RowPitch);
|
||||
|
||||
// Copy backward motion vectors
|
||||
D3D12_TEXTURE_COPY_LOCATION src_back = {
|
||||
.pResource = s->resolved_mv_texture_back,
|
||||
.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
|
||||
.SubresourceIndex = 0
|
||||
};
|
||||
|
||||
D3D12_TEXTURE_COPY_LOCATION dst_back = {
|
||||
.pResource = s->readback_buffer_back,
|
||||
.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
|
||||
.PlacedFootprint = {
|
||||
.Offset = 0,
|
||||
.Footprint = layout_back.Footprint
|
||||
}
|
||||
};
|
||||
|
||||
av_log(ctx, AV_LOG_DEBUG, "Copying backward MVs...\n");
|
||||
ID3D12GraphicsCommandList_CopyTextureRegion(s->copy_command_list, &dst_back, 0, 0, 0, &src_back, NULL);
|
||||
|
||||
// Copy forward motion vectors if available
|
||||
if (s->next_frame) {
|
||||
// Get texture layout for forward copy
|
||||
D3D12_RESOURCE_DESC texture_desc_fwd;
|
||||
D3D12_PLACED_SUBRESOURCE_FOOTPRINT layout_fwd;
|
||||
UINT64 row_size_fwd, total_size_fwd;
|
||||
UINT num_rows_fwd;
|
||||
|
||||
// Get the resource description for forward texture
|
||||
s->resolved_mv_texture_fwd->lpVtbl->GetDesc(s->resolved_mv_texture_fwd, &texture_desc_fwd);
|
||||
|
||||
av_log(ctx, AV_LOG_DEBUG, "Fwd texture desc: Width=%llu, Height=%u, Format=%d\n",
|
||||
(unsigned long long)texture_desc_fwd.Width, texture_desc_fwd.Height, texture_desc_fwd.Format);
|
||||
|
||||
// Get the copyable footprints for the forward texture
|
||||
ID3D12Device_GetCopyableFootprints(s->device, &texture_desc_fwd, 0, 1, 0,
|
||||
&layout_fwd, &num_rows_fwd, &row_size_fwd, &total_size_fwd);
|
||||
|
||||
av_log(ctx, AV_LOG_DEBUG, "Fwd layout: Offset=%llu, Width=%u, Height=%u, Depth=%u, RowPitch=%u\n",
|
||||
(unsigned long long)layout_fwd.Offset, layout_fwd.Footprint.Width, layout_fwd.Footprint.Height,
|
||||
layout_fwd.Footprint.Depth, layout_fwd.Footprint.RowPitch);
|
||||
|
||||
D3D12_TEXTURE_COPY_LOCATION src_fwd = {
|
||||
.pResource = s->resolved_mv_texture_fwd,
|
||||
.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
|
||||
.SubresourceIndex = 0
|
||||
};
|
||||
|
||||
D3D12_TEXTURE_COPY_LOCATION dst_fwd = {
|
||||
.pResource = s->readback_buffer_fwd,
|
||||
.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
|
||||
.PlacedFootprint = {
|
||||
.Offset = 0,
|
||||
.Footprint = layout_fwd.Footprint
|
||||
}
|
||||
};
|
||||
|
||||
av_log(ctx, AV_LOG_DEBUG, "Copying forward MVs...\n");
|
||||
ID3D12GraphicsCommandList_CopyTextureRegion(s->copy_command_list, &dst_fwd, 0, 0, 0, &src_fwd, NULL);
|
||||
}
|
||||
|
||||
// Transition back to COMMON state (reuse barriers by swapping states)
|
||||
for (int i = 0; i < copy_barrier_count; i++)
|
||||
FFSWAP(D3D12_RESOURCE_STATES, copy_barriers[i].Transition.StateBefore, copy_barriers[i].Transition.StateAfter);
|
||||
|
||||
ID3D12GraphicsCommandList_ResourceBarrier(s->copy_command_list, copy_barrier_count, copy_barriers);
|
||||
|
||||
hr = ID3D12GraphicsCommandList_Close(s->copy_command_list);
|
||||
if (FAILED(hr)) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to close copy command list (hr=0x%lx)\n", (long)hr);
|
||||
av_frame_free(&out);
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
// Execute copy command list on the copy queue
|
||||
ID3D12CommandQueue_ExecuteCommandLists(s->copy_command_queue, 1, (ID3D12CommandList **)&s->copy_command_list);
|
||||
|
||||
// Signal and wait for copy completion
|
||||
hr = ID3D12CommandQueue_Signal(s->copy_command_queue, s->fence, ++s->fence_value);
|
||||
if (FAILED(hr)) {
|
||||
av_log(ctx, AV_LOG_ERROR, "Failed to signal fence for copy\n");
|
||||
av_frame_free(&out);
|
||||
return AVERROR(EINVAL);
|
||||
}
|
||||
|
||||
err = mestimate_d3d12_sync_gpu(s);
|
||||
if (err < 0) {
|
||||
av_frame_free(&out);
|
||||
return err;
|
||||
}
|
||||
|
||||
// Read motion vectors for both directions
|
||||
err = mestimate_d3d12_read_motion_vectors(ctx, out, 0);
|
||||
if (err < 0) {
|
||||
av_frame_free(&out);
|
||||
return err;
|
||||
}
|
||||
|
||||
if (s->next_frame) {
|
||||
err = mestimate_d3d12_read_motion_vectors(ctx, out, 1);
|
||||
if (err < 0) {
|
||||
av_frame_free(&out);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
return ff_filter_frame(ctx->outputs[0], out);
|
||||
}
|
||||
|
||||
static av_cold void mestimate_d3d12_uninit(AVFilterContext *ctx)
|
||||
{
|
||||
MEstimateD3D12Context *s = ctx->priv;
|
||||
|
||||
av_frame_free(&s->prev_frame);
|
||||
av_frame_free(&s->cur_frame);
|
||||
av_frame_free(&s->next_frame);
|
||||
|
||||
D3D12_OBJECT_RELEASE(s->copy_command_list);
|
||||
D3D12_OBJECT_RELEASE(s->copy_command_allocator);
|
||||
D3D12_OBJECT_RELEASE(s->copy_command_queue);
|
||||
D3D12_OBJECT_RELEASE(s->readback_buffer_back);
|
||||
D3D12_OBJECT_RELEASE(s->readback_buffer_fwd);
|
||||
D3D12_OBJECT_RELEASE(s->resolved_mv_texture_back);
|
||||
D3D12_OBJECT_RELEASE(s->resolved_mv_texture_fwd);
|
||||
D3D12_OBJECT_RELEASE(s->motion_vector_heap);
|
||||
D3D12_OBJECT_RELEASE(s->motion_estimator);
|
||||
D3D12_OBJECT_RELEASE(s->command_list);
|
||||
D3D12_OBJECT_RELEASE(s->command_allocator);
|
||||
D3D12_OBJECT_RELEASE(s->command_queue);
|
||||
D3D12_OBJECT_RELEASE(s->fence);
|
||||
|
||||
if (s->fence_event)
|
||||
CloseHandle(s->fence_event);
|
||||
|
||||
av_buffer_unref(&s->hw_frames_ref);
|
||||
av_buffer_unref(&s->hw_device_ref);
|
||||
}
|
||||
|
||||
static const AVFilterPad mestimate_d3d12_inputs[] = {
|
||||
{
|
||||
.name = "default",
|
||||
.type = AVMEDIA_TYPE_VIDEO,
|
||||
.filter_frame = mestimate_d3d12_filter_frame,
|
||||
},
|
||||
};
|
||||
|
||||
static const AVFilterPad mestimate_d3d12_outputs[] = {
|
||||
{
|
||||
.name = "default",
|
||||
.type = AVMEDIA_TYPE_VIDEO,
|
||||
.config_props = mestimate_d3d12_config_props,
|
||||
},
|
||||
};
|
||||
|
||||
#define OFFSET(x) offsetof(MEstimateD3D12Context, x)
|
||||
#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
|
||||
|
||||
static const AVOption mestimate_d3d12_options[] = {
|
||||
{ "mb_size", "macroblock size", OFFSET(block_size), AV_OPT_TYPE_INT, {.i64 = 16}, 8, 16, FLAGS, .unit = "mb_size" },
|
||||
{ "8", "8x8 blocks", 0, AV_OPT_TYPE_CONST, {.i64 = 8}, 0, 0, FLAGS, .unit = "mb_size" },
|
||||
{ "16", "16x16 blocks", 0, AV_OPT_TYPE_CONST, {.i64 = 16}, 0, 0, FLAGS, .unit = "mb_size" },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
AVFILTER_DEFINE_CLASS(mestimate_d3d12);
|
||||
|
||||
const FFFilter ff_vf_mestimate_d3d12 = {
|
||||
.p.name = "mestimate_d3d12",
|
||||
.p.description = NULL_IF_CONFIG_SMALL("Generate motion vectors using D3D12 hardware acceleration."),
|
||||
.p.priv_class = &mestimate_d3d12_class,
|
||||
.p.flags = AVFILTER_FLAG_METADATA_ONLY | AVFILTER_FLAG_HWDEVICE,
|
||||
.priv_size = sizeof(MEstimateD3D12Context),
|
||||
.init = mestimate_d3d12_init,
|
||||
.uninit = mestimate_d3d12_uninit,
|
||||
.flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
|
||||
FILTER_INPUTS(mestimate_d3d12_inputs),
|
||||
FILTER_SINGLE_PIXFMT(AV_PIX_FMT_D3D12),
|
||||
FILTER_OUTPUTS(mestimate_d3d12_outputs),
|
||||
};
|
||||
Reference in New Issue
Block a user