mirror of
https://mirror.skon.top/https://github.com/FFmpeg/FFmpeg
synced 2026-04-20 21:00:41 +08:00
avfilter/scale_cuda: fix color bleeding in lanczos scaling
Prior to this, the results were not saturated into the uchar/ushort range before being written. The characteristics of the Lanczos filter exposed this issue. In addition, the results were truncated rather than rounded, which resulted in checkerboard artifacts in solid color areas and were noticeable when using Lanczos with 8-bit input. Example: ffmpeg -init_hw_device cuda -f lavfi -i testsrc2=s=960x540,format=yuv420p \ -vf hwupload,scale_cuda=format=yuv420p:w=-2:h=720:interp_algo=lanczos \ -c:v h264_nvenc -qp:v 20 -t 1 <OUTPUT> Fix #20784 Signed-off-by: nyanmisaka <nst799610810@gmail.com>
This commit is contained in:
committed by
Timo Rothenpieler
parent
2dff0156ba
commit
69fc910777
@@ -182,6 +182,7 @@ static inline __device__ float fabsf(float a) { return __builtin_fabsf(a); }
|
|||||||
static inline __device__ float fabs(float a) { return __builtin_fabsf(a); }
|
static inline __device__ float fabs(float a) { return __builtin_fabsf(a); }
|
||||||
static inline __device__ double fabs(double a) { return __builtin_fabs(a); }
|
static inline __device__ double fabs(double a) { return __builtin_fabs(a); }
|
||||||
static inline __device__ float sqrtf(float a) { return __builtin_sqrtf(a); }
|
static inline __device__ float sqrtf(float a) { return __builtin_sqrtf(a); }
|
||||||
|
static inline __device__ float rintf(float a) { return __builtin_rintf(a); }
|
||||||
|
|
||||||
static inline __device__ float __saturatef(float a) { return __nvvm_saturate_f(a); }
|
static inline __device__ float __saturatef(float a) { return __nvvm_saturate_f(a); }
|
||||||
static inline __device__ float __sinf(float a) { return __nvvm_sin_approx_f(a); }
|
static inline __device__ float __sinf(float a) { return __nvvm_sin_approx_f(a); }
|
||||||
|
|||||||
@@ -109,4 +109,27 @@ inline __device__ float4 lerp_scalar<float4>(float4 v0, float4 v1, float t) {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
inline __device__ T saturate_rintf(T a, float factor) {
|
||||||
|
return rintf(__saturatef(a) * factor);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
inline __device__ float2 saturate_rintf<float2>(float2 a, float factor) {
|
||||||
|
return make_float2(
|
||||||
|
saturate_rintf(a.x, factor),
|
||||||
|
saturate_rintf(a.y, factor)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
inline __device__ float4 saturate_rintf<float4>(float4 a, float factor) {
|
||||||
|
return make_float4(
|
||||||
|
saturate_rintf(a.x, factor),
|
||||||
|
saturate_rintf(a.y, factor),
|
||||||
|
saturate_rintf(a.z, factor),
|
||||||
|
saturate_rintf(a.w, factor)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -1162,12 +1162,15 @@ __device__ static inline T Subsample_Bicubic(cudaTextureObject_t tex,
|
|||||||
#define PIX(x, y) tex2D<floatT>(tex, (x), (y))
|
#define PIX(x, y) tex2D<floatT>(tex, (x), (y))
|
||||||
|
|
||||||
return from_floatN<T, floatT>(
|
return from_floatN<T, floatT>(
|
||||||
apply_coeffs<floatT>(coeffsY,
|
saturate_rintf(
|
||||||
apply_coeffs<floatT>(coeffsX, PIX(px - 1, py - 1), PIX(px, py - 1), PIX(px + 1, py - 1), PIX(px + 2, py - 1)),
|
apply_coeffs<floatT>(coeffsY,
|
||||||
apply_coeffs<floatT>(coeffsX, PIX(px - 1, py ), PIX(px, py ), PIX(px + 1, py ), PIX(px + 2, py )),
|
apply_coeffs<floatT>(coeffsX, PIX(px - 1, py - 1), PIX(px, py - 1), PIX(px + 1, py - 1), PIX(px + 2, py - 1)),
|
||||||
apply_coeffs<floatT>(coeffsX, PIX(px - 1, py + 1), PIX(px, py + 1), PIX(px + 1, py + 1), PIX(px + 2, py + 1)),
|
apply_coeffs<floatT>(coeffsX, PIX(px - 1, py ), PIX(px, py ), PIX(px + 1, py ), PIX(px + 2, py )),
|
||||||
apply_coeffs<floatT>(coeffsX, PIX(px - 1, py + 2), PIX(px, py + 2), PIX(px + 1, py + 2), PIX(px + 2, py + 2))
|
apply_coeffs<floatT>(coeffsX, PIX(px - 1, py + 1), PIX(px, py + 1), PIX(px + 1, py + 1), PIX(px + 2, py + 1)),
|
||||||
) * factor
|
apply_coeffs<floatT>(coeffsX, PIX(px - 1, py + 2), PIX(px, py + 2), PIX(px + 1, py + 2), PIX(px + 2, py + 2))
|
||||||
|
),
|
||||||
|
factor
|
||||||
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
#undef PIX
|
#undef PIX
|
||||||
|
|||||||
Reference in New Issue
Block a user