mirror of
https://mirror.skon.top/https://github.com/FFmpeg/FFmpeg
synced 2026-04-20 12:50:49 +08:00
This is a complete rewrite of the math in swscale/utils.c initFilter(), using floating point math and with a bit more polished UI and internals. I have also included a substantial number of improvements, including a method to numerically compute the true filter support size from the parameters, and a more robust logic for the edge conditions. The upshot of these changes is that the filter weight computation is now much simpler and faster, and with fewer edge cases. I copy/pasted the actual underlying kernel functions from libplacebo, so this math is already quite battle-tested. I made some adjustments to the defaults to align with the existing defaults in libswscale, for backwards compatibility. Note that this commit introduces a lot more filter kernels than what we actually expose; but they are cheap to carry around, don't take up binary space, and will probably save some poor soul from incorrectly reimplementing them in the future. Plus, I have plans to expand the list of functions down the line, so it makes sense to just define them all, even if we don't necessarily use them yet. Sponsored-by: Sovereign Tech Fund Signed-off-by: Niklas Haas <git@haasn.dev>
446 lines
14 KiB
C
446 lines
14 KiB
C
/*
|
|
* Copyright (C) 2026 Niklas Haas
|
|
*
|
|
* This file is part of FFmpeg.
|
|
*
|
|
* FFmpeg is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* FFmpeg is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with FFmpeg; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#include <math.h>
|
|
#include <stdbool.h>
|
|
|
|
#include <libavutil/attributes.h>
|
|
#include <libavutil/avassert.h>
|
|
#include <libavutil/mem.h>
|
|
|
|
#include "filters.h"
|
|
|
|
#ifdef _WIN32
|
|
# define j1 _j1
|
|
#endif
|
|
|
|
/* Maximum (pre-stretching) radius (for tunable filters) */
|
|
#define RADIUS_MAX 10.0
|
|
|
|
/* Defined only on [0, radius]. */
|
|
typedef double (*SwsFilterKernel)(double x, const double *params);
|
|
|
|
typedef struct SwsFilterFunction {
|
|
char name[16];
|
|
double radius; /* negative means resizable */
|
|
SwsFilterKernel kernel;
|
|
SwsFilterKernel window; /* optional */
|
|
double params[SWS_NUM_SCALER_PARAMS]; /* default params */
|
|
} SwsFilterFunction;
|
|
|
|
static const SwsFilterFunction filter_functions[SWS_SCALE_NB];
|
|
|
|
static double scaler_sample(const SwsFilterFunction *f, double x)
|
|
{
|
|
x = fabs(x);
|
|
if (x > f->radius)
|
|
return 0.0;
|
|
|
|
double w = f->kernel(x, f->params);
|
|
if (f->window)
|
|
w *= f->window(x / f->radius, f->params);
|
|
return w;
|
|
}
|
|
|
|
static void compute_row(SwsFilterWeights *f, const SwsFilterFunction *fun,
|
|
double radius, double ratio_inv, double stretch_inv,
|
|
int dst_pos, double *tmp)
|
|
{
|
|
int *out = &f->weights[dst_pos * f->filter_size];
|
|
int *pos = &f->offsets[dst_pos];
|
|
|
|
/**
|
|
* Explanation of the 0.5 offsets: Normally, pixel samples are assumed
|
|
* to be representative of the center of their containing area; e.g. for
|
|
* a 2x2 image, the samples are located at {0.5, 1.5}^2. However, with
|
|
* integer indexing, we round sample positions down (0-based indexing).
|
|
* So the (0, 0) sample is actually located at (0.5, 0.5) and represents
|
|
* the entire square from (0,0) to (1,1). When normalizing between different
|
|
* image sizes, we therefore need to add/subtract off these 0.5 offsets.
|
|
*/
|
|
const double src_pos = (dst_pos + 0.5) * ratio_inv - 0.5;
|
|
if (f->filter_size == 1) {
|
|
*pos = fmin(fmax(round(src_pos), 0.0), f->src_size - 1);
|
|
*out = SWS_FILTER_SCALE;
|
|
return;
|
|
}
|
|
|
|
/* First pixel that is actually within the filter envelope */
|
|
const double start_pos = src_pos - radius;
|
|
int64_t start_idx = ceil(start_pos);
|
|
start_idx = FFMAX(start_idx, 0); /* edge clamping */
|
|
start_idx = FFMIN(start_idx, f->src_size - f->filter_size);
|
|
const double offset = start_idx - src_pos;
|
|
*pos = start_idx;
|
|
|
|
/**
|
|
* Generate raw filter weights with maximum precision. Sum the positive
|
|
* and negative weights separately to avoid catastrophic cancellation. This
|
|
* summation order should already give the best precision because abs(w)
|
|
* is monotonically decreasing
|
|
*/
|
|
const double base = stretch_inv * offset;
|
|
double wsum_pos = 0.0, wsum_neg = 0.0;
|
|
for (int i = 0; i < f->filter_size; i++) {
|
|
tmp[i] = scaler_sample(fun, base + stretch_inv * i);
|
|
if (tmp[i] >= 0)
|
|
wsum_pos += tmp[i];
|
|
else
|
|
wsum_neg += tmp[i];
|
|
}
|
|
|
|
const double wsum = wsum_pos + wsum_neg;
|
|
av_assert0(wsum > 0);
|
|
|
|
/* Generate correctly rounded filter weights with error diffusion */
|
|
double error = 0.0;
|
|
int sum_pos = 0, sum_neg = 0;
|
|
for (int i = 0; i < f->filter_size; i++) {
|
|
if (i == f->filter_size - 1) {
|
|
/* Ensure weights sum to exactly SWS_FILTER_SCALE */
|
|
out[i] = SWS_FILTER_SCALE - sum_pos - sum_neg;
|
|
} else {
|
|
const double w = tmp[i] / wsum + error;
|
|
out[i] = round(w * SWS_FILTER_SCALE);
|
|
error = w - (double) out[i] / SWS_FILTER_SCALE;
|
|
}
|
|
if (out[i] >= 0)
|
|
sum_pos += out[i];
|
|
else
|
|
sum_neg += out[i];
|
|
}
|
|
|
|
if (sum_pos > f->sum_positive)
|
|
f->sum_positive = sum_pos;
|
|
if (sum_neg < f->sum_negative)
|
|
f->sum_negative = sum_neg;
|
|
}
|
|
|
|
static void sws_filter_free(AVRefStructOpaque opaque, void *obj)
|
|
{
|
|
SwsFilterWeights *filter = obj;
|
|
av_refstruct_unref(&filter->weights);
|
|
av_refstruct_unref(&filter->offsets);
|
|
}
|
|
|
|
static bool validate_params(const SwsFilterFunction *fun, SwsScaler scaler)
|
|
{
|
|
switch (scaler) {
|
|
case SWS_SCALE_GAUSSIAN:
|
|
return fun->params[0] >= 0.0; /* sigma */
|
|
case SWS_SCALE_LANCZOS:
|
|
return fun->params[0] >= 1.0 && fun->params[0] <= RADIUS_MAX; /* radius */
|
|
case SWS_SCALE_BICUBIC:
|
|
return fun->params[0] < 3.0; /* B param (division by zero) */
|
|
default:
|
|
return true;
|
|
}
|
|
}
|
|
|
|
static double filter_radius(const SwsFilterFunction *fun)
|
|
{
|
|
const double bound = fun->radius;
|
|
const double step = 1e-2;
|
|
|
|
double radius = bound;
|
|
double prev = 0.0, fprev = 1.0; /* f(0) is always 1.0 */
|
|
double integral = 0.0;
|
|
for (double x = step; x < bound + step; x += step) {
|
|
const double fx = scaler_sample(fun, x);
|
|
integral += (fprev + fx) * step; /* trapezoidal rule (mirrored) */
|
|
double cutoff = SWS_MAX_REDUCE_CUTOFF * integral;
|
|
if ((fprev > cutoff && fx <= cutoff) || (fprev < -cutoff && fx >= -cutoff)) {
|
|
/* estimate crossing with secant method; note that we have to
|
|
* bias by the cutoff to find the actual cutoff radius */
|
|
double estimate = fx + (fx > fprev ? cutoff : -cutoff);
|
|
double root = x - estimate * (x - prev) / (fx - fprev);
|
|
radius = fmin(root, bound);
|
|
}
|
|
prev = x;
|
|
fprev = fx;
|
|
}
|
|
|
|
return radius;
|
|
}
|
|
|
|
int ff_sws_filter_generate(void *log, const SwsFilterParams *params,
|
|
SwsFilterWeights **out)
|
|
{
|
|
SwsScaler scaler = params->scaler;
|
|
if (scaler >= SWS_SCALE_NB)
|
|
return AVERROR(EINVAL);
|
|
|
|
if (scaler == SWS_SCALE_AUTO)
|
|
scaler = SWS_SCALE_BICUBIC;
|
|
|
|
const double ratio = (double) params->dst_size / params->src_size;
|
|
double stretch = 1.0;
|
|
if (ratio < 1.0 && scaler != SWS_SCALE_POINT) {
|
|
/* Widen filter for downscaling (anti-aliasing) */
|
|
stretch = 1.0 / ratio;
|
|
}
|
|
|
|
if (scaler == SWS_SCALE_AREA) {
|
|
/**
|
|
* SWS_SCALE_AREA is a pseudo-filter that is equivalent to bilinear
|
|
* filtering for upscaling (since bilinear just evenly mixes samples
|
|
* according to the relative distance), and equivalent to (anti-aliased)
|
|
* point sampling for downscaling.
|
|
*/
|
|
scaler = ratio >= 1.0 ? SWS_SCALE_BILINEAR : SWS_SCALE_POINT;
|
|
}
|
|
|
|
SwsFilterFunction fun = filter_functions[scaler];
|
|
if (!fun.kernel)
|
|
return AVERROR(EINVAL);
|
|
|
|
for (int i = 0; i < SWS_NUM_SCALER_PARAMS; i++) {
|
|
if (params->scaler_params[i] != SWS_PARAM_DEFAULT)
|
|
fun.params[i] = params->scaler_params[i];
|
|
}
|
|
|
|
if (!validate_params(&fun, scaler)) {
|
|
av_log(log, AV_LOG_ERROR, "Invalid parameters for scaler %s: {%f, %f}\n",
|
|
fun.name, fun.params[0], fun.params[1]);
|
|
return AVERROR(EINVAL);
|
|
}
|
|
|
|
if (fun.radius < 0.0) /* tunable width kernels like lanczos */
|
|
fun.radius = fun.params[0];
|
|
|
|
const double radius = filter_radius(&fun) * stretch;
|
|
int filter_size = ceil(radius * 2.0);
|
|
filter_size = FFMIN(filter_size, params->src_size);
|
|
av_assert0(filter_size >= 1);
|
|
if (filter_size > SWS_FILTER_SIZE_MAX)
|
|
return AVERROR(ENOTSUP);
|
|
|
|
SwsFilterWeights *filter;
|
|
filter = av_refstruct_alloc_ext(sizeof(*filter), 0, NULL, sws_filter_free);
|
|
if (!filter)
|
|
return AVERROR(ENOMEM);
|
|
memcpy(filter->name, fun.name, sizeof(filter->name));
|
|
filter->src_size = params->src_size;
|
|
filter->dst_size = params->dst_size;
|
|
filter->filter_size = filter_size;
|
|
if (filter->filter_size == 1)
|
|
filter->sum_positive = SWS_FILTER_SCALE;
|
|
|
|
av_log(log, AV_LOG_DEBUG, "Generating %s filter with %d taps (radius = %f)\n",
|
|
filter->name, filter->filter_size, radius);
|
|
|
|
filter->num_weights = (size_t) params->dst_size * filter->filter_size;
|
|
filter->weights = av_refstruct_allocz(filter->num_weights * sizeof(*filter->weights));
|
|
if (!filter->weights) {
|
|
av_refstruct_unref(&filter);
|
|
return AVERROR(ENOMEM);
|
|
}
|
|
|
|
filter->offsets = av_refstruct_allocz(params->dst_size * sizeof(*filter->offsets));
|
|
if (!filter->offsets) {
|
|
av_refstruct_unref(&filter);
|
|
return AVERROR(ENOMEM);
|
|
}
|
|
|
|
double *tmp = av_malloc(filter->filter_size * sizeof(*tmp));
|
|
if (!tmp) {
|
|
av_refstruct_unref(&filter);
|
|
return AVERROR(ENOMEM);
|
|
}
|
|
|
|
const double ratio_inv = 1.0 / ratio, stretch_inv = 1.0 / stretch;
|
|
for (int i = 0; i < params->dst_size; i++)
|
|
compute_row(filter, &fun, radius, ratio_inv, stretch_inv, i, tmp);
|
|
av_free(tmp);
|
|
|
|
*out = filter;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Some of the filter code originally derives (via libplacebo/mpv) from Glumpy:
|
|
* # Copyright (c) 2009-2016 Nicolas P. Rougier. All rights reserved.
|
|
* # Distributed under the (new) BSD License.
|
|
* (https://github.com/glumpy/glumpy/blob/master/glumpy/library/build-spatial-filters.py)
|
|
*
|
|
* The math underlying each filter function was written from scratch, with
|
|
* some algorithms coming from a number of different sources, including:
|
|
* - https://en.wikipedia.org/wiki/Window_function
|
|
* - https://en.wikipedia.org/wiki/Jinc
|
|
* - http://vector-agg.cvs.sourceforge.net/viewvc/vector-agg/agg-2.5/include/agg_image_filters.h
|
|
* - Vapoursynth plugin fmtconv (WTFPL Licensed), which is based on
|
|
* dither plugin for avisynth from the same author:
|
|
* https://github.com/vapoursynth/fmtconv/tree/master/src/fmtc
|
|
* - Paul Heckbert's "zoom"
|
|
* - XBMC: ConvolutionKernels.cpp etc.
|
|
* - https://github.com/AviSynth/jinc-resize (only used to verify the math)
|
|
*/
|
|
|
|
av_unused static double box(double x, const double *params)
|
|
{
|
|
return 1.0;
|
|
}
|
|
|
|
av_unused static double triangle(double x, const double *params)
|
|
{
|
|
return 1.0 - x;
|
|
}
|
|
|
|
av_unused static double cosine(double x, const double *params)
|
|
{
|
|
return cos(x);
|
|
}
|
|
|
|
av_unused static double hann(double x, const double *params)
|
|
{
|
|
return 0.5 + 0.5 * cos(M_PI * x);
|
|
}
|
|
|
|
av_unused static double hamming(double x, const double *params)
|
|
{
|
|
return 0.54 + 0.46 * cos(M_PI * x);
|
|
}
|
|
|
|
av_unused static double welch(double x, const double *params)
|
|
{
|
|
return 1.0 - x * x;
|
|
}
|
|
|
|
av_unused static double bessel_i0(double x)
|
|
{
|
|
double s = 1.0;
|
|
double y = x * x / 4.0;
|
|
double t = y;
|
|
int i = 2;
|
|
while (t > 1e-12) {
|
|
s += t;
|
|
t *= y / (i * i);
|
|
i += 1;
|
|
}
|
|
return s;
|
|
}
|
|
|
|
av_unused static double kaiser(double x, const double *params)
|
|
{
|
|
double alpha = fmax(params[0], 0.0);
|
|
double scale = bessel_i0(alpha);
|
|
return bessel_i0(alpha * sqrt(1.0 - x * x)) / scale;
|
|
}
|
|
|
|
av_unused static double blackman(double x, const double *params)
|
|
{
|
|
double a = params[0];
|
|
double a0 = (1 - a) / 2.0, a1 = 1 / 2.0, a2 = a / 2.0;
|
|
x *= M_PI;
|
|
return a0 + a1 * cos(x) + a2 * cos(2 * x);
|
|
}
|
|
|
|
av_unused static double bohman(double x, const double *params)
|
|
{
|
|
double pix = M_PI * x;
|
|
return (1.0 - x) * cos(pix) + sin(pix) / M_PI;
|
|
}
|
|
|
|
av_unused static double gaussian(double x, const double *params)
|
|
{
|
|
return exp(-params[0] * x * x);
|
|
}
|
|
|
|
av_unused static double quadratic(double x, const double *params)
|
|
{
|
|
if (x < 0.5) {
|
|
return 1.0 - 4.0/3.0 * (x * x);
|
|
} else {
|
|
return 2.0 / 3.0 * (x - 1.5) * (x - 1.5);
|
|
}
|
|
}
|
|
|
|
av_unused static double sinc(double x, const double *params)
|
|
{
|
|
if (x < 1e-8)
|
|
return 1.0;
|
|
x *= M_PI;
|
|
return sin(x) / x;
|
|
}
|
|
|
|
av_unused static double jinc(double x, const double *params)
|
|
{
|
|
if (x < 1e-8)
|
|
return 1.0;
|
|
x *= M_PI;
|
|
return 2.0 * j1(x) / x;
|
|
}
|
|
|
|
av_unused static double sphinx(double x, const double *params)
|
|
{
|
|
if (x < 1e-8)
|
|
return 1.0;
|
|
x *= M_PI;
|
|
return 3.0 * (sin(x) - x * cos(x)) / (x * x * x);
|
|
}
|
|
|
|
av_unused static double cubic(double x, const double *params)
|
|
{
|
|
const double b = params[0], c = params[1];
|
|
double p0 = 6.0 - 2.0 * b,
|
|
p2 = -18.0 + 12.0 * b + 6.0 * c,
|
|
p3 = 12.0 - 9.0 * b - 6.0 * c,
|
|
q0 = 8.0 * b + 24.0 * c,
|
|
q1 = -12.0 * b - 48.0 * c,
|
|
q2 = 6.0 * b + 30.0 * c,
|
|
q3 = -b - 6.0 * c;
|
|
|
|
if (x < 1.0) {
|
|
return (p0 + x * x * (p2 + x * p3)) / p0;
|
|
} else {
|
|
return (q0 + x * (q1 + x * (q2 + x * q3))) / p0;
|
|
}
|
|
}
|
|
|
|
static double spline_coeff(double a, double b, double c, double d, double x)
|
|
{
|
|
if (x <= 1.0) {
|
|
return ((d * x + c) * x + b) * x + a;
|
|
} else {
|
|
return spline_coeff(0.0,
|
|
b + 2.0 * c + 3.0 * d,
|
|
c + 3.0 * d,
|
|
-b - 3.0 * c - 6.0 * d,
|
|
x - 1.0);
|
|
}
|
|
}
|
|
|
|
av_unused static double spline(double x, const double *params)
|
|
{
|
|
const double p = -2.196152422706632;
|
|
return spline_coeff(1.0, 0.0, p, -p - 1.0, x);
|
|
}
|
|
|
|
static const SwsFilterFunction filter_functions[SWS_SCALE_NB] = {
|
|
[SWS_SCALE_BILINEAR] = { "bilinear", 1.0, triangle },
|
|
[SWS_SCALE_BICUBIC] = { "bicubic", 2.0, cubic, .params = { 0.0, 0.6 } },
|
|
[SWS_SCALE_POINT] = { "point", 0.5, box },
|
|
[SWS_SCALE_GAUSSIAN] = { "gaussian", 4.0, gaussian, .params = { 3.0 } },
|
|
[SWS_SCALE_SINC] = { "sinc", RADIUS_MAX, sinc },
|
|
[SWS_SCALE_LANCZOS] = { "lanczos", -1.0, sinc, sinc, .params = { 3.0 } },
|
|
[SWS_SCALE_SPLINE] = { "spline", RADIUS_MAX, spline },
|
|
/* SWS_SCALE_AREA is a pseudo-filter, see code above */
|
|
};
|