Files
FFmpeg/libswscale/filters.c
Niklas Haas 475b11b2e0 swscale/filters: write new filter LUT generation code
This is a complete rewrite of the math in swscale/utils.c initFilter(), using
floating point math and with a bit more polished UI and internals. I have
also included a substantial number of improvements, including a method to
numerically compute the true filter support size from the parameters, and a
more robust logic for the edge conditions. The upshot of these changes is
that the filter weight computation is now much simpler and faster, and with
fewer edge cases.

I copy/pasted the actual underlying kernel functions from libplacebo, so this
math is already quite battle-tested. I made some adjustments to the defaults
to align with the existing defaults in libswscale, for backwards compatibility.

Note that this commit introduces a lot more filter kernels than what we
actually expose; but they are cheap to carry around, don't take up binary
space, and will probably save some poor soul from incorrectly reimplementing
them in the future. Plus, I have plans to expand the list of functions down
the line, so it makes sense to just define them all, even if we don't
necessarily use them yet.

Sponsored-by: Sovereign Tech Fund
Signed-off-by: Niklas Haas <git@haasn.dev>
2026-03-28 18:50:13 +01:00

446 lines
14 KiB
C

/*
* Copyright (C) 2026 Niklas Haas
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <math.h>
#include <stdbool.h>
#include <libavutil/attributes.h>
#include <libavutil/avassert.h>
#include <libavutil/mem.h>
#include "filters.h"
#ifdef _WIN32
# define j1 _j1
#endif
/* Maximum (pre-stretching) radius (for tunable filters) */
#define RADIUS_MAX 10.0
/* Defined only on [0, radius]. */
typedef double (*SwsFilterKernel)(double x, const double *params);
typedef struct SwsFilterFunction {
char name[16];
double radius; /* negative means resizable */
SwsFilterKernel kernel;
SwsFilterKernel window; /* optional */
double params[SWS_NUM_SCALER_PARAMS]; /* default params */
} SwsFilterFunction;
static const SwsFilterFunction filter_functions[SWS_SCALE_NB];
static double scaler_sample(const SwsFilterFunction *f, double x)
{
x = fabs(x);
if (x > f->radius)
return 0.0;
double w = f->kernel(x, f->params);
if (f->window)
w *= f->window(x / f->radius, f->params);
return w;
}
static void compute_row(SwsFilterWeights *f, const SwsFilterFunction *fun,
double radius, double ratio_inv, double stretch_inv,
int dst_pos, double *tmp)
{
int *out = &f->weights[dst_pos * f->filter_size];
int *pos = &f->offsets[dst_pos];
/**
* Explanation of the 0.5 offsets: Normally, pixel samples are assumed
* to be representative of the center of their containing area; e.g. for
* a 2x2 image, the samples are located at {0.5, 1.5}^2. However, with
* integer indexing, we round sample positions down (0-based indexing).
* So the (0, 0) sample is actually located at (0.5, 0.5) and represents
* the entire square from (0,0) to (1,1). When normalizing between different
* image sizes, we therefore need to add/subtract off these 0.5 offsets.
*/
const double src_pos = (dst_pos + 0.5) * ratio_inv - 0.5;
if (f->filter_size == 1) {
*pos = fmin(fmax(round(src_pos), 0.0), f->src_size - 1);
*out = SWS_FILTER_SCALE;
return;
}
/* First pixel that is actually within the filter envelope */
const double start_pos = src_pos - radius;
int64_t start_idx = ceil(start_pos);
start_idx = FFMAX(start_idx, 0); /* edge clamping */
start_idx = FFMIN(start_idx, f->src_size - f->filter_size);
const double offset = start_idx - src_pos;
*pos = start_idx;
/**
* Generate raw filter weights with maximum precision. Sum the positive
* and negative weights separately to avoid catastrophic cancellation. This
* summation order should already give the best precision because abs(w)
* is monotonically decreasing
*/
const double base = stretch_inv * offset;
double wsum_pos = 0.0, wsum_neg = 0.0;
for (int i = 0; i < f->filter_size; i++) {
tmp[i] = scaler_sample(fun, base + stretch_inv * i);
if (tmp[i] >= 0)
wsum_pos += tmp[i];
else
wsum_neg += tmp[i];
}
const double wsum = wsum_pos + wsum_neg;
av_assert0(wsum > 0);
/* Generate correctly rounded filter weights with error diffusion */
double error = 0.0;
int sum_pos = 0, sum_neg = 0;
for (int i = 0; i < f->filter_size; i++) {
if (i == f->filter_size - 1) {
/* Ensure weights sum to exactly SWS_FILTER_SCALE */
out[i] = SWS_FILTER_SCALE - sum_pos - sum_neg;
} else {
const double w = tmp[i] / wsum + error;
out[i] = round(w * SWS_FILTER_SCALE);
error = w - (double) out[i] / SWS_FILTER_SCALE;
}
if (out[i] >= 0)
sum_pos += out[i];
else
sum_neg += out[i];
}
if (sum_pos > f->sum_positive)
f->sum_positive = sum_pos;
if (sum_neg < f->sum_negative)
f->sum_negative = sum_neg;
}
static void sws_filter_free(AVRefStructOpaque opaque, void *obj)
{
SwsFilterWeights *filter = obj;
av_refstruct_unref(&filter->weights);
av_refstruct_unref(&filter->offsets);
}
static bool validate_params(const SwsFilterFunction *fun, SwsScaler scaler)
{
switch (scaler) {
case SWS_SCALE_GAUSSIAN:
return fun->params[0] >= 0.0; /* sigma */
case SWS_SCALE_LANCZOS:
return fun->params[0] >= 1.0 && fun->params[0] <= RADIUS_MAX; /* radius */
case SWS_SCALE_BICUBIC:
return fun->params[0] < 3.0; /* B param (division by zero) */
default:
return true;
}
}
static double filter_radius(const SwsFilterFunction *fun)
{
const double bound = fun->radius;
const double step = 1e-2;
double radius = bound;
double prev = 0.0, fprev = 1.0; /* f(0) is always 1.0 */
double integral = 0.0;
for (double x = step; x < bound + step; x += step) {
const double fx = scaler_sample(fun, x);
integral += (fprev + fx) * step; /* trapezoidal rule (mirrored) */
double cutoff = SWS_MAX_REDUCE_CUTOFF * integral;
if ((fprev > cutoff && fx <= cutoff) || (fprev < -cutoff && fx >= -cutoff)) {
/* estimate crossing with secant method; note that we have to
* bias by the cutoff to find the actual cutoff radius */
double estimate = fx + (fx > fprev ? cutoff : -cutoff);
double root = x - estimate * (x - prev) / (fx - fprev);
radius = fmin(root, bound);
}
prev = x;
fprev = fx;
}
return radius;
}
int ff_sws_filter_generate(void *log, const SwsFilterParams *params,
SwsFilterWeights **out)
{
SwsScaler scaler = params->scaler;
if (scaler >= SWS_SCALE_NB)
return AVERROR(EINVAL);
if (scaler == SWS_SCALE_AUTO)
scaler = SWS_SCALE_BICUBIC;
const double ratio = (double) params->dst_size / params->src_size;
double stretch = 1.0;
if (ratio < 1.0 && scaler != SWS_SCALE_POINT) {
/* Widen filter for downscaling (anti-aliasing) */
stretch = 1.0 / ratio;
}
if (scaler == SWS_SCALE_AREA) {
/**
* SWS_SCALE_AREA is a pseudo-filter that is equivalent to bilinear
* filtering for upscaling (since bilinear just evenly mixes samples
* according to the relative distance), and equivalent to (anti-aliased)
* point sampling for downscaling.
*/
scaler = ratio >= 1.0 ? SWS_SCALE_BILINEAR : SWS_SCALE_POINT;
}
SwsFilterFunction fun = filter_functions[scaler];
if (!fun.kernel)
return AVERROR(EINVAL);
for (int i = 0; i < SWS_NUM_SCALER_PARAMS; i++) {
if (params->scaler_params[i] != SWS_PARAM_DEFAULT)
fun.params[i] = params->scaler_params[i];
}
if (!validate_params(&fun, scaler)) {
av_log(log, AV_LOG_ERROR, "Invalid parameters for scaler %s: {%f, %f}\n",
fun.name, fun.params[0], fun.params[1]);
return AVERROR(EINVAL);
}
if (fun.radius < 0.0) /* tunable width kernels like lanczos */
fun.radius = fun.params[0];
const double radius = filter_radius(&fun) * stretch;
int filter_size = ceil(radius * 2.0);
filter_size = FFMIN(filter_size, params->src_size);
av_assert0(filter_size >= 1);
if (filter_size > SWS_FILTER_SIZE_MAX)
return AVERROR(ENOTSUP);
SwsFilterWeights *filter;
filter = av_refstruct_alloc_ext(sizeof(*filter), 0, NULL, sws_filter_free);
if (!filter)
return AVERROR(ENOMEM);
memcpy(filter->name, fun.name, sizeof(filter->name));
filter->src_size = params->src_size;
filter->dst_size = params->dst_size;
filter->filter_size = filter_size;
if (filter->filter_size == 1)
filter->sum_positive = SWS_FILTER_SCALE;
av_log(log, AV_LOG_DEBUG, "Generating %s filter with %d taps (radius = %f)\n",
filter->name, filter->filter_size, radius);
filter->num_weights = (size_t) params->dst_size * filter->filter_size;
filter->weights = av_refstruct_allocz(filter->num_weights * sizeof(*filter->weights));
if (!filter->weights) {
av_refstruct_unref(&filter);
return AVERROR(ENOMEM);
}
filter->offsets = av_refstruct_allocz(params->dst_size * sizeof(*filter->offsets));
if (!filter->offsets) {
av_refstruct_unref(&filter);
return AVERROR(ENOMEM);
}
double *tmp = av_malloc(filter->filter_size * sizeof(*tmp));
if (!tmp) {
av_refstruct_unref(&filter);
return AVERROR(ENOMEM);
}
const double ratio_inv = 1.0 / ratio, stretch_inv = 1.0 / stretch;
for (int i = 0; i < params->dst_size; i++)
compute_row(filter, &fun, radius, ratio_inv, stretch_inv, i, tmp);
av_free(tmp);
*out = filter;
return 0;
}
/*
* Some of the filter code originally derives (via libplacebo/mpv) from Glumpy:
* # Copyright (c) 2009-2016 Nicolas P. Rougier. All rights reserved.
* # Distributed under the (new) BSD License.
* (https://github.com/glumpy/glumpy/blob/master/glumpy/library/build-spatial-filters.py)
*
* The math underlying each filter function was written from scratch, with
* some algorithms coming from a number of different sources, including:
* - https://en.wikipedia.org/wiki/Window_function
* - https://en.wikipedia.org/wiki/Jinc
* - http://vector-agg.cvs.sourceforge.net/viewvc/vector-agg/agg-2.5/include/agg_image_filters.h
* - Vapoursynth plugin fmtconv (WTFPL Licensed), which is based on
* dither plugin for avisynth from the same author:
* https://github.com/vapoursynth/fmtconv/tree/master/src/fmtc
* - Paul Heckbert's "zoom"
* - XBMC: ConvolutionKernels.cpp etc.
* - https://github.com/AviSynth/jinc-resize (only used to verify the math)
*/
av_unused static double box(double x, const double *params)
{
return 1.0;
}
av_unused static double triangle(double x, const double *params)
{
return 1.0 - x;
}
av_unused static double cosine(double x, const double *params)
{
return cos(x);
}
av_unused static double hann(double x, const double *params)
{
return 0.5 + 0.5 * cos(M_PI * x);
}
av_unused static double hamming(double x, const double *params)
{
return 0.54 + 0.46 * cos(M_PI * x);
}
av_unused static double welch(double x, const double *params)
{
return 1.0 - x * x;
}
av_unused static double bessel_i0(double x)
{
double s = 1.0;
double y = x * x / 4.0;
double t = y;
int i = 2;
while (t > 1e-12) {
s += t;
t *= y / (i * i);
i += 1;
}
return s;
}
av_unused static double kaiser(double x, const double *params)
{
double alpha = fmax(params[0], 0.0);
double scale = bessel_i0(alpha);
return bessel_i0(alpha * sqrt(1.0 - x * x)) / scale;
}
av_unused static double blackman(double x, const double *params)
{
double a = params[0];
double a0 = (1 - a) / 2.0, a1 = 1 / 2.0, a2 = a / 2.0;
x *= M_PI;
return a0 + a1 * cos(x) + a2 * cos(2 * x);
}
av_unused static double bohman(double x, const double *params)
{
double pix = M_PI * x;
return (1.0 - x) * cos(pix) + sin(pix) / M_PI;
}
av_unused static double gaussian(double x, const double *params)
{
return exp(-params[0] * x * x);
}
av_unused static double quadratic(double x, const double *params)
{
if (x < 0.5) {
return 1.0 - 4.0/3.0 * (x * x);
} else {
return 2.0 / 3.0 * (x - 1.5) * (x - 1.5);
}
}
av_unused static double sinc(double x, const double *params)
{
if (x < 1e-8)
return 1.0;
x *= M_PI;
return sin(x) / x;
}
av_unused static double jinc(double x, const double *params)
{
if (x < 1e-8)
return 1.0;
x *= M_PI;
return 2.0 * j1(x) / x;
}
av_unused static double sphinx(double x, const double *params)
{
if (x < 1e-8)
return 1.0;
x *= M_PI;
return 3.0 * (sin(x) - x * cos(x)) / (x * x * x);
}
av_unused static double cubic(double x, const double *params)
{
const double b = params[0], c = params[1];
double p0 = 6.0 - 2.0 * b,
p2 = -18.0 + 12.0 * b + 6.0 * c,
p3 = 12.0 - 9.0 * b - 6.0 * c,
q0 = 8.0 * b + 24.0 * c,
q1 = -12.0 * b - 48.0 * c,
q2 = 6.0 * b + 30.0 * c,
q3 = -b - 6.0 * c;
if (x < 1.0) {
return (p0 + x * x * (p2 + x * p3)) / p0;
} else {
return (q0 + x * (q1 + x * (q2 + x * q3))) / p0;
}
}
static double spline_coeff(double a, double b, double c, double d, double x)
{
if (x <= 1.0) {
return ((d * x + c) * x + b) * x + a;
} else {
return spline_coeff(0.0,
b + 2.0 * c + 3.0 * d,
c + 3.0 * d,
-b - 3.0 * c - 6.0 * d,
x - 1.0);
}
}
av_unused static double spline(double x, const double *params)
{
const double p = -2.196152422706632;
return spline_coeff(1.0, 0.0, p, -p - 1.0, x);
}
static const SwsFilterFunction filter_functions[SWS_SCALE_NB] = {
[SWS_SCALE_BILINEAR] = { "bilinear", 1.0, triangle },
[SWS_SCALE_BICUBIC] = { "bicubic", 2.0, cubic, .params = { 0.0, 0.6 } },
[SWS_SCALE_POINT] = { "point", 0.5, box },
[SWS_SCALE_GAUSSIAN] = { "gaussian", 4.0, gaussian, .params = { 3.0 } },
[SWS_SCALE_SINC] = { "sinc", RADIUS_MAX, sinc },
[SWS_SCALE_LANCZOS] = { "lanczos", -1.0, sinc, sinc, .params = { 3.0 } },
[SWS_SCALE_SPLINE] = { "spline", RADIUS_MAX, spline },
/* SWS_SCALE_AREA is a pseudo-filter, see code above */
};