mirror of
https://mirror.skon.top/https://github.com/FFmpeg/FFmpeg
synced 2026-04-20 21:00:41 +08:00
This adds a test for the mpegvideo unquantize functions. It has been written in order to be able to easily bench these functions. It should be noted that the random input fed to the tested functions is not necessarily representative of the stuff actually occuring in the wild. So benchmarks should be taken with a grain of salt; but comparisons between two functions that do not depend on branch predictions are valid (the usecase for this is to port the x86 mmx functions to use xmm registers). During testing I have found a bug in the arm/aarch64 neon optimizations when using the LIBMPEG2 permutation (used by FF_IDCT_INT): The code seems to be based on the presumption that the remainder of the number of coefficients to process is always <= 4 mod 16. The test therefore sometimes fails for these arches. Hint: I am not certain that 16 bits are enough for the intermediate values of all the computations involved; e.g. both FLV and MPEG-4 escape values can go beyond that after the corresponding multiplications. The input in this test is nevertheless designed to fit into 16 bits. Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
274 lines
10 KiB
C
274 lines
10 KiB
C
/*
|
|
* This file is part of FFmpeg.
|
|
*
|
|
* FFmpeg is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* FFmpeg is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with FFmpeg; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#include <assert.h>
|
|
#include <stddef.h>
|
|
|
|
#include "config.h"
|
|
|
|
#include "checkasm.h"
|
|
|
|
#include "libavcodec/idctdsp.h"
|
|
#include "libavcodec/mathops.h"
|
|
#include "libavcodec/mpegvideo.h"
|
|
#include "libavcodec/mpegvideodata.h"
|
|
#include "libavcodec/mpegvideo_unquantize.h"
|
|
|
|
#include "libavutil/intreadwrite.h"
|
|
#include "libavutil/mem_internal.h"
|
|
|
|
#define randomize_struct(TYPE, s) do { \
|
|
static_assert(!(_Alignof(TYPE) % 4), \
|
|
"can't use aligned stores"); \
|
|
unsigned char *ptr = (unsigned char*)s; \
|
|
for (size_t i = 0; i < sizeof(*s) & ~3; i += 4) \
|
|
AV_WN32A(ptr + i, rnd()); \
|
|
for (size_t i = sizeof(*s) & ~3; i < sizeof(*s); ++i) \
|
|
ptr[i] = rnd(); \
|
|
} while (0)
|
|
|
|
enum TestType {
|
|
H263,
|
|
MPEG1,
|
|
MPEG2,
|
|
};
|
|
|
|
static void init_idct_scantable(MPVContext *const s, int intra_scantable)
|
|
{
|
|
static const enum idct_permutation_type permutation_types[] = {
|
|
FF_IDCT_PERM_NONE,
|
|
FF_IDCT_PERM_LIBMPEG2,
|
|
#if ARCH_X86_32 && HAVE_X86ASM
|
|
FF_IDCT_PERM_SIMPLE,
|
|
#endif
|
|
#if ARCH_PPC || ARCH_X86
|
|
FF_IDCT_PERM_TRANSPOSE,
|
|
#endif
|
|
#if ARCH_ARM || ARCH_AARCH64
|
|
FF_IDCT_PERM_PARTTRANS,
|
|
#endif
|
|
#if ARCH_X86 && HAVE_X86ASM
|
|
FF_IDCT_PERM_SSE2,
|
|
#endif
|
|
};
|
|
// Copied here to avoid #ifs.
|
|
static const uint8_t ff_wmv1_scantable[][64] = {
|
|
{ 0x00, 0x08, 0x01, 0x02, 0x09, 0x10, 0x18, 0x11,
|
|
0x0A, 0x03, 0x04, 0x0B, 0x12, 0x19, 0x20, 0x28,
|
|
0x30, 0x38, 0x29, 0x21, 0x1A, 0x13, 0x0C, 0x05,
|
|
0x06, 0x0D, 0x14, 0x1B, 0x22, 0x31, 0x39, 0x3A,
|
|
0x32, 0x2A, 0x23, 0x1C, 0x15, 0x0E, 0x07, 0x0F,
|
|
0x16, 0x1D, 0x24, 0x2B, 0x33, 0x3B, 0x3C, 0x34,
|
|
0x2C, 0x25, 0x1E, 0x17, 0x1F, 0x26, 0x2D, 0x35,
|
|
0x3D, 0x3E, 0x36, 0x2E, 0x27, 0x2F, 0x37, 0x3F, },
|
|
{ 0x00, 0x08, 0x01, 0x02, 0x09, 0x10, 0x18, 0x11,
|
|
0x0A, 0x03, 0x04, 0x0B, 0x12, 0x19, 0x20, 0x28,
|
|
0x21, 0x30, 0x1A, 0x13, 0x0C, 0x05, 0x06, 0x0D,
|
|
0x14, 0x1B, 0x22, 0x29, 0x38, 0x31, 0x39, 0x2A,
|
|
0x23, 0x1C, 0x15, 0x0E, 0x07, 0x0F, 0x16, 0x1D,
|
|
0x24, 0x2B, 0x32, 0x3A, 0x33, 0x3B, 0x2C, 0x25,
|
|
0x1E, 0x17, 0x1F, 0x26, 0x2D, 0x34, 0x3C, 0x35,
|
|
0x3D, 0x2E, 0x27, 0x2F, 0x36, 0x3E, 0x37, 0x3F, },
|
|
{ 0x00, 0x01, 0x08, 0x02, 0x03, 0x09, 0x10, 0x18,
|
|
0x11, 0x0A, 0x04, 0x05, 0x0B, 0x12, 0x19, 0x20,
|
|
0x28, 0x30, 0x21, 0x1A, 0x13, 0x0C, 0x06, 0x07,
|
|
0x0D, 0x14, 0x1B, 0x22, 0x29, 0x38, 0x31, 0x39,
|
|
0x2A, 0x23, 0x1C, 0x15, 0x0E, 0x0F, 0x16, 0x1D,
|
|
0x24, 0x2B, 0x32, 0x3A, 0x33, 0x2C, 0x25, 0x1E,
|
|
0x17, 0x1F, 0x26, 0x2D, 0x34, 0x3B, 0x3C, 0x35,
|
|
0x2E, 0x27, 0x2F, 0x36, 0x3D, 0x3E, 0x37, 0x3F, },
|
|
{ 0x00, 0x08, 0x10, 0x01, 0x18, 0x20, 0x28, 0x09,
|
|
0x02, 0x03, 0x0A, 0x11, 0x19, 0x30, 0x38, 0x29,
|
|
0x21, 0x1A, 0x12, 0x0B, 0x04, 0x05, 0x0C, 0x13,
|
|
0x1B, 0x22, 0x31, 0x39, 0x32, 0x2A, 0x23, 0x1C,
|
|
0x14, 0x0D, 0x06, 0x07, 0x0E, 0x15, 0x1D, 0x24,
|
|
0x2B, 0x33, 0x3A, 0x3B, 0x34, 0x2C, 0x25, 0x1E,
|
|
0x16, 0x0F, 0x17, 0x1F, 0x26, 0x2D, 0x3C, 0x35,
|
|
0x2E, 0x27, 0x2F, 0x36, 0x3D, 0x3E, 0x37, 0x3F, }
|
|
};
|
|
|
|
static const uint8_t *const scantables[] = {
|
|
ff_alternate_vertical_scan,
|
|
ff_alternate_horizontal_scan,
|
|
ff_zigzag_direct,
|
|
ff_wmv1_scantable[0],
|
|
ff_wmv1_scantable[1],
|
|
ff_wmv1_scantable[2],
|
|
ff_wmv1_scantable[3],
|
|
};
|
|
static const uint8_t *scantable = NULL;
|
|
static enum idct_permutation_type idct_permutation;
|
|
|
|
if (!scantable) {
|
|
scantable = scantables[rnd() % FF_ARRAY_ELEMS(scantables)];
|
|
idct_permutation = permutation_types[rnd() % FF_ARRAY_ELEMS(permutation_types)];
|
|
}
|
|
ff_init_scantable_permutation(s->idsp.idct_permutation, idct_permutation);
|
|
ff_init_scantable(s->idsp.idct_permutation,
|
|
intra_scantable ? &s->intra_scantable : &s->inter_scantable,
|
|
scantable);
|
|
}
|
|
|
|
static void init_h263_test(MPVContext *const s, int16_t block[64],
|
|
int last_nonzero_coeff, int qscale, int intra)
|
|
{
|
|
const uint8_t *permutation = s->inter_scantable.permutated;
|
|
if (intra) {
|
|
permutation = s->intra_scantable.permutated;
|
|
block[0] = rnd() & 511;
|
|
static int h263_aic = -1, ac_pred;
|
|
if (h263_aic < 0) {
|
|
h263_aic = rnd() & 1;
|
|
ac_pred = rnd() & 1;
|
|
}
|
|
s->h263_aic = h263_aic;
|
|
s->ac_pred = ac_pred;
|
|
if (s->ac_pred)
|
|
last_nonzero_coeff = 63;
|
|
}
|
|
for (int i = intra; i <= last_nonzero_coeff; ++i) {
|
|
int random = rnd();
|
|
if (random & 1)
|
|
continue;
|
|
random >>= 1;
|
|
// Select level so that the multiplication fits into 16 bits.
|
|
// FIXME: The FLV and MPEG-4 decoders can have escape values exceeding this.
|
|
block[permutation[i]] = sign_extend(random, 10);
|
|
}
|
|
}
|
|
|
|
static void init_mpeg12_test(MPVContext *const s, int16_t block[64],
|
|
int last_nonzero_coeff, int qscale, int intra,
|
|
enum TestType type)
|
|
{
|
|
uint16_t *matrix = intra ? s->intra_matrix : s->inter_matrix;
|
|
|
|
if (type == MPEG2)
|
|
qscale = s->q_scale_type ? ff_mpeg2_non_linear_qscale[qscale] : qscale << 1;
|
|
|
|
for (int i = 0; i < 64; ++i)
|
|
matrix[i] = 1 + rnd() % 254;
|
|
|
|
const uint8_t *permutation = s->intra_scantable.permutated;
|
|
if (intra) {
|
|
block[0] = (int8_t)rnd();
|
|
for (int i = 1; i <= last_nonzero_coeff; ++i) {
|
|
int j = permutation[i];
|
|
unsigned random = rnd();
|
|
if (random & 1)
|
|
continue;
|
|
random >>= 1;
|
|
// Select level so that the multiplication does not overflow
|
|
// an int16_t and so that it is within the possible range
|
|
// (-2048..2047). FIXME: It seems that this need not be fulfilled
|
|
// in practice for the MPEG-4 decoder at least.
|
|
int limit = FFMIN(INT16_MAX / (qscale * matrix[j]), 2047);
|
|
block[j] = random % (2 * limit + 1) - limit;
|
|
}
|
|
} else {
|
|
for (int i = 0; i <= last_nonzero_coeff; ++i) {
|
|
int j = permutation[i];
|
|
unsigned random = rnd();
|
|
if (random & 1)
|
|
continue;
|
|
random >>= 1;
|
|
int limit = FFMIN((INT16_MAX / (qscale * matrix[j]) - 1) / 2, 2047);
|
|
block[j] = random % (2 * limit + 1) - limit;
|
|
}
|
|
}
|
|
}
|
|
|
|
void checkasm_check_mpegvideo_unquantize(void)
|
|
{
|
|
static const struct {
|
|
const char *name;
|
|
size_t offset;
|
|
int intra, intra_scantable;
|
|
enum TestType type;
|
|
} tests[] = {
|
|
#define TEST(NAME, INTRA, INTRA_SCANTABLE, TYPE) \
|
|
{ .name = #NAME, .offset = offsetof(MPVUnquantDSPContext, NAME), \
|
|
.intra = INTRA, .intra_scantable = INTRA_SCANTABLE, .type = TYPE }
|
|
TEST(dct_unquantize_mpeg1_intra, 1, 1, MPEG1),
|
|
TEST(dct_unquantize_mpeg1_inter, 0, 1, MPEG1),
|
|
TEST(dct_unquantize_mpeg2_intra, 1, 1, MPEG2),
|
|
TEST(dct_unquantize_mpeg2_inter, 0, 1, MPEG2),
|
|
TEST(dct_unquantize_h263_intra, 1, 1, H263),
|
|
TEST(dct_unquantize_h263_inter, 0, 0, H263),
|
|
};
|
|
MPVUnquantDSPContext unquant_dsp_ctx;
|
|
int q_scale_type = rnd() & 1;
|
|
|
|
ff_mpv_unquantize_init(&unquant_dsp_ctx, 1 /* bitexact */, q_scale_type);
|
|
declare_func_emms(AV_CPU_FLAG_MMX, void, MPVContext *s, int16_t *block, int n, int qscale);
|
|
|
|
for (size_t i = 0; i < FF_ARRAY_ELEMS(tests); ++i) {
|
|
void (*func)(MPVContext *s, int16_t *block, int n, int qscale) =
|
|
*(void (**)(MPVContext *, int16_t *, int, int))((char*)&unquant_dsp_ctx + tests[i].offset);
|
|
if (check_func(func, "%s", tests[i].name)) {
|
|
MPVContext new, ref;
|
|
DECLARE_ALIGNED(16, int16_t, block_new)[64];
|
|
DECLARE_ALIGNED(16, int16_t, block_ref)[64];
|
|
static int block_last_index = -1;
|
|
|
|
randomize_struct(MPVContext, &ref);
|
|
|
|
ref.q_scale_type = q_scale_type;
|
|
|
|
init_idct_scantable(&ref, tests[i].intra_scantable);
|
|
|
|
if (block_last_index < 0)
|
|
block_last_index = rnd() % 64;
|
|
|
|
memset(block_ref, 0, sizeof(block_ref));
|
|
|
|
if (tests[i].intra) {
|
|
// Less restricted than real dc_scale values
|
|
ref.y_dc_scale = 1 + rnd() % 64;
|
|
ref.c_dc_scale = 1 + rnd() % 64;
|
|
}
|
|
|
|
static int qscale = 0;
|
|
|
|
if (qscale == 0)
|
|
qscale = 1 + rnd() % 31;
|
|
|
|
if (tests[i].type == H263)
|
|
init_h263_test(&ref, block_ref, block_last_index, qscale,
|
|
tests[i].intra);
|
|
else
|
|
init_mpeg12_test(&ref, block_ref, block_last_index, qscale,
|
|
tests[i].intra, tests[i].type);
|
|
|
|
int n = rnd() % 6;
|
|
ref.block_last_index[n] = block_last_index;
|
|
|
|
memcpy(&new, &ref, sizeof(new));
|
|
memcpy(block_new, block_ref, sizeof(block_new));
|
|
|
|
call_ref(&ref, block_ref, n, qscale);
|
|
call_new(&new, block_new, n, qscale);
|
|
|
|
if (memcmp(&ref, &new, sizeof(new)) || memcmp(block_new, block_ref, sizeof(block_new)))
|
|
fail();
|
|
|
|
bench_new(&new, block_new, n, qscale);
|
|
}
|
|
}
|
|
}
|