mirror of
https://mirror.skon.top/https://github.com/FFmpeg/FFmpeg
synced 2026-04-20 12:50:49 +08:00
tests/checkasm/vp3dsp: Add test for idct_add, idct_put, idct_dc_add
Due to a discrepancy between SSE2 and the C version coefficients for idct_put and idct_add are restricted to a range not causing overflows. Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
@@ -16,8 +16,8 @@
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "checkasm.h"
|
||||
#include "libavutil/intreadwrite.h"
|
||||
@@ -27,15 +27,14 @@
|
||||
|
||||
#define randomize_buffers(buf0, buf1, size) \
|
||||
do { \
|
||||
static_assert(sizeof(buf0[0]) == 1 && sizeof(buf1[0]) == 1, \
|
||||
"Pointer arithmetic needs to be adapted"); \
|
||||
char *b0 = (char*)buf0, *b1 = (char*)buf1; \
|
||||
for (size_t k = 0; k < (size & ~3); k += 4) { \
|
||||
uint32_t r = rnd(); \
|
||||
AV_WN32A(buf0 + k, r); \
|
||||
AV_WN32A(buf1 + k, r); \
|
||||
AV_WN32A(b0 + k, r); \
|
||||
AV_WN32A(b1 + k, r); \
|
||||
} \
|
||||
for (size_t k = size & ~3; k < size; ++k) \
|
||||
buf0[k] = buf1[k] = rnd(); \
|
||||
b0[k] = b1[k] = rnd(); \
|
||||
} while (0)
|
||||
|
||||
static void vp3_check_put_no_rnd_pixels_l2(const VP3DSPContext *const vp3dsp)
|
||||
@@ -83,6 +82,46 @@ static void vp3_check_put_no_rnd_pixels_l2(const VP3DSPContext *const vp3dsp)
|
||||
bench_new(dst_new, src1, src1, stride, h);
|
||||
}
|
||||
|
||||
static void vp3_check_idct(int nb_bits)
|
||||
{
|
||||
enum {
|
||||
MAX_STRIDE = 64,
|
||||
MIN_STRIDE = 16,
|
||||
NB_LINES = 8,
|
||||
WIDTH = 8,
|
||||
BUF_SIZE = MAX_STRIDE * (NB_LINES - 1) + WIDTH,
|
||||
};
|
||||
|
||||
declare_func_emms(AV_CPU_FLAG_MMXEXT, void, uint8_t *dest, ptrdiff_t stride, int16_t *block);
|
||||
|
||||
DECLARE_ALIGNED(16, int16_t, block_new)[64];
|
||||
DECLARE_ALIGNED(16, int16_t, block_ref)[64];
|
||||
DECLARE_ALIGNED(8, uint8_t, dstbuf_new)[BUF_SIZE];
|
||||
DECLARE_ALIGNED(8, uint8_t, dstbuf_ref)[BUF_SIZE];
|
||||
|
||||
ptrdiff_t stride = (rnd() % (MAX_STRIDE / MIN_STRIDE) + 1) * MIN_STRIDE;
|
||||
uint8_t *dst_new = dstbuf_new, *dst_ref = dstbuf_ref;
|
||||
|
||||
if (rnd() & 1) {
|
||||
// Flip stride.
|
||||
dst_new += (NB_LINES - 1) * stride;
|
||||
dst_ref += (NB_LINES - 1) * stride;
|
||||
stride = -stride;
|
||||
}
|
||||
|
||||
randomize_buffers(dstbuf_new, dstbuf_ref, sizeof(dstbuf_ref));
|
||||
for (size_t k = 0; k < FF_ARRAY_ELEMS(block_new); ++k) {
|
||||
int32_t r = (int32_t)rnd() >> (32 - nb_bits);
|
||||
block_new[k] = block_ref[k] = r;
|
||||
}
|
||||
|
||||
call_ref(dst_ref, stride, block_ref);
|
||||
call_new(dst_new, stride, block_new);
|
||||
if (memcmp(dstbuf_new, dstbuf_ref, sizeof(dstbuf_new)) ||
|
||||
memcmp(block_new, block_ref, sizeof(block_new)))
|
||||
fail();
|
||||
bench_new(dst_new, stride, block_new);
|
||||
}
|
||||
|
||||
static void vp3_check_loop_filter(const VP3DSPContext *const vp3dsp)
|
||||
{
|
||||
@@ -160,6 +199,19 @@ void checkasm_check_vp3dsp(void)
|
||||
vp3_check_put_no_rnd_pixels_l2(&vp3dsp);
|
||||
report("put_no_rnd_pixels_l2");
|
||||
|
||||
#define IDCT_TEST(func, mask) \
|
||||
if (check_func(vp3dsp.func, #func)) \
|
||||
vp3_check_idct(mask); \
|
||||
report(#func)
|
||||
IDCT_TEST(idct_dc_add, 16);
|
||||
// FIXME: The Theora specification actually requires using unsaturated
|
||||
// 16-bit arithmetic for its idct. Yet the SSE2 version uses saturated
|
||||
// arithmetic and even the C version seems to forget truncating
|
||||
// intermediate values to 16 bit. For the time being, use a range
|
||||
// that does not trigger overflow.
|
||||
IDCT_TEST(idct_put, 8);
|
||||
IDCT_TEST(idct_add, 8);
|
||||
|
||||
vp3_check_loop_filter(&vp3dsp);
|
||||
report("loop_filter");
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user