mirror of
https://mirror.skon.top/https://github.com/FFmpeg/FFmpeg
synced 2026-04-20 21:00:41 +08:00
swscale/aarch64: add NEON sws_ops backend
This commit pieces together the previous few commits to implement the NEON backend for sws_ops. In essence, a tool which runs on the target (sws_ops_aarch64) is used to enumerate all the functions that the backend needs to implement. The list it generates is stored in the repository (ops_entries.c). The list from above is used at build time by a code generator tool (ops_asmgen) to implement all the sws_ops functions the NEON backend supports, and generate a lookup function in C to retrieve the assembly function pointers. At runtime, the NEON backend fetches the function pointers to the assembly functions and chains them together in a continuation-passing style design, similar to the x86 backend. The following speedup is observed from legacy swscale to NEON: A520: Overall speedup=3.780x faster, min=0.137x max=91.928x A720: Overall speedup=4.129x faster, min=0.234x max=92.424x And the following from the C sws_ops implementation to NEON: A520: Overall speedup=5.513x faster, min=0.927x max=14.169x A720: Overall speedup=4.786x faster, min=0.585x max=20.157x The slowdowns from legacy to NEON are the same for C/x86. Mostly low bit-depth conversions that did not perform dithering in legacy. The 0.585x outlier from C to NEON is gbrpf32le -> gbrapf32le, which is mostly memcpy with the C implementation. All other conversions are better. Sponsored-by: Sovereign Tech Fund Signed-off-by: Ramiro Polla <ramiro.polla@gmail.com>
This commit is contained in:
@@ -197,7 +197,7 @@ endif
|
||||
clean::
|
||||
$(RM) $(BIN2CEXE) $(CLEANSUFFIXES:%=ffbuild/%)
|
||||
|
||||
%.c %.h %.pc %.ver %.version: TAG = GEN
|
||||
%.c %.h %.S %.pc %.ver %.version: TAG = GEN
|
||||
|
||||
# Dummy rule to stop make trying to rebuild removed or renamed headers
|
||||
%.h %_template.c:
|
||||
@@ -266,7 +266,7 @@ $(TOOLOBJS): | tools
|
||||
|
||||
OUTDIRS := $(OUTDIRS) $(dir $(OBJS) $(HOBJS) $(HOSTOBJS) $(SHLIBOBJS) $(STLIBOBJS) $(TESTOBJS))
|
||||
|
||||
CLEANSUFFIXES = *.d *.gcda *.gcno *.h.c *.ho *.map *.o *.objs *.pc *.ptx *.ptx.gz *.ptx.c *.spv *.spv.gz *.spv.c *.ver *.version *.html.gz *.html.c *.css.min.gz *.css.min *.css.c *$(DEFAULT_X86ASMD).asm *~ *.ilk *.pdb
|
||||
CLEANSUFFIXES = *.d *.gcda *.gcno *.h.c *.ho *.map *.o *.objs *.pc *.ptx *.ptx.gz *.ptx.c *.spv *.spv.gz *.spv.c *.gen.c *.gen.S *.ver *.version *.html.gz *.html.c *.css.min.gz *.css.min *.css.c *$(DEFAULT_X86ASMD).asm *~ *.ilk *.pdb
|
||||
LIBSUFFIXES = *.a *.lib *.so *.so.* *.dylib *.dll *.def *.dll.a
|
||||
|
||||
define RULES
|
||||
|
||||
Reference in New Issue
Block a user