mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-23 13:30:06 +00:00
c5d241f06b
The CORE-MATH implementation is correctly rounded (for any rounding mode) and shows better performance to the generic cbrtf. The code was adapted to glibc style and to use the definition of math_config.h. Benchtest on x64_64 (Ryzen 9 5900X, gcc 14.2.1), aarch64 (M1, gcc 13.2.1), and powerpc (POWER10, gcc 13.2.1): latency master patched improvement x86_64 68.6348 36.8908 46.25% x86_64v2 67.3418 36.6968 45.51% x86_64v3 63.4981 32.7859 48.37% aarch64 29.3172 12.1496 58.56% power10 18.0845 8.8893 50.85% powerpc 18.0859 8.79527 51.37% reciprocal-throughput master patched improvement x86_64 36.4369 13.3565 63.34% x86_64v2 37.3611 13.1149 64.90% x86_64v3 31.6024 11.2102 64.53% aarch64 18.6866 7.3474 60.68% power10 9.4758 3.6329 61.66% powerpc 9.58896 3.90439 59.28% Signed-off-by: Alexei Sibidanov <sibid@uvic.ca> Signed-off-by: Paul Zimmermann <Paul.Zimmermann@inria.fr> Signed-off-by: Adhemerval Zanella <adhemerval.zanella@linaro.org> |
||
---|---|---|
.. | ||
bits | ||
fpu | ||
nofpu | ||
nptl | ||
__longjmp.S | ||
abort-instr.h | ||
atomic-machine.h | ||
bsd-_setjmp.S | ||
bsd-setjmp.S | ||
configure | ||
configure.ac | ||
dl-machine.h | ||
dl-runtime.h | ||
dl-tls.h | ||
dl-trampoline.S | ||
entry.h | ||
fpu_control.h | ||
gccframe.h | ||
get-rounding-mode.h | ||
Implies | ||
jmpbuf-offsets.h | ||
jmpbuf-unwind.h | ||
ldsodefs.h | ||
libc-tls.c | ||
machine-gmon.h | ||
Makefile | ||
math-tests-trap.h | ||
math-use-builtins-ffs.h | ||
preconfigure | ||
setjmp.S | ||
sfp-machine.h | ||
sotruss-lib.c | ||
stackinfo.h | ||
start.S | ||
sysdep.h | ||
tininess.h | ||
tst-audit.h | ||
utmp-size.h | ||
Versions |