mirror of
https://sourceware.org/git/glibc.git
synced 2024-12-26 12:41:05 +00:00
cfee7d9cf4
This patch removes the powerpc assembly implementation of fmax/fmin. Based on benchtests, the assembly ones shows: $ ./testrun.sh benchtests/bench-fmax "fmax": { "": { "duration": 5.07586e+09, "iterations": 2.01676e+09, "max": 1350.39, "min": 2.073, "mean": 2.51684 }, "qNaN": { "duration": 5.09315e+09, "iterations": 8.4568e+08, "max": 2788, "min": 5.806, "mean": 6.02255 }, "sNaN": { "duration": 5.09073e+09, "iterations": 8.42316e+08, "max": 4215.84, "min": 5.737, "mean": 6.04373 } And $ ./testrun.sh benchtests/bench-fmin "fmin": { "": { "duration": 5.07711e+09, "iterations": 2.02982e+09, "max": 497.094, "min": 2.073, "mean": 2.50126 }, "qNaN": { "duration": 5.09134e+09, "iterations": 8.46968e+08, "max": 2255.14, "min": 5.807, "mean": 6.01125 }, "sNaN": { "duration": 5.09122e+09, "iterations": 8.4746e+08, "max": 1969.38, "min": 5.729, "mean": 6.00763 } } The default implementation (math/s_f{max.min}_template.c) shows slight better latency for all cases: $ ./testrun.sh benchtests/bench-fmax "fmax": { "": { "duration": 5.07044e+09, "iterations": 2.38695e+09, "max": 2048.58, "min": 2.073, "mean": 2.12423 }, "qNaN": { "duration": 5.09004e+09, "iterations": 9.45428e+08, "max": 3306.93, "min": 5.138, "mean": 5.38385 }, "sNaN": { "duration": 5.08458e+09, "iterations": 1.15959e+09, "max": 972.008, "min": 3.321, "mean": 4.3848 } } And: $ ./testrun.sh benchtests/bench-fmin "fmin": { "": { "duration": 5.06817e+09, "iterations": 2.3913e+09, "max": 1177.9, "min": 2.073, "mean": 2.11942 }, "qNaN": { "duration": 5.08857e+09, "iterations": 9.45656e+08, "max": 2658.83, "min": 5.09, "mean": 5.38099 }, "sNaN": { "duration": 5.08093e+09, "iterations": 1.16725e+09, "max": 1030.74, "min": 3.323, "mean": 4.3529 } } Both were run with GCC 5.4 (ubuntu 16 default installation) using default compiler flags on POWER8E 3.4GHz (powerpc64le-linux-gnu). |
||
---|---|---|
.. | ||
405 | ||
440 | ||
464 | ||
476 | ||
970 | ||
a2 | ||
bits | ||
cell | ||
e500/nofpu | ||
fpu | ||
power4 | ||
power5 | ||
power5+ | ||
power6 | ||
power6x | ||
power7 | ||
power8 | ||
power9 | ||
__longjmp-common.S | ||
__longjmp.S | ||
add_n.S | ||
addmul_1.S | ||
atomic-machine.h | ||
backtrace.c | ||
bsd-_setjmp.S | ||
bsd-setjmp.S | ||
bzero.S | ||
compat-ppc-mcount.S | ||
configure | ||
configure.ac | ||
crti.S | ||
crtn.S | ||
dl-dtprocnum.h | ||
dl-irel.h | ||
dl-machine.c | ||
dl-machine.h | ||
dl-start.S | ||
dl-trampoline.S | ||
gprrest0.S | ||
gprrest1.S | ||
gprsave0.S | ||
gprsave1.S | ||
Implies | ||
libgcc-compat.S | ||
lshift.S | ||
Makefile | ||
mcount.c | ||
memset.S | ||
mul_1.S | ||
ppc-mcount.S | ||
register-dump.h | ||
rshift.S | ||
rtld-memset.c | ||
setjmp-common.S | ||
setjmp.S | ||
stackguard-macros.h | ||
start.S | ||
stpcpy.S | ||
strchr.S | ||
strcmp.S | ||
strcpy.S | ||
strlen.S | ||
strncmp.S | ||
sub_n.S | ||
submul_1.S | ||
sysdep.h | ||
tls-macros.h | ||
tst-audit.h | ||
Versions |