aarch64: revert memcpy optimze for kunpeng to avoid performance degradation

In commit 863d775c48, kunpeng920 is added to default memcpy version,
however, there is performance degradation when the copy size is some large bytes, eg: 100k.
This is the result, tested in glibc-2.28:
             before backport  after backport	 Performance improvement
memcpy_1k      0.005              0.005                 0.00%
memcpy_10k     0.032              0.029                 10.34%
memcpy_100k    0.356              0.429                 -17.02%
memcpy_1m      7.470              11.153                -33.02%

This is the demo
#include "stdio.h"
#include "string.h"
#include "stdlib.h"

char a[1024*1024] = {12};
char b[1024*1024] = {13};
int main(int argc, char *argv[])
{
    int i = atoi(argv[1]);
    int j;
    int size = atoi(argv[2]);

    for (j = 0; j < i; j++)
        memcpy(b, a, size*1024);
    return 0;
}

# gcc -g -O0 memcpy.c -o memcpy
# time taskset -c 10 ./memcpy 100000 1024

Co-authored-by: liqingqing <liqingqing3@huawei.com>
This commit is contained in:
Shuo Wang 2021-01-20 15:20:44 +08:00 committed by Szabolcs Nagy
parent 2682695e5c
commit 28f2ce2772

View File

@ -37,7 +37,7 @@ extern __typeof (__redirect_memcpy) __memcpy_falkor attribute_hidden;
libc_ifunc (__libc_memcpy,
(IS_THUNDERX (midr)
? __memcpy_thunderx
: (IS_FALKOR (midr) || IS_PHECDA (midr) || IS_KUNPENG920 (midr)
: (IS_FALKOR (midr) || IS_PHECDA (midr)
? __memcpy_falkor
: (IS_THUNDERX2 (midr) || IS_THUNDERX2PA (midr)
? __memcpy_thunderx2