2020-07-04 13:35:49 +00:00
|
|
|
/* x86 cache info.
|
2021-01-02 19:32:25 +00:00
|
|
|
Copyright (C) 2020-2021 Free Software Foundation, Inc.
|
2020-07-04 13:35:49 +00:00
|
|
|
This file is part of the GNU C Library.
|
|
|
|
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
|
|
License along with the GNU C Library; if not, see
|
|
|
|
<https://www.gnu.org/licenses/>. */
|
|
|
|
|
|
|
|
#include <assert.h>
|
|
|
|
#include <unistd.h>
|
2020-09-18 14:55:14 +00:00
|
|
|
#include <cpuid.h>
|
|
|
|
#include <cpu-features.h>
|
2020-07-04 13:35:49 +00:00
|
|
|
|
2020-09-18 14:55:14 +00:00
|
|
|
#if HAVE_TUNABLES
|
|
|
|
# define TUNABLE_NAMESPACE cpu
|
|
|
|
# include <unistd.h> /* Get STDOUT_FILENO for _dl_printf. */
|
|
|
|
# include <elf/dl-tunables.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if IS_IN (libc)
|
2020-07-04 13:35:49 +00:00
|
|
|
/* Data cache size for use in memory and string routines, typically
|
|
|
|
L1 size, rounded to multiple of 256 bytes. */
|
|
|
|
long int __x86_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
|
|
|
|
long int __x86_data_cache_size attribute_hidden = 32 * 1024;
|
|
|
|
/* Shared cache size for use in memory and string routines, typically
|
|
|
|
L2 or L3 size, rounded to multiple of 256 bytes. */
|
|
|
|
long int __x86_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
|
|
|
|
long int __x86_shared_cache_size attribute_hidden = 1024 * 1024;
|
|
|
|
|
|
|
|
/* Threshold to use non temporal store. */
|
|
|
|
long int __x86_shared_non_temporal_threshold attribute_hidden;
|
|
|
|
|
|
|
|
/* Threshold to use Enhanced REP MOVSB. */
|
|
|
|
long int __x86_rep_movsb_threshold attribute_hidden = 2048;
|
|
|
|
|
|
|
|
/* Threshold to use Enhanced REP STOSB. */
|
|
|
|
long int __x86_rep_stosb_threshold attribute_hidden = 2048;
|
|
|
|
|
2021-02-02 11:42:14 +00:00
|
|
|
/* Threshold to stop using Enhanced REP MOVSB. */
|
|
|
|
long int __x86_rep_movsb_stop_threshold attribute_hidden;
|
|
|
|
|
x86-64: Add Avoid_Short_Distance_REP_MOVSB
commit 3ec5d83d2a237d39e7fd6ef7a0bc8ac4c171a4a5
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Sat Jan 25 14:19:40 2020 -0800
x86-64: Avoid rep movsb with short distance [BZ #27130]
introduced some regressions on Intel processors without Fast Short REP
MOV (FSRM). Add Avoid_Short_Distance_REP_MOVSB to avoid rep movsb with
short distance only on Intel processors with FSRM. bench-memmove-large
on Skylake server shows that cycles of __memmove_evex_unaligned_erms
improves for the following data size:
before after Improvement
length=4127, align1=3, align2=0: 479.38 349.25 27%
length=4223, align1=9, align2=5: 405.62 333.25 18%
length=8223, align1=3, align2=0: 786.12 496.38 37%
length=8319, align1=9, align2=5: 727.50 501.38 31%
length=16415, align1=3, align2=0: 1436.88 840.00 41%
length=16511, align1=9, align2=5: 1375.50 836.38 39%
length=32799, align1=3, align2=0: 2890.00 1860.12 36%
length=32895, align1=9, align2=5: 2891.38 1931.88 33%
2021-07-23 03:26:25 +00:00
|
|
|
/* A bit-wise OR of string/memory requirements for optimal performance
|
|
|
|
e.g. X86_STRING_CONTROL_AVOID_SHORT_DISTANCE_REP_MOVSB. These bits
|
|
|
|
are used at runtime to tune implementation behavior. */
|
|
|
|
int __x86_string_control attribute_hidden;
|
|
|
|
|
2020-07-04 13:35:49 +00:00
|
|
|
static void
|
|
|
|
init_cacheinfo (void)
|
|
|
|
{
|
|
|
|
const struct cpu_features *cpu_features = __get_cpu_features ();
|
2020-09-18 14:55:14 +00:00
|
|
|
long int data = cpu_features->data_cache_size;
|
|
|
|
/* Round data cache size to multiple of 256 bytes. */
|
|
|
|
data = data & ~255L;
|
|
|
|
__x86_data_cache_size_half = data / 2;
|
|
|
|
__x86_data_cache_size = data;
|
|
|
|
|
|
|
|
long int shared = cpu_features->shared_cache_size;
|
|
|
|
/* Round shared cache size to multiple of 256 bytes. */
|
|
|
|
shared = shared & ~255L;
|
|
|
|
__x86_shared_cache_size_half = shared / 2;
|
|
|
|
__x86_shared_cache_size = shared;
|
2020-07-04 13:35:49 +00:00
|
|
|
|
|
|
|
__x86_shared_non_temporal_threshold
|
2020-09-18 14:55:14 +00:00
|
|
|
= cpu_features->non_temporal_threshold;
|
2020-07-04 13:35:49 +00:00
|
|
|
|
2020-09-18 14:55:14 +00:00
|
|
|
__x86_rep_movsb_threshold = cpu_features->rep_movsb_threshold;
|
2020-07-04 13:35:49 +00:00
|
|
|
__x86_rep_stosb_threshold = cpu_features->rep_stosb_threshold;
|
2021-02-02 11:42:14 +00:00
|
|
|
__x86_rep_movsb_stop_threshold = cpu_features->rep_movsb_stop_threshold;
|
x86-64: Add Avoid_Short_Distance_REP_MOVSB
commit 3ec5d83d2a237d39e7fd6ef7a0bc8ac4c171a4a5
Author: H.J. Lu <hjl.tools@gmail.com>
Date: Sat Jan 25 14:19:40 2020 -0800
x86-64: Avoid rep movsb with short distance [BZ #27130]
introduced some regressions on Intel processors without Fast Short REP
MOV (FSRM). Add Avoid_Short_Distance_REP_MOVSB to avoid rep movsb with
short distance only on Intel processors with FSRM. bench-memmove-large
on Skylake server shows that cycles of __memmove_evex_unaligned_erms
improves for the following data size:
before after Improvement
length=4127, align1=3, align2=0: 479.38 349.25 27%
length=4223, align1=9, align2=5: 405.62 333.25 18%
length=8223, align1=3, align2=0: 786.12 496.38 37%
length=8319, align1=9, align2=5: 727.50 501.38 31%
length=16415, align1=3, align2=0: 1436.88 840.00 41%
length=16511, align1=9, align2=5: 1375.50 836.38 39%
length=32799, align1=3, align2=0: 2890.00 1860.12 36%
length=32895, align1=9, align2=5: 2891.38 1931.88 33%
2021-07-23 03:26:25 +00:00
|
|
|
|
|
|
|
if (CPU_FEATURES_ARCH_P (cpu_features, Avoid_Short_Distance_REP_MOVSB))
|
|
|
|
__x86_string_control
|
|
|
|
|= X86_STRING_CONTROL_AVOID_SHORT_DISTANCE_REP_MOVSB;
|
2020-07-04 13:35:49 +00:00
|
|
|
}
|
2020-09-18 14:55:14 +00:00
|
|
|
#endif
|