diff --git a/ChangeLog b/ChangeLog index 60b0364037..5f3df320bc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,259 @@ +2019-09-13 Wilco Dijkstra + + * string/memmem.c (__memmem): Rewrite to improve performance. + +2019-06-12 Wilco Dijkstra + + * string/str-two-way.h (two_way_short_needle): Add inline to avoid + warning. + (two_way_long_needle): Block inlining. + * string/strstr.c (strstr2): Add new function. + (strstr3): Likewise. + (STRSTR): Completely rewrite strstr to improve performance. + +2019-09-13 Wilco Dijkstra + + [BZ #23637] + * string/test-strstr.c (pr23637): New function. + (test_main): Add tests with longer needles. + * string/strcasestr.c (AVAILABLE): Fix readahead distance. + * string/strstr.c (AVAILABLE): Likewise. + +2019-09-13 Rajalakshmi Srinivasaraghavan + + * string/memmem.c: Use memcmp for first match. + +2019-09-13 Wilco Dijkstra + + * string/strcasestr.c (STRCASESTR): Simplify and speedup first match. + * string/strstr.c (AVAILABLE): Likewise. + +2019-09-13 Wilco Dijkstra + + * benchtests/bench-strcasestr.c: Rename __strnlen to strnlen. + * benchtests/bench-strstr.c: Likewise. + * string/memmem.c (FASTSEARCH): Define. + * string/str-two-way.h (two_way_short_needle): Minor cleanups. + Add support for FASTSEARCH. + * string/strcasestr.c (AVAILABLE): Use read-ahead __strnlen. + * string/strstr.c (AVAILABLE): Use read-ahead __strnlen. + (FASTSEARCH): Define. + * string/test-strcasestr.c: Rename __strnlen to strnlen. + * string/test-strstr.c: Likewise. + +2019-09-06 Wilco Dijkstra + + * manual/tunables.texi (glibc.cpu.name): Add ares tunable. + * sysdeps/aarch64/multiarch/memcpy.c (__libc_memcpy): Use + __memcpy_falkor for ares. + * sysdeps/unix/sysv/linux/aarch64/cpu-features.h (IS_ARES): + Add new define. + * sysdeps/unix/sysv/linux/aarch64/cpu-features.c (cpu_list): + Add ares cpu. + +2019-09-06 Siddhesh Poyarekar + + * sysdeps/aarch64/multiarch/memcpy_falkor.S (__memcpy_falkor): + Use vector registers. + +2019-09-06 Siddhesh Poyarekar + + * sysdeps/aarch64/multiarch/memcpy_falkor.S (__memcpy_falkor): + Use multiple registers to copy data in loop tail. + +2019-09-06 Siddhesh Poyarekar + + * sysdeps/aarch64/strncmp.S (strncmp): Use lsr instead of + mov + lsr. + +2019-09-06 Siddhesh Poyarekar + + * sysdeps/aarch64/strncmp.S (strncmp): Use a separate shift + instruction to unbreak builds with binutils 2.26 and older. + +2019-09-06 Siddhesh Poyarekar + + * sysdeps/aarch64/strncmp.S (count): New macro. + (strncmp): Store misaligned length in SRC1 in COUNT. + (mutual_align): Adjust. + (misaligned8): Load dword at a time when it is safe. + +2019-09-06 Siddhesh Poyarekar + + * sysdeps/aarch64/strcmp.S (do_misaligned): Jump back to + do_misaligned, not misaligned8. + +2019-09-06 Siddhesh Poyarekar + + * sysdeps/aarch64/strcmp.S (misaligned8): Compare dword at a + time whenever possible. + +2019-09-06 Siddhesh Poyarekar + + * sysdeps/aarch64/memcmp.S (more16): Fix loop16 branch target. + + * sysdeps/aarch64/memcmp.S: Widen comparison to 16 bytes at a + time. + +2019-09-06 Siddhesh Poyarekar + + * sysdeps/aarch64/memcmp.S: Use L() macro for labels. + +2019-09-06 Wilco Dijkstra + + * sysdeps/aarch64/memcmp.S (memcmp): + Rewrite of optimized memcmp. + +2019-07-12 Adhemerval Zanella + + [BZ #24699] + * posix/tst-mmap-offset.c: Mention BZ #24699. + (do_test_bz21270): Rename to do_test_large_offset and use + mmap64_maximum_offset to check for maximum expected offset value. + * sysdeps/generic/mmap_info.h: New file. + * sysdeps/unix/sysv/linux/mips/mmap_info.h: Likewise. + * sysdeps/unix/sysv/linux/mmap64.c (MMAP_OFF_HIGH_MASK): Define iff + __NR_mmap2 is used. + +2019-07-12 Szabolcs Nagy + + * sysdeps/aarch64/dl-machine.h (elf_machine_lazy_rel): Check + STO_AARCH64_VARIANT_PCS and bind such symbols at load time. + +2019-06-13 Szabolcs Nagy + + * elf/elf.h (STO_AARCH64_VARIANT_PCS): Define. + (DT_AARCH64_VARIANT_PCS): Define. + +2019-05-22 Wilco Dijkstra + + [BZ #24531] + * malloc/malloc.c (MAX_TCACHE_COUNT): New define. + (do_set_tcache_count): Only update if count is small enough. + * manual/tunables.texi (glibc.malloc.tcache_count): Document max value. + +2019-05-15 Andreas Schwab + + [BZ #20568] + * libio/wfileops.c (_IO_wfile_sync): Correct last argument to + __codecvt_do_length. + * libio/Makefile (tests): Add tst-wfile-sync. + ($(objpfx)tst-wfile-sync.out): Depend on $(gen-locales). + * libio/tst-wfile-sync.c: New file. + * libio/tst-wfile-sync.input: New file. + +2019-02-07 Stefan Liebler + + [BZ #24180] + * nptl/pthread_mutex_trylock.c (__pthread_mutex_trylock): + Add compiler barriers and comments. + +2019-02-04 H.J. Lu + + [BZ #24155] + CVE-2019-7309 + * NEWS: Updated for CVE-2019-7309. + * sysdeps/x86_64/memcmp.S: Use RDX_LP for size. Clear the + upper 32 bits of RDX register for x32. Use unsigned Jcc + instructions, instead of signed. + * sysdeps/x86_64/x32/Makefile (tests): Add tst-size_t-memcmp-2. + * sysdeps/x86_64/x32/tst-size_t-memcmp-2.c: New test. + +2019-02-01 H.J. Lu + + [BZ #24097] + CVE-2019-6488 + * sysdeps/x86_64/multiarch/strlen-avx2.S: Use RSI_LP for length. + Clear the upper 32 bits of RSI register. + * sysdeps/x86_64/strlen.S: Use RSI_LP for length. + * sysdeps/x86_64/x32/Makefile (tests): Add tst-size_t-strnlen + and tst-size_t-wcsnlen. + * sysdeps/x86_64/x32/tst-size_t-strnlen.c: New file. + * sysdeps/x86_64/x32/tst-size_t-wcsnlen.c: Likewise. + +2019-02-01 H.J. Lu + + [BZ #24097] + CVE-2019-6488 + * sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S: Use RDX_LP + for length. + * sysdeps/x86_64/multiarch/strcpy-ssse3.S: Likewise. + * sysdeps/x86_64/x32/Makefile (tests): Add tst-size_t-strncpy. + * sysdeps/x86_64/x32/tst-size_t-strncpy.c: New file. + +2019-02-01 H.J. Lu + + [BZ #24097] + CVE-2019-6488 + * sysdeps/x86_64/multiarch/strcmp-sse42.S: Use RDX_LP for length. + * sysdeps/x86_64/strcmp.S: Likewise. + * sysdeps/x86_64/x32/Makefile (tests): Add tst-size_t-strncasecmp, + tst-size_t-strncmp and tst-size_t-wcsncmp. + * sysdeps/x86_64/x32/tst-size_t-strncasecmp.c: New file. + * sysdeps/x86_64/x32/tst-size_t-strncmp.c: Likewise. + * sysdeps/x86_64/x32/tst-size_t-wcsncmp.c: Likewise. + +2019-02-01 H.J. Lu + + [BZ #24097] + CVE-2019-6488 + * sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S: Use + RDX_LP for length. Clear the upper 32 bits of RDX register. + * sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S: Likewise. + * sysdeps/x86_64/x32/Makefile (tests): Add tst-size_t-wmemset. + * sysdeps/x86_64/x32/tst-size_t-memset.c: New file. + * sysdeps/x86_64/x32/tst-size_t-wmemset.c: Likewise. + +2019-02-01 H.J. Lu + + [BZ #24097] + CVE-2019-6488 + * sysdeps/x86_64/memrchr.S: Use RDX_LP for length. + * sysdeps/x86_64/multiarch/memrchr-avx2.S: Likewise. + * sysdeps/x86_64/x32/Makefile (tests): Add tst-size_t-memrchr. + * sysdeps/x86_64/x32/tst-size_t-memrchr.c: New file. + +2019-02-01 H.J. Lu + + [BZ #24097] + CVE-2019-6488 + * sysdeps/x86_64/multiarch/memcpy-ssse3-back.S: Use RDX_LP for + length. Clear the upper 32 bits of RDX register. + * sysdeps/x86_64/multiarch/memcpy-ssse3.S: Likewise. + * sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S: + Likewise. + * sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S: + Likewise. + * sysdeps/x86_64/x32/Makefile (tests): Add tst-size_t-memcpy. + tst-size_t-wmemchr. + * sysdeps/x86_64/x32/tst-size_t-memcpy.c: New file. + +2019-02-01 H.J. Lu + + [BZ #24097] + CVE-2019-6488 + * sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S: Use RDX_LP for + length. Clear the upper 32 bits of RDX register. + * sysdeps/x86_64/multiarch/memcmp-sse4.S: Likewise. + * sysdeps/x86_64/multiarch/memcmp-ssse3.S: Likewise. + * sysdeps/x86_64/x32/Makefile (tests): Add tst-size_t-memcmp and + tst-size_t-wmemcmp. + * sysdeps/x86_64/x32/tst-size_t-memcmp.c: New file. + * sysdeps/x86_64/x32/tst-size_t-wmemcmp.c: Likewise. + +2019-02-01 H.J. Lu + + [BZ #24097] + CVE-2019-6488 + * sysdeps/x86_64/memchr.S: Use RDX_LP for length. Clear the + upper 32 bits of RDX register. + * sysdeps/x86_64/multiarch/memchr-avx2.S: Likewise. + * sysdeps/x86_64/x32/Makefile (tests): Add tst-size_t-memchr and + tst-size_t-wmemchr. + * sysdeps/x86_64/x32/test-size_t.h: New file. + * sysdeps/x86_64/x32/tst-size_t-memchr.c: Likewise. + * sysdeps/x86_64/x32/tst-size_t-wmemchr.c: Likewise. + 2019-01-11 Gabriel F. T. Gomes * sysdeps/powerpc/fpu/libm-test-ulps: Regenerate. diff --git a/NEWS b/NEWS index 49895f81bd..3ccaae3968 100644 --- a/NEWS +++ b/NEWS @@ -86,6 +86,26 @@ Security related changes: denial of service due to resource exhaustion when processing getaddrinfo calls with crafted host names. Reported by Guido Vranken. + CVE-2019-6488: On x32, the size_t parameter may be passed in the lower + 32 bits of a 64-bit register with with non-zero upper 32 bit. When it + happened, accessing the 32-bit size_t value as the full 64-bit register + in the assembly string/memory functions would cause a buffer overflow. + Reported by H.J. Lu. + + CVE-2019-7309: x86-64 memcmp used signed Jcc instructions to check + size. For x86-64, memcmp on an object size larger than SSIZE_MAX + has undefined behavior. On x32, the size_t argument may be passed + in the lower 32 bits of the 64-bit RDX register with non-zero upper + 32 bits. When it happened with the sign bit of RDX register set, + memcmp gave the wrong result since it treated the size argument as + zero. Reported by H.J. Lu. + + CVE-2019-19126: ld.so failed to ignore the LD_PREFER_MAP_32BIT_EXEC + environment variable during program execution after a security + transition, allowing local attackers to restrict the possible mapping + addresses for loaded libraries and thus bypass ASLR for a setuid + program. Reported by Marcin Koƛcielnicki. + The following bugs are resolved with this release: [16750] ldd: Never run file directly. @@ -93,6 +113,7 @@ The following bugs are resolved with this release: [17956] crypt: Use NSPR header files in addition to NSS header files [20419] elf: Fix stack overflow with huge PT_NOTE segment [20532] getaddrinfo: More robust handling of dlopen failures + [20568] Fix crash in _IO_wfile_sync [21242] assert: Suppress pedantic warning caused by statement expression [21265] x86-64: Use fxsave/xsave/xsavec in _dl_runtime_resolve [21269] i386 sigaction sa_restorer handling is wrong @@ -165,6 +186,11 @@ The following bugs are resolved with this release: [23927] Linux if_nametoindex() does not close descriptor (CVE-2018-19591) [24018] gettext may return NULL [24027] malloc: Integer overflow in realloc + [24097] Can't use 64-bit register for size_t in assembly codes for x32 (CVE-2019-6488) + [24155] x32 memcmp can treat positive length as 0 (if sign bit in RDX is set) (CVE-2019-7309) + [25203] libio: Disable vtable validation for pre-2.1 interposed handles + [25204] Ignore LD_PREFER_MAP_32BIT_EXEC for SUID programs + Version 2.26 diff --git a/benchtests/bench-strcasestr.c b/benchtests/bench-strcasestr.c index 4e6f480c84..9a031b3064 100644 --- a/benchtests/bench-strcasestr.c +++ b/benchtests/bench-strcasestr.c @@ -24,6 +24,7 @@ #define STRCASESTR simple_strcasestr #define NO_ALIAS #define __strncasecmp strncasecmp +#define __strnlen strnlen #include "../string/strcasestr.c" diff --git a/benchtests/bench-strstr.c b/benchtests/bench-strstr.c index e63659f136..2fa64118f4 100644 --- a/benchtests/bench-strstr.c +++ b/benchtests/bench-strstr.c @@ -22,6 +22,9 @@ #define STRSTR simple_strstr +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(X) +#define __strnlen strnlen #include "../string/strstr.c" diff --git a/elf/elf.h b/elf/elf.h index 3900b4c9f0..f80506c562 100644 --- a/elf/elf.h +++ b/elf/elf.h @@ -2759,6 +2759,13 @@ enum #define R_AARCH64_TLSDESC 1031 /* TLS Descriptor. */ #define R_AARCH64_IRELATIVE 1032 /* STT_GNU_IFUNC relocation. */ +/* AArch64 specific values for the Dyn d_tag field. */ +#define DT_AARCH64_VARIANT_PCS (DT_LOPROC + 5) +#define DT_AARCH64_NUM 6 + +/* AArch64 specific values for the st_other field. */ +#define STO_AARCH64_VARIANT_PCS 0x80 + /* ARM relocs. */ #define R_ARM_NONE 0 /* No reloc */ diff --git a/libio/Makefile b/libio/Makefile index 74bf5279f1..79158f3ebd 100644 --- a/libio/Makefile +++ b/libio/Makefile @@ -62,7 +62,7 @@ tests = tst_swprintf tst_wprintf tst_swscanf tst_wscanf tst_getwc tst_putwc \ bug-memstream1 bug-wmemstream1 \ tst-setvbuf1 tst-popen1 tst-fgetwc bug-wsetpos tst-fseek \ tst-fwrite-error tst-ftell-partial-wide tst-ftell-active-handler \ - tst-ftell-append tst-fputws + tst-ftell-append tst-fputws tst-wfile-sync tests-internal = tst-vtables tst-vtables-interposed @@ -202,6 +202,7 @@ $(objpfx)tst-ungetwc1.out: $(gen-locales) $(objpfx)tst-ungetwc2.out: $(gen-locales) $(objpfx)tst-widetext.out: $(gen-locales) $(objpfx)tst_wprintf2.out: $(gen-locales) +$(objpfx)tst-wfile-sync.out: $(gen-locales) endif $(objpfx)test-freopen.out: test-freopen.sh $(objpfx)test-freopen diff --git a/libio/oldstdfiles.c b/libio/oldstdfiles.c index bed7bceca2..82ba986367 100644 --- a/libio/oldstdfiles.c +++ b/libio/oldstdfiles.c @@ -87,6 +87,11 @@ _IO_check_libio (void) stdout->_vtable_offset = stderr->_vtable_offset = ((int) sizeof (struct _IO_FILE) - (int) sizeof (struct _IO_FILE_complete)); + + if (_IO_stdin_.vtable != &_IO_old_file_jumps + || _IO_stdout_.vtable != &_IO_old_file_jumps + || _IO_stderr_.vtable != &_IO_old_file_jumps) + IO_set_accept_foreign_vtables (&_IO_vtable_check); } } diff --git a/libio/tst-wfile-sync.c b/libio/tst-wfile-sync.c new file mode 100644 index 0000000000..618682064d --- /dev/null +++ b/libio/tst-wfile-sync.c @@ -0,0 +1,39 @@ +/* Test that _IO_wfile_sync does not crash (bug 20568). + Copyright (C) 2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include + +static int +do_test (void) +{ + TEST_VERIFY_EXIT (setlocale (LC_ALL, "de_DE.UTF-8") != NULL); + /* Fill the stdio buffer and advance the read pointer. */ + TEST_VERIFY_EXIT (fgetwc (stdin) != WEOF); + /* This calls _IO_wfile_sync, it should not crash. */ + TEST_VERIFY_EXIT (setvbuf (stdin, NULL, _IONBF, 0) == 0); + /* Verify that the external file offset has been synchronized. */ + TEST_COMPARE (xlseek (0, 0, SEEK_CUR), 1); + + return 0; +} + +#include diff --git a/libio/tst-wfile-sync.input b/libio/tst-wfile-sync.input new file mode 100644 index 0000000000..12d0958f7a --- /dev/null +++ b/libio/tst-wfile-sync.input @@ -0,0 +1 @@ +This is a test of _IO_wfile_sync. diff --git a/libio/wfileops.c b/libio/wfileops.c index fb94f45040..727e1b23b9 100644 --- a/libio/wfileops.c +++ b/libio/wfileops.c @@ -526,11 +526,12 @@ _IO_wfile_sync (_IO_FILE *fp) generate the wide characters up to the current reading position. */ int nread; - + size_t wnread = (fp->_wide_data->_IO_read_ptr + - fp->_wide_data->_IO_read_base); fp->_wide_data->_IO_state = fp->_wide_data->_IO_last_state; nread = (*cv->__codecvt_do_length) (cv, &fp->_wide_data->_IO_state, fp->_IO_read_base, - fp->_IO_read_end, delta); + fp->_IO_read_end, wnread); fp->_IO_read_ptr = fp->_IO_read_base + nread; delta = -(fp->_IO_read_end - fp->_IO_read_base - nread); } diff --git a/malloc/malloc.c b/malloc/malloc.c index 49e8ed69c2..9896230b21 100644 --- a/malloc/malloc.c +++ b/malloc/malloc.c @@ -2924,6 +2924,8 @@ typedef struct tcache_perthread_struct tcache_entry *entries[TCACHE_MAX_BINS]; } tcache_perthread_struct; +#define MAX_TCACHE_COUNT 127 /* Maximum value of counts[] entries. */ + static __thread bool tcache_shutting_down = false; static __thread tcache_perthread_struct *tcache = NULL; @@ -5097,8 +5099,11 @@ static inline int __always_inline do_set_tcache_count (size_t value) { - LIBC_PROBE (memory_tunable_tcache_count, 2, value, mp_.tcache_count); - mp_.tcache_count = value; + if (value <= MAX_TCACHE_COUNT) + { + LIBC_PROBE (memory_tunable_tcache_count, 2, value, mp_.tcache_count); + mp_.tcache_count = value; + } return 1; } diff --git a/manual/tunables.texi b/manual/tunables.texi index b09e3fe791..b230cde556 100644 --- a/manual/tunables.texi +++ b/manual/tunables.texi @@ -187,8 +187,8 @@ per-thread cache. The default (and maximum) value is 1032 bytes on @deftp Tunable glibc.malloc.tcache_count The maximum number of chunks of each size to cache. The default is 7. -There is no upper limit, other than available system memory. If set -to zero, the per-thread cache is effectively disabled. +The upper limit is 127. If set to zero, the per-thread cache is effectively +disabled. The approximate maximum overhead of the per-thread cache is thus equal to the number of bins times the chunk count in each bin times the size @@ -253,7 +253,7 @@ This tunable is specific to i386 and x86-64. @deftp Tunable glibc.tune.cpu The @code{glibc.tune.cpu=xxx} tunable allows the user to tell @theglibc{} to assume that the CPU is @code{xxx} where xxx may have one of these values: -@code{generic}, @code{falkor}, @code{thunderxt88}. +@code{generic}, @code{falkor}, @code{thunderxt88}, @code{ares}. This tunable is specific to aarch64. @end deftp diff --git a/nptl/pthread_mutex_trylock.c b/nptl/pthread_mutex_trylock.c index ec7da61c73..d478eca21b 100644 --- a/nptl/pthread_mutex_trylock.c +++ b/nptl/pthread_mutex_trylock.c @@ -92,6 +92,9 @@ __pthread_mutex_trylock (pthread_mutex_t *mutex) case PTHREAD_MUTEX_ROBUST_ADAPTIVE_NP: THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, &mutex->__data.__list.__next); + /* We need to set op_pending before starting the operation. Also + see comments at ENQUEUE_MUTEX. */ + __asm ("" ::: "memory"); oldval = mutex->__data.__lock; do @@ -117,7 +120,12 @@ __pthread_mutex_trylock (pthread_mutex_t *mutex) /* But it is inconsistent unless marked otherwise. */ mutex->__data.__owner = PTHREAD_MUTEX_INCONSISTENT; + /* We must not enqueue the mutex before we have acquired it. + Also see comments at ENQUEUE_MUTEX. */ + __asm ("" ::: "memory"); ENQUEUE_MUTEX (mutex); + /* We need to clear op_pending after we enqueue the mutex. */ + __asm ("" ::: "memory"); THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL); /* Note that we deliberately exist here. If we fall @@ -133,6 +141,8 @@ __pthread_mutex_trylock (pthread_mutex_t *mutex) int kind = PTHREAD_MUTEX_TYPE (mutex); if (kind == PTHREAD_MUTEX_ROBUST_ERRORCHECK_NP) { + /* We do not need to ensure ordering wrt another memory + access. Also see comments at ENQUEUE_MUTEX. */ THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL); return EDEADLK; @@ -140,6 +150,8 @@ __pthread_mutex_trylock (pthread_mutex_t *mutex) if (kind == PTHREAD_MUTEX_ROBUST_RECURSIVE_NP) { + /* We do not need to ensure ordering wrt another memory + access. */ THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL); @@ -158,6 +170,9 @@ __pthread_mutex_trylock (pthread_mutex_t *mutex) id, 0); if (oldval != 0 && (oldval & FUTEX_OWNER_DIED) == 0) { + /* We haven't acquired the lock as it is already acquired by + another owner. We do not need to ensure ordering wrt another + memory access. */ THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL); return EBUSY; @@ -171,13 +186,20 @@ __pthread_mutex_trylock (pthread_mutex_t *mutex) if (oldval == id) lll_unlock (mutex->__data.__lock, PTHREAD_ROBUST_MUTEX_PSHARED (mutex)); + /* FIXME This violates the mutex destruction requirements. See + __pthread_mutex_unlock_full. */ THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL); return ENOTRECOVERABLE; } } while ((oldval & FUTEX_OWNER_DIED) != 0); + /* We must not enqueue the mutex before we have acquired it. + Also see comments at ENQUEUE_MUTEX. */ + __asm ("" ::: "memory"); ENQUEUE_MUTEX (mutex); + /* We need to clear op_pending after we enqueue the mutex. */ + __asm ("" ::: "memory"); THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL); mutex->__data.__owner = id; @@ -203,10 +225,15 @@ __pthread_mutex_trylock (pthread_mutex_t *mutex) int robust = mutex->__data.__kind & PTHREAD_MUTEX_ROBUST_NORMAL_NP; if (robust) - /* Note: robust PI futexes are signaled by setting bit 0. */ - THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, - (void *) (((uintptr_t) &mutex->__data.__list.__next) - | 1)); + { + /* Note: robust PI futexes are signaled by setting bit 0. */ + THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, + (void *) (((uintptr_t) &mutex->__data.__list.__next) + | 1)); + /* We need to set op_pending before starting the operation. Also + see comments at ENQUEUE_MUTEX. */ + __asm ("" ::: "memory"); + } oldval = mutex->__data.__lock; @@ -215,12 +242,16 @@ __pthread_mutex_trylock (pthread_mutex_t *mutex) { if (kind == PTHREAD_MUTEX_ERRORCHECK_NP) { + /* We do not need to ensure ordering wrt another memory + access. */ THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL); return EDEADLK; } if (kind == PTHREAD_MUTEX_RECURSIVE_NP) { + /* We do not need to ensure ordering wrt another memory + access. */ THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL); /* Just bump the counter. */ @@ -242,6 +273,9 @@ __pthread_mutex_trylock (pthread_mutex_t *mutex) { if ((oldval & FUTEX_OWNER_DIED) == 0) { + /* We haven't acquired the lock as it is already acquired by + another owner. We do not need to ensure ordering wrt another + memory access. */ THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL); return EBUSY; @@ -262,6 +296,9 @@ __pthread_mutex_trylock (pthread_mutex_t *mutex) if (INTERNAL_SYSCALL_ERROR_P (e, __err) && INTERNAL_SYSCALL_ERRNO (e, __err) == EWOULDBLOCK) { + /* The kernel has not yet finished the mutex owner death. + We do not need to ensure ordering wrt another memory + access. */ THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL); return EBUSY; @@ -279,7 +316,12 @@ __pthread_mutex_trylock (pthread_mutex_t *mutex) /* But it is inconsistent unless marked otherwise. */ mutex->__data.__owner = PTHREAD_MUTEX_INCONSISTENT; + /* We must not enqueue the mutex before we have acquired it. + Also see comments at ENQUEUE_MUTEX. */ + __asm ("" ::: "memory"); ENQUEUE_MUTEX (mutex); + /* We need to clear op_pending after we enqueue the mutex. */ + __asm ("" ::: "memory"); THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL); /* Note that we deliberately exit here. If we fall @@ -302,13 +344,20 @@ __pthread_mutex_trylock (pthread_mutex_t *mutex) PTHREAD_ROBUST_MUTEX_PSHARED (mutex)), 0, 0); + /* To the kernel, this will be visible after the kernel has + acquired the mutex in the syscall. */ THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL); return ENOTRECOVERABLE; } if (robust) { + /* We must not enqueue the mutex before we have acquired it. + Also see comments at ENQUEUE_MUTEX. */ + __asm ("" ::: "memory"); ENQUEUE_MUTEX_PI (mutex); + /* We need to clear op_pending after we enqueue the mutex. */ + __asm ("" ::: "memory"); THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL); } diff --git a/posix/tst-mmap-offset.c b/posix/tst-mmap-offset.c index 5bb88aab10..cfd82484f6 100644 --- a/posix/tst-mmap-offset.c +++ b/posix/tst-mmap-offset.c @@ -1,4 +1,4 @@ -/* BZ #18877 and #21270 mmap offset test. +/* BZ #18877, BZ #21270, and BZ #24699 mmap offset test. Copyright (C) 2015-2017 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -76,7 +77,7 @@ do_test_bz18877 (void) /* Check if invalid offset are handled correctly by mmap. */ static int -do_test_bz21270 (void) +do_test_large_offset (void) { /* For architectures with sizeof (off_t) < sizeof (off64_t) mmap is implemented with __SYS_mmap2 syscall and the offset is represented in @@ -90,7 +91,7 @@ do_test_bz21270 (void) const size_t length = 4096; void *addr = mmap64 (NULL, length, prot, flags, fd, offset); - if (sizeof (off_t) < sizeof (off64_t)) + if (mmap64_maximum_offset (page_shift) < UINT64_MAX) { if ((addr != MAP_FAILED) && (errno != EINVAL)) FAIL_RET ("mmap succeed"); @@ -110,7 +111,7 @@ do_test (void) int ret = 0; ret += do_test_bz18877 (); - ret += do_test_bz21270 (); + ret += do_test_large_offset (); return ret; } diff --git a/string/memmem.c b/string/memmem.c index 54fca4966d..fba7fe33f7 100644 --- a/string/memmem.c +++ b/string/memmem.c @@ -15,67 +15,115 @@ License along with the GNU C Library; if not, see . */ -/* This particular implementation was written by Eric Blake, 2008. */ - #ifndef _LIBC # include #endif -/* Specification of memmem. */ #include #ifndef _LIBC -# define __builtin_expect(expr, val) (expr) # define __memmem memmem #endif #define RETURN_TYPE void * #define AVAILABLE(h, h_l, j, n_l) ((j) <= (h_l) - (n_l)) +#define FASTSEARCH(S,C,N) (void*) memchr ((void *)(S), (C), (N)) #include "str-two-way.h" #undef memmem -/* Return the first occurrence of NEEDLE in HAYSTACK. Return HAYSTACK - if NEEDLE_LEN is 0, otherwise NULL if NEEDLE is not found in - HAYSTACK. */ +/* Hash character pairs so a small shift table can be used. All bits of + p[0] are included, but not all bits from p[-1]. So if two equal hashes + match on p[-1], p[0] matches too. Hash collisions are harmless and result + in smaller shifts. */ +#define hash2(p) (((size_t)(p)[0] - ((size_t)(p)[-1] << 3)) % sizeof (shift)) + +/* Fast memmem algorithm with guaranteed linear-time performance. + Small needles up to size 2 use a dedicated linear search. Longer needles + up to size 256 use a novel modified Horspool algorithm. It hashes pairs + of characters to quickly skip past mismatches. The main search loop only + exits if the last 2 characters match, avoiding unnecessary calls to memcmp + and allowing for a larger skip if there is no match. A self-adapting + filtering check is used to quickly detect mismatches in long needles. + By limiting the needle length to 256, the shift table can be reduced to 8 + bits per entry, lowering preprocessing overhead and minimizing cache effects. + The limit also implies worst-case performance is linear. + Needles larger than 256 characters use the linear-time Two-Way algorithm. */ void * -__memmem (const void *haystack_start, size_t haystack_len, - const void *needle_start, size_t needle_len) +__memmem (const void *haystack, size_t hs_len, + const void *needle, size_t ne_len) { - /* Abstract memory is considered to be an array of 'unsigned char' values, - not an array of 'char' values. See ISO C 99 section 6.2.6.1. */ - const unsigned char *haystack = (const unsigned char *) haystack_start; - const unsigned char *needle = (const unsigned char *) needle_start; + const unsigned char *hs = (const unsigned char *) haystack; + const unsigned char *ne = (const unsigned char *) needle; - if (needle_len == 0) - /* The first occurrence of the empty string is deemed to occur at - the beginning of the string. */ - return (void *) haystack; + if (ne_len == 0) + return (void *) hs; + if (ne_len == 1) + return (void *) memchr (hs, ne[0], hs_len); - /* Sanity check, otherwise the loop might search through the whole - memory. */ - if (__glibc_unlikely (haystack_len < needle_len)) + /* Ensure haystack length is >= needle length. */ + if (hs_len < ne_len) return NULL; - /* Use optimizations in memchr when possible, to reduce the search - size of haystack using a linear algorithm with a smaller - coefficient. However, avoid memchr for long needles, since we - can often achieve sublinear performance. */ - if (needle_len < LONG_NEEDLE_THRESHOLD) + const unsigned char *end = hs + hs_len - ne_len; + + if (ne_len == 2) { - haystack = memchr (haystack, *needle, haystack_len); - if (!haystack || __builtin_expect (needle_len == 1, 0)) - return (void *) haystack; - haystack_len -= haystack - (const unsigned char *) haystack_start; - if (haystack_len < needle_len) - return NULL; - return two_way_short_needle (haystack, haystack_len, needle, needle_len); + uint32_t nw = ne[0] << 16 | ne[1], hw = hs[0] << 16 | hs[1]; + for (hs++; hs <= end && hw != nw; ) + hw = hw << 16 | *++hs; + return hw == nw ? (void *)hs - 1 : NULL; } - else - return two_way_long_needle (haystack, haystack_len, needle, needle_len); + + /* Use Two-Way algorithm for very long needles. */ + if (__builtin_expect (ne_len > 256, 0)) + return two_way_long_needle (hs, hs_len, ne, ne_len); + + uint8_t shift[256]; + size_t tmp, shift1; + size_t m1 = ne_len - 1; + size_t offset = 0; + + memset (shift, 0, sizeof (shift)); + for (int i = 1; i < m1; i++) + shift[hash2 (ne + i)] = i; + /* Shift1 is the amount we can skip after matching the hash of the + needle end but not the full needle. */ + shift1 = m1 - shift[hash2 (ne + m1)]; + shift[hash2 (ne + m1)] = m1; + + for ( ; hs <= end; ) + { + /* Skip past character pairs not in the needle. */ + do + { + hs += m1; + tmp = shift[hash2 (hs)]; + } + while (tmp == 0 && hs <= end); + + /* If the match is not at the end of the needle, shift to the end + and continue until we match the hash of the needle end. */ + hs -= tmp; + if (tmp < m1) + continue; + + /* Hash of the last 2 characters matches. If the needle is long, + try to quickly filter out mismatches. */ + if (m1 < 15 || memcmp (hs + offset, ne + offset, 8) == 0) + { + if (memcmp (hs, ne, m1) == 0) + return (void *) hs; + + /* Adjust filter offset when it doesn't find the mismatch. */ + offset = (offset >= 8 ? offset : m1) - 8; + } + + /* Skip based on matching the hash of the needle end. */ + hs += shift1; + } + return NULL; } libc_hidden_def (__memmem) weak_alias (__memmem, memmem) libc_hidden_weak (memmem) - -#undef LONG_NEEDLE_THRESHOLD diff --git a/string/str-two-way.h b/string/str-two-way.h index 599c867ffd..30aca30c40 100644 --- a/string/str-two-way.h +++ b/string/str-two-way.h @@ -221,7 +221,7 @@ critical_factorization (const unsigned char *needle, size_t needle_len, most 2 * HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching. If AVAILABLE modifies HAYSTACK_LEN (as in strstr), then at most 3 * HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching. */ -static RETURN_TYPE +static inline RETURN_TYPE two_way_short_needle (const unsigned char *haystack, size_t haystack_len, const unsigned char *needle, size_t needle_len) { @@ -281,50 +281,50 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len, } else { - const unsigned char *phaystack = &haystack[suffix]; + const unsigned char *phaystack; /* The comparison always starts from needle[suffix], so cache it and use an optimized first-character loop. */ unsigned char needle_suffix = CANON_ELEMENT (needle[suffix]); -#if CHECK_EOL - /* We start matching from the SUFFIX'th element, so make sure we - don't hit '\0' before that. */ - if (haystack_len < suffix + 1 - && !AVAILABLE (haystack, haystack_len, 0, suffix + 1)) - return NULL; -#endif - /* The two halves of needle are distinct; no extra memory is required, and any mismatch results in a maximal shift. */ period = MAX (suffix, needle_len - suffix) + 1; j = 0; - while (1 -#if !CHECK_EOL - && AVAILABLE (haystack, haystack_len, j, needle_len) -#endif - ) + while (AVAILABLE (haystack, haystack_len, j, needle_len)) { unsigned char haystack_char; const unsigned char *pneedle; - /* TODO: The first-character loop can be sped up by adapting - longword-at-a-time implementation of memchr/strchr. */ - if (needle_suffix + phaystack = &haystack[suffix + j]; + +#ifdef FASTSEARCH + if (*phaystack++ != needle_suffix) + { + phaystack = FASTSEARCH (phaystack, needle_suffix, + haystack_len - needle_len - j); + if (phaystack == NULL) + goto ret0; + j = phaystack - &haystack[suffix]; + phaystack++; + } +#else + while (needle_suffix != (haystack_char = CANON_ELEMENT (*phaystack++))) { RET0_IF_0 (haystack_char); -#if !CHECK_EOL +# if !CHECK_EOL ++j; -#endif - continue; + if (!AVAILABLE (haystack, haystack_len, j, needle_len)) + goto ret0; +# endif } -#if CHECK_EOL +# if CHECK_EOL /* Calculate J if it wasn't kept up-to-date in the first-character loop. */ j = phaystack - &haystack[suffix] - 1; +# endif #endif - /* Scan for matches in right half. */ i = suffix + 1; pneedle = &needle[i]; @@ -338,6 +338,11 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len, } ++i; } +#if CHECK_EOL + /* Update minimal length of haystack. */ + if (phaystack > haystack + haystack_len) + haystack_len = phaystack - haystack; +#endif if (needle_len <= i) { /* Scan for matches in left half. */ @@ -360,13 +365,6 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len, } else j += i - suffix + 1; - -#if CHECK_EOL - if (!AVAILABLE (haystack, haystack_len, j, needle_len)) - break; -#endif - - phaystack = &haystack[suffix + j]; } } ret0: __attribute__ ((unused)) @@ -384,8 +382,11 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len, and sublinear performance O(HAYSTACK_LEN / NEEDLE_LEN) is possible. If AVAILABLE modifies HAYSTACK_LEN (as in strstr), then at most 3 * HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching, and - sublinear performance is not possible. */ -static RETURN_TYPE + sublinear performance is not possible. + + Since this function is large and complex, block inlining to avoid + slowing down the common case of small needles. */ +__attribute__((noinline)) static RETURN_TYPE two_way_long_needle (const unsigned char *haystack, size_t haystack_len, const unsigned char *needle, size_t needle_len) { diff --git a/string/strcasestr.c b/string/strcasestr.c index 2acf003155..19ea1d4bbf 100644 --- a/string/strcasestr.c +++ b/string/strcasestr.c @@ -37,8 +37,9 @@ /* Two-Way algorithm. */ #define RETURN_TYPE char * #define AVAILABLE(h, h_l, j, n_l) \ - (!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l)) \ - && ((h_l) = (j) + (n_l))) + (((j) + (n_l) <= (h_l)) \ + || ((h_l) += __strnlen ((void*)((h) + (h_l)), (n_l) + 512), \ + (j) + (n_l) <= (h_l))) #define CHECK_EOL (1) #define RET0_IF_0(a) if (!a) goto ret0 #define CANON_ELEMENT(c) TOLOWER (c) @@ -58,31 +59,22 @@ case-insensitive comparison. This function gives unspecified results in multibyte locales. */ char * -STRCASESTR (const char *haystack_start, const char *needle_start) +STRCASESTR (const char *haystack, const char *needle) { - const char *haystack = haystack_start; - const char *needle = needle_start; size_t needle_len; /* Length of NEEDLE. */ size_t haystack_len; /* Known minimum length of HAYSTACK. */ - bool ok = true; /* True if NEEDLE is prefix of HAYSTACK. */ - /* Determine length of NEEDLE, and in the process, make sure - HAYSTACK is at least as long (no point processing all of a long - NEEDLE if HAYSTACK is too short). */ - while (*haystack && *needle) - { - ok &= (TOLOWER ((unsigned char) *haystack) - == TOLOWER ((unsigned char) *needle)); - haystack++; - needle++; - } - if (*needle) + /* Handle empty NEEDLE special case. */ + if (needle[0] == '\0') + return (char *) haystack; + + /* Ensure HAYSTACK length is at least as long as NEEDLE length. + Since a match may occur early on in a huge HAYSTACK, use strnlen + and read ahead a few cachelines for improved performance. */ + needle_len = strlen (needle); + haystack_len = __strnlen (haystack, needle_len + 256); + if (haystack_len < needle_len) return NULL; - if (ok) - return (char *) haystack_start; - needle_len = needle - needle_start; - haystack = haystack_start + 1; - haystack_len = needle_len - 1; /* Perform the search. Abstract memory is considered to be an array of 'unsigned char' values, not an array of 'char' values. See @@ -90,10 +82,10 @@ STRCASESTR (const char *haystack_start, const char *needle_start) if (needle_len < LONG_NEEDLE_THRESHOLD) return two_way_short_needle ((const unsigned char *) haystack, haystack_len, - (const unsigned char *) needle_start, + (const unsigned char *) needle, needle_len); return two_way_long_needle ((const unsigned char *) haystack, haystack_len, - (const unsigned char *) needle_start, + (const unsigned char *) needle, needle_len); } diff --git a/string/strstr.c b/string/strstr.c index 88f1d5de36..4d72ffbfc9 100644 --- a/string/strstr.c +++ b/string/strstr.c @@ -16,27 +16,17 @@ License along with the GNU C Library; if not, see . */ -/* This particular implementation was written by Eric Blake, 2008. */ - #ifndef _LIBC # include #endif -/* Specification of strstr. */ #include -#include - -#ifndef _LIBC -# define __builtin_expect(expr, val) (expr) -#endif - #define RETURN_TYPE char * #define AVAILABLE(h, h_l, j, n_l) \ - (!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l)) \ - && ((h_l) = (j) + (n_l))) -#define CHECK_EOL (1) -#define RET0_IF_0(a) if (!a) goto ret0 + (((j) + (n_l) <= (h_l)) \ + || ((h_l) += __strnlen ((void*)((h) + (h_l)), (n_l) + 512), \ + (j) + (n_l) <= (h_l))) #include "str-two-way.h" #undef strstr @@ -45,48 +35,128 @@ #define STRSTR strstr #endif -/* Return the first occurrence of NEEDLE in HAYSTACK. Return HAYSTACK - if NEEDLE is empty, otherwise NULL if NEEDLE is not found in - HAYSTACK. */ -char * -STRSTR (const char *haystack_start, const char *needle_start) +static inline char * +strstr2 (const unsigned char *hs, const unsigned char *ne) { - const char *haystack = haystack_start; - const char *needle = needle_start; - size_t needle_len; /* Length of NEEDLE. */ - size_t haystack_len; /* Known minimum length of HAYSTACK. */ - bool ok = true; /* True if NEEDLE is prefix of HAYSTACK. */ + uint32_t h1 = (ne[0] << 16) | ne[1]; + uint32_t h2 = 0; + for (int c = hs[0]; h1 != h2 && c != 0; c = *++hs) + h2 = (h2 << 16) | c; + return h1 == h2 ? (char *)hs - 2 : NULL; +} - /* Determine length of NEEDLE, and in the process, make sure - HAYSTACK is at least as long (no point processing all of a long - NEEDLE if HAYSTACK is too short). */ - while (*haystack && *needle) - ok &= *haystack++ == *needle++; - if (*needle) +static inline char * +strstr3 (const unsigned char *hs, const unsigned char *ne) +{ + uint32_t h1 = ((uint32_t)ne[0] << 24) | (ne[1] << 16) | (ne[2] << 8); + uint32_t h2 = 0; + for (int c = hs[0]; h1 != h2 && c != 0; c = *++hs) + h2 = (h2 | c) << 8; + return h1 == h2 ? (char *)hs - 3 : NULL; +} + +/* Hash character pairs so a small shift table can be used. All bits of + p[0] are included, but not all bits from p[-1]. So if two equal hashes + match on p[-1], p[0] matches too. Hash collisions are harmless and result + in smaller shifts. */ +#define hash2(p) (((size_t)(p)[0] - ((size_t)(p)[-1] << 3)) % sizeof (shift)) + +/* Fast strstr algorithm with guaranteed linear-time performance. + Small needles up to size 3 use a dedicated linear search. Longer needles + up to size 256 use a novel modified Horspool algorithm. It hashes pairs + of characters to quickly skip past mismatches. The main search loop only + exits if the last 2 characters match, avoiding unnecessary calls to memcmp + and allowing for a larger skip if there is no match. A self-adapting + filtering check is used to quickly detect mismatches in long needles. + By limiting the needle length to 256, the shift table can be reduced to 8 + bits per entry, lowering preprocessing overhead and minimizing cache effects. + The limit also implies worst-case performance is linear. + Needles larger than 256 characters use the linear-time Two-Way algorithm. */ +char * +STRSTR (const char *haystack, const char *needle) +{ + const unsigned char *hs = (const unsigned char *) haystack; + const unsigned char *ne = (const unsigned char *) needle; + + /* Handle short needle special cases first. */ + if (ne[0] == '\0') + return (char *)hs; + hs = (const unsigned char *)strchr ((const char*)hs, ne[0]); + if (hs == NULL || ne[1] == '\0') + return (char*)hs; + if (ne[2] == '\0') + return strstr2 (hs, ne); + if (ne[3] == '\0') + return strstr3 (hs, ne); + + /* Ensure haystack length is at least as long as needle length. + Since a match may occur early on in a huge haystack, use strnlen + and read ahead a few cachelines for improved performance. */ + size_t ne_len = strlen ((const char*)ne); + size_t hs_len = __strnlen ((const char*)hs, ne_len | 512); + if (hs_len < ne_len) return NULL; - if (ok) - return (char *) haystack_start; - /* Reduce the size of haystack using strchr, since it has a smaller - linear coefficient than the Two-Way algorithm. */ - needle_len = needle - needle_start; - haystack = strchr (haystack_start + 1, *needle_start); - if (!haystack || __builtin_expect (needle_len == 1, 0)) - return (char *) haystack; - needle -= needle_len; - haystack_len = (haystack > haystack_start + needle_len ? 1 - : needle_len + haystack_start - haystack); + /* Check whether we have a match. This improves performance since we + avoid initialization overheads. */ + if (memcmp (hs, ne, ne_len) == 0) + return (char *) hs; - /* Perform the search. Abstract memory is considered to be an array - of 'unsigned char' values, not an array of 'char' values. See - ISO C 99 section 6.2.6.1. */ - if (needle_len < LONG_NEEDLE_THRESHOLD) - return two_way_short_needle ((const unsigned char *) haystack, - haystack_len, - (const unsigned char *) needle, needle_len); - return two_way_long_needle ((const unsigned char *) haystack, haystack_len, - (const unsigned char *) needle, needle_len); + /* Use Two-Way algorithm for very long needles. */ + if (__glibc_unlikely (ne_len > 256)) + return two_way_long_needle (hs, hs_len, ne, ne_len); + + const unsigned char *end = hs + hs_len - ne_len; + uint8_t shift[256]; + size_t tmp, shift1; + size_t m1 = ne_len - 1; + size_t offset = 0; + + /* Initialize bad character shift hash table. */ + memset (shift, 0, sizeof (shift)); + for (int i = 1; i < m1; i++) + shift[hash2 (ne + i)] = i; + /* Shift1 is the amount we can skip after matching the hash of the + needle end but not the full needle. */ + shift1 = m1 - shift[hash2 (ne + m1)]; + shift[hash2 (ne + m1)] = m1; + + while (1) + { + if (__glibc_unlikely (hs > end)) + { + end += __strnlen ((const char*)end + m1 + 1, 2048); + if (hs > end) + return NULL; + } + + /* Skip past character pairs not in the needle. */ + do + { + hs += m1; + tmp = shift[hash2 (hs)]; + } + while (tmp == 0 && hs <= end); + + /* If the match is not at the end of the needle, shift to the end + and continue until we match the hash of the needle end. */ + hs -= tmp; + if (tmp < m1) + continue; + + /* Hash of the last 2 characters matches. If the needle is long, + try to quickly filter out mismatches. */ + if (m1 < 15 || memcmp (hs + offset, ne + offset, 8) == 0) + { + if (memcmp (hs, ne, m1) == 0) + return (void *) hs; + + /* Adjust filter offset when it doesn't find the mismatch. */ + offset = (offset >= 8 ? offset : m1) - 8; + } + + /* Skip based on matching the hash of the needle end. */ + hs += shift1; + } } libc_hidden_builtin_def (strstr) - -#undef LONG_NEEDLE_THRESHOLD diff --git a/string/test-strcasestr.c b/string/test-strcasestr.c index abb3916732..78e03da7c4 100644 --- a/string/test-strcasestr.c +++ b/string/test-strcasestr.c @@ -25,6 +25,7 @@ #define STRCASESTR simple_strcasestr #define NO_ALIAS #define __strncasecmp strncasecmp +#define __strnlen strnlen #include "strcasestr.c" diff --git a/string/test-strstr.c b/string/test-strstr.c index 33f221149a..5bce73b0bd 100644 --- a/string/test-strstr.c +++ b/string/test-strstr.c @@ -24,6 +24,7 @@ #define STRSTR simple_strstr #define libc_hidden_builtin_def(arg) /* nothing */ +#define __strnlen strnlen #include "strstr.c" @@ -150,6 +151,32 @@ check2 (void) } } +#define N 1024 + +static void +pr23637 (void) +{ + char *h = (char*) buf1; + char *n = (char*) buf2; + + for (int i = 0; i < N; i++) + { + n[i] = 'x'; + h[i] = ' '; + h[i + N] = 'x'; + } + + n[N] = '\0'; + h[N * 2] = '\0'; + + /* Ensure we don't match at the first 'x'. */ + h[0] = 'x'; + + char *exp_result = stupid_strstr (h, n); + FOR_EACH_IMPL (impl, 0) + check_result (impl, h, n, exp_result); +} + static int test_main (void) { @@ -157,6 +184,7 @@ test_main (void) check1 (); check2 (); + pr23637 (); printf ("%23s", ""); FOR_EACH_IMPL (impl, 0) @@ -201,6 +229,9 @@ test_main (void) do_test (15, 9, hlen, klen, 1); do_test (15, 15, hlen, klen, 0); do_test (15, 15, hlen, klen, 1); + + do_test (15, 15, hlen + klen * 4, klen * 4, 0); + do_test (15, 15, hlen + klen * 4, klen * 4, 1); } do_test (0, 0, page_size - 1, 16, 0); diff --git a/sysdeps/aarch64/dl-machine.h b/sysdeps/aarch64/dl-machine.h index 3fb00e6e2d..9ffc2e4c9d 100644 --- a/sysdeps/aarch64/dl-machine.h +++ b/sysdeps/aarch64/dl-machine.h @@ -391,10 +391,37 @@ elf_machine_lazy_rel (struct link_map *map, /* Check for unexpected PLT reloc type. */ if (__builtin_expect (r_type == AARCH64_R(JUMP_SLOT), 1)) { - if (__builtin_expect (map->l_mach.plt, 0) == 0) - *reloc_addr += l_addr; - else - *reloc_addr = map->l_mach.plt; + if (map->l_mach.plt == 0) + { + /* Prelinking. */ + *reloc_addr += l_addr; + return; + } + + if (1) /* DT_AARCH64_VARIANT_PCS is not available, so always check. */ + { + /* Check the symbol table for variant PCS symbols. */ + const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info); + const ElfW (Sym) *symtab = + (const void *)D_PTR (map, l_info[DT_SYMTAB]); + const ElfW (Sym) *sym = &symtab[symndx]; + if (__glibc_unlikely (sym->st_other & STO_AARCH64_VARIANT_PCS)) + { + /* Avoid lazy resolution of variant PCS symbols. */ + const struct r_found_version *version = NULL; + if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL) + { + const ElfW (Half) *vernum = + (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]); + version = &map->l_versions[vernum[symndx] & 0x7fff]; + } + elf_machine_rela (map, reloc, sym, version, reloc_addr, + skip_ifunc); + return; + } + } + + *reloc_addr = map->l_mach.plt; } else if (__builtin_expect (r_type == AARCH64_R(TLSDESC), 1)) { diff --git a/sysdeps/aarch64/memcmp.S b/sysdeps/aarch64/memcmp.S index 4cfcb89297..a741e7b17f 100644 --- a/sysdeps/aarch64/memcmp.S +++ b/sysdeps/aarch64/memcmp.S @@ -22,132 +22,132 @@ /* Assumptions: * - * ARMv8-a, AArch64 + * ARMv8-a, AArch64, unaligned accesses. */ /* Parameters and result. */ #define src1 x0 #define src2 x1 #define limit x2 -#define result x0 +#define result w0 /* Internal variables. */ #define data1 x3 #define data1w w3 -#define data2 x4 -#define data2w w4 -#define has_nul x5 -#define diff x6 -#define endloop x7 -#define tmp1 x8 -#define tmp2 x9 -#define tmp3 x10 -#define pos x11 -#define limit_wd x12 -#define mask x13 +#define data1h x4 +#define data2 x5 +#define data2w w5 +#define data2h x6 +#define tmp1 x7 +#define tmp2 x8 ENTRY_ALIGN (memcmp, 6) DELOUSE (0) DELOUSE (1) DELOUSE (2) - cbz limit, L(ret0) - eor tmp1, src1, src2 - tst tmp1, #7 - b.ne L(misaligned8) - ands tmp1, src1, #7 - b.ne L(mutual_align) - add limit_wd, limit, #7 - lsr limit_wd, limit_wd, #3 - /* Start of performance-critical section -- one 64B cache line. */ -L(loop_aligned): - ldr data1, [src1], #8 - ldr data2, [src2], #8 -L(start_realigned): - subs limit_wd, limit_wd, #1 - eor diff, data1, data2 /* Non-zero if differences found. */ - csinv endloop, diff, xzr, ne /* Last Dword or differences. */ - cbz endloop, L(loop_aligned) - /* End of performance-critical section -- one 64B cache line. */ - /* Not reached the limit, must have found a diff. */ - cbnz limit_wd, L(not_limit) + subs limit, limit, 8 + b.lo L(less8) - /* Limit % 8 == 0 => all bytes significant. */ - ands limit, limit, #7 - b.eq L(not_limit) + ldr data1, [src1], 8 + ldr data2, [src2], 8 + cmp data1, data2 + b.ne L(return) - lsl limit, limit, #3 /* Bits -> bytes. */ - mov mask, #~0 -#ifdef __AARCH64EB__ - lsr mask, mask, limit -#else - lsl mask, mask, limit -#endif - bic data1, data1, mask - bic data2, data2, mask + subs limit, limit, 8 + b.gt L(more16) - orr diff, diff, mask -L(not_limit): + ldr data1, [src1, limit] + ldr data2, [src2, limit] + b L(return) -#ifndef __AARCH64EB__ - rev diff, diff +L(more16): + ldr data1, [src1], 8 + ldr data2, [src2], 8 + cmp data1, data2 + bne L(return) + + /* Jump directly to comparing the last 16 bytes for 32 byte (or less) + strings. */ + subs limit, limit, 16 + b.ls L(last_bytes) + + /* We overlap loads between 0-32 bytes at either side of SRC1 when we + try to align, so limit it only to strings larger than 128 bytes. */ + cmp limit, 96 + b.ls L(loop16) + + /* Align src1 and adjust src2 with bytes not yet done. */ + and tmp1, src1, 15 + add limit, limit, tmp1 + sub src1, src1, tmp1 + sub src2, src2, tmp1 + + /* Loop performing 16 bytes per iteration using aligned src1. + Limit is pre-decremented by 16 and must be larger than zero. + Exit if <= 16 bytes left to do or if the data is not equal. */ + .p2align 4 +L(loop16): + ldp data1, data1h, [src1], 16 + ldp data2, data2h, [src2], 16 + subs limit, limit, 16 + ccmp data1, data2, 0, hi + ccmp data1h, data2h, 0, eq + b.eq L(loop16) + + cmp data1, data2 + bne L(return) + mov data1, data1h + mov data2, data2h + cmp data1, data2 + bne L(return) + + /* Compare last 1-16 bytes using unaligned access. */ +L(last_bytes): + add src1, src1, limit + add src2, src2, limit + ldp data1, data1h, [src1] + ldp data2, data2h, [src2] + cmp data1, data2 + bne L(return) + mov data1, data1h + mov data2, data2h + cmp data1, data2 + + /* Compare data bytes and set return value to 0, -1 or 1. */ +L(return): +#ifndef __AARCH64EB__ rev data1, data1 rev data2, data2 #endif - /* The MS-non-zero bit of DIFF marks either the first bit - that is different, or the end of the significant data. - Shifting left now will bring the critical information into the - top bits. */ - clz pos, diff - lsl data1, data1, pos - lsl data2, data2, pos - /* But we need to zero-extend (char is unsigned) the value and then - perform a signed 32-bit subtraction. */ - lsr data1, data1, #56 - sub result, data1, data2, lsr #56 - RET + cmp data1, data2 +L(ret_eq): + cset result, ne + cneg result, result, lo + ret -L(mutual_align): - /* Sources are mutually aligned, but are not currently at an - alignment boundary. Round down the addresses and then mask off - the bytes that precede the start point. */ - bic src1, src1, #7 - bic src2, src2, #7 - add limit, limit, tmp1 /* Adjust the limit for the extra. */ - lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */ - ldr data1, [src1], #8 - neg tmp1, tmp1 /* Bits to alignment -64. */ - ldr data2, [src2], #8 - mov tmp2, #~0 -#ifdef __AARCH64EB__ - /* Big-endian. Early bytes are at MSB. */ - lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ -#else - /* Little-endian. Early bytes are at LSB. */ - lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ -#endif - add limit_wd, limit, #7 - orr data1, data1, tmp2 - orr data2, data2, tmp2 - lsr limit_wd, limit_wd, #3 - b L(start_realigned) + .p2align 4 + /* Compare up to 8 bytes. Limit is [-8..-1]. */ +L(less8): + adds limit, limit, 4 + b.lo L(less4) + ldr data1w, [src1], 4 + ldr data2w, [src2], 4 + cmp data1w, data2w + b.ne L(return) + sub limit, limit, 4 +L(less4): + adds limit, limit, 4 + beq L(ret_eq) +L(byte_loop): + ldrb data1w, [src1], 1 + ldrb data2w, [src2], 1 + subs limit, limit, 1 + ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */ + b.eq L(byte_loop) + sub result, data1w, data2w + ret -L(ret0): - mov result, #0 - RET - - .p2align 6 -L(misaligned8): - sub limit, limit, #1 -1: - /* Perhaps we can do better than this. */ - ldrb data1w, [src1], #1 - ldrb data2w, [src2], #1 - subs limit, limit, #1 - ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ - b.eq 1b - sub result, data1, data2 - RET END (memcmp) #undef bcmp weak_alias (memcmp, bcmp) diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c index b395df1c63..ee4d78ea1d 100644 --- a/sysdeps/aarch64/multiarch/memcpy.c +++ b/sysdeps/aarch64/multiarch/memcpy.c @@ -35,7 +35,7 @@ extern __typeof (__redirect_memcpy) __memcpy_falkor attribute_hidden; libc_ifunc (__libc_memcpy, (IS_THUNDERX (midr) ? __memcpy_thunderx - : (IS_FALKOR (midr) + : (IS_FALKOR (midr) || IS_ARES (midr) ? __memcpy_falkor : __memcpy_generic))); diff --git a/sysdeps/aarch64/multiarch/memcpy_falkor.S b/sysdeps/aarch64/multiarch/memcpy_falkor.S index dea4f225ee..9cde8dcbd6 100644 --- a/sysdeps/aarch64/multiarch/memcpy_falkor.S +++ b/sysdeps/aarch64/multiarch/memcpy_falkor.S @@ -29,11 +29,19 @@ #define dst x3 #define srcend x4 #define dstend x5 -#define A_l x6 -#define A_lw w6 -#define A_h x7 -#define A_hw w7 #define tmp1 x14 +#define A_x x6 +#define B_x x7 +#define A_w w6 +#define B_w w7 + +#define A_q q0 +#define B_q q1 +#define C_q q2 +#define D_q q3 +#define E_q q4 +#define F_q q5 +#define G_q q6 /* Copies are split into 3 main cases: @@ -53,9 +61,9 @@ bumping up the small copies up to 32 bytes allows us to do that without cost and also allows us to reduce the size of the prep code before loop64. - All copies are done only via two registers r6 and r7. This is to ensure - that all loads hit a single hardware prefetcher which can get correctly - trained to prefetch a single stream. + The copy loop uses only one register q0. This is to ensure that all loads + hit a single hardware prefetcher which can get correctly trained to prefetch + a single stream. The non-temporal stores help optimize cache utilization. */ @@ -66,29 +74,29 @@ ENTRY_ALIGN (__memcpy_falkor, 6) add srcend, src, count add dstend, dstin, count b.ls L(copy32) - ldp A_l, A_h, [src] + ldr A_q, [src] cmp count, 128 - stp A_l, A_h, [dstin] + str A_q, [dstin] b.hi L(copy_long) /* Medium copies: 33..128 bytes. */ sub tmp1, count, 1 - ldp A_l, A_h, [src, 16] - stp A_l, A_h, [dstin, 16] + ldr A_q, [src, 16] + ldr B_q, [srcend, -32] + ldr C_q, [srcend, -16] tbz tmp1, 6, 1f - ldp A_l, A_h, [src, 32] - stp A_l, A_h, [dstin, 32] - ldp A_l, A_h, [src, 48] - stp A_l, A_h, [dstin, 48] - ldp A_l, A_h, [srcend, -64] - stp A_l, A_h, [dstend, -64] - ldp A_l, A_h, [srcend, -48] - stp A_l, A_h, [dstend, -48] + ldr D_q, [src, 32] + ldr E_q, [src, 48] + str D_q, [dstin, 32] + str E_q, [dstin, 48] + ldr F_q, [srcend, -64] + ldr G_q, [srcend, -48] + str F_q, [dstend, -64] + str G_q, [dstend, -48] 1: - ldp A_l, A_h, [srcend, -32] - stp A_l, A_h, [dstend, -32] - ldp A_l, A_h, [srcend, -16] - stp A_l, A_h, [dstend, -16] + str A_q, [dstin, 16] + str B_q, [dstend, -32] + str C_q, [dstend, -16] ret .p2align 4 @@ -97,44 +105,44 @@ L(copy32): /* 16-32 */ cmp count, 16 b.lo 1f - ldp A_l, A_h, [src] - stp A_l, A_h, [dstin] - ldp A_l, A_h, [srcend, -16] - stp A_l, A_h, [dstend, -16] + ldr A_q, [src] + ldr B_q, [srcend, -16] + str A_q, [dstin] + str B_q, [dstend, -16] ret .p2align 4 1: /* 8-15 */ tbz count, 3, 1f - ldr A_l, [src] - str A_l, [dstin] - ldr A_l, [srcend, -8] - str A_l, [dstend, -8] + ldr A_x, [src] + ldr B_x, [srcend, -8] + str A_x, [dstin] + str B_x, [dstend, -8] ret .p2align 4 1: /* 4-7 */ tbz count, 2, 1f - ldr A_lw, [src] - str A_lw, [dstin] - ldr A_lw, [srcend, -4] - str A_lw, [dstend, -4] + ldr A_w, [src] + ldr B_w, [srcend, -4] + str A_w, [dstin] + str B_w, [dstend, -4] ret .p2align 4 1: /* 2-3 */ tbz count, 1, 1f - ldrh A_lw, [src] - strh A_lw, [dstin] - ldrh A_lw, [srcend, -2] - strh A_lw, [dstend, -2] + ldrh A_w, [src] + ldrh B_w, [srcend, -2] + strh A_w, [dstin] + strh B_w, [dstend, -2] ret .p2align 4 1: /* 0-1 */ tbz count, 0, 1f - ldrb A_lw, [src] - strb A_lw, [dstin] + ldrb A_w, [src] + strb A_w, [dstin] 1: ret @@ -153,30 +161,29 @@ L(copy_long): add count, count, tmp1 L(loop64): - ldp A_l, A_h, [src, 16]! - stnp A_l, A_h, [dst, 16] - ldp A_l, A_h, [src, 16]! + ldr A_q, [src, 16]! + str A_q, [dst, 16] + ldr A_q, [src, 16]! subs count, count, 64 - stnp A_l, A_h, [dst, 32] - ldp A_l, A_h, [src, 16]! - stnp A_l, A_h, [dst, 48] - ldp A_l, A_h, [src, 16]! - stnp A_l, A_h, [dst, 64] - add dst, dst, 64 + str A_q, [dst, 32] + ldr A_q, [src, 16]! + str A_q, [dst, 48] + ldr A_q, [src, 16]! + str A_q, [dst, 64]! b.hi L(loop64) /* Write the last full set of 64 bytes. The remainder is at most 64 bytes, so it is safe to always copy 64 bytes from the end even if there is just 1 byte left. */ L(last64): - ldp A_l, A_h, [srcend, -64] - stnp A_l, A_h, [dstend, -64] - ldp A_l, A_h, [srcend, -48] - stnp A_l, A_h, [dstend, -48] - ldp A_l, A_h, [srcend, -32] - stnp A_l, A_h, [dstend, -32] - ldp A_l, A_h, [srcend, -16] - stnp A_l, A_h, [dstend, -16] + ldr E_q, [srcend, -64] + str E_q, [dstend, -64] + ldr D_q, [srcend, -48] + str D_q, [dstend, -48] + ldr C_q, [srcend, -32] + str C_q, [dstend, -32] + ldr B_q, [srcend, -16] + str B_q, [dstend, -16] ret END (__memcpy_falkor) diff --git a/sysdeps/aarch64/strcmp.S b/sysdeps/aarch64/strcmp.S index e99d6625b7..7eed82cee7 100644 --- a/sysdeps/aarch64/strcmp.S +++ b/sysdeps/aarch64/strcmp.S @@ -72,6 +72,7 @@ L(start_realigned): cbz syndrome, L(loop_aligned) /* End of performance-critical section -- one 64B cache line. */ +L(end): #ifndef __AARCH64EB__ rev syndrome, syndrome rev data1, data1 @@ -145,12 +146,38 @@ L(mutual_align): b L(start_realigned) L(misaligned8): - /* We can do better than this. */ + /* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always + checking to make sure that we don't access beyond page boundary in + SRC2. */ + tst src1, #7 + b.eq L(loop_misaligned) +L(do_misaligned): ldrb data1w, [src1], #1 ldrb data2w, [src2], #1 cmp data1w, #1 ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ - b.eq L(misaligned8) + b.ne L(done) + tst src1, #7 + b.ne L(do_misaligned) + +L(loop_misaligned): + /* Test if we are within the last dword of the end of a 4K page. If + yes then jump back to the misaligned loop to copy a byte at a time. */ + and tmp1, src2, #0xff8 + eor tmp1, tmp1, #0xff8 + cbz tmp1, L(do_misaligned) + ldr data1, [src1], #8 + ldr data2, [src2], #8 + + sub tmp1, data1, zeroones + orr tmp2, data1, #REP8_7f + eor diff, data1, data2 /* Non-zero if differences found. */ + bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ + orr syndrome, diff, has_nul + cbz syndrome, L(loop_misaligned) + b L(end) + +L(done): sub result, data1, data2 RET END(strcmp) diff --git a/sysdeps/aarch64/strncmp.S b/sysdeps/aarch64/strncmp.S index 3e4d88a5d7..fc1f633348 100644 --- a/sysdeps/aarch64/strncmp.S +++ b/sysdeps/aarch64/strncmp.S @@ -49,6 +49,7 @@ #define limit_wd x13 #define mask x14 #define endloop x15 +#define count mask ENTRY_ALIGN_AND_PAD (strncmp, 6, 7) DELOUSE (0) @@ -58,9 +59,9 @@ ENTRY_ALIGN_AND_PAD (strncmp, 6, 7) eor tmp1, src1, src2 mov zeroones, #REP8_01 tst tmp1, #7 + and count, src1, #7 b.ne L(misaligned8) - ands tmp1, src1, #7 - b.ne L(mutual_align) + cbnz count, L(mutual_align) /* Calculate the number of full and partial words -1. */ sub limit_wd, limit, #1 /* limit != 0, so no underflow. */ lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */ @@ -165,43 +166,107 @@ L(mutual_align): bic src1, src1, #7 bic src2, src2, #7 ldr data1, [src1], #8 - neg tmp3, tmp1, lsl #3 /* 64 - bits(bytes beyond align). */ + neg tmp3, count, lsl #3 /* 64 - bits(bytes beyond align). */ ldr data2, [src2], #8 mov tmp2, #~0 sub limit_wd, limit, #1 /* limit != 0, so no underflow. */ #ifdef __AARCH64EB__ /* Big-endian. Early bytes are at MSB. */ - lsl tmp2, tmp2, tmp3 /* Shift (tmp1 & 63). */ + lsl tmp2, tmp2, tmp3 /* Shift (count & 63). */ #else /* Little-endian. Early bytes are at LSB. */ - lsr tmp2, tmp2, tmp3 /* Shift (tmp1 & 63). */ + lsr tmp2, tmp2, tmp3 /* Shift (count & 63). */ #endif and tmp3, limit_wd, #7 lsr limit_wd, limit_wd, #3 /* Adjust the limit. Only low 3 bits used, so overflow irrelevant. */ - add limit, limit, tmp1 - add tmp3, tmp3, tmp1 + add limit, limit, count + add tmp3, tmp3, count orr data1, data1, tmp2 orr data2, data2, tmp2 add limit_wd, limit_wd, tmp3, lsr #3 b L(start_realigned) -L(ret0): - mov result, #0 - RET - .p2align 6 + /* Don't bother with dwords for up to 16 bytes. */ L(misaligned8): - sub limit, limit, #1 -1: + cmp limit, #16 + b.hs L(try_misaligned_words) + +L(byte_loop): /* Perhaps we can do better than this. */ ldrb data1w, [src1], #1 ldrb data2w, [src2], #1 subs limit, limit, #1 - ccmp data1w, #1, #0, cs /* NZCV = 0b0000. */ + ccmp data1w, #1, #0, hi /* NZCV = 0b0000. */ ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ - b.eq 1b + b.eq L(byte_loop) +L(done): sub result, data1, data2 RET + + /* Align the SRC1 to a dword by doing a bytewise compare and then do + the dword loop. */ +L(try_misaligned_words): + lsr limit_wd, limit, #3 + cbz count, L(do_misaligned) + + neg count, count + and count, count, #7 + sub limit, limit, count + lsr limit_wd, limit, #3 + +L(page_end_loop): + ldrb data1w, [src1], #1 + ldrb data2w, [src2], #1 + cmp data1w, #1 + ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ + b.ne L(done) + subs count, count, #1 + b.hi L(page_end_loop) + +L(do_misaligned): + /* Prepare ourselves for the next page crossing. Unlike the aligned + loop, we fetch 1 less dword because we risk crossing bounds on + SRC2. */ + mov count, #8 + subs limit_wd, limit_wd, #1 + b.lo L(done_loop) +L(loop_misaligned): + and tmp2, src2, #0xff8 + eor tmp2, tmp2, #0xff8 + cbz tmp2, L(page_end_loop) + + ldr data1, [src1], #8 + ldr data2, [src2], #8 + sub tmp1, data1, zeroones + orr tmp2, data1, #REP8_7f + eor diff, data1, data2 /* Non-zero if differences found. */ + bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ + ccmp diff, #0, #0, eq + b.ne L(not_limit) + subs limit_wd, limit_wd, #1 + b.pl L(loop_misaligned) + +L(done_loop): + /* We found a difference or a NULL before the limit was reached. */ + and limit, limit, #7 + cbz limit, L(not_limit) + /* Read the last word. */ + sub src1, src1, 8 + sub src2, src2, 8 + ldr data1, [src1, limit] + ldr data2, [src2, limit] + sub tmp1, data1, zeroones + orr tmp2, data1, #REP8_7f + eor diff, data1, data2 /* Non-zero if differences found. */ + bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ + ccmp diff, #0, #0, eq + b.ne L(not_limit) + +L(ret0): + mov result, #0 + RET + END (strncmp) libc_hidden_builtin_def (strncmp) diff --git a/sysdeps/generic/mmap_info.h b/sysdeps/generic/mmap_info.h new file mode 100644 index 0000000000..b3087df2d3 --- /dev/null +++ b/sysdeps/generic/mmap_info.h @@ -0,0 +1,16 @@ +/* As default architectures with sizeof (off_t) < sizeof (off64_t) the mmap is + implemented with __SYS_mmap2 syscall and the offset is represented in + multiples of page size. For offset larger than + '1 << (page_shift + 8 * sizeof (off_t))' (that is, 1<<44 on system with + page size of 4096 bytes) the system call silently truncates the offset. + For this case, glibc mmap implementation returns EINVAL. */ + +/* Return the maximum value expected as offset argument in mmap64 call. */ +static inline uint64_t +mmap64_maximum_offset (long int page_shift) +{ + if (sizeof (off_t) < sizeof (off64_t)) + return (UINT64_C(1) << (page_shift + (8 * sizeof (off_t)))) - 1; + else + return UINT64_MAX; +} diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c index 0c7e13f4fa..50297bc409 100644 --- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c +++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c @@ -30,6 +30,7 @@ struct cpu_list static struct cpu_list cpu_list[] = { {"falkor", 0x510FC000}, {"thunderxt88", 0x430F0A10}, + {"ares", 0x411FD0C0}, {"generic", 0x0} }; diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h index 73cb53da9a..d2ad5c63b9 100644 --- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h +++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h @@ -44,6 +44,9 @@ #define IS_FALKOR(midr) (MIDR_IMPLEMENTOR(midr) == 'Q' \ && MIDR_PARTNUM(midr) == 0xc00) +#define IS_ARES(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \ + && MIDR_PARTNUM(midr) == 0xd0c) + struct cpu_features { uint64_t midr_el1; diff --git a/sysdeps/unix/sysv/linux/mips/Makefile b/sysdeps/unix/sysv/linux/mips/Makefile index bca11d39e0..a58e6954a7 100644 --- a/sysdeps/unix/sysv/linux/mips/Makefile +++ b/sysdeps/unix/sysv/linux/mips/Makefile @@ -99,14 +99,25 @@ sysdep-dl-routines += dl-static sysdep_routines += dl-vdso endif - -# Supporting non-executable stacks on MIPS requires changes to both -# the Linux kernel and glibc. See -# and -# . +# If the compiler doesn't use GNU.stack note, +# this test is expected to fail. +ifneq ($(mips-has-gnustack),yes) test-xfail-check-execstack = yes endif +endif ifeq ($(subdir),stdlib) gen-as-const-headers += ucontext_i.sym endif + +ifeq ($(mips-force-execstack),yes) +CFLAGS-.o += -Wa,-execstack +CFLAGS-.os += -Wa,-execstack +CFLAGS-.op += -Wa,-execstack +CFLAGS-.oS += -Wa,-execstack + +ASFLAGS-.o += -Wa,-execstack +ASFLAGS-.os += -Wa,-execstack +ASFLAGS-.op += -Wa,-execstack +ASFLAGS-.oS += -Wa,-execstack +endif diff --git a/sysdeps/unix/sysv/linux/mips/configure b/sysdeps/unix/sysv/linux/mips/configure index a5513fad48..2ec86a3121 100644 --- a/sysdeps/unix/sysv/linux/mips/configure +++ b/sysdeps/unix/sysv/linux/mips/configure @@ -475,3 +475,44 @@ if test -z "$arch_minimum_kernel"; then arch_minimum_kernel=4.5.0 fi fi + +# Check if we are supposed to run on kernels older than 4.8.0. If so, +# force executable stack to avoid potential runtime problems with fpu +# emulation. +# NOTE: The check below assumes that in absence of user-provided minumum_kernel +# we will default to arch_minimum_kernel which is currently less than 4.8.0 for +# all known configurations. If this changes, the check must be updated. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the compiler must use executable stack" >&5 +$as_echo_n "checking whether the compiler must use executable stack... " >&6; } +if ${libc_cv_mips_force_execstack+:} false; then : + $as_echo_n "(cached) " >&6 +else + libc_cv_mips_force_execstack=no + if test $libc_mips_float = hard; then + if test -n "$minimum_kernel"; then + + min_version=$((`echo "$minimum_kernel.0.0.0" | sed 's/\([0-9]*\)\.\([0-9]*\)\.\([0-9]*\).*/\1 \* 65536 + \2 \* 256 + \3/'`)) + + if test $min_version -lt 264192; then + libc_cv_mips_force_execstack=yes + fi + else + libc_cv_mips_force_execstack=yes + fi + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_mips_force_execstack" >&5 +$as_echo "$libc_cv_mips_force_execstack" >&6; } + +libc_mips_has_gnustack=$libc_cv_as_noexecstack + +if test $libc_cv_mips_force_execstack = yes; then + libc_mips_has_gnustack=no + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: forcing executable stack for pre-4.8.0 Linux kernels" >&5 +$as_echo "$as_me: WARNING: forcing executable stack for pre-4.8.0 Linux kernels" >&2;} +fi + +config_vars="$config_vars +mips-force-execstack = ${libc_cv_mips_force_execstack}" +config_vars="$config_vars +mips-has-gnustack = ${libc_mips_has_gnustack}" diff --git a/sysdeps/unix/sysv/linux/mips/configure.ac b/sysdeps/unix/sysv/linux/mips/configure.ac index 9147aa4582..3db1b32b08 100644 --- a/sysdeps/unix/sysv/linux/mips/configure.ac +++ b/sysdeps/unix/sysv/linux/mips/configure.ac @@ -134,3 +134,35 @@ if test -z "$arch_minimum_kernel"; then arch_minimum_kernel=4.5.0 fi fi + +# Check if we are supposed to run on kernels older than 4.8.0. If so, +# force executable stack to avoid potential runtime problems with fpu +# emulation. +# NOTE: The check below assumes that in absence of user-provided minumum_kernel +# we will default to arch_minimum_kernel which is currently less than 4.8.0 for +# all known configurations. If this changes, the check must be updated. +AC_CACHE_CHECK([whether the compiler must use executable stack], + libc_cv_mips_force_execstack, [dnl +libc_cv_mips_force_execstack=no + if test $libc_mips_float = hard; then + if test -n "$minimum_kernel"; then + changequote(,) + min_version=$((`echo "$minimum_kernel.0.0.0" | sed 's/\([0-9]*\)\.\([0-9]*\)\.\([0-9]*\).*/\1 \* 65536 + \2 \* 256 + \3/'`)) + changequote([,]) + if test $min_version -lt 264192; then + libc_cv_mips_force_execstack=yes + fi + else + libc_cv_mips_force_execstack=yes + fi + fi]) + +libc_mips_has_gnustack=$libc_cv_as_noexecstack + +if test $libc_cv_mips_force_execstack = yes; then + libc_mips_has_gnustack=no + AC_MSG_WARN([forcing executable stack for pre-4.8.0 Linux kernels]) +fi + +LIBC_CONFIG_VAR([mips-force-execstack],[${libc_cv_mips_force_execstack}]) +LIBC_CONFIG_VAR([mips-has-gnustack],[${libc_mips_has_gnustack}]) diff --git a/sysdeps/unix/sysv/linux/mips/mmap_info.h b/sysdeps/unix/sysv/linux/mips/mmap_info.h new file mode 100644 index 0000000000..07c9e3a044 --- /dev/null +++ b/sysdeps/unix/sysv/linux/mips/mmap_info.h @@ -0,0 +1,13 @@ +/* mips64n32 uses __NR_mmap for mmap64 while still having sizeof (off_t) + smaller than sizeof (off64_t). So it allows mapping large offsets + using mmap64 than 32-bit archs which uses __NR_mmap2. */ + +static inline uint64_t +mmap64_maximum_offset (long int page_shift) +{ +#if _MIPS_SIM == _ABIN32 || _MIPS_SIM == _ABI64 + return UINT64_MAX; +#else + return (UINT64_C(1) << (page_shift + (8 * sizeof (off_t)))) - 1; +#endif +} diff --git a/sysdeps/unix/sysv/linux/mmap64.c b/sysdeps/unix/sysv/linux/mmap64.c index e8d519b17a..8441a9caa7 100644 --- a/sysdeps/unix/sysv/linux/mmap64.c +++ b/sysdeps/unix/sysv/linux/mmap64.c @@ -23,11 +23,18 @@ #include #include +#ifdef __NR_mmap2 /* To avoid silent truncation of offset when using mmap2, do not accept offset larger than 1 << (page_shift + off_t bits). For archictures with 32 bits off_t and page size of 4096 it would be 1^44. */ -#define MMAP_OFF_HIGH_MASK \ +# define MMAP_OFF_HIGH_MASK \ ((-(MMAP2_PAGE_UNIT << 1) << (8 * sizeof (off_t) - 1))) +#else +/* Some ABIs might use __NR_mmap while having sizeof (off_t) smaller than + sizeof (off64_t) (currently only MIPS64n32). For this case just set + zero the higher bits so mmap with large offset does not fail. */ +# define MMAP_OFF_HIGH_MASK 0x0 +#endif #define MMAP_OFF_MASK (MMAP_OFF_HIGH_MASK | MMAP_OFF_LOW_MASK) diff --git a/sysdeps/unix/sysv/linux/x86_64/64/dl-librecon.h b/sysdeps/unix/sysv/linux/x86_64/64/dl-librecon.h index 8d474d0d04..37f0b14adb 100644 --- a/sysdeps/unix/sysv/linux/x86_64/64/dl-librecon.h +++ b/sysdeps/unix/sysv/linux/x86_64/64/dl-librecon.h @@ -31,7 +31,8 @@ environment variable, LD_PREFER_MAP_32BIT_EXEC. */ #define EXTRA_LD_ENVVARS \ case 21: \ - if (memcmp (envline, "PREFER_MAP_32BIT_EXEC", 21) == 0) \ + if (!__libc_enable_secure \ + && memcmp (envline, "PREFER_MAP_32BIT_EXEC", 21) == 0) \ GLRO(dl_x86_cpu_features).feature[index_arch_Prefer_MAP_32BIT_EXEC] \ |= bit_arch_Prefer_MAP_32BIT_EXEC; \ break; diff --git a/sysdeps/x86_64/memchr.S b/sysdeps/x86_64/memchr.S index f5f05f6c8c..fd20f64f9b 100644 --- a/sysdeps/x86_64/memchr.S +++ b/sysdeps/x86_64/memchr.S @@ -34,12 +34,16 @@ ENTRY(MEMCHR) mov %edi, %ecx #ifdef USE_AS_WMEMCHR - test %rdx, %rdx + test %RDX_LP, %RDX_LP jz L(return_null) - shl $2, %rdx + shl $2, %RDX_LP #else +# ifdef __ILP32__ + /* Clear the upper 32 bits. */ + movl %edx, %edx +# endif punpcklbw %xmm1, %xmm1 - test %rdx, %rdx + test %RDX_LP, %RDX_LP jz L(return_null) punpcklbw %xmm1, %xmm1 #endif diff --git a/sysdeps/x86_64/memcmp.S b/sysdeps/x86_64/memcmp.S index 0828a22534..9ae90661c8 100644 --- a/sysdeps/x86_64/memcmp.S +++ b/sysdeps/x86_64/memcmp.S @@ -21,14 +21,18 @@ .text ENTRY (memcmp) - test %rdx, %rdx +#ifdef __ILP32__ + /* Clear the upper 32 bits. */ + movl %edx, %edx +#endif + test %RDX_LP, %RDX_LP jz L(finz) cmpq $1, %rdx - jle L(finr1b) + jbe L(finr1b) subq %rdi, %rsi movq %rdx, %r10 cmpq $32, %r10 - jge L(gt32) + jae L(gt32) /* Handle small chunks and last block of less than 32 bytes. */ L(small): testq $1, %r10 @@ -156,7 +160,7 @@ L(A32): movq %r11, %r10 andq $-32, %r10 cmpq %r10, %rdi - jge L(mt16) + jae L(mt16) /* Pre-unroll to be ready for unrolled 64B loop. */ testq $32, %rdi jz L(A64) @@ -178,7 +182,7 @@ L(A64): movq %r11, %r10 andq $-64, %r10 cmpq %r10, %rdi - jge L(mt32) + jae L(mt32) L(A64main): movdqu (%rdi,%rsi), %xmm0 @@ -216,7 +220,7 @@ L(mt32): movq %r11, %r10 andq $-32, %r10 cmpq %r10, %rdi - jge L(mt16) + jae L(mt16) L(A32main): movdqu (%rdi,%rsi), %xmm0 @@ -254,7 +258,7 @@ L(ATR): movq %r11, %r10 andq $-32, %r10 cmpq %r10, %rdi - jge L(mt16) + jae L(mt16) testq $16, %rdi jz L(ATR32) @@ -325,7 +329,7 @@ L(ATR64main): movq %r11, %r10 andq $-32, %r10 cmpq %r10, %rdi - jge L(mt16) + jae L(mt16) L(ATR32res): movdqa (%rdi,%rsi), %xmm0 diff --git a/sysdeps/x86_64/memrchr.S b/sysdeps/x86_64/memrchr.S index 5fa0fe9c1c..44ef5c213a 100644 --- a/sysdeps/x86_64/memrchr.S +++ b/sysdeps/x86_64/memrchr.S @@ -24,13 +24,13 @@ ENTRY (__memrchr) movd %esi, %xmm1 - sub $16, %rdx + sub $16, %RDX_LP jbe L(length_less16) punpcklbw %xmm1, %xmm1 punpcklbw %xmm1, %xmm1 - add %rdx, %rdi + add %RDX_LP, %RDI_LP pshufd $0, %xmm1, %xmm1 movdqu (%rdi), %xmm0 diff --git a/sysdeps/x86_64/multiarch/memchr-avx2.S b/sysdeps/x86_64/multiarch/memchr-avx2.S index a7275ed7e1..85bdca5b4f 100644 --- a/sysdeps/x86_64/multiarch/memchr-avx2.S +++ b/sysdeps/x86_64/multiarch/memchr-avx2.S @@ -40,16 +40,20 @@ ENTRY (MEMCHR) # ifndef USE_AS_RAWMEMCHR /* Check for zero length. */ - testq %rdx, %rdx + test %RDX_LP, %RDX_LP jz L(null) # endif movl %edi, %ecx /* Broadcast CHAR to YMM0. */ vmovd %esi, %xmm0 # ifdef USE_AS_WMEMCHR - shl $2, %rdx + shl $2, %RDX_LP vpbroadcastd %xmm0, %ymm0 # else +# ifdef __ILP32__ + /* Clear the upper 32 bits. */ + movl %edx, %edx +# endif vpbroadcastb %xmm0, %ymm0 # endif /* Check if we may cross page boundary with one vector load. */ diff --git a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S index 16f46301ca..fb12e13ddf 100644 --- a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S +++ b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S @@ -58,9 +58,12 @@ .section .text.avx,"ax",@progbits ENTRY (MEMCMP) # ifdef USE_AS_WMEMCMP - shl $2, %rdx + shl $2, %RDX_LP +# elif defined __ILP32__ + /* Clear the upper 32 bits. */ + movl %edx, %edx # endif - cmpq $VEC_SIZE, %rdx + cmp $VEC_SIZE, %RDX_LP jb L(less_vec) /* From VEC to 2 * VEC. No branch when size == VEC_SIZE. */ diff --git a/sysdeps/x86_64/multiarch/memcmp-sse4.S b/sysdeps/x86_64/multiarch/memcmp-sse4.S index 771639f662..834b84cf72 100644 --- a/sysdeps/x86_64/multiarch/memcmp-sse4.S +++ b/sysdeps/x86_64/multiarch/memcmp-sse4.S @@ -42,13 +42,16 @@ .section .text.sse4.1,"ax",@progbits ENTRY (MEMCMP) # ifdef USE_AS_WMEMCMP - shl $2, %rdx + shl $2, %RDX_LP +# elif defined __ILP32__ + /* Clear the upper 32 bits. */ + mov %edx, %edx # endif pxor %xmm0, %xmm0 - cmp $79, %rdx + cmp $79, %RDX_LP ja L(79bytesormore) # ifndef USE_AS_WMEMCMP - cmp $1, %rdx + cmp $1, %RDX_LP je L(firstbyte) # endif add %rdx, %rsi diff --git a/sysdeps/x86_64/multiarch/memcmp-ssse3.S b/sysdeps/x86_64/multiarch/memcmp-ssse3.S index 8d7d2fe67b..af8724e9db 100644 --- a/sysdeps/x86_64/multiarch/memcmp-ssse3.S +++ b/sysdeps/x86_64/multiarch/memcmp-ssse3.S @@ -33,9 +33,12 @@ atom_text_section ENTRY (MEMCMP) # ifdef USE_AS_WMEMCMP - shl $2, %rdx - test %rdx, %rdx + shl $2, %RDX_LP + test %RDX_LP, %RDX_LP jz L(equal) +# elif defined __ILP32__ + /* Clear the upper 32 bits. */ + mov %edx, %edx # endif mov %rdx, %rcx mov %rdi, %rdx diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S index 4e060a27fd..7388e7412c 100644 --- a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S +++ b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S @@ -48,28 +48,33 @@ .section .text.ssse3,"ax",@progbits #if !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE ENTRY (MEMPCPY_CHK) - cmpq %rdx, %rcx + cmp %RDX_LP, %RCX_LP jb HIDDEN_JUMPTARGET (__chk_fail) END (MEMPCPY_CHK) ENTRY (MEMPCPY) - movq %rdi, %rax - addq %rdx, %rax + mov %RDI_LP, %RAX_LP + add %RDX_LP, %RAX_LP jmp L(start) END (MEMPCPY) #endif #if !defined USE_AS_BCOPY ENTRY (MEMCPY_CHK) - cmpq %rdx, %rcx + cmp %RDX_LP, %RCX_LP jb HIDDEN_JUMPTARGET (__chk_fail) END (MEMCPY_CHK) #endif ENTRY (MEMCPY) - mov %rdi, %rax + mov %RDI_LP, %RAX_LP #ifdef USE_AS_MEMPCPY - add %rdx, %rax + add %RDX_LP, %RAX_LP +#endif + +#ifdef __ILP32__ + /* Clear the upper 32 bits. */ + mov %edx, %edx #endif #ifdef USE_AS_MEMMOVE diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3.S b/sysdeps/x86_64/multiarch/memcpy-ssse3.S index f3ea52a46c..74306d7daf 100644 --- a/sysdeps/x86_64/multiarch/memcpy-ssse3.S +++ b/sysdeps/x86_64/multiarch/memcpy-ssse3.S @@ -48,28 +48,33 @@ .section .text.ssse3,"ax",@progbits #if !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE ENTRY (MEMPCPY_CHK) - cmpq %rdx, %rcx + cmp %RDX_LP, %RCX_LP jb HIDDEN_JUMPTARGET (__chk_fail) END (MEMPCPY_CHK) ENTRY (MEMPCPY) - movq %rdi, %rax - addq %rdx, %rax + mov %RDI_LP, %RAX_LP + add %RDX_LP, %RAX_LP jmp L(start) END (MEMPCPY) #endif #if !defined USE_AS_BCOPY ENTRY (MEMCPY_CHK) - cmpq %rdx, %rcx + cmp %RDX_LP, %RCX_LP jb HIDDEN_JUMPTARGET (__chk_fail) END (MEMCPY_CHK) #endif ENTRY (MEMCPY) - mov %rdi, %rax + mov %RDI_LP, %RAX_LP #ifdef USE_AS_MEMPCPY - add %rdx, %rax + add %RDX_LP, %RAX_LP +#endif + +#ifdef __ILP32__ + /* Clear the upper 32 bits. */ + mov %edx, %edx #endif #ifdef USE_AS_MEMMOVE diff --git a/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S index ae84ddc667..dae0616019 100644 --- a/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S +++ b/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S @@ -25,30 +25,34 @@ .section .text.avx512,"ax",@progbits # if defined SHARED && !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE ENTRY (__mempcpy_chk_avx512_no_vzeroupper) - cmpq %rdx, %rcx + cmp %RDX_LP, %RCX_LP jb HIDDEN_JUMPTARGET (__chk_fail) END (__mempcpy_chk_avx512_no_vzeroupper) ENTRY (__mempcpy_avx512_no_vzeroupper) - movq %rdi, %rax - addq %rdx, %rax + mov %RDI_LP, %RAX_LP + add %RDX_LP, %RAX_LP jmp L(start) END (__mempcpy_avx512_no_vzeroupper) # endif # ifdef SHARED ENTRY (__memmove_chk_avx512_no_vzeroupper) - cmpq %rdx, %rcx + cmp %RDX_LP, %RCX_LP jb HIDDEN_JUMPTARGET (__chk_fail) END (__memmove_chk_avx512_no_vzeroupper) # endif ENTRY (__memmove_avx512_no_vzeroupper) - mov %rdi, %rax + mov %RDI_LP, %RAX_LP # ifdef USE_AS_MEMPCPY - add %rdx, %rax + add %RDX_LP, %RAX_LP # endif L(start): +# ifdef __ILP32__ + /* Clear the upper 32 bits. */ + mov %edx, %edx +# endif lea (%rsi, %rdx), %rcx lea (%rdi, %rdx), %r9 cmp $512, %rdx diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S index d694e8b2be..9225fea1d7 100644 --- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S +++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S @@ -107,22 +107,22 @@ .section SECTION(.text),"ax",@progbits #if defined SHARED && IS_IN (libc) ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned)) - cmpq %rdx, %rcx + cmp %RDX_LP, %RCX_LP jb HIDDEN_JUMPTARGET (__chk_fail) END (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned)) #endif #if VEC_SIZE == 16 || defined SHARED ENTRY (MEMPCPY_SYMBOL (__mempcpy, unaligned)) - movq %rdi, %rax - addq %rdx, %rax + mov %RDI_LP, %RAX_LP + add %RDX_LP, %RAX_LP jmp L(start) END (MEMPCPY_SYMBOL (__mempcpy, unaligned)) #endif #if defined SHARED && IS_IN (libc) ENTRY (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned)) - cmpq %rdx, %rcx + cmp %RDX_LP, %RCX_LP jb HIDDEN_JUMPTARGET (__chk_fail) END (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned)) #endif @@ -130,9 +130,13 @@ END (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned)) ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned)) movq %rdi, %rax L(start): - cmpq $VEC_SIZE, %rdx +# ifdef __ILP32__ + /* Clear the upper 32 bits. */ + movl %edx, %edx +# endif + cmp $VEC_SIZE, %RDX_LP jb L(less_vec) - cmpq $(VEC_SIZE * 2), %rdx + cmp $(VEC_SIZE * 2), %RDX_LP ja L(more_2x_vec) #if !defined USE_MULTIARCH || !IS_IN (libc) L(last_2x_vec): @@ -153,33 +157,33 @@ END (MEMMOVE_SYMBOL (__memmove, unaligned)) # if VEC_SIZE == 16 # if defined SHARED ENTRY (__mempcpy_chk_erms) - cmpq %rdx, %rcx + cmp %RDX_LP, %RCX_LP jb HIDDEN_JUMPTARGET (__chk_fail) END (__mempcpy_chk_erms) /* Only used to measure performance of REP MOVSB. */ ENTRY (__mempcpy_erms) - movq %rdi, %rax - addq %rdx, %rax + mov %RDI_LP, %RAX_LP + add %RDX_LP, %RAX_LP jmp L(start_movsb) END (__mempcpy_erms) # endif ENTRY (__memmove_chk_erms) - cmpq %rdx, %rcx + cmp %RDX_LP, %RCX_LP jb HIDDEN_JUMPTARGET (__chk_fail) END (__memmove_chk_erms) ENTRY (__memmove_erms) movq %rdi, %rax L(start_movsb): - movq %rdx, %rcx - cmpq %rsi, %rdi + mov %RDX_LP, %RCX_LP + cmp %RSI_LP, %RDI_LP jb 1f /* Source == destination is less common. */ je 2f - leaq (%rsi,%rcx), %rdx - cmpq %rdx, %rdi + lea (%rsi,%rcx), %RDX_LP + cmp %RDX_LP, %RDI_LP jb L(movsb_backward) 1: rep movsb @@ -201,18 +205,18 @@ strong_alias (__memmove_chk_erms, __memcpy_chk_erms) # ifdef SHARED ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms)) - cmpq %rdx, %rcx + cmp %RDX_LP, %RCX_LP jb HIDDEN_JUMPTARGET (__chk_fail) END (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms)) ENTRY (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms)) - movq %rdi, %rax - addq %rdx, %rax + mov %RDI_LP, %RAX_LP + add %RDX_LP, %RAX_LP jmp L(start_erms) END (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms)) ENTRY (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_erms)) - cmpq %rdx, %rcx + cmp %RDX_LP, %RCX_LP jb HIDDEN_JUMPTARGET (__chk_fail) END (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_erms)) # endif @@ -220,9 +224,13 @@ END (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_erms)) ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned_erms)) movq %rdi, %rax L(start_erms): - cmpq $VEC_SIZE, %rdx +# ifdef __ILP32__ + /* Clear the upper 32 bits. */ + movl %edx, %edx +# endif + cmp $VEC_SIZE, %RDX_LP jb L(less_vec) - cmpq $(VEC_SIZE * 2), %rdx + cmp $(VEC_SIZE * 2), %RDX_LP ja L(movsb_more_2x_vec) L(last_2x_vec): /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */ @@ -249,7 +257,7 @@ L(movsb): # endif jb L(more_8x_vec_backward) 1: - movq %rdx, %rcx + mov %RDX_LP, %RCX_LP rep movsb L(nop): ret diff --git a/sysdeps/x86_64/multiarch/memrchr-avx2.S b/sysdeps/x86_64/multiarch/memrchr-avx2.S index 3ee02e1cc3..40e1dba301 100644 --- a/sysdeps/x86_64/multiarch/memrchr-avx2.S +++ b/sysdeps/x86_64/multiarch/memrchr-avx2.S @@ -32,10 +32,10 @@ ENTRY (__memrchr_avx2) vmovd %esi, %xmm0 vpbroadcastb %xmm0, %ymm0 - subq $VEC_SIZE, %rdx + sub $VEC_SIZE, %RDX_LP jbe L(last_vec_or_less) - addq %rdx, %rdi + add %RDX_LP, %RDI_LP /* Check the last VEC_SIZE bytes. */ vpcmpeqb (%rdi), %ymm0, %ymm1 diff --git a/sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S index 1f66602398..5be12bd06b 100644 --- a/sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S +++ b/sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S @@ -29,12 +29,16 @@ .section .text.avx512,"ax",@progbits #if defined PIC ENTRY (MEMSET_CHK) - cmpq %rdx, %rcx + cmp %RDX_LP, %RCX_LP jb HIDDEN_JUMPTARGET (__chk_fail) END (MEMSET_CHK) #endif ENTRY (MEMSET) +# ifdef __ILP32__ + /* Clear the upper 32 bits. */ + mov %edx, %edx +# endif vpxor %xmm0, %xmm0, %xmm0 vmovd %esi, %xmm1 lea (%rdi, %rdx), %rsi diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S index 8ed470283e..2023a8e108 100644 --- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S +++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S @@ -75,8 +75,8 @@ .section SECTION(.text),"ax",@progbits #if VEC_SIZE == 16 && IS_IN (libc) ENTRY (__bzero) - movq %rdi, %rax /* Set return value. */ - movq %rsi, %rdx /* Set n. */ + mov %RDI_LP, %RAX_LP /* Set return value. */ + mov %RSI_LP, %RDX_LP /* Set n. */ pxor %xmm0, %xmm0 jmp L(entry_from_bzero) END (__bzero) @@ -86,13 +86,13 @@ weak_alias (__bzero, bzero) #if IS_IN (libc) # if defined SHARED ENTRY_CHK (WMEMSET_CHK_SYMBOL (__wmemset_chk, unaligned)) - cmpq %rdx, %rcx + cmp %RDX_LP, %RCX_LP jb HIDDEN_JUMPTARGET (__chk_fail) END_CHK (WMEMSET_CHK_SYMBOL (__wmemset_chk, unaligned)) # endif ENTRY (WMEMSET_SYMBOL (__wmemset, unaligned)) - shlq $2, %rdx + shl $2, %RDX_LP WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN (%esi, %rdi) jmp L(entry_from_bzero) END (WMEMSET_SYMBOL (__wmemset, unaligned)) @@ -100,13 +100,17 @@ END (WMEMSET_SYMBOL (__wmemset, unaligned)) #if defined SHARED && IS_IN (libc) ENTRY_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned)) - cmpq %rdx, %rcx + cmp %RDX_LP, %RCX_LP jb HIDDEN_JUMPTARGET (__chk_fail) END_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned)) #endif ENTRY (MEMSET_SYMBOL (__memset, unaligned)) MEMSET_VDUP_TO_VEC0_AND_SET_RETURN (%esi, %rdi) +# ifdef __ILP32__ + /* Clear the upper 32 bits. */ + mov %edx, %edx +# endif L(entry_from_bzero): cmpq $VEC_SIZE, %rdx jb L(less_vec) @@ -122,7 +126,7 @@ END (MEMSET_SYMBOL (__memset, unaligned)) # if VEC_SIZE == 16 ENTRY (__memset_chk_erms) - cmpq %rdx, %rcx + cmp %RDX_LP, %RCX_LP jb HIDDEN_JUMPTARGET (__chk_fail) END (__memset_chk_erms) @@ -135,11 +139,11 @@ ENTRY (MEMSET_SYMBOL (__memset, erms)) L(stosb): /* Issue vzeroupper before rep stosb. */ VZEROUPPER - movq %rdx, %rcx + mov %RDX_LP, %RCX_LP movzbl %sil, %eax - movq %rdi, %rdx + mov %RDI_LP, %RDX_LP rep stosb - movq %rdx, %rax + mov %RDX_LP, %RAX_LP ret # if VEC_SIZE == 16 END (__memset_erms) @@ -149,16 +153,20 @@ END (MEMSET_SYMBOL (__memset, erms)) # if defined SHARED && IS_IN (libc) ENTRY_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned_erms)) - cmpq %rdx, %rcx + cmp %RDX_LP, %RCX_LP jb HIDDEN_JUMPTARGET (__chk_fail) END_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned_erms)) # endif ENTRY (MEMSET_SYMBOL (__memset, unaligned_erms)) MEMSET_VDUP_TO_VEC0_AND_SET_RETURN (%esi, %rdi) - cmpq $VEC_SIZE, %rdx +# ifdef __ILP32__ + /* Clear the upper 32 bits. */ + mov %edx, %edx +# endif + cmp $VEC_SIZE, %RDX_LP jb L(less_vec) - cmpq $(VEC_SIZE * 2), %rdx + cmp $(VEC_SIZE * 2), %RDX_LP ja L(stosb_more_2x_vec) /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */ VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx) diff --git a/sysdeps/x86_64/multiarch/strcmp-sse42.S b/sysdeps/x86_64/multiarch/strcmp-sse42.S index 4aeb14e175..9e5f93cb86 100644 --- a/sysdeps/x86_64/multiarch/strcmp-sse42.S +++ b/sysdeps/x86_64/multiarch/strcmp-sse42.S @@ -155,11 +155,11 @@ STRCMP_SSE42: #endif #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - test %rdx, %rdx + test %RDX_LP, %RDX_LP je LABEL(strcmp_exitz) - cmp $1, %rdx + cmp $1, %RDX_LP je LABEL(Byte0) - mov %rdx, %r11 + mov %RDX_LP, %R11_LP #endif mov %esi, %ecx mov %edi, %eax diff --git a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S index 6a5ab7ab26..b46f6f8aed 100644 --- a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S +++ b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S @@ -40,8 +40,8 @@ .text ENTRY (STRCPY) # ifdef USE_AS_STRNCPY - mov %rdx, %r8 - test %r8, %r8 + mov %RDX_LP, %R8_LP + test %R8_LP, %R8_LP jz L(ExitZero) # endif mov %rsi, %rcx diff --git a/sysdeps/x86_64/multiarch/strcpy-ssse3.S b/sysdeps/x86_64/multiarch/strcpy-ssse3.S index 47aaeae671..83134f3b2c 100644 --- a/sysdeps/x86_64/multiarch/strcpy-ssse3.S +++ b/sysdeps/x86_64/multiarch/strcpy-ssse3.S @@ -31,13 +31,13 @@ ENTRY (STRCPY) mov %rsi, %rcx # ifdef USE_AS_STRNCPY - mov %rdx, %r8 + mov %RDX_LP, %R8_LP # endif mov %rdi, %rdx # ifdef USE_AS_STRNCPY - test %r8, %r8 + test %R8_LP, %R8_LP jz L(Exit0) - cmp $8, %r8 + cmp $8, %R8_LP jbe L(StrncpyExit8Bytes) # endif cmpb $0, (%rcx) diff --git a/sysdeps/x86_64/multiarch/strlen-avx2.S b/sysdeps/x86_64/multiarch/strlen-avx2.S index 1dc823af0a..2ce10f59e6 100644 --- a/sysdeps/x86_64/multiarch/strlen-avx2.S +++ b/sysdeps/x86_64/multiarch/strlen-avx2.S @@ -42,12 +42,15 @@ ENTRY (STRLEN) # ifdef USE_AS_STRNLEN /* Check for zero length. */ - testq %rsi, %rsi + test %RSI_LP, %RSI_LP jz L(zero) # ifdef USE_AS_WCSLEN - shl $2, %rsi + shl $2, %RSI_LP +# elif defined __ILP32__ + /* Clear the upper 32 bits. */ + movl %esi, %esi # endif - movq %rsi, %r8 + mov %RSI_LP, %R8_LP # endif movl %edi, %ecx movq %rdi, %rdx diff --git a/sysdeps/x86_64/strcmp.S b/sysdeps/x86_64/strcmp.S index 076be04df5..2aa301997f 100644 --- a/sysdeps/x86_64/strcmp.S +++ b/sysdeps/x86_64/strcmp.S @@ -135,11 +135,11 @@ ENTRY (STRCMP) * This implementation uses SSE to compare up to 16 bytes at a time. */ #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L - test %rdx, %rdx + test %RDX_LP, %RDX_LP je LABEL(strcmp_exitz) - cmp $1, %rdx + cmp $1, %RDX_LP je LABEL(Byte0) - mov %rdx, %r11 + mov %RDX_LP, %R11_LP #endif mov %esi, %ecx mov %edi, %eax diff --git a/sysdeps/x86_64/strlen.S b/sysdeps/x86_64/strlen.S index b5ab117c79..30cec693c1 100644 --- a/sysdeps/x86_64/strlen.S +++ b/sysdeps/x86_64/strlen.S @@ -59,21 +59,21 @@ ENTRY(strlen) #ifdef AS_STRNLEN /* Do not read anything when n==0. */ - test %rsi, %rsi + test %RSI_LP, %RSI_LP jne L(n_nonzero) xor %rax, %rax ret L(n_nonzero): # ifdef AS_WCSLEN - shlq $2, %rsi + shl $2, %RSI_LP # endif /* Initialize long lived registers. */ - add %rdi, %rsi - mov %rsi, %r10 - and $-64, %r10 - mov %rsi, %r11 + add %RDI_LP, %RSI_LP + mov %RSI_LP, %R10_LP + and $-64, %R10_LP + mov %RSI_LP, %R11_LP #endif pxor %xmm0, %xmm0 diff --git a/sysdeps/x86_64/x32/Makefile b/sysdeps/x86_64/x32/Makefile index f2ebc24fb0..8748956563 100644 --- a/sysdeps/x86_64/x32/Makefile +++ b/sysdeps/x86_64/x32/Makefile @@ -4,3 +4,15 @@ ifeq ($(subdir),math) # 64-bit llround. Add -fno-builtin-lround to silence the compiler. CFLAGS-s_llround.c += -fno-builtin-lround endif + +ifeq ($(subdir),string) +tests += tst-size_t-memchr tst-size_t-memcmp tst-size_t-memcpy \ + tst-size_t-memrchr tst-size_t-memset tst-size_t-strncasecmp \ + tst-size_t-strncmp tst-size_t-strncpy tst-size_t-strnlen \ + tst-size_t-memcmp-2 +endif + +ifeq ($(subdir),wcsmbs) +tests += tst-size_t-wmemchr tst-size_t-wmemcmp tst-size_t-wmemset \ + tst-size_t-wcsncmp tst-size_t-wcsnlen +endif diff --git a/sysdeps/x86_64/x32/test-size_t.h b/sysdeps/x86_64/x32/test-size_t.h new file mode 100644 index 0000000000..78a940863e --- /dev/null +++ b/sysdeps/x86_64/x32/test-size_t.h @@ -0,0 +1,35 @@ +/* Test string/memory functions with size_t in the lower 32 bits of + 64-bit register. + Copyright (C) 2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define TEST_MAIN +#include + +/* On x32, parameter_t may be passed in a 64-bit register with the LEN + field in the lower 32 bits. When the LEN field of 64-bit register + is passed to string/memory function as the size_t parameter, only + the lower 32 bits can be used. */ +typedef struct +{ + union + { + size_t len; + void (*fn) (void); + }; + void *p; +} parameter_t; diff --git a/sysdeps/x86_64/x32/tst-size_t-memchr.c b/sysdeps/x86_64/x32/tst-size_t-memchr.c new file mode 100644 index 0000000000..29a3daf102 --- /dev/null +++ b/sysdeps/x86_64/x32/tst-size_t-memchr.c @@ -0,0 +1,72 @@ +/* Test memchr with size_t in the lower 32 bits of 64-bit register. + Copyright (C) 2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef WIDE +# define TEST_NAME "memchr" +#else +# define TEST_NAME "wmemchr" +#endif /* WIDE */ +#include "test-size_t.h" + +#ifndef WIDE +# define MEMCHR memchr +# define CHAR char +# define UCHAR unsigned char +#else +# include +# define MEMCHR wmemchr +# define CHAR wchar_t +# define UCHAR wchar_t +#endif /* WIDE */ + +IMPL (MEMCHR, 1) + +typedef CHAR * (*proto_t) (const CHAR*, int, size_t); + +static CHAR * +__attribute__ ((noinline, noclone)) +do_memchr (parameter_t a, parameter_t b) +{ + return CALL (&b, a.p, (uintptr_t) b.p, a.len); +} + +static int +test_main (void) +{ + test_init (); + + parameter_t src = { { page_size / sizeof (CHAR) }, buf2 }; + parameter_t c = { { 0 }, (void *) (uintptr_t) 0x12 }; + + int ret = 0; + FOR_EACH_IMPL (impl, 0) + { + c.fn = impl->fn; + CHAR *res = do_memchr (src, c); + if (res) + { + error (0, 0, "Wrong result in function %s: %p != NULL", + impl->name, res); + ret = 1; + } + } + + return ret ? EXIT_FAILURE : EXIT_SUCCESS; +} + +#include diff --git a/sysdeps/x86_64/x32/tst-size_t-memcmp-2.c b/sysdeps/x86_64/x32/tst-size_t-memcmp-2.c new file mode 100644 index 0000000000..d8ae1a0813 --- /dev/null +++ b/sysdeps/x86_64/x32/tst-size_t-memcmp-2.c @@ -0,0 +1,79 @@ +/* Test memcmp with size_t in the lower 32 bits of 64-bit register. + Copyright (C) 2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define TEST_MAIN +#ifdef WIDE +# define TEST_NAME "wmemcmp" +#else +# define TEST_NAME "memcmp" +#endif + +#include "test-size_t.h" + +#ifdef WIDE +# include +# include + +# define MEMCMP wmemcmp +# define CHAR wchar_t +#else +# define MEMCMP memcmp +# define CHAR char +#endif + +IMPL (MEMCMP, 1) + +typedef int (*proto_t) (const CHAR *, const CHAR *, size_t); + +static int +__attribute__ ((noinline, noclone)) +do_memcmp (parameter_t a, parameter_t b) +{ + return CALL (&b, a.p, b.p, a.len); +} + +static int +test_main (void) +{ + test_init (); + + parameter_t dest = { { page_size / sizeof (CHAR) }, buf1 }; + parameter_t src = { { 0 }, buf2 }; + + memcpy (buf1, buf2, page_size); + + CHAR *p = (CHAR *) buf1; + p[page_size / sizeof (CHAR) - 1] = (CHAR) 1; + + int ret = 0; + FOR_EACH_IMPL (impl, 0) + { + src.fn = impl->fn; + int res = do_memcmp (dest, src); + if (res >= 0) + { + error (0, 0, "Wrong result in function %s: %i >= 0", + impl->name, res); + ret = 1; + } + } + + return ret ? EXIT_FAILURE : EXIT_SUCCESS; +} + +#include diff --git a/sysdeps/x86_64/x32/tst-size_t-memcmp.c b/sysdeps/x86_64/x32/tst-size_t-memcmp.c new file mode 100644 index 0000000000..9bd6fdb45a --- /dev/null +++ b/sysdeps/x86_64/x32/tst-size_t-memcmp.c @@ -0,0 +1,76 @@ +/* Test memcmp with size_t in the lower 32 bits of 64-bit register. + Copyright (C) 2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define TEST_MAIN +#ifdef WIDE +# define TEST_NAME "wmemcmp" +#else +# define TEST_NAME "memcmp" +#endif + +#include "test-size_t.h" + +#ifdef WIDE +# include +# include + +# define MEMCMP wmemcmp +# define CHAR wchar_t +#else +# define MEMCMP memcmp +# define CHAR char +#endif + +IMPL (MEMCMP, 1) + +typedef int (*proto_t) (const CHAR *, const CHAR *, size_t); + +static int +__attribute__ ((noinline, noclone)) +do_memcmp (parameter_t a, parameter_t b) +{ + return CALL (&b, a.p, b.p, a.len); +} + +static int +test_main (void) +{ + test_init (); + + parameter_t dest = { { page_size / sizeof (CHAR) }, buf1 }; + parameter_t src = { { 0 }, buf2 }; + + memcpy (buf1, buf2, page_size); + + int ret = 0; + FOR_EACH_IMPL (impl, 0) + { + src.fn = impl->fn; + int res = do_memcmp (dest, src); + if (res) + { + error (0, 0, "Wrong result in function %s: %i != 0", + impl->name, res); + ret = 1; + } + } + + return ret ? EXIT_FAILURE : EXIT_SUCCESS; +} + +#include diff --git a/sysdeps/x86_64/x32/tst-size_t-memcpy.c b/sysdeps/x86_64/x32/tst-size_t-memcpy.c new file mode 100644 index 0000000000..66b71e1749 --- /dev/null +++ b/sysdeps/x86_64/x32/tst-size_t-memcpy.c @@ -0,0 +1,58 @@ +/* Test memcpy with size_t in the lower 32 bits of 64-bit register. + Copyright (C) 2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define TEST_NAME "memcpy" +#include "test-size_t.h" + +IMPL (memcpy, 1) + +typedef void *(*proto_t) (void *, const void *, size_t); + +static void * +__attribute__ ((noinline, noclone)) +do_memcpy (parameter_t a, parameter_t b) +{ + return CALL (&b, a.p, b.p, a.len); +} + +static int +test_main (void) +{ + test_init (); + + parameter_t dest = { { page_size }, buf1 }; + parameter_t src = { { 0 }, buf2 }; + + int ret = 0; + FOR_EACH_IMPL (impl, 0) + { + src.fn = impl->fn; + do_memcpy (dest, src); + int res = memcmp (dest.p, src.p, dest.len); + if (res) + { + error (0, 0, "Wrong result in function %s: %i != 0", + impl->name, res); + ret = 1; + } + } + + return ret ? EXIT_FAILURE : EXIT_SUCCESS; +} + +#include diff --git a/sysdeps/x86_64/x32/tst-size_t-memrchr.c b/sysdeps/x86_64/x32/tst-size_t-memrchr.c new file mode 100644 index 0000000000..c83699c097 --- /dev/null +++ b/sysdeps/x86_64/x32/tst-size_t-memrchr.c @@ -0,0 +1,57 @@ +/* Test memrchr with size_t in the lower 32 bits of 64-bit register. + Copyright (C) 2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define TEST_NAME "memrchr" +#include "test-size_t.h" + +IMPL (memchr, 1) + +typedef void * (*proto_t) (const void *, int, size_t); + +static void * +__attribute__ ((noinline, noclone)) +do_memrchr (parameter_t a, parameter_t b) +{ + return CALL (&b, a.p, (uintptr_t) b.p, a.len); +} + +static int +test_main (void) +{ + test_init (); + + parameter_t src = { { page_size }, buf2 }; + parameter_t c = { { 0 }, (void *) (uintptr_t) 0x12 }; + + int ret = 0; + FOR_EACH_IMPL (impl, 0) + { + c.fn = impl->fn; + void * res = do_memrchr (src, c); + if (res) + { + error (0, 0, "Wrong result in function %s: %p != NULL", + impl->name, res); + ret = 1; + } + } + + return ret ? EXIT_FAILURE : EXIT_SUCCESS; +} + +#include diff --git a/sysdeps/x86_64/x32/tst-size_t-memset.c b/sysdeps/x86_64/x32/tst-size_t-memset.c new file mode 100644 index 0000000000..2c367af6cd --- /dev/null +++ b/sysdeps/x86_64/x32/tst-size_t-memset.c @@ -0,0 +1,73 @@ +/* Test memset with size_t in the lower 32 bits of 64-bit register. + Copyright (C) 2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifdef WIDE +# define TEST_NAME "wmemset" +#else +# define TEST_NAME "memset" +#endif /* WIDE */ + +#include "test-size_t.h" + +#ifdef WIDE +# include +# define MEMSET wmemset +# define CHAR wchar_t +#else +# define MEMSET memset +# define CHAR char +#endif /* WIDE */ + +IMPL (MEMSET, 1) + +typedef CHAR *(*proto_t) (CHAR *, int, size_t); + +static void * +__attribute__ ((noinline, noclone)) +do_memset (parameter_t a, parameter_t b) +{ + return CALL (&b, a.p, (uintptr_t) b.p, a.len); +} + +static int +test_main (void) +{ + test_init (); + + CHAR ch = 0x23; + parameter_t src = { { page_size / sizeof (CHAR) }, buf2 }; + parameter_t c = { { 0 }, (void *) (uintptr_t) ch }; + + int ret = 0; + FOR_EACH_IMPL (impl, 0) + { + c.fn = impl->fn; + CHAR *p = (CHAR *) do_memset (src, c); + size_t i; + for (i = 0; i < src.len; i++) + if (p[i] != ch) + { + error (0, 0, "Wrong result in function %s", impl->name); + ret = 1; + } + } + + return ret ? EXIT_FAILURE : EXIT_SUCCESS; +} + +#include diff --git a/sysdeps/x86_64/x32/tst-size_t-strncasecmp.c b/sysdeps/x86_64/x32/tst-size_t-strncasecmp.c new file mode 100644 index 0000000000..862335937b --- /dev/null +++ b/sysdeps/x86_64/x32/tst-size_t-strncasecmp.c @@ -0,0 +1,59 @@ +/* Test strncaecmp with size_t in the lower 32 bits of 64-bit register. + Copyright (C) 2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define TEST_NAME "strncasecmp" +#include "test-size_t.h" + +IMPL (strncasecmp, 1) + +typedef int (*proto_t) (const char *, const char *, size_t); + +static int +__attribute__ ((noinline, noclone)) +do_strncasecmp (parameter_t a, parameter_t b) +{ + return CALL (&b, a.p, b.p, a.len); +} + +static int +test_main (void) +{ + test_init (); + + parameter_t dest = { { page_size }, buf1 }; + parameter_t src = { { 0 }, buf2 }; + + strncpy ((char *) buf1, (const char *) buf2, page_size); + + int ret = 0; + FOR_EACH_IMPL (impl, 0) + { + src.fn = impl->fn; + int res = do_strncasecmp (dest, src); + if (res) + { + error (0, 0, "Wrong result in function %s: %i != 0", + impl->name, res); + ret = 1; + } + } + + return ret ? EXIT_FAILURE : EXIT_SUCCESS; +} + +#include diff --git a/sysdeps/x86_64/x32/tst-size_t-strncmp.c b/sysdeps/x86_64/x32/tst-size_t-strncmp.c new file mode 100644 index 0000000000..54e6bd83ef --- /dev/null +++ b/sysdeps/x86_64/x32/tst-size_t-strncmp.c @@ -0,0 +1,78 @@ +/* Test strncmp with size_t in the lower 32 bits of 64-bit register. + Copyright (C) 2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifdef WIDE +# define TEST_NAME "wcsncmp" +#else +# define TEST_NAME "strncmp" +#endif + +#include "test-size_t.h" + +#ifdef WIDE +# include + +# define STRNCMP wcsncmp +# define STRNCPY wcsncpy +# define CHAR wchar_t +#else +# define STRNCMP strncmp +# define STRNCPY strncpy +# define CHAR char +#endif + +IMPL (STRNCMP, 1) + +typedef int (*proto_t) (const CHAR *, const CHAR *, size_t); + + +static int +__attribute__ ((noinline, noclone)) +do_strncmp (parameter_t a, parameter_t b) +{ + return CALL (&b, a.p, b.p, a.len); +} + +static int +test_main (void) +{ + test_init (); + + size_t size = page_size / sizeof (CHAR); + parameter_t dest = { { size }, buf1 }; + parameter_t src = { { 0 }, buf2 }; + + STRNCPY ((CHAR *) buf1, (const CHAR *) buf2, size); + + int ret = 0; + FOR_EACH_IMPL (impl, 0) + { + src.fn = impl->fn; + int res = do_strncmp (dest, src); + if (res) + { + error (0, 0, "Wrong result in function %s: %i != 0", + impl->name, res); + ret = 1; + } + } + + return ret ? EXIT_FAILURE : EXIT_SUCCESS; +} + +#include diff --git a/sysdeps/x86_64/x32/tst-size_t-strncpy.c b/sysdeps/x86_64/x32/tst-size_t-strncpy.c new file mode 100644 index 0000000000..4dec71e6b3 --- /dev/null +++ b/sysdeps/x86_64/x32/tst-size_t-strncpy.c @@ -0,0 +1,58 @@ +/* Test strncpy with size_t in the lower 32 bits of 64-bit register. + Copyright (C) 2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define TEST_NAME "strncpy" +#include "test-size_t.h" + +IMPL (strncpy, 1) + +typedef char *(*proto_t) (char *, const char*, size_t); + +static void * +__attribute__ ((noinline, noclone)) +do_strncpy (parameter_t a, parameter_t b) +{ + return CALL (&b, a.p, b.p, a.len); +} + +static int +test_main (void) +{ + test_init (); + + parameter_t dest = { { page_size }, buf1 }; + parameter_t src = { { 0 }, buf2 }; + + int ret = 0; + FOR_EACH_IMPL (impl, 0) + { + src.fn = impl->fn; + do_strncpy (dest, src); + int res = strncmp (dest.p, src.p, dest.len); + if (res) + { + error (0, 0, "Wrong result in function %s: %i != 0", + impl->name, res); + ret = 1; + } + } + + return ret ? EXIT_FAILURE : EXIT_SUCCESS; +} + +#include diff --git a/sysdeps/x86_64/x32/tst-size_t-strnlen.c b/sysdeps/x86_64/x32/tst-size_t-strnlen.c new file mode 100644 index 0000000000..690a4a8a31 --- /dev/null +++ b/sysdeps/x86_64/x32/tst-size_t-strnlen.c @@ -0,0 +1,72 @@ +/* Test strnlen with size_t in the lower 32 bits of 64-bit register. + Copyright (C) 2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifdef WIDE +# define TEST_NAME "wcsnlen" +#else +# define TEST_NAME "strnlen" +#endif /* WIDE */ + +#include "test-size_t.h" + +#ifdef WIDE +# include +# define STRNLEN wcsnlen +# define CHAR wchar_t +#else +# define STRNLEN strnlen +# define CHAR char +#endif /* WIDE */ + +IMPL (STRNLEN, 1) + +typedef size_t (*proto_t) (const CHAR *, size_t); + +static size_t +__attribute__ ((noinline, noclone)) +do_strnlen (parameter_t a, parameter_t b) +{ + return CALL (&a, a.p, b.len); +} + +static int +test_main (void) +{ + test_init (); + + size_t size = page_size / sizeof (CHAR); + parameter_t src = { { 0 }, buf2 }; + parameter_t c = { { size }, (void *) (uintptr_t) 'a' }; + + int ret = 0; + FOR_EACH_IMPL (impl, 0) + { + src.fn = impl->fn; + size_t res = do_strnlen (src, c); + if (res != size) + { + error (0, 0, "Wrong result in function %s: 0x%x != 0x%x", + impl->name, res, size); + ret = 1; + } + } + + return ret ? EXIT_FAILURE : EXIT_SUCCESS; +} + +#include diff --git a/sysdeps/x86_64/x32/tst-size_t-wcsncmp.c b/sysdeps/x86_64/x32/tst-size_t-wcsncmp.c new file mode 100644 index 0000000000..4829647c19 --- /dev/null +++ b/sysdeps/x86_64/x32/tst-size_t-wcsncmp.c @@ -0,0 +1,20 @@ +/* Test wcsncmp with size_t in the lower 32 bits of 64-bit register. + Copyright (C) 2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define WIDE 1 +#include "tst-size_t-strncmp.c" diff --git a/sysdeps/x86_64/x32/tst-size_t-wcsnlen.c b/sysdeps/x86_64/x32/tst-size_t-wcsnlen.c new file mode 100644 index 0000000000..093b4bbe1b --- /dev/null +++ b/sysdeps/x86_64/x32/tst-size_t-wcsnlen.c @@ -0,0 +1,20 @@ +/* Test wcsnlen with size_t in the lower 32 bits of 64-bit register. + Copyright (C) 2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define WIDE 1 +#include "tst-size_t-strnlen.c" diff --git a/sysdeps/x86_64/x32/tst-size_t-wmemchr.c b/sysdeps/x86_64/x32/tst-size_t-wmemchr.c new file mode 100644 index 0000000000..877801d646 --- /dev/null +++ b/sysdeps/x86_64/x32/tst-size_t-wmemchr.c @@ -0,0 +1,20 @@ +/* Test wmemchr with size_t in the lower 32 bits of 64-bit register. + Copyright (C) 2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define WIDE 1 +#include "tst-size_t-memchr.c" diff --git a/sysdeps/x86_64/x32/tst-size_t-wmemcmp.c b/sysdeps/x86_64/x32/tst-size_t-wmemcmp.c new file mode 100644 index 0000000000..e8b5ffd0d5 --- /dev/null +++ b/sysdeps/x86_64/x32/tst-size_t-wmemcmp.c @@ -0,0 +1,20 @@ +/* Test wmemcmp with size_t in the lower 32 bits of 64-bit register. + Copyright (C) 2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define WIDE 1 +#include "tst-size_t-memcmp.c" diff --git a/sysdeps/x86_64/x32/tst-size_t-wmemset.c b/sysdeps/x86_64/x32/tst-size_t-wmemset.c new file mode 100644 index 0000000000..955eb488c2 --- /dev/null +++ b/sysdeps/x86_64/x32/tst-size_t-wmemset.c @@ -0,0 +1,20 @@ +/* Test wmemset with size_t in the lower 32 bits of 64-bit register. + Copyright (C) 2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define WIDE 1 +#include "tst-size_t-memset.c"