mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-22 04:50:07 +00:00
tilegx: small performance fix for string routines
We were multiplying a byte by 0x0101010101010101ULL to create a constant for SIMD ops, but the compiler isn't good at optimizing this case (the fact that one operand is a byte is lost by the time it would be possible to do the optimization). So instead we add a helper routine that explicitly uses SIMD ops to create the constant.
This commit is contained in:
parent
575298fcd2
commit
64d76ca064
@ -1,3 +1,14 @@
|
||||
2012-05-12 Chris Metcalf <cmetcalf@tilera.com>
|
||||
|
||||
* sysdeps/tile/tilegx/memchr.c: Use new copy_byte() function
|
||||
to efficiently generate a large constant for masking.
|
||||
* sysdeps/tile/tilegx/memset.c: Likewise.
|
||||
* sysdeps/tile/tilegx/rawmemchr.c: Likewise.
|
||||
* sysdeps/tile/tilegx/strchr.c: Likewise.
|
||||
* sysdeps/tile/tilegx/strchrnul.c: Likewise.
|
||||
* sysdeps/tile/tilegx/strrchr.c: Likewise.
|
||||
* sysdeps/tile/tilegx/string-endian.h (copy_byte): New function.
|
||||
|
||||
2012-05-12 Chris Metcalf <cmetcalf@tilera.com>
|
||||
|
||||
* sysdeps/tile/tilegx/memcpy.c: Allow memcpy(p, p, n)
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2011 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 2011-2012 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
|
||||
|
||||
@ -41,7 +41,7 @@ __memchr (const void *s, int c, size_t n)
|
||||
p = (const uint64_t *) (s_int & -8);
|
||||
|
||||
/* Create eight copies of the byte for which we are looking. */
|
||||
goal = 0x0101010101010101ULL * (uint8_t) c;
|
||||
goal = copy_byte(c);
|
||||
|
||||
/* Read the first word, but munge it so that bytes before the array
|
||||
will not match goal. */
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2011 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 2011-2012 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
|
||||
|
||||
@ -19,6 +19,7 @@
|
||||
#include <arch/chip.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include "string-endian.h"
|
||||
|
||||
void *
|
||||
__memset (void *s, int c, size_t n)
|
||||
@ -71,7 +72,7 @@ __memset (void *s, int c, size_t n)
|
||||
n64 = n >> 3;
|
||||
|
||||
/* Tile input byte out to 64 bits. */
|
||||
v64 = 0x0101010101010101ULL * (uint8_t) c;
|
||||
v64 = copy_byte(c);
|
||||
|
||||
/* This must be at least 8 or the following loop doesn't work. */
|
||||
#define CACHE_LINE_SIZE_IN_DOUBLEWORDS (CHIP_L2_LINE_SIZE() / 8)
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2011 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 2011-2012 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
|
||||
|
||||
@ -28,7 +28,7 @@ __rawmemchr (const void *s, int c)
|
||||
const uint64_t *p = (const uint64_t *) (s_int & -8);
|
||||
|
||||
/* Create eight copies of the byte for which we are looking. */
|
||||
const uint64_t goal = 0x0101010101010101ULL * (uint8_t) c;
|
||||
const uint64_t goal = copy_byte(c);
|
||||
|
||||
/* Read the first word, but munge it so that bytes before the array
|
||||
will not match goal. */
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2011 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 2011-2012 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
|
||||
|
||||
@ -32,7 +32,7 @@ strchr (const char *s, int c)
|
||||
const uint64_t *p = (const uint64_t *) (s_int & -8);
|
||||
|
||||
/* Create eight copies of the byte for which we are looking. */
|
||||
const uint64_t goal = 0x0101010101010101ULL * (uint8_t) c;
|
||||
const uint64_t goal = copy_byte(c);
|
||||
|
||||
/* Read the first aligned word, but force bytes before the string to
|
||||
match neither zero nor goal (we make sure the high bit of each byte
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2011 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 2011-2012 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
|
||||
|
||||
@ -30,7 +30,7 @@ __strchrnul (const char *s, int c)
|
||||
const uint64_t *p = (const uint64_t *) (s_int & -8);
|
||||
|
||||
/* Create eight copies of the byte for which we are looking. */
|
||||
const uint64_t goal = 0x0101010101010101ULL * (uint8_t) c;
|
||||
const uint64_t goal = copy_byte(c);
|
||||
|
||||
/* Read the first aligned word, but force bytes before the string to
|
||||
match neither zero nor goal (we make sure the high bit of each byte
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2011 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 2011-2012 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
|
||||
|
||||
@ -33,3 +33,13 @@
|
||||
#define CFZ(x) __insn_clz(x)
|
||||
#define REVCZ(x) __insn_ctz(x)
|
||||
#endif
|
||||
|
||||
/* Create eight copies of the byte in a uint64_t. */
|
||||
static inline uint64_t copy_byte(uint8_t byte)
|
||||
{
|
||||
uint64_t word = byte;
|
||||
word = __insn_bfins(word, word, 8, 15);
|
||||
word = __insn_bfins(word, word, 16, 31);
|
||||
word = __insn_bfins(word, word, 32, 63);
|
||||
return word;
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2011 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 2011-2012 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
|
||||
|
||||
@ -28,7 +28,7 @@ strrchr (const char *s, int c)
|
||||
const uint64_t *p = (const uint64_t *) (s_int & -8);
|
||||
|
||||
/* Create eight copies of the byte for which we are looking. */
|
||||
const uint64_t goal = 0x0101010101010101ULL * (uint8_t) c;
|
||||
const uint64_t goal = copy_byte(c);
|
||||
|
||||
/* Read the first aligned word, but force bytes before the string to
|
||||
match neither zero nor goal (we make sure the high bit of each byte
|
||||
|
Loading…
Reference in New Issue
Block a user