tilegx: small performance fix for string routines

We were multiplying a byte by 0x0101010101010101ULL to create a
constant for SIMD ops, but the compiler isn't good at optimizing
this case (the fact that one operand is a byte is lost by the time
it would be possible to do the optimization).  So instead we add
a helper routine that explicitly uses SIMD ops to create the constant.
This commit is contained in:
Chris Metcalf 2012-05-11 18:03:58 -04:00
parent 575298fcd2
commit 64d76ca064
8 changed files with 35 additions and 13 deletions

View File

@ -1,3 +1,14 @@
2012-05-12 Chris Metcalf <cmetcalf@tilera.com>
* sysdeps/tile/tilegx/memchr.c: Use new copy_byte() function
to efficiently generate a large constant for masking.
* sysdeps/tile/tilegx/memset.c: Likewise.
* sysdeps/tile/tilegx/rawmemchr.c: Likewise.
* sysdeps/tile/tilegx/strchr.c: Likewise.
* sysdeps/tile/tilegx/strchrnul.c: Likewise.
* sysdeps/tile/tilegx/strrchr.c: Likewise.
* sysdeps/tile/tilegx/string-endian.h (copy_byte): New function.
2012-05-12 Chris Metcalf <cmetcalf@tilera.com> 2012-05-12 Chris Metcalf <cmetcalf@tilera.com>
* sysdeps/tile/tilegx/memcpy.c: Allow memcpy(p, p, n) * sysdeps/tile/tilegx/memcpy.c: Allow memcpy(p, p, n)

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2011 Free Software Foundation, Inc. /* Copyright (C) 2011-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library. This file is part of the GNU C Library.
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
@ -41,7 +41,7 @@ __memchr (const void *s, int c, size_t n)
p = (const uint64_t *) (s_int & -8); p = (const uint64_t *) (s_int & -8);
/* Create eight copies of the byte for which we are looking. */ /* Create eight copies of the byte for which we are looking. */
goal = 0x0101010101010101ULL * (uint8_t) c; goal = copy_byte(c);
/* Read the first word, but munge it so that bytes before the array /* Read the first word, but munge it so that bytes before the array
will not match goal. */ will not match goal. */

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2011 Free Software Foundation, Inc. /* Copyright (C) 2011-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library. This file is part of the GNU C Library.
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
@ -19,6 +19,7 @@
#include <arch/chip.h> #include <arch/chip.h>
#include <string.h> #include <string.h>
#include <stdint.h> #include <stdint.h>
#include "string-endian.h"
void * void *
__memset (void *s, int c, size_t n) __memset (void *s, int c, size_t n)
@ -71,7 +72,7 @@ __memset (void *s, int c, size_t n)
n64 = n >> 3; n64 = n >> 3;
/* Tile input byte out to 64 bits. */ /* Tile input byte out to 64 bits. */
v64 = 0x0101010101010101ULL * (uint8_t) c; v64 = copy_byte(c);
/* This must be at least 8 or the following loop doesn't work. */ /* This must be at least 8 or the following loop doesn't work. */
#define CACHE_LINE_SIZE_IN_DOUBLEWORDS (CHIP_L2_LINE_SIZE() / 8) #define CACHE_LINE_SIZE_IN_DOUBLEWORDS (CHIP_L2_LINE_SIZE() / 8)

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2011 Free Software Foundation, Inc. /* Copyright (C) 2011-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library. This file is part of the GNU C Library.
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
@ -28,7 +28,7 @@ __rawmemchr (const void *s, int c)
const uint64_t *p = (const uint64_t *) (s_int & -8); const uint64_t *p = (const uint64_t *) (s_int & -8);
/* Create eight copies of the byte for which we are looking. */ /* Create eight copies of the byte for which we are looking. */
const uint64_t goal = 0x0101010101010101ULL * (uint8_t) c; const uint64_t goal = copy_byte(c);
/* Read the first word, but munge it so that bytes before the array /* Read the first word, but munge it so that bytes before the array
will not match goal. */ will not match goal. */

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2011 Free Software Foundation, Inc. /* Copyright (C) 2011-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library. This file is part of the GNU C Library.
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
@ -32,7 +32,7 @@ strchr (const char *s, int c)
const uint64_t *p = (const uint64_t *) (s_int & -8); const uint64_t *p = (const uint64_t *) (s_int & -8);
/* Create eight copies of the byte for which we are looking. */ /* Create eight copies of the byte for which we are looking. */
const uint64_t goal = 0x0101010101010101ULL * (uint8_t) c; const uint64_t goal = copy_byte(c);
/* Read the first aligned word, but force bytes before the string to /* Read the first aligned word, but force bytes before the string to
match neither zero nor goal (we make sure the high bit of each byte match neither zero nor goal (we make sure the high bit of each byte

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2011 Free Software Foundation, Inc. /* Copyright (C) 2011-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library. This file is part of the GNU C Library.
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
@ -30,7 +30,7 @@ __strchrnul (const char *s, int c)
const uint64_t *p = (const uint64_t *) (s_int & -8); const uint64_t *p = (const uint64_t *) (s_int & -8);
/* Create eight copies of the byte for which we are looking. */ /* Create eight copies of the byte for which we are looking. */
const uint64_t goal = 0x0101010101010101ULL * (uint8_t) c; const uint64_t goal = copy_byte(c);
/* Read the first aligned word, but force bytes before the string to /* Read the first aligned word, but force bytes before the string to
match neither zero nor goal (we make sure the high bit of each byte match neither zero nor goal (we make sure the high bit of each byte

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2011 Free Software Foundation, Inc. /* Copyright (C) 2011-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library. This file is part of the GNU C Library.
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
@ -33,3 +33,13 @@
#define CFZ(x) __insn_clz(x) #define CFZ(x) __insn_clz(x)
#define REVCZ(x) __insn_ctz(x) #define REVCZ(x) __insn_ctz(x)
#endif #endif
/* Create eight copies of the byte in a uint64_t. */
static inline uint64_t copy_byte(uint8_t byte)
{
uint64_t word = byte;
word = __insn_bfins(word, word, 8, 15);
word = __insn_bfins(word, word, 16, 31);
word = __insn_bfins(word, word, 32, 63);
return word;
}

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2011 Free Software Foundation, Inc. /* Copyright (C) 2011-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library. This file is part of the GNU C Library.
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011. Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
@ -28,7 +28,7 @@ strrchr (const char *s, int c)
const uint64_t *p = (const uint64_t *) (s_int & -8); const uint64_t *p = (const uint64_t *) (s_int & -8);
/* Create eight copies of the byte for which we are looking. */ /* Create eight copies of the byte for which we are looking. */
const uint64_t goal = 0x0101010101010101ULL * (uint8_t) c; const uint64_t goal = copy_byte(c);
/* Read the first aligned word, but force bytes before the string to /* Read the first aligned word, but force bytes before the string to
match neither zero nor goal (we make sure the high bit of each byte match neither zero nor goal (we make sure the high bit of each byte