tilegx: small performance fix for string routines

We were multiplying a byte by 0x0101010101010101ULL to create a
constant for SIMD ops, but the compiler isn't good at optimizing
this case (the fact that one operand is a byte is lost by the time
it would be possible to do the optimization).  So instead we add
a helper routine that explicitly uses SIMD ops to create the constant.
This commit is contained in:
Chris Metcalf 2012-05-11 18:03:58 -04:00
parent 575298fcd2
commit 64d76ca064
8 changed files with 35 additions and 13 deletions

View File

@ -1,3 +1,14 @@
2012-05-12 Chris Metcalf <cmetcalf@tilera.com>
* sysdeps/tile/tilegx/memchr.c: Use new copy_byte() function
to efficiently generate a large constant for masking.
* sysdeps/tile/tilegx/memset.c: Likewise.
* sysdeps/tile/tilegx/rawmemchr.c: Likewise.
* sysdeps/tile/tilegx/strchr.c: Likewise.
* sysdeps/tile/tilegx/strchrnul.c: Likewise.
* sysdeps/tile/tilegx/strrchr.c: Likewise.
* sysdeps/tile/tilegx/string-endian.h (copy_byte): New function.
2012-05-12 Chris Metcalf <cmetcalf@tilera.com>
* sysdeps/tile/tilegx/memcpy.c: Allow memcpy(p, p, n)

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2011 Free Software Foundation, Inc.
/* Copyright (C) 2011-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
@ -41,7 +41,7 @@ __memchr (const void *s, int c, size_t n)
p = (const uint64_t *) (s_int & -8);
/* Create eight copies of the byte for which we are looking. */
goal = 0x0101010101010101ULL * (uint8_t) c;
goal = copy_byte(c);
/* Read the first word, but munge it so that bytes before the array
will not match goal. */

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2011 Free Software Foundation, Inc.
/* Copyright (C) 2011-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
@ -19,6 +19,7 @@
#include <arch/chip.h>
#include <string.h>
#include <stdint.h>
#include "string-endian.h"
void *
__memset (void *s, int c, size_t n)
@ -71,7 +72,7 @@ __memset (void *s, int c, size_t n)
n64 = n >> 3;
/* Tile input byte out to 64 bits. */
v64 = 0x0101010101010101ULL * (uint8_t) c;
v64 = copy_byte(c);
/* This must be at least 8 or the following loop doesn't work. */
#define CACHE_LINE_SIZE_IN_DOUBLEWORDS (CHIP_L2_LINE_SIZE() / 8)

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2011 Free Software Foundation, Inc.
/* Copyright (C) 2011-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
@ -28,7 +28,7 @@ __rawmemchr (const void *s, int c)
const uint64_t *p = (const uint64_t *) (s_int & -8);
/* Create eight copies of the byte for which we are looking. */
const uint64_t goal = 0x0101010101010101ULL * (uint8_t) c;
const uint64_t goal = copy_byte(c);
/* Read the first word, but munge it so that bytes before the array
will not match goal. */

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2011 Free Software Foundation, Inc.
/* Copyright (C) 2011-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
@ -32,7 +32,7 @@ strchr (const char *s, int c)
const uint64_t *p = (const uint64_t *) (s_int & -8);
/* Create eight copies of the byte for which we are looking. */
const uint64_t goal = 0x0101010101010101ULL * (uint8_t) c;
const uint64_t goal = copy_byte(c);
/* Read the first aligned word, but force bytes before the string to
match neither zero nor goal (we make sure the high bit of each byte

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2011 Free Software Foundation, Inc.
/* Copyright (C) 2011-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
@ -30,7 +30,7 @@ __strchrnul (const char *s, int c)
const uint64_t *p = (const uint64_t *) (s_int & -8);
/* Create eight copies of the byte for which we are looking. */
const uint64_t goal = 0x0101010101010101ULL * (uint8_t) c;
const uint64_t goal = copy_byte(c);
/* Read the first aligned word, but force bytes before the string to
match neither zero nor goal (we make sure the high bit of each byte

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2011 Free Software Foundation, Inc.
/* Copyright (C) 2011-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
@ -33,3 +33,13 @@
#define CFZ(x) __insn_clz(x)
#define REVCZ(x) __insn_ctz(x)
#endif
/* Create eight copies of the byte in a uint64_t. */
static inline uint64_t copy_byte(uint8_t byte)
{
uint64_t word = byte;
word = __insn_bfins(word, word, 8, 15);
word = __insn_bfins(word, word, 16, 31);
word = __insn_bfins(word, word, 32, 63);
return word;
}

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2011 Free Software Foundation, Inc.
/* Copyright (C) 2011-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
@ -28,7 +28,7 @@ strrchr (const char *s, int c)
const uint64_t *p = (const uint64_t *) (s_int & -8);
/* Create eight copies of the byte for which we are looking. */
const uint64_t goal = 0x0101010101010101ULL * (uint8_t) c;
const uint64_t goal = copy_byte(c);
/* Read the first aligned word, but force bytes before the string to
match neither zero nor goal (we make sure the high bit of each byte