mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-25 14:30:06 +00:00
tilegx: small performance fix for string routines
We were multiplying a byte by 0x0101010101010101ULL to create a constant for SIMD ops, but the compiler isn't good at optimizing this case (the fact that one operand is a byte is lost by the time it would be possible to do the optimization). So instead we add a helper routine that explicitly uses SIMD ops to create the constant.
This commit is contained in:
parent
575298fcd2
commit
64d76ca064
@ -1,3 +1,14 @@
|
|||||||
|
2012-05-12 Chris Metcalf <cmetcalf@tilera.com>
|
||||||
|
|
||||||
|
* sysdeps/tile/tilegx/memchr.c: Use new copy_byte() function
|
||||||
|
to efficiently generate a large constant for masking.
|
||||||
|
* sysdeps/tile/tilegx/memset.c: Likewise.
|
||||||
|
* sysdeps/tile/tilegx/rawmemchr.c: Likewise.
|
||||||
|
* sysdeps/tile/tilegx/strchr.c: Likewise.
|
||||||
|
* sysdeps/tile/tilegx/strchrnul.c: Likewise.
|
||||||
|
* sysdeps/tile/tilegx/strrchr.c: Likewise.
|
||||||
|
* sysdeps/tile/tilegx/string-endian.h (copy_byte): New function.
|
||||||
|
|
||||||
2012-05-12 Chris Metcalf <cmetcalf@tilera.com>
|
2012-05-12 Chris Metcalf <cmetcalf@tilera.com>
|
||||||
|
|
||||||
* sysdeps/tile/tilegx/memcpy.c: Allow memcpy(p, p, n)
|
* sysdeps/tile/tilegx/memcpy.c: Allow memcpy(p, p, n)
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/* Copyright (C) 2011 Free Software Foundation, Inc.
|
/* Copyright (C) 2011-2012 Free Software Foundation, Inc.
|
||||||
This file is part of the GNU C Library.
|
This file is part of the GNU C Library.
|
||||||
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
|
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
|
||||||
|
|
||||||
@ -41,7 +41,7 @@ __memchr (const void *s, int c, size_t n)
|
|||||||
p = (const uint64_t *) (s_int & -8);
|
p = (const uint64_t *) (s_int & -8);
|
||||||
|
|
||||||
/* Create eight copies of the byte for which we are looking. */
|
/* Create eight copies of the byte for which we are looking. */
|
||||||
goal = 0x0101010101010101ULL * (uint8_t) c;
|
goal = copy_byte(c);
|
||||||
|
|
||||||
/* Read the first word, but munge it so that bytes before the array
|
/* Read the first word, but munge it so that bytes before the array
|
||||||
will not match goal. */
|
will not match goal. */
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/* Copyright (C) 2011 Free Software Foundation, Inc.
|
/* Copyright (C) 2011-2012 Free Software Foundation, Inc.
|
||||||
This file is part of the GNU C Library.
|
This file is part of the GNU C Library.
|
||||||
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
|
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
|
||||||
|
|
||||||
@ -19,6 +19,7 @@
|
|||||||
#include <arch/chip.h>
|
#include <arch/chip.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
#include "string-endian.h"
|
||||||
|
|
||||||
void *
|
void *
|
||||||
__memset (void *s, int c, size_t n)
|
__memset (void *s, int c, size_t n)
|
||||||
@ -71,7 +72,7 @@ __memset (void *s, int c, size_t n)
|
|||||||
n64 = n >> 3;
|
n64 = n >> 3;
|
||||||
|
|
||||||
/* Tile input byte out to 64 bits. */
|
/* Tile input byte out to 64 bits. */
|
||||||
v64 = 0x0101010101010101ULL * (uint8_t) c;
|
v64 = copy_byte(c);
|
||||||
|
|
||||||
/* This must be at least 8 or the following loop doesn't work. */
|
/* This must be at least 8 or the following loop doesn't work. */
|
||||||
#define CACHE_LINE_SIZE_IN_DOUBLEWORDS (CHIP_L2_LINE_SIZE() / 8)
|
#define CACHE_LINE_SIZE_IN_DOUBLEWORDS (CHIP_L2_LINE_SIZE() / 8)
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/* Copyright (C) 2011 Free Software Foundation, Inc.
|
/* Copyright (C) 2011-2012 Free Software Foundation, Inc.
|
||||||
This file is part of the GNU C Library.
|
This file is part of the GNU C Library.
|
||||||
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
|
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
|
||||||
|
|
||||||
@ -28,7 +28,7 @@ __rawmemchr (const void *s, int c)
|
|||||||
const uint64_t *p = (const uint64_t *) (s_int & -8);
|
const uint64_t *p = (const uint64_t *) (s_int & -8);
|
||||||
|
|
||||||
/* Create eight copies of the byte for which we are looking. */
|
/* Create eight copies of the byte for which we are looking. */
|
||||||
const uint64_t goal = 0x0101010101010101ULL * (uint8_t) c;
|
const uint64_t goal = copy_byte(c);
|
||||||
|
|
||||||
/* Read the first word, but munge it so that bytes before the array
|
/* Read the first word, but munge it so that bytes before the array
|
||||||
will not match goal. */
|
will not match goal. */
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/* Copyright (C) 2011 Free Software Foundation, Inc.
|
/* Copyright (C) 2011-2012 Free Software Foundation, Inc.
|
||||||
This file is part of the GNU C Library.
|
This file is part of the GNU C Library.
|
||||||
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
|
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
|
||||||
|
|
||||||
@ -32,7 +32,7 @@ strchr (const char *s, int c)
|
|||||||
const uint64_t *p = (const uint64_t *) (s_int & -8);
|
const uint64_t *p = (const uint64_t *) (s_int & -8);
|
||||||
|
|
||||||
/* Create eight copies of the byte for which we are looking. */
|
/* Create eight copies of the byte for which we are looking. */
|
||||||
const uint64_t goal = 0x0101010101010101ULL * (uint8_t) c;
|
const uint64_t goal = copy_byte(c);
|
||||||
|
|
||||||
/* Read the first aligned word, but force bytes before the string to
|
/* Read the first aligned word, but force bytes before the string to
|
||||||
match neither zero nor goal (we make sure the high bit of each byte
|
match neither zero nor goal (we make sure the high bit of each byte
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/* Copyright (C) 2011 Free Software Foundation, Inc.
|
/* Copyright (C) 2011-2012 Free Software Foundation, Inc.
|
||||||
This file is part of the GNU C Library.
|
This file is part of the GNU C Library.
|
||||||
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
|
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
|
||||||
|
|
||||||
@ -30,7 +30,7 @@ __strchrnul (const char *s, int c)
|
|||||||
const uint64_t *p = (const uint64_t *) (s_int & -8);
|
const uint64_t *p = (const uint64_t *) (s_int & -8);
|
||||||
|
|
||||||
/* Create eight copies of the byte for which we are looking. */
|
/* Create eight copies of the byte for which we are looking. */
|
||||||
const uint64_t goal = 0x0101010101010101ULL * (uint8_t) c;
|
const uint64_t goal = copy_byte(c);
|
||||||
|
|
||||||
/* Read the first aligned word, but force bytes before the string to
|
/* Read the first aligned word, but force bytes before the string to
|
||||||
match neither zero nor goal (we make sure the high bit of each byte
|
match neither zero nor goal (we make sure the high bit of each byte
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/* Copyright (C) 2011 Free Software Foundation, Inc.
|
/* Copyright (C) 2011-2012 Free Software Foundation, Inc.
|
||||||
This file is part of the GNU C Library.
|
This file is part of the GNU C Library.
|
||||||
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
|
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
|
||||||
|
|
||||||
@ -33,3 +33,13 @@
|
|||||||
#define CFZ(x) __insn_clz(x)
|
#define CFZ(x) __insn_clz(x)
|
||||||
#define REVCZ(x) __insn_ctz(x)
|
#define REVCZ(x) __insn_ctz(x)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* Create eight copies of the byte in a uint64_t. */
|
||||||
|
static inline uint64_t copy_byte(uint8_t byte)
|
||||||
|
{
|
||||||
|
uint64_t word = byte;
|
||||||
|
word = __insn_bfins(word, word, 8, 15);
|
||||||
|
word = __insn_bfins(word, word, 16, 31);
|
||||||
|
word = __insn_bfins(word, word, 32, 63);
|
||||||
|
return word;
|
||||||
|
}
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
/* Copyright (C) 2011 Free Software Foundation, Inc.
|
/* Copyright (C) 2011-2012 Free Software Foundation, Inc.
|
||||||
This file is part of the GNU C Library.
|
This file is part of the GNU C Library.
|
||||||
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
|
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
|
||||||
|
|
||||||
@ -28,7 +28,7 @@ strrchr (const char *s, int c)
|
|||||||
const uint64_t *p = (const uint64_t *) (s_int & -8);
|
const uint64_t *p = (const uint64_t *) (s_int & -8);
|
||||||
|
|
||||||
/* Create eight copies of the byte for which we are looking. */
|
/* Create eight copies of the byte for which we are looking. */
|
||||||
const uint64_t goal = 0x0101010101010101ULL * (uint8_t) c;
|
const uint64_t goal = copy_byte(c);
|
||||||
|
|
||||||
/* Read the first aligned word, but force bytes before the string to
|
/* Read the first aligned word, but force bytes before the string to
|
||||||
match neither zero nor goal (we make sure the high bit of each byte
|
match neither zero nor goal (we make sure the high bit of each byte
|
||||||
|
Loading…
Reference in New Issue
Block a user