/* Copyright (C) 2010 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ #include #include typedef unsigned long word; static inline word ldq_u(const void *s) { return *(const word *)((word)s & -8); } #define unlikely(X) __builtin_expect ((X), 0) #define prefetch(X) __builtin_prefetch ((void *)(X), 0) #define cmpbeq0(X) __builtin_alpha_cmpbge(0, (X)) #define find(X, Y) cmpbeq0 ((X) ^ (Y)) /* Search no more than N bytes of S for C. */ void * __memchr (const void *s, int xc, size_t n) { const word *s_align; word t, current, found, mask, offset; if (unlikely (n == 0)) return 0; current = ldq_u (s); /* Replicate low byte of XC into all bytes of C. */ t = xc & 0xff; /* 0000000c */ t = (t << 8) | t; /* 000000cc */ t = (t << 16) | t; /* 0000cccc */ const word c = (t << 32) | t; /* cccccccc */ /* Align the source, and decrement the count by the number of bytes searched in the first word. */ s_align = (const word *)((word)s & -8); n += ((word)s & 7); /* Deal with misalignment in the first word for the comparison. */ mask = (1ul << ((word)s & 7)) - 1; /* If the entire string fits within one word, we may need masking at both the front and the back of the string. */ if (unlikely (n <= 8)) { mask |= -1ul << n; goto last_quad; } found = find (current, c) & ~mask; if (unlikely (found)) goto found_it; s_align++; n -= 8; /* If the block is sufficiently large, align to cacheline and prefetch. */ if (unlikely (n >= 256)) { /* Prefetch 3 cache lines beyond the one we're working on. */ prefetch (s_align + 8); prefetch (s_align + 16); prefetch (s_align + 24); while ((word)s_align & 63) { current = *s_align; found = find (current, c); if (found) goto found_it; s_align++; n -= 8; } /* Within each cacheline, advance the load for the next word before the test for the previous word is complete. This allows us to hide the 3 cycle L1 cache load latency. We only perform this advance load within a cacheline to prevent reading across page boundary. */ #define CACHELINE_LOOP \ do { \ word i, next = s_align[0]; \ for (i = 0; i < 7; ++i) \ { \ current = next; \ next = s_align[1]; \ found = find (current, c); \ if (unlikely (found)) \ goto found_it; \ s_align++; \ } \ current = next; \ found = find (current, c); \ if (unlikely (found)) \ goto found_it; \ s_align++; \ n -= 64; \ } while (0) /* While there's still lots more data to potentially be read, continue issuing prefetches for the 4th cacheline out. */ while (n >= 256) { prefetch (s_align + 24); CACHELINE_LOOP; } /* Up to 3 cache lines remaining. Continue issuing advanced loads, but stop prefetching. */ while (n >= 64) CACHELINE_LOOP; /* We may have exhausted the buffer. */ if (n == 0) return NULL; } /* Quadword aligned loop. */ current = *s_align; while (n > 8) { found = find (current, c); if (unlikely (found)) goto found_it; current = *++s_align; n -= 8; } /* The last word may need masking at the tail of the compare. */ mask = -1ul << n; last_quad: found = find (current, c) & ~mask; if (found == 0) return NULL; found_it: #ifdef __alpha_cix__ offset = __builtin_alpha_cttz (found); #else /* Extract LSB. */ found &= -found; /* Binary search for the LSB. */ offset = (found & 0x0f ? 0 : 4); offset += (found & 0x33 ? 0 : 2); offset += (found & 0x55 ? 0 : 1); #endif return (void *)((word)s_align + offset); } #ifdef weak_alias weak_alias (__memchr, BP_SYM (memchr)) #endif libc_hidden_builtin_def (memchr)