Prefetch the backreference hashtable bucket.

Place the prefetch before the last distance checks, to give the prefetch enough time to work.

PiperOrigin-RevId: 626228820
This commit is contained in:
Brotli 2024-04-18 19:59:28 -07:00 committed by Copybara-Service
parent 443af10a80
commit 1b3a5ccb6e
3 changed files with 28 additions and 7 deletions

View File

@ -519,6 +519,21 @@ BROTLI_UNUSED_FUNCTION void BrotliSuppressUnusedFunctions(void) {
#if BROTLI_ENABLE_DUMP #if BROTLI_ENABLE_DUMP
BROTLI_UNUSED(&BrotliDump); BROTLI_UNUSED(&BrotliDump);
#endif #endif
#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) && !defined(_M_ARM64EC) /* _mm_prefetch() is not defined outside of x86/x64 */
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
#elif BROTLI_GNUC_HAS_BUILTIN(__builtin_prefetch, 3, 1, 0)
# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
#elif defined(__aarch64__)
# define PREFETCH_L1(ptr) do { __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))); } while (0)
# define PREFETCH_L2(ptr) do { __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))); } while (0)
#else
# define PREFETCH_L1(ptr) do { (void)(ptr); } while (0) /* disabled */
# define PREFETCH_L2(ptr) do { (void)(ptr); } while (0) /* disabled */
#endif
} }
#endif /* BROTLI_COMMON_PLATFORM_H_ */ #endif /* BROTLI_COMMON_PLATFORM_H_ */

View File

@ -170,6 +170,11 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
score_t best_score = out->score; score_t best_score = out->score;
size_t best_len = out->len; size_t best_len = out->len;
size_t i; size_t i;
/* Precalculate the hash key and prefetch the bucket. */
const size_t key = FN(HashBytes)(&data[cur_ix_masked], self->hash_mul_);
uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
PREFETCH_L1(bucket);
if (self->block_bits_ > 4) PREFETCH_L1(bucket + 16);
out->len = 0; out->len = 0;
out->len_code_delta = 0; out->len_code_delta = 0;
@ -220,8 +225,6 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
best_len = 3; best_len = 3;
} }
{ {
const size_t key = FN(HashBytes)(&data[cur_ix_masked], self->hash_mul_);
uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
const size_t down = const size_t down =
(num[key] > self->block_size_) ? (num[key] > self->block_size_) ?
(num[key] - self->block_size_) : 0u; (num[key] - self->block_size_) : 0u;

View File

@ -169,11 +169,17 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
score_t best_score = out->score; score_t best_score = out->score;
size_t best_len = out->len; size_t best_len = out->len;
size_t i; size_t i;
/* Precalculate the hash key and prefetch the bucket. */
const uint32_t key =
FN(HashBytes)(&data[cur_ix_masked], self->hash_shift_);
uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
PREFETCH_L1(bucket);
if (self->block_bits_ > 4) PREFETCH_L1(bucket + 16);
out->len = 0;
out->len_code_delta = 0;
BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask); BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask);
out->len = 0;
out->len_code_delta = 0;
/* Try last distance first. */ /* Try last distance first. */
for (i = 0; i < (size_t)self->num_last_distances_to_check_; ++i) { for (i = 0; i < (size_t)self->num_last_distances_to_check_; ++i) {
const size_t backward = (size_t)distance_cache[i]; const size_t backward = (size_t)distance_cache[i];
@ -219,9 +225,6 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
best_len = 3; best_len = 3;
} }
{ {
const uint32_t key =
FN(HashBytes)(&data[cur_ix_masked], self->hash_shift_);
uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
const size_t down = const size_t down =
(num[key] > self->block_size_) ? (num[key] - self->block_size_) : 0u; (num[key] > self->block_size_) ? (num[key] - self->block_size_) : 0u;
for (i = num[key]; i > down;) { for (i = num[key]; i > down;) {