speedup encoder on q5-9 / 1MB+ files

PiperOrigin-RevId: 553087469
This commit is contained in:
Evgenii Kliuchnikov 2023-08-02 03:04:52 -07:00 committed by Copybara-Service
parent 4125f2587c
commit 117b68b745
4 changed files with 51 additions and 13 deletions

View File

@ -181,6 +181,7 @@ void BrotliCreateBackwardReferences(size_t num_bytes,
CASE_(65)
#undef CASE_
default:
BROTLI_DCHECK(false);
break;
}
}
@ -196,6 +197,7 @@ void BrotliCreateBackwardReferences(size_t num_bytes,
FOR_GENERIC_HASHERS(CASE_)
#undef CASE_
default:
BROTLI_DCHECK(false);
break;
}
}

View File

@ -78,8 +78,7 @@ typedef struct HasherSearchResult {
for this use.
* The number has been tuned heuristically against compression benchmarks. */
static const uint32_t kHashMul32 = 0x1E35A7BD;
static const uint64_t kHashMul64 = BROTLI_MAKE_UINT64_T(0x1E35A7BD, 0x1E35A7BD);
static const uint64_t kHashMul64Long =
static const uint64_t kHashMul64 =
BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u);
static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) {

View File

@ -20,13 +20,12 @@ static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 8; }
static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; }
/* HashBytes is the function that chooses the bucket to place the address in. */
static BROTLI_INLINE uint32_t FN(HashBytes)(const uint8_t* BROTLI_RESTRICT data,
const int shift) {
const uint64_t mask = (~((uint64_t)0U)) >> 24; /* Use only 5 bytes. */
const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(data) & mask) * kHashMul64Long;
static BROTLI_INLINE size_t FN(HashBytes)(const uint8_t* BROTLI_RESTRICT data,
uint64_t hash_mul) {
const uint64_t h = BROTLI_UNALIGNED_LOAD64LE(data) * hash_mul;
/* The higher bits contain more mixture from the multiplication,
so we take our results from there. */
return (uint32_t)(h >> shift);
return (size_t)(h >> (64 - 15));
}
typedef struct HashLongestMatch {
@ -35,8 +34,8 @@ typedef struct HashLongestMatch {
/* Only block_size_ newest backward references are kept,
and the older are forgotten. */
size_t block_size_;
/* Left-shift for computing hash bucket index from hash value. */
int hash_shift_;
/* Hash multiplier tuned to match length. */
uint64_t hash_mul_;
/* Mask for accessing entries in a block (in a ring-buffer manner). */
uint32_t block_mask_;
@ -61,7 +60,8 @@ static void FN(Initialize)(
self->common_ = common;
BROTLI_UNUSED(params);
self->hash_shift_ = 64 - common->params.bucket_bits;
self->hash_mul_ = kHashMul64 << (64 - 5 * 8);
BROTLI_DCHECK(common->params.bucket_bits == 15);
self->bucket_size_ = (size_t)1 << common->params.bucket_bits;
self->block_bits_ = common->params.block_bits;
self->block_size_ = (size_t)1 << common->params.block_bits;
@ -81,7 +81,7 @@ static void FN(Prepare)(
if (one_shot && input_size <= partial_prepare_threshold) {
size_t i;
for (i = 0; i < input_size; ++i) {
const uint32_t key = FN(HashBytes)(&data[i], self->hash_shift_);
const size_t key = FN(HashBytes)(&data[i], self->hash_mul_);
num[key] = 0;
}
} else {
@ -107,7 +107,7 @@ static BROTLI_INLINE void FN(Store)(
const size_t mask, const size_t ix) {
uint16_t* BROTLI_RESTRICT num = self->num_;
uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
const uint32_t key = FN(HashBytes)(&data[ix & mask], self->hash_shift_);
const size_t key = FN(HashBytes)(&data[ix & mask], self->hash_mul_);
const size_t minor_ix = num[key] & self->block_mask_;
const size_t offset = minor_ix + (key << self->block_bits_);
++num[key];
@ -212,7 +212,7 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
}
}
{
const uint32_t key = FN(HashBytes)(&data[cur_ix_masked], self->hash_shift_);
const size_t key = FN(HashBytes)(&data[cur_ix_masked], self->hash_mul_);
uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
const size_t down =
(num[key] > self->block_size_) ?

View File

@ -119,6 +119,41 @@ static BROTLI_INLINE size_t LiteralSpreeLengthForSparseSearch(
return params->quality < 9 ? 64 : 512;
}
/* Quality to hasher mapping:
- q02: h02 (longest_match_quickly), b16, l5
- q03: h03 (longest_match_quickly), b17, l5
- q04: h04 (longest_match_quickly), b17, l5
- q04: h54 (longest_match_quickly), b20, l7 | for large files
- q05: h05 (longest_match ), b14, l4
- q05: h06 (longest_match64 ), b15, l5 | for large files
- q05: h40 (forgetful_chain ), b15, l4 | for small window
- q06: h05 (longest_match ), b14, l4
- q06: h06 (longest_match64 ), b15, l5 | for large files
- q06: h40 (forgetful_chain ), b15, l4 | for small window
- q07: h05 (longest_match ), b15, l4
- q07: h06 (longest_match64 ), b15, l5 | for large files
- q07: h41 (forgetful_chain ), b15, l4 | for small window
- q08: h05 (longest_match ), b15, l4
- q08: h06 (longest_match64 ), b15, l5 | for large files
- q08: h41 (forgetful_chain ), b15, l4 | for small window
- q09: h05 (longest_match ), b15, l4
- q09: h06 (longest_match64 ), b15, l5 | for large files
- q09: h42 (forgetful_chain ), b15, l4 | for small window
- q10: t10 (to_binary_tree ), b17, l128
- q11: t10 (to_binary_tree ), b17, l128
Where "q" is quality, "h" is hasher type, "b" is bucket bits,
"l" is source len. */
static BROTLI_INLINE void ChooseHasher(const BrotliEncoderParams* params,
BrotliHasherParams* hparams) {
if (params->quality > 9) {
@ -136,6 +171,8 @@ static BROTLI_INLINE void ChooseHasher(const BrotliEncoderParams* params,
hparams->num_last_distances_to_check =
params->quality < 7 ? 4 : params->quality < 9 ? 10 : 16;
} else {
/* TODO(eustas): often previous setting (H6) is faster and denser; consider
adding an option to use it. */
hparams->type = 5;
hparams->block_bits = params->quality - 1;
hparams->bucket_bits = params->quality < 7 ? 14 : 15;