mirror of
https://github.com/google/brotli.git
synced 2024-12-28 10:41:06 +00:00
Use uint32_t positions in the hasher and compute distances modulo 2^32.
This commit is contained in:
parent
0be6b0af54
commit
a89b57b90c
@ -506,7 +506,7 @@ void CreateBackwardReferences(size_t num_bytes,
|
||||
const Command * const orig_commands = commands;
|
||||
int insert_length = *last_insert_len;
|
||||
size_t i = position & ringbuffer_mask;
|
||||
const int i_diff = position - i;
|
||||
const size_t i_diff = position - i;
|
||||
const size_t i_end = i + num_bytes;
|
||||
|
||||
// For speed up heuristics for random data.
|
||||
|
@ -84,7 +84,8 @@ class BrotliCompressor {
|
||||
// Encodes the data in input_buffer as a meta-block and writes it to
|
||||
// encoded_buffer (*encoded_size should be set to the size of
|
||||
// encoded_buffer) and sets *encoded_size to the number of bytes that
|
||||
// was written. Returns 0 if there was an error and 1 otherwise.
|
||||
// was written. The input_size must be <= input_block_size().
|
||||
// Returns 0 if there was an error and 1 otherwise.
|
||||
bool WriteMetaBlock(const size_t input_size,
|
||||
const uint8_t* input_buffer,
|
||||
const bool is_last,
|
||||
|
132
enc/hash.h
132
enc/hash.h
@ -136,20 +136,13 @@ class HashLongestMatchQuickly {
|
||||
// Look at 4 bytes at data.
|
||||
// Compute a hash from these, and store the value somewhere within
|
||||
// [ix .. ix+3].
|
||||
inline void Store(const uint8_t *data, const int ix) {
|
||||
inline void Store(const uint8_t *data, const uint32_t ix) {
|
||||
const uint32_t key = HashBytes(data);
|
||||
// Wiggle the value with the bucket sweep range.
|
||||
const uint32_t off = (static_cast<uint32_t>(ix) >> 3) % kBucketSweep;
|
||||
const uint32_t off = (ix >> 3) % kBucketSweep;
|
||||
buckets_[key + off] = ix;
|
||||
}
|
||||
|
||||
// Store hashes for a range of data.
|
||||
void StoreHashes(const uint8_t *data, size_t len, int startix, int mask) {
|
||||
for (int p = 0; p < len; ++p) {
|
||||
Store(&data[p & mask], startix + p);
|
||||
}
|
||||
}
|
||||
|
||||
// Find a longest backward match of &ring_buffer[cur_ix & ring_buffer_mask]
|
||||
// up to the length of max_length.
|
||||
//
|
||||
@ -174,7 +167,7 @@ class HashLongestMatchQuickly {
|
||||
double best_score = *best_score_out;
|
||||
int best_len = best_len_in;
|
||||
int cached_backward = distance_cache[0];
|
||||
size_t prev_ix = cur_ix - cached_backward;
|
||||
uint32_t prev_ix = cur_ix - cached_backward;
|
||||
bool match_found = false;
|
||||
if (prev_ix < cur_ix) {
|
||||
prev_ix &= ring_buffer_mask;
|
||||
@ -202,7 +195,7 @@ class HashLongestMatchQuickly {
|
||||
if (kBucketSweep == 1) {
|
||||
// Only one to look for, don't bother to prepare for a loop.
|
||||
prev_ix = buckets_[key];
|
||||
int backward = cur_ix - prev_ix;
|
||||
uint32_t backward = cur_ix - prev_ix;
|
||||
prev_ix &= ring_buffer_mask;
|
||||
if (compare_char != ring_buffer[prev_ix + best_len_in]) {
|
||||
return false;
|
||||
@ -224,7 +217,7 @@ class HashLongestMatchQuickly {
|
||||
uint32_t *bucket = buckets_ + key;
|
||||
prev_ix = *bucket++;
|
||||
for (int i = 0; i < kBucketSweep; ++i, prev_ix = *bucket++) {
|
||||
const int backward = cur_ix - prev_ix;
|
||||
const uint32_t backward = cur_ix - prev_ix;
|
||||
prev_ix &= ring_buffer_mask;
|
||||
if (compare_char != ring_buffer[prev_ix + best_len]) {
|
||||
continue;
|
||||
@ -335,20 +328,13 @@ class HashLongestMatch {
|
||||
|
||||
// Look at 3 bytes at data.
|
||||
// Compute a hash from these, and store the value of ix at that position.
|
||||
inline void Store(const uint8_t *data, const int ix) {
|
||||
inline void Store(const uint8_t *data, const uint32_t ix) {
|
||||
const uint32_t key = HashBytes(data);
|
||||
const int minor_ix = num_[key] & kBlockMask;
|
||||
buckets_[key][minor_ix] = ix;
|
||||
++num_[key];
|
||||
}
|
||||
|
||||
// Store hashes for a range of data.
|
||||
void StoreHashes(const uint8_t *data, size_t len, int startix, int mask) {
|
||||
for (int p = 0; p < len; ++p) {
|
||||
Store(&data[p & mask], startix + p);
|
||||
}
|
||||
}
|
||||
|
||||
// Find a longest backward match of &data[cur_ix] up to the length of
|
||||
// max_length.
|
||||
//
|
||||
@ -379,7 +365,7 @@ class HashLongestMatch {
|
||||
for (int i = 0; i < kNumLastDistancesToCheck; ++i) {
|
||||
const int idx = kDistanceCacheIndex[i];
|
||||
const int backward = distance_cache[idx] + kDistanceCacheOffset[i];
|
||||
size_t prev_ix = cur_ix - backward;
|
||||
uint32_t prev_ix = cur_ix - backward;
|
||||
if (prev_ix >= cur_ix) {
|
||||
continue;
|
||||
}
|
||||
@ -413,38 +399,36 @@ class HashLongestMatch {
|
||||
}
|
||||
}
|
||||
const uint32_t key = HashBytes(&data[cur_ix_masked]);
|
||||
const int * __restrict const bucket = &buckets_[key][0];
|
||||
const uint32_t * __restrict const bucket = &buckets_[key][0];
|
||||
const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
|
||||
for (int i = num_[key] - 1; i >= down; --i) {
|
||||
int prev_ix = bucket[i & kBlockMask];
|
||||
if (prev_ix >= 0) {
|
||||
const size_t backward = cur_ix - prev_ix;
|
||||
if (PREDICT_FALSE(backward > max_backward)) {
|
||||
break;
|
||||
}
|
||||
prev_ix &= ring_buffer_mask;
|
||||
if (cur_ix_masked + best_len > ring_buffer_mask ||
|
||||
prev_ix + best_len > ring_buffer_mask ||
|
||||
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
|
||||
continue;
|
||||
}
|
||||
const size_t len =
|
||||
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
|
||||
max_length);
|
||||
if (len >= 4) {
|
||||
// Comparing for >= 3 does not change the semantics, but just saves
|
||||
// for a few unnecessary binary logarithms in backward reference
|
||||
// score, since we are not interested in such short matches.
|
||||
double score = BackwardReferenceScore(len, backward);
|
||||
if (best_score < score) {
|
||||
best_score = score;
|
||||
best_len = len;
|
||||
*best_len_out = best_len;
|
||||
*best_len_code_out = best_len;
|
||||
*best_distance_out = backward;
|
||||
*best_score_out = best_score;
|
||||
match_found = true;
|
||||
}
|
||||
uint32_t prev_ix = bucket[i & kBlockMask];
|
||||
const uint32_t backward = cur_ix - prev_ix;
|
||||
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
|
||||
break;
|
||||
}
|
||||
prev_ix &= ring_buffer_mask;
|
||||
if (cur_ix_masked + best_len > ring_buffer_mask ||
|
||||
prev_ix + best_len > ring_buffer_mask ||
|
||||
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
|
||||
continue;
|
||||
}
|
||||
const size_t len =
|
||||
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
|
||||
max_length);
|
||||
if (len >= 4) {
|
||||
// Comparing for >= 3 does not change the semantics, but just saves
|
||||
// for a few unnecessary binary logarithms in backward reference
|
||||
// score, since we are not interested in such short matches.
|
||||
double score = BackwardReferenceScore(len, backward);
|
||||
if (best_score < score) {
|
||||
best_score = score;
|
||||
best_len = len;
|
||||
*best_len_out = best_len;
|
||||
*best_len_code_out = best_len;
|
||||
*best_distance_out = backward;
|
||||
*best_score_out = best_score;
|
||||
match_found = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -530,31 +514,29 @@ class HashLongestMatch {
|
||||
}
|
||||
}
|
||||
const uint32_t key = HashBytes(&data[cur_ix_masked]);
|
||||
const int * __restrict const bucket = &buckets_[key][0];
|
||||
const uint32_t * __restrict const bucket = &buckets_[key][0];
|
||||
const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
|
||||
for (int i = num_[key] - 1; i >= down; --i) {
|
||||
int prev_ix = bucket[i & kBlockMask];
|
||||
if (prev_ix >= 0) {
|
||||
const size_t backward = cur_ix - prev_ix;
|
||||
if (PREDICT_FALSE(backward > max_backward)) {
|
||||
break;
|
||||
}
|
||||
prev_ix &= ring_buffer_mask;
|
||||
if (cur_ix_masked + best_len > ring_buffer_mask ||
|
||||
prev_ix + best_len > ring_buffer_mask ||
|
||||
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
|
||||
continue;
|
||||
}
|
||||
const size_t len =
|
||||
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
|
||||
max_length);
|
||||
if (len > best_len) {
|
||||
best_len = len;
|
||||
if (len > kMaxZopfliLen) {
|
||||
matches = orig_matches;
|
||||
}
|
||||
*matches++ = BackwardMatch(backward, len);
|
||||
uint32_t prev_ix = bucket[i & kBlockMask];
|
||||
const uint32_t backward = cur_ix - prev_ix;
|
||||
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
|
||||
break;
|
||||
}
|
||||
prev_ix &= ring_buffer_mask;
|
||||
if (cur_ix_masked + best_len > ring_buffer_mask ||
|
||||
prev_ix + best_len > ring_buffer_mask ||
|
||||
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
|
||||
continue;
|
||||
}
|
||||
const size_t len =
|
||||
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
|
||||
max_length);
|
||||
if (len > best_len) {
|
||||
best_len = len;
|
||||
if (len > kMaxZopfliLen) {
|
||||
matches = orig_matches;
|
||||
}
|
||||
*matches++ = BackwardMatch(backward, len);
|
||||
}
|
||||
}
|
||||
std::vector<int> dict_matches(kMaxDictionaryMatchLen + 1, kInvalidMatch);
|
||||
@ -601,7 +583,7 @@ class HashLongestMatch {
|
||||
uint16_t num_[kBucketSize];
|
||||
|
||||
// Buckets containing kBlockSize of backward references.
|
||||
int buckets_[kBucketSize][kBlockSize];
|
||||
uint32_t buckets_[kBucketSize][kBlockSize];
|
||||
|
||||
size_t num_dict_lookups_;
|
||||
size_t num_dict_matches_;
|
||||
@ -654,7 +636,7 @@ struct Hashers {
|
||||
template<typename Hasher>
|
||||
void WarmupHash(const size_t size, const uint8_t* dict, Hasher* hasher) {
|
||||
for (size_t i = 0; i + Hasher::kHashTypeLength - 1 < size; i++) {
|
||||
hasher->Store(dict, i);
|
||||
hasher->Store(&dict[i], static_cast<uint32_t>(i));
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user