Use uint32_t positions in the hasher and compute distances modulo 2^32.

This commit is contained in:
Zoltan Szabadka 2015-10-26 17:08:57 +01:00
parent 0be6b0af54
commit a89b57b90c
3 changed files with 60 additions and 77 deletions

View File

@ -506,7 +506,7 @@ void CreateBackwardReferences(size_t num_bytes,
const Command * const orig_commands = commands;
int insert_length = *last_insert_len;
size_t i = position & ringbuffer_mask;
const int i_diff = position - i;
const size_t i_diff = position - i;
const size_t i_end = i + num_bytes;
// For speed up heuristics for random data.

View File

@ -84,7 +84,8 @@ class BrotliCompressor {
// Encodes the data in input_buffer as a meta-block and writes it to
// encoded_buffer (*encoded_size should be set to the size of
// encoded_buffer) and sets *encoded_size to the number of bytes that
// was written. Returns 0 if there was an error and 1 otherwise.
// was written. The input_size must be <= input_block_size().
// Returns 0 if there was an error and 1 otherwise.
bool WriteMetaBlock(const size_t input_size,
const uint8_t* input_buffer,
const bool is_last,

View File

@ -136,20 +136,13 @@ class HashLongestMatchQuickly {
// Look at 4 bytes at data.
// Compute a hash from these, and store the value somewhere within
// [ix .. ix+3].
inline void Store(const uint8_t *data, const int ix) {
inline void Store(const uint8_t *data, const uint32_t ix) {
const uint32_t key = HashBytes(data);
// Wiggle the value with the bucket sweep range.
const uint32_t off = (static_cast<uint32_t>(ix) >> 3) % kBucketSweep;
const uint32_t off = (ix >> 3) % kBucketSweep;
buckets_[key + off] = ix;
}
// Store hashes for a range of data.
void StoreHashes(const uint8_t *data, size_t len, int startix, int mask) {
for (int p = 0; p < len; ++p) {
Store(&data[p & mask], startix + p);
}
}
// Find a longest backward match of &ring_buffer[cur_ix & ring_buffer_mask]
// up to the length of max_length.
//
@ -174,7 +167,7 @@ class HashLongestMatchQuickly {
double best_score = *best_score_out;
int best_len = best_len_in;
int cached_backward = distance_cache[0];
size_t prev_ix = cur_ix - cached_backward;
uint32_t prev_ix = cur_ix - cached_backward;
bool match_found = false;
if (prev_ix < cur_ix) {
prev_ix &= ring_buffer_mask;
@ -202,7 +195,7 @@ class HashLongestMatchQuickly {
if (kBucketSweep == 1) {
// Only one to look for, don't bother to prepare for a loop.
prev_ix = buckets_[key];
int backward = cur_ix - prev_ix;
uint32_t backward = cur_ix - prev_ix;
prev_ix &= ring_buffer_mask;
if (compare_char != ring_buffer[prev_ix + best_len_in]) {
return false;
@ -224,7 +217,7 @@ class HashLongestMatchQuickly {
uint32_t *bucket = buckets_ + key;
prev_ix = *bucket++;
for (int i = 0; i < kBucketSweep; ++i, prev_ix = *bucket++) {
const int backward = cur_ix - prev_ix;
const uint32_t backward = cur_ix - prev_ix;
prev_ix &= ring_buffer_mask;
if (compare_char != ring_buffer[prev_ix + best_len]) {
continue;
@ -335,20 +328,13 @@ class HashLongestMatch {
// Look at 3 bytes at data.
// Compute a hash from these, and store the value of ix at that position.
inline void Store(const uint8_t *data, const int ix) {
inline void Store(const uint8_t *data, const uint32_t ix) {
const uint32_t key = HashBytes(data);
const int minor_ix = num_[key] & kBlockMask;
buckets_[key][minor_ix] = ix;
++num_[key];
}
// Store hashes for a range of data.
void StoreHashes(const uint8_t *data, size_t len, int startix, int mask) {
for (int p = 0; p < len; ++p) {
Store(&data[p & mask], startix + p);
}
}
// Find a longest backward match of &data[cur_ix] up to the length of
// max_length.
//
@ -379,7 +365,7 @@ class HashLongestMatch {
for (int i = 0; i < kNumLastDistancesToCheck; ++i) {
const int idx = kDistanceCacheIndex[i];
const int backward = distance_cache[idx] + kDistanceCacheOffset[i];
size_t prev_ix = cur_ix - backward;
uint32_t prev_ix = cur_ix - backward;
if (prev_ix >= cur_ix) {
continue;
}
@ -413,38 +399,36 @@ class HashLongestMatch {
}
}
const uint32_t key = HashBytes(&data[cur_ix_masked]);
const int * __restrict const bucket = &buckets_[key][0];
const uint32_t * __restrict const bucket = &buckets_[key][0];
const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
for (int i = num_[key] - 1; i >= down; --i) {
int prev_ix = bucket[i & kBlockMask];
if (prev_ix >= 0) {
const size_t backward = cur_ix - prev_ix;
if (PREDICT_FALSE(backward > max_backward)) {
break;
}
prev_ix &= ring_buffer_mask;
if (cur_ix_masked + best_len > ring_buffer_mask ||
prev_ix + best_len > ring_buffer_mask ||
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
continue;
}
const size_t len =
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
max_length);
if (len >= 4) {
// Comparing for >= 3 does not change the semantics, but just saves
// for a few unnecessary binary logarithms in backward reference
// score, since we are not interested in such short matches.
double score = BackwardReferenceScore(len, backward);
if (best_score < score) {
best_score = score;
best_len = len;
*best_len_out = best_len;
*best_len_code_out = best_len;
*best_distance_out = backward;
*best_score_out = best_score;
match_found = true;
}
uint32_t prev_ix = bucket[i & kBlockMask];
const uint32_t backward = cur_ix - prev_ix;
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
break;
}
prev_ix &= ring_buffer_mask;
if (cur_ix_masked + best_len > ring_buffer_mask ||
prev_ix + best_len > ring_buffer_mask ||
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
continue;
}
const size_t len =
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
max_length);
if (len >= 4) {
// Comparing for >= 3 does not change the semantics, but just saves
// for a few unnecessary binary logarithms in backward reference
// score, since we are not interested in such short matches.
double score = BackwardReferenceScore(len, backward);
if (best_score < score) {
best_score = score;
best_len = len;
*best_len_out = best_len;
*best_len_code_out = best_len;
*best_distance_out = backward;
*best_score_out = best_score;
match_found = true;
}
}
}
@ -530,31 +514,29 @@ class HashLongestMatch {
}
}
const uint32_t key = HashBytes(&data[cur_ix_masked]);
const int * __restrict const bucket = &buckets_[key][0];
const uint32_t * __restrict const bucket = &buckets_[key][0];
const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
for (int i = num_[key] - 1; i >= down; --i) {
int prev_ix = bucket[i & kBlockMask];
if (prev_ix >= 0) {
const size_t backward = cur_ix - prev_ix;
if (PREDICT_FALSE(backward > max_backward)) {
break;
}
prev_ix &= ring_buffer_mask;
if (cur_ix_masked + best_len > ring_buffer_mask ||
prev_ix + best_len > ring_buffer_mask ||
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
continue;
}
const size_t len =
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
max_length);
if (len > best_len) {
best_len = len;
if (len > kMaxZopfliLen) {
matches = orig_matches;
}
*matches++ = BackwardMatch(backward, len);
uint32_t prev_ix = bucket[i & kBlockMask];
const uint32_t backward = cur_ix - prev_ix;
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
break;
}
prev_ix &= ring_buffer_mask;
if (cur_ix_masked + best_len > ring_buffer_mask ||
prev_ix + best_len > ring_buffer_mask ||
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
continue;
}
const size_t len =
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
max_length);
if (len > best_len) {
best_len = len;
if (len > kMaxZopfliLen) {
matches = orig_matches;
}
*matches++ = BackwardMatch(backward, len);
}
}
std::vector<int> dict_matches(kMaxDictionaryMatchLen + 1, kInvalidMatch);
@ -601,7 +583,7 @@ class HashLongestMatch {
uint16_t num_[kBucketSize];
// Buckets containing kBlockSize of backward references.
int buckets_[kBucketSize][kBlockSize];
uint32_t buckets_[kBucketSize][kBlockSize];
size_t num_dict_lookups_;
size_t num_dict_matches_;
@ -654,7 +636,7 @@ struct Hashers {
template<typename Hasher>
void WarmupHash(const size_t size, const uint8_t* dict, Hasher* hasher) {
for (size_t i = 0; i + Hasher::kHashTypeLength - 1 < size; i++) {
hasher->Store(dict, i);
hasher->Store(&dict[i], static_cast<uint32_t>(i));
}
}