mirror of
https://github.com/google/brotli.git
synced 2024-11-22 11:40:06 +00:00
7f740f1308
- fix formatting - fix type conversion - fix no-op arithmetic with null-pointer - improve performance of hash_longest_match64 - go: detect read after close - java decoder: support compound dictionary - remove executable flag on non-scripts
213 lines
7.0 KiB
C
213 lines
7.0 KiB
C
/* NOLINT(build/header_guard) */
|
|
/* Copyright 2018 Google Inc. All Rights Reserved.
|
|
|
|
Distributed under MIT license.
|
|
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
|
*/
|
|
|
|
/* template parameters: FN, JUMP, NUMBUCKETS, MASK, CHUNKLEN */
|
|
/* NUMBUCKETS / (MASK + 1) = probability of storing and using hash code. */
|
|
/* JUMP = skip bytes for speedup */
|
|
|
|
/* Rolling hash for long distance long string matches. Stores one position
|
|
per bucket, bucket key is computed over a long region. */
|
|
|
|
#define HashRolling HASHER()
|
|
|
|
static const uint32_t FN(kRollingHashMul32) = 69069;
|
|
static const uint32_t FN(kInvalidPos) = 0xffffffff;
|
|
|
|
/* This hasher uses a longer forward length, but returning a higher value here
|
|
will hurt compression by the main hasher when combined with a composite
|
|
hasher. The hasher tests for forward itself instead. */
|
|
static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
|
|
static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
|
|
|
|
/* Computes a code from a single byte. A lookup table of 256 values could be
|
|
used, but simply adding 1 works about as good. */
|
|
static uint32_t FN(HashByte)(uint8_t byte) {
|
|
return (uint32_t)byte + 1u;
|
|
}
|
|
|
|
static uint32_t FN(HashRollingFunctionInitial)(uint32_t state, uint8_t add,
|
|
uint32_t factor) {
|
|
return (uint32_t)(factor * state + FN(HashByte)(add));
|
|
}
|
|
|
|
static uint32_t FN(HashRollingFunction)(uint32_t state, uint8_t add,
|
|
uint8_t rem, uint32_t factor,
|
|
uint32_t factor_remove) {
|
|
return (uint32_t)(factor * state +
|
|
FN(HashByte)(add) - factor_remove * FN(HashByte)(rem));
|
|
}
|
|
|
|
typedef struct HashRolling {
|
|
uint32_t state;
|
|
uint32_t* table;
|
|
size_t next_ix;
|
|
|
|
uint32_t chunk_len;
|
|
uint32_t factor;
|
|
uint32_t factor_remove;
|
|
} HashRolling;
|
|
|
|
static void FN(Initialize)(
|
|
HasherCommon* common, HashRolling* BROTLI_RESTRICT self,
|
|
const BrotliEncoderParams* params) {
|
|
size_t i;
|
|
self->state = 0;
|
|
self->next_ix = 0;
|
|
|
|
self->factor = FN(kRollingHashMul32);
|
|
|
|
/* Compute the factor of the oldest byte to remove: factor**steps modulo
|
|
0xffffffff (the multiplications rely on 32-bit overflow) */
|
|
self->factor_remove = 1;
|
|
for (i = 0; i < CHUNKLEN; i += JUMP) {
|
|
self->factor_remove *= self->factor;
|
|
}
|
|
|
|
self->table = (uint32_t*)common->extra;
|
|
for (i = 0; i < NUMBUCKETS; i++) {
|
|
self->table[i] = FN(kInvalidPos);
|
|
}
|
|
|
|
BROTLI_UNUSED(params);
|
|
}
|
|
|
|
static void FN(Prepare)(HashRolling* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
|
|
size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
|
|
size_t i;
|
|
/* Too small size, cannot use this hasher. */
|
|
if (input_size < CHUNKLEN) return;
|
|
self->state = 0;
|
|
for (i = 0; i < CHUNKLEN; i += JUMP) {
|
|
self->state = FN(HashRollingFunctionInitial)(
|
|
self->state, data[i], self->factor);
|
|
}
|
|
BROTLI_UNUSED(one_shot);
|
|
}
|
|
|
|
static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
|
|
const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
|
|
size_t input_size) {
|
|
return NUMBUCKETS * sizeof(uint32_t);
|
|
BROTLI_UNUSED(params);
|
|
BROTLI_UNUSED(one_shot);
|
|
BROTLI_UNUSED(input_size);
|
|
}
|
|
|
|
static BROTLI_INLINE void FN(Store)(HashRolling* BROTLI_RESTRICT self,
|
|
const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
|
|
BROTLI_UNUSED(self);
|
|
BROTLI_UNUSED(data);
|
|
BROTLI_UNUSED(mask);
|
|
BROTLI_UNUSED(ix);
|
|
}
|
|
|
|
static BROTLI_INLINE void FN(StoreRange)(HashRolling* BROTLI_RESTRICT self,
|
|
const uint8_t* BROTLI_RESTRICT data, const size_t mask,
|
|
const size_t ix_start, const size_t ix_end) {
|
|
BROTLI_UNUSED(self);
|
|
BROTLI_UNUSED(data);
|
|
BROTLI_UNUSED(mask);
|
|
BROTLI_UNUSED(ix_start);
|
|
BROTLI_UNUSED(ix_end);
|
|
}
|
|
|
|
static BROTLI_INLINE void FN(StitchToPreviousBlock)(
|
|
HashRolling* BROTLI_RESTRICT self,
|
|
size_t num_bytes, size_t position, const uint8_t* ringbuffer,
|
|
size_t ring_buffer_mask) {
|
|
/* In this case we must re-initialize the hasher from scratch from the
|
|
current position. */
|
|
size_t position_masked;
|
|
size_t available = num_bytes;
|
|
if ((position & (JUMP - 1)) != 0) {
|
|
size_t diff = JUMP - (position & (JUMP - 1));
|
|
available = (diff > available) ? 0 : (available - diff);
|
|
position += diff;
|
|
}
|
|
position_masked = position & ring_buffer_mask;
|
|
/* wrapping around ringbuffer not handled. */
|
|
if (available > ring_buffer_mask - position_masked) {
|
|
available = ring_buffer_mask - position_masked;
|
|
}
|
|
|
|
FN(Prepare)(self, BROTLI_FALSE, available,
|
|
ringbuffer + (position & ring_buffer_mask));
|
|
self->next_ix = position;
|
|
BROTLI_UNUSED(num_bytes);
|
|
}
|
|
|
|
static BROTLI_INLINE void FN(PrepareDistanceCache)(
|
|
HashRolling* BROTLI_RESTRICT self,
|
|
int* BROTLI_RESTRICT distance_cache) {
|
|
BROTLI_UNUSED(self);
|
|
BROTLI_UNUSED(distance_cache);
|
|
}
|
|
|
|
static BROTLI_INLINE void FN(FindLongestMatch)(
|
|
HashRolling* BROTLI_RESTRICT self,
|
|
const BrotliEncoderDictionary* dictionary,
|
|
const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
|
|
const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
|
|
const size_t max_length, const size_t max_backward,
|
|
const size_t dictionary_distance, const size_t max_distance,
|
|
HasherSearchResult* BROTLI_RESTRICT out) {
|
|
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
|
|
size_t pos;
|
|
|
|
if ((cur_ix & (JUMP - 1)) != 0) return;
|
|
|
|
/* Not enough lookahead */
|
|
if (max_length < CHUNKLEN) return;
|
|
|
|
for (pos = self->next_ix; pos <= cur_ix; pos += JUMP) {
|
|
uint32_t code = self->state & MASK;
|
|
|
|
uint8_t rem = data[pos & ring_buffer_mask];
|
|
uint8_t add = data[(pos + CHUNKLEN) & ring_buffer_mask];
|
|
size_t found_ix = FN(kInvalidPos);
|
|
|
|
self->state = FN(HashRollingFunction)(
|
|
self->state, add, rem, self->factor, self->factor_remove);
|
|
|
|
if (code < NUMBUCKETS) {
|
|
found_ix = self->table[code];
|
|
self->table[code] = (uint32_t)pos;
|
|
if (pos == cur_ix && found_ix != FN(kInvalidPos)) {
|
|
/* The cast to 32-bit makes backward distances up to 4GB work even
|
|
if cur_ix is above 4GB, despite using 32-bit values in the table. */
|
|
size_t backward = (uint32_t)(cur_ix - found_ix);
|
|
if (backward <= max_backward) {
|
|
const size_t found_ix_masked = found_ix & ring_buffer_mask;
|
|
const size_t len = FindMatchLengthWithLimit(&data[found_ix_masked],
|
|
&data[cur_ix_masked],
|
|
max_length);
|
|
if (len >= 4 && len > out->len) {
|
|
score_t score = BackwardReferenceScore(len, backward);
|
|
if (score > out->score) {
|
|
out->len = len;
|
|
out->distance = backward;
|
|
out->score = score;
|
|
out->len_code_delta = 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
self->next_ix = cur_ix + JUMP;
|
|
|
|
/* NOTE: this hasher does not search in the dictionary. It is used as
|
|
backup-hasher, the main hasher already searches in it. */
|
|
BROTLI_UNUSED(dictionary);
|
|
BROTLI_UNUSED(distance_cache);
|
|
BROTLI_UNUSED(dictionary_distance);
|
|
BROTLI_UNUSED(max_distance);
|
|
}
|
|
|
|
#undef HashRolling
|