mirror of
https://github.com/google/brotli.git
synced 2024-12-26 17:51:04 +00:00
Update common, decoder, encoder, java (#520)
Common: * wrap dictionary data into `BrotliDictionary` structure * replace public constant with getter `BrotliGetDictionary` * reformat dictionary data Decoder: * adopt common changes * clarify acceptable instance usage patterns * hold reference to dictionary in state Encoder: * adopt common changes * eliminate PIC spots in `CreateBackwardReferences` * add per-chunk ratio guards for q0 and q1 * precompute relative distances to avoid repeated calculations * prostpone hasher allocation/initialization * refactor Hashers to be class-like structure * further improvements for 1MiB+ inputs * added new hasher type; made hashers more configurable Java: * Pull byte->int magic to `IntReader` from `BitReader`
This commit is contained in:
parent
aaa4424d9b
commit
cdca91b6f5
15339
common/dictionary.c
15339
common/dictionary.c
File diff suppressed because it is too large
Load Diff
@ -16,9 +16,29 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
BROTLI_COMMON_API extern const uint8_t kBrotliDictionary[122784];
|
||||
BROTLI_COMMON_API extern const uint32_t kBrotliDictionaryOffsetsByLength[32];
|
||||
BROTLI_COMMON_API extern const uint8_t kBrotliDictionarySizeBitsByLength[32];
|
||||
typedef struct BrotliDictionary {
|
||||
/**
|
||||
* Number of bits to encode index of dictionary word in a bucket.
|
||||
*
|
||||
* Specification: Appendix A. Static Dictionary Data
|
||||
*
|
||||
* Words in a dictionary are bucketed by length.
|
||||
* @c 0 means that there are no words of a given length.
|
||||
* Dictionary consists of words with length of [4..24] bytes.
|
||||
* Values at [0..3] and [25..31] indices should not be addressed.
|
||||
*/
|
||||
uint8_t size_bits_by_length[32];
|
||||
|
||||
/* assert(offset[i + 1] == offset[i] + (bits[i] ? (i << bits[i]) : 0)) */
|
||||
uint32_t offsets_by_length[32];
|
||||
|
||||
/* Data array is not bound, and should obey to size_bits_by_length values.
|
||||
Specified size matches default (RFC 7932) dictionary. */
|
||||
/* assert(sizeof(data) == offsets_by_length[31]) */
|
||||
uint8_t data[122784];
|
||||
} BrotliDictionary;
|
||||
|
||||
BROTLI_COMMON_API extern const BrotliDictionary* BrotliGetDictionary();
|
||||
|
||||
#define BROTLI_MIN_DICTIONARY_WORD_LENGTH 4
|
||||
#define BROTLI_MAX_DICTIONARY_WORD_LENGTH 24
|
||||
|
@ -1730,9 +1730,9 @@ postReadDistance:
|
||||
if (s->distance_code > s->max_distance) {
|
||||
if (i >= BROTLI_MIN_DICTIONARY_WORD_LENGTH &&
|
||||
i <= BROTLI_MAX_DICTIONARY_WORD_LENGTH) {
|
||||
int offset = (int)kBrotliDictionaryOffsetsByLength[i];
|
||||
int offset = (int)s->dictionary->offsets_by_length[i];
|
||||
int word_id = s->distance_code - s->max_distance - 1;
|
||||
uint32_t shift = kBrotliDictionarySizeBitsByLength[i];
|
||||
uint32_t shift = s->dictionary->size_bits_by_length[i];
|
||||
int mask = (int)BitMask(shift);
|
||||
int word_idx = word_id & mask;
|
||||
int transform_idx = word_id >> shift;
|
||||
@ -1740,7 +1740,7 @@ postReadDistance:
|
||||
s->dist_rb_idx += s->distance_context;
|
||||
offset += word_idx * i;
|
||||
if (transform_idx < kNumTransforms) {
|
||||
const uint8_t* word = &kBrotliDictionary[offset];
|
||||
const uint8_t* word = &s->dictionary->data[offset];
|
||||
int len = i;
|
||||
if (transform_idx == 0) {
|
||||
memcpy(&s->ringbuffer[pos], word, (size_t)len);
|
||||
|
@ -51,6 +51,8 @@ void BrotliDecoderStateInitWithCustomAllocators(BrotliDecoderState* s,
|
||||
s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_NONE;
|
||||
s->substate_read_block_length = BROTLI_STATE_READ_BLOCK_LENGTH_NONE;
|
||||
|
||||
s->dictionary = BrotliGetDictionary();
|
||||
|
||||
s->buffer_length = 0;
|
||||
s->loop_counter = 0;
|
||||
s->pos = 0;
|
||||
|
@ -10,6 +10,7 @@
|
||||
#define BROTLI_DEC_STATE_H_
|
||||
|
||||
#include "../common/constants.h"
|
||||
#include "../common/dictionary.h"
|
||||
#include <brotli/types.h>
|
||||
#include "./bit_reader.h"
|
||||
#include "./huffman.h"
|
||||
@ -222,6 +223,7 @@ struct BrotliDecoderStateStruct {
|
||||
uint32_t num_literal_htrees;
|
||||
uint8_t* context_map;
|
||||
uint8_t* context_modes;
|
||||
const BrotliDictionary* dictionary;
|
||||
|
||||
uint32_t trivial_literal_contexts[8]; /* 256 bits */
|
||||
};
|
||||
|
@ -9,8 +9,10 @@
|
||||
#include "./backward_references.h"
|
||||
|
||||
#include "../common/constants.h"
|
||||
#include "../common/dictionary.h"
|
||||
#include <brotli/types.h>
|
||||
#include "./command.h"
|
||||
#include "./dictionary_hash.h"
|
||||
#include "./memory.h"
|
||||
#include "./port.h"
|
||||
#include "./quality.h"
|
||||
@ -72,21 +74,6 @@ static BROTLI_INLINE size_t ComputeDistanceCode(size_t distance,
|
||||
#include "./backward_references_inc.h"
|
||||
#undef HASHER
|
||||
|
||||
#define HASHER() H7
|
||||
/* NOLINTNEXTLINE(build/include) */
|
||||
#include "./backward_references_inc.h"
|
||||
#undef HASHER
|
||||
|
||||
#define HASHER() H8
|
||||
/* NOLINTNEXTLINE(build/include) */
|
||||
#include "./backward_references_inc.h"
|
||||
#undef HASHER
|
||||
|
||||
#define HASHER() H9
|
||||
/* NOLINTNEXTLINE(build/include) */
|
||||
#include "./backward_references_inc.h"
|
||||
#undef HASHER
|
||||
|
||||
#define HASHER() H40
|
||||
/* NOLINTNEXTLINE(build/include) */
|
||||
#include "./backward_references_inc.h"
|
||||
@ -111,23 +98,25 @@ static BROTLI_INLINE size_t ComputeDistanceCode(size_t distance,
|
||||
#undef CAT
|
||||
#undef EXPAND_CAT
|
||||
|
||||
void BrotliCreateBackwardReferences(size_t num_bytes,
|
||||
void BrotliCreateBackwardReferences(const BrotliDictionary* dictionary,
|
||||
size_t num_bytes,
|
||||
size_t position,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t ringbuffer_mask,
|
||||
const BrotliEncoderParams* params,
|
||||
Hashers* hashers,
|
||||
HasherHandle hasher,
|
||||
int* dist_cache,
|
||||
size_t* last_insert_len,
|
||||
Command* commands,
|
||||
size_t* num_commands,
|
||||
size_t* num_literals) {
|
||||
switch (ChooseHasher(params)) {
|
||||
#define CASE_(N) \
|
||||
case N: \
|
||||
CreateBackwardReferencesH ## N(num_bytes, position, \
|
||||
ringbuffer, ringbuffer_mask, params, hashers->h ## N, dist_cache, \
|
||||
last_insert_len, commands, num_commands, num_literals); \
|
||||
switch (params->hasher.type) {
|
||||
#define CASE_(N) \
|
||||
case N: \
|
||||
CreateBackwardReferencesH ## N(dictionary, \
|
||||
kStaticDictionaryHash, num_bytes, position, ringbuffer, \
|
||||
ringbuffer_mask, params, hasher, dist_cache, \
|
||||
last_insert_len, commands, num_commands, num_literals); \
|
||||
break;
|
||||
FOR_GENERIC_HASHERS(CASE_)
|
||||
#undef CASE_
|
||||
|
@ -10,6 +10,7 @@
|
||||
#define BROTLI_ENC_BACKWARD_REFERENCES_H_
|
||||
|
||||
#include "../common/constants.h"
|
||||
#include "../common/dictionary.h"
|
||||
#include <brotli/types.h>
|
||||
#include "./command.h"
|
||||
#include "./hash.h"
|
||||
@ -25,9 +26,9 @@ extern "C" {
|
||||
CreateBackwardReferences calls, and must be incremented by the amount written
|
||||
by this call. */
|
||||
BROTLI_INTERNAL void BrotliCreateBackwardReferences(
|
||||
size_t num_bytes, size_t position,
|
||||
const BrotliDictionary* dictionary, size_t num_bytes, size_t position,
|
||||
const uint8_t* ringbuffer, size_t ringbuffer_mask,
|
||||
const BrotliEncoderParams* params, Hashers* hashers, int* dist_cache,
|
||||
const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache,
|
||||
size_t* last_insert_len, Command* commands, size_t* num_commands,
|
||||
size_t* num_literals);
|
||||
|
||||
|
@ -27,6 +27,13 @@ extern "C" {
|
||||
|
||||
static const float kInfinity = 1.7e38f; /* ~= 2 ^ 127 */
|
||||
|
||||
static const uint32_t kDistanceCacheIndex[] = {
|
||||
0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
|
||||
};
|
||||
static const int kDistanceCacheOffset[] = {
|
||||
0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
|
||||
};
|
||||
|
||||
void BrotliInitZopfliNodes(ZopfliNode* array, size_t length) {
|
||||
ZopfliNode stub;
|
||||
size_t i;
|
||||
@ -604,6 +611,7 @@ static size_t ZopfliIterate(size_t num_bytes,
|
||||
|
||||
|
||||
size_t BrotliZopfliComputeShortestPath(MemoryManager* m,
|
||||
const BrotliDictionary* dictionary,
|
||||
size_t num_bytes,
|
||||
size_t position,
|
||||
const uint8_t* ringbuffer,
|
||||
@ -611,7 +619,7 @@ size_t BrotliZopfliComputeShortestPath(MemoryManager* m,
|
||||
const BrotliEncoderParams* params,
|
||||
const size_t max_backward_limit,
|
||||
const int* dist_cache,
|
||||
H10* hasher,
|
||||
HasherHandle hasher,
|
||||
ZopfliNode* nodes) {
|
||||
const size_t max_zopfli_len = MaxZopfliLen(params);
|
||||
ZopfliCostModel model;
|
||||
@ -630,8 +638,8 @@ size_t BrotliZopfliComputeShortestPath(MemoryManager* m,
|
||||
for (i = 0; i + HashTypeLengthH10() - 1 < num_bytes; i++) {
|
||||
const size_t pos = position + i;
|
||||
const size_t max_distance = BROTLI_MIN(size_t, pos, max_backward_limit);
|
||||
size_t num_matches = FindAllMatchesH10(hasher, ringbuffer, ringbuffer_mask,
|
||||
pos, num_bytes - i, max_distance, params, matches);
|
||||
size_t num_matches = FindAllMatchesH10(hasher, dictionary, ringbuffer,
|
||||
ringbuffer_mask, pos, num_bytes - i, max_distance, params, matches);
|
||||
size_t skip;
|
||||
if (num_matches > 0 &&
|
||||
BackwardMatchLength(&matches[num_matches - 1]) > max_zopfli_len) {
|
||||
@ -664,9 +672,9 @@ size_t BrotliZopfliComputeShortestPath(MemoryManager* m,
|
||||
}
|
||||
|
||||
void BrotliCreateZopfliBackwardReferences(
|
||||
MemoryManager* m, size_t num_bytes, size_t position,
|
||||
const uint8_t* ringbuffer, size_t ringbuffer_mask,
|
||||
const BrotliEncoderParams* params, H10* hasher, int* dist_cache,
|
||||
MemoryManager* m, const BrotliDictionary* dictionary, size_t num_bytes,
|
||||
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
|
||||
const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache,
|
||||
size_t* last_insert_len, Command* commands, size_t* num_commands,
|
||||
size_t* num_literals) {
|
||||
const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
|
||||
@ -674,8 +682,8 @@ void BrotliCreateZopfliBackwardReferences(
|
||||
nodes = BROTLI_ALLOC(m, ZopfliNode, num_bytes + 1);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
BrotliInitZopfliNodes(nodes, num_bytes + 1);
|
||||
*num_commands += BrotliZopfliComputeShortestPath(m, num_bytes, position,
|
||||
ringbuffer, ringbuffer_mask, params, max_backward_limit,
|
||||
*num_commands += BrotliZopfliComputeShortestPath(m, dictionary, num_bytes,
|
||||
position, ringbuffer, ringbuffer_mask, params, max_backward_limit,
|
||||
dist_cache, hasher, nodes);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
BrotliZopfliCreateCommands(num_bytes, position, max_backward_limit, nodes,
|
||||
@ -684,9 +692,9 @@ void BrotliCreateZopfliBackwardReferences(
|
||||
}
|
||||
|
||||
void BrotliCreateHqZopfliBackwardReferences(
|
||||
MemoryManager* m, size_t num_bytes, size_t position,
|
||||
const uint8_t* ringbuffer, size_t ringbuffer_mask,
|
||||
const BrotliEncoderParams* params, H10* hasher, int* dist_cache,
|
||||
MemoryManager* m, const BrotliDictionary* dictionary, size_t num_bytes,
|
||||
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
|
||||
const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache,
|
||||
size_t* last_insert_len, Command* commands, size_t* num_commands,
|
||||
size_t* num_literals) {
|
||||
const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
|
||||
@ -715,8 +723,9 @@ void BrotliCreateHqZopfliBackwardReferences(
|
||||
BROTLI_ENSURE_CAPACITY(m, BackwardMatch, matches, matches_size,
|
||||
cur_match_pos + MAX_NUM_MATCHES_H10);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
num_found_matches = FindAllMatchesH10(hasher, ringbuffer, ringbuffer_mask,
|
||||
pos, max_length, max_distance, params, &matches[cur_match_pos]);
|
||||
num_found_matches = FindAllMatchesH10(hasher, dictionary, ringbuffer,
|
||||
ringbuffer_mask, pos, max_length, max_distance, params,
|
||||
&matches[cur_match_pos]);
|
||||
cur_match_end = cur_match_pos + num_found_matches;
|
||||
for (j = cur_match_pos; j + 1 < cur_match_end; ++j) {
|
||||
assert(BackwardMatchLength(&matches[j]) <
|
||||
|
@ -10,6 +10,7 @@
|
||||
#define BROTLI_ENC_BACKWARD_REFERENCES_HQ_H_
|
||||
|
||||
#include "../common/constants.h"
|
||||
#include "../common/dictionary.h"
|
||||
#include <brotli/types.h>
|
||||
#include "./command.h"
|
||||
#include "./hash.h"
|
||||
@ -22,16 +23,16 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
BROTLI_INTERNAL void BrotliCreateZopfliBackwardReferences(
|
||||
MemoryManager* m, size_t num_bytes, size_t position,
|
||||
const uint8_t* ringbuffer, size_t ringbuffer_mask,
|
||||
const BrotliEncoderParams* params, H10* hasher, int* dist_cache,
|
||||
MemoryManager* m, const BrotliDictionary* dictionary, size_t num_bytes,
|
||||
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
|
||||
const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache,
|
||||
size_t* last_insert_len, Command* commands, size_t* num_commands,
|
||||
size_t* num_literals);
|
||||
|
||||
BROTLI_INTERNAL void BrotliCreateHqZopfliBackwardReferences(
|
||||
MemoryManager* m, size_t num_bytes, size_t position,
|
||||
const uint8_t* ringbuffer, size_t ringbuffer_mask,
|
||||
const BrotliEncoderParams* params, H10* hasher, int* dist_cache,
|
||||
MemoryManager* m, const BrotliDictionary* dictionary, size_t num_bytes,
|
||||
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
|
||||
const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache,
|
||||
size_t* last_insert_len, Command* commands, size_t* num_commands,
|
||||
size_t* num_literals);
|
||||
|
||||
@ -77,10 +78,10 @@ BROTLI_INTERNAL void BrotliInitZopfliNodes(ZopfliNode* array, size_t length);
|
||||
(2) nodes[i].command_length() <= i and
|
||||
(3) nodes[i - nodes[i].command_length()].cost < kInfinity */
|
||||
BROTLI_INTERNAL size_t BrotliZopfliComputeShortestPath(
|
||||
MemoryManager* m, size_t num_bytes, size_t position,
|
||||
const uint8_t* ringbuffer, size_t ringbuffer_mask,
|
||||
MemoryManager* m, const BrotliDictionary* dictionary, size_t num_bytes,
|
||||
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
|
||||
const BrotliEncoderParams* params, const size_t max_backward_limit,
|
||||
const int* dist_cache, H10* hasher, ZopfliNode* nodes);
|
||||
const int* dist_cache, HasherHandle hasher, ZopfliNode* nodes);
|
||||
|
||||
BROTLI_INTERNAL void BrotliZopfliCreateCommands(const size_t num_bytes,
|
||||
const size_t block_start,
|
||||
|
@ -7,12 +7,11 @@
|
||||
|
||||
/* template parameters: FN */
|
||||
|
||||
#define Hasher HASHER()
|
||||
|
||||
static BROTLI_NOINLINE void FN(CreateBackwardReferences)(
|
||||
const BrotliDictionary* dictionary, const uint16_t* dictionary_hash,
|
||||
size_t num_bytes, size_t position,
|
||||
const uint8_t* ringbuffer, size_t ringbuffer_mask,
|
||||
const BrotliEncoderParams* params, Hasher* hasher, int* dist_cache,
|
||||
const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache,
|
||||
size_t* last_insert_len, Command* commands, size_t* num_commands,
|
||||
size_t* num_literals) {
|
||||
/* Set maximum distance, see section 9.1. of the spec. */
|
||||
@ -30,7 +29,9 @@ static BROTLI_NOINLINE void FN(CreateBackwardReferences)(
|
||||
size_t apply_random_heuristics = position + random_heuristics_window_size;
|
||||
|
||||
/* Minimum score to accept a backward reference. */
|
||||
const score_t kMinScore = BROTLI_SCORE_BASE + 400;
|
||||
const score_t kMinScore = BROTLI_SCORE_BASE + 100;
|
||||
|
||||
FN(PrepareDistanceCache)(hasher, dist_cache);
|
||||
|
||||
while (position + FN(HashTypeLength)() < pos_end) {
|
||||
size_t max_length = pos_end - position;
|
||||
@ -40,13 +41,14 @@ static BROTLI_NOINLINE void FN(CreateBackwardReferences)(
|
||||
sr.len_x_code = 0;
|
||||
sr.distance = 0;
|
||||
sr.score = kMinScore;
|
||||
if (FN(FindLongestMatch)(hasher, ringbuffer, ringbuffer_mask, dist_cache,
|
||||
if (FN(FindLongestMatch)(hasher, dictionary, dictionary_hash,
|
||||
ringbuffer, ringbuffer_mask, dist_cache,
|
||||
position, max_length, max_distance, &sr)) {
|
||||
/* Found a match. Let's look for something even better ahead. */
|
||||
int delayed_backward_references_in_row = 0;
|
||||
--max_length;
|
||||
for (;; --max_length) {
|
||||
const score_t cost_diff_lazy = 700;
|
||||
const score_t cost_diff_lazy = 175;
|
||||
BROTLI_BOOL is_match_found;
|
||||
HasherSearchResult sr2;
|
||||
sr2.len = params->quality < MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH ?
|
||||
@ -55,9 +57,9 @@ static BROTLI_NOINLINE void FN(CreateBackwardReferences)(
|
||||
sr2.distance = 0;
|
||||
sr2.score = kMinScore;
|
||||
max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
|
||||
is_match_found = FN(FindLongestMatch)(hasher, ringbuffer,
|
||||
ringbuffer_mask, dist_cache, position + 1, max_length, max_distance,
|
||||
&sr2);
|
||||
is_match_found = FN(FindLongestMatch)(hasher, dictionary,
|
||||
dictionary_hash, ringbuffer, ringbuffer_mask, dist_cache,
|
||||
position + 1, max_length, max_distance, &sr2);
|
||||
if (is_match_found && sr2.score >= sr.score + cost_diff_lazy) {
|
||||
/* Ok, let's just write one byte for now and start a match from the
|
||||
next byte. */
|
||||
@ -84,6 +86,7 @@ static BROTLI_NOINLINE void FN(CreateBackwardReferences)(
|
||||
dist_cache[2] = dist_cache[1];
|
||||
dist_cache[1] = dist_cache[0];
|
||||
dist_cache[0] = (int)sr.distance;
|
||||
FN(PrepareDistanceCache)(hasher, dist_cache);
|
||||
}
|
||||
InitCommand(commands++, insert_length, sr.len, sr.len ^ sr.len_x_code,
|
||||
distance_code);
|
||||
@ -138,5 +141,3 @@ static BROTLI_NOINLINE void FN(CreateBackwardReferences)(
|
||||
*last_insert_len = insert_length;
|
||||
*num_commands += (size_t)(commands - orig_commands);
|
||||
}
|
||||
|
||||
#undef Hasher
|
||||
|
@ -65,11 +65,17 @@ static BROTLI_INLINE uint16_t CombineLengthCodes(
|
||||
if (use_last_distance && inscode < 8 && copycode < 16) {
|
||||
return (copycode < 8) ? bits64 : (bits64 | 64);
|
||||
} else {
|
||||
/* "To convert an insert-and-copy length code to an insert length code and
|
||||
a copy length code, the following table can be used" */
|
||||
static const uint16_t cells[9] = { 128u, 192u, 384u, 256u, 320u, 512u,
|
||||
448u, 576u, 640u };
|
||||
return cells[(copycode >> 3) + 3 * (inscode >> 3)] | bits64;
|
||||
/* Specification: 5 Encoding of ... (last table) */
|
||||
/* offset = 2 * index, where index is in range [0..8] */
|
||||
int offset = 2 * ((copycode >> 3) + 3 * (inscode >> 3));
|
||||
/* All values in specification are K * 64,
|
||||
where K = [2, 3, 6, 4, 5, 8, 7, 9, 10],
|
||||
i + 1 = [1, 2, 3, 4, 5, 6, 7, 8, 9],
|
||||
K - i - 1 = [1, 1, 3, 0, 0, 2, 0, 1, 2] = D.
|
||||
All values in D require only 2 bits to encode.
|
||||
Magic constant is shifted 6 bits left, to avoid final multiplication. */
|
||||
offset = (offset << 5) + 0x40 + ((0x520D40 >> offset) & 0xC0);
|
||||
return (uint16_t)offset | bits64;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -325,21 +325,20 @@ static BROTLI_INLINE void EmitLiterals(const uint8_t* input, const size_t len,
|
||||
}
|
||||
}
|
||||
|
||||
/* REQUIRES: len <= 1 << 20. */
|
||||
/* REQUIRES: len <= 1 << 24. */
|
||||
static void BrotliStoreMetaBlockHeader(
|
||||
size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
size_t nibbles = 6;
|
||||
/* ISLAST */
|
||||
BrotliWriteBits(1, 0, storage_ix, storage);
|
||||
if (len <= (1U << 16)) {
|
||||
/* MNIBBLES is 4 */
|
||||
BrotliWriteBits(2, 0, storage_ix, storage);
|
||||
BrotliWriteBits(16, len - 1, storage_ix, storage);
|
||||
} else {
|
||||
/* MNIBBLES is 5 */
|
||||
BrotliWriteBits(2, 1, storage_ix, storage);
|
||||
BrotliWriteBits(20, len - 1, storage_ix, storage);
|
||||
nibbles = 4;
|
||||
} else if (len <= (1U << 20)) {
|
||||
nibbles = 5;
|
||||
}
|
||||
BrotliWriteBits(2, nibbles - 4, storage_ix, storage);
|
||||
BrotliWriteBits(nibbles * 4, len - 1, storage_ix, storage);
|
||||
/* ISUNCOMPRESSED */
|
||||
BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
|
||||
}
|
||||
@ -463,14 +462,6 @@ static BROTLI_INLINE void BrotliCompressFragmentFastImpl(
|
||||
|
||||
const size_t shift = 64u - table_bits;
|
||||
|
||||
if (input_size == 0) {
|
||||
assert(is_last);
|
||||
BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
|
||||
BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
return;
|
||||
}
|
||||
|
||||
BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
|
||||
/* No block splits, no contexts. */
|
||||
BrotliWriteBits(13, 0, storage_ix, storage);
|
||||
@ -728,11 +719,7 @@ next_block:
|
||||
goto emit_commands;
|
||||
}
|
||||
|
||||
if (is_last) {
|
||||
BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
|
||||
BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
} else {
|
||||
if (!is_last) {
|
||||
/* If this is not the last block, update the command and distance prefix
|
||||
codes for the next block and store the compressed forms. */
|
||||
cmd_code[0] = 0;
|
||||
@ -761,7 +748,17 @@ void BrotliCompressFragmentFast(
|
||||
BROTLI_BOOL is_last, int* table, size_t table_size, uint8_t cmd_depth[128],
|
||||
uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
const size_t initial_storage_ix = *storage_ix;
|
||||
const size_t table_bits = Log2FloorNonZero(table_size);
|
||||
|
||||
if (input_size == 0) {
|
||||
assert(is_last);
|
||||
BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
|
||||
BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
return;
|
||||
}
|
||||
|
||||
switch (table_bits) {
|
||||
#define CASE_(B) \
|
||||
case B: \
|
||||
@ -773,6 +770,18 @@ void BrotliCompressFragmentFast(
|
||||
#undef CASE_
|
||||
default: assert(0); break;
|
||||
}
|
||||
|
||||
/* If output is larger than single uncompressed block, rewrite it. */
|
||||
if (*storage_ix - initial_storage_ix > 31 + (input_size << 3)) {
|
||||
EmitUncompressedMetaBlock(input, input + input_size, initial_storage_ix,
|
||||
storage_ix, storage);
|
||||
}
|
||||
|
||||
if (is_last) {
|
||||
BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
|
||||
BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
}
|
||||
}
|
||||
|
||||
#undef FOR_TABLE_BITS_
|
||||
|
@ -37,6 +37,7 @@ extern "C" {
|
||||
updated to represent the updated "cmd_depth" and "cmd_bits".
|
||||
|
||||
REQUIRES: "input_size" is greater than zero, or "is_last" is 1.
|
||||
REQUIRES: "input_size" is less or equal to maximal metablock size (1 << 24).
|
||||
REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
|
||||
REQUIRES: "table_size" is an odd (9, 11, 13, 15) power of two
|
||||
OUTPUT: maximal copy distance <= |input_size|
|
||||
|
@ -216,21 +216,20 @@ static BROTLI_INLINE void EmitDistance(uint32_t distance, uint32_t** commands) {
|
||||
++(*commands);
|
||||
}
|
||||
|
||||
/* REQUIRES: len <= 1 << 20. */
|
||||
/* REQUIRES: len <= 1 << 24. */
|
||||
static void BrotliStoreMetaBlockHeader(
|
||||
size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
size_t nibbles = 6;
|
||||
/* ISLAST */
|
||||
BrotliWriteBits(1, 0, storage_ix, storage);
|
||||
if (len <= (1U << 16)) {
|
||||
/* MNIBBLES is 4 */
|
||||
BrotliWriteBits(2, 0, storage_ix, storage);
|
||||
BrotliWriteBits(16, len - 1, storage_ix, storage);
|
||||
} else {
|
||||
/* MNIBBLES is 5 */
|
||||
BrotliWriteBits(2, 1, storage_ix, storage);
|
||||
BrotliWriteBits(20, len - 1, storage_ix, storage);
|
||||
nibbles = 4;
|
||||
} else if (len <= (1U << 20)) {
|
||||
nibbles = 5;
|
||||
}
|
||||
BrotliWriteBits(2, nibbles - 4, storage_ix, storage);
|
||||
BrotliWriteBits(nibbles * 4, len - 1, storage_ix, storage);
|
||||
/* ISUNCOMPRESSED */
|
||||
BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
|
||||
}
|
||||
@ -507,6 +506,23 @@ static BROTLI_BOOL ShouldCompress(
|
||||
}
|
||||
}
|
||||
|
||||
static void RewindBitPosition(const size_t new_storage_ix,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
const size_t bitpos = new_storage_ix & 7;
|
||||
const size_t mask = (1u << bitpos) - 1;
|
||||
storage[new_storage_ix >> 3] &= (uint8_t)mask;
|
||||
*storage_ix = new_storage_ix;
|
||||
}
|
||||
|
||||
static void EmitUncompressedMetaBlock(const uint8_t* input, size_t input_size,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
BrotliStoreMetaBlockHeader(input_size, 1, storage_ix, storage);
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
memcpy(&storage[*storage_ix >> 3], input, input_size);
|
||||
*storage_ix += input_size << 3;
|
||||
storage[*storage_ix >> 3] = 0;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void BrotliCompressFragmentTwoPassImpl(
|
||||
MemoryManager* m, const uint8_t* input, size_t input_size,
|
||||
BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf,
|
||||
@ -514,6 +530,7 @@ static BROTLI_INLINE void BrotliCompressFragmentTwoPassImpl(
|
||||
/* Save the start of the first block for position and distance computations.
|
||||
*/
|
||||
const uint8_t* base_ip = input;
|
||||
BROTLI_UNUSED(is_last);
|
||||
|
||||
while (input_size > 0) {
|
||||
size_t block_size =
|
||||
@ -536,21 +553,11 @@ static BROTLI_INLINE void BrotliCompressFragmentTwoPassImpl(
|
||||
/* Since we did not find many backward references and the entropy of
|
||||
the data is close to 8 bits, we can simply emit an uncompressed block.
|
||||
This makes compression speed of uncompressible data about 3x faster. */
|
||||
BrotliStoreMetaBlockHeader(block_size, 1, storage_ix, storage);
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
memcpy(&storage[*storage_ix >> 3], input, block_size);
|
||||
*storage_ix += block_size << 3;
|
||||
storage[*storage_ix >> 3] = 0;
|
||||
EmitUncompressedMetaBlock(input, block_size, storage_ix, storage);
|
||||
}
|
||||
input += block_size;
|
||||
input_size -= block_size;
|
||||
}
|
||||
|
||||
if (is_last) {
|
||||
BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
|
||||
BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
}
|
||||
}
|
||||
|
||||
#define FOR_TABLE_BITS_(X) \
|
||||
@ -571,6 +578,7 @@ void BrotliCompressFragmentTwoPass(
|
||||
MemoryManager* m, const uint8_t* input, size_t input_size,
|
||||
BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf,
|
||||
int* table, size_t table_size, size_t* storage_ix, uint8_t* storage) {
|
||||
const size_t initial_storage_ix = *storage_ix;
|
||||
const size_t table_bits = Log2FloorNonZero(table_size);
|
||||
switch (table_bits) {
|
||||
#define CASE_(B) \
|
||||
@ -583,6 +591,18 @@ void BrotliCompressFragmentTwoPass(
|
||||
#undef CASE_
|
||||
default: assert(0); break;
|
||||
}
|
||||
|
||||
/* If output is larger than single uncompressed block, rewrite it. */
|
||||
if (*storage_ix - initial_storage_ix > 31 + (input_size << 3)) {
|
||||
RewindBitPosition(initial_storage_ix, storage_ix, storage);
|
||||
EmitUncompressedMetaBlock(input, input_size, storage_ix, storage);
|
||||
}
|
||||
|
||||
if (is_last) {
|
||||
BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
|
||||
BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
}
|
||||
}
|
||||
|
||||
#undef FOR_TABLE_BITS_
|
||||
|
@ -29,6 +29,7 @@ static const size_t kCompressFragmentTwoPassBlockSize = 1 << 17;
|
||||
If "is_last" is 1, emits an additional empty last meta-block.
|
||||
|
||||
REQUIRES: "input_size" is greater than zero, or "is_last" is 1.
|
||||
REQUIRES: "input_size" is less or equal to maximal metablock size (1 << 24).
|
||||
REQUIRES: "command_buf" and "literal_buf" point to at least
|
||||
kCompressFragmentTwoPassBlockSize long arrays.
|
||||
REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
|
||||
|
80
enc/encode.c
80
enc/encode.c
@ -57,7 +57,7 @@ typedef struct BrotliEncoderStateStruct {
|
||||
|
||||
MemoryManager memory_manager_;
|
||||
|
||||
Hashers hashers_;
|
||||
HasherHandle hasher_;
|
||||
uint64_t input_pos_;
|
||||
RingBuffer ringbuffer_;
|
||||
size_t cmd_alloc_size_;
|
||||
@ -67,7 +67,7 @@ typedef struct BrotliEncoderStateStruct {
|
||||
size_t last_insert_len_;
|
||||
uint64_t last_flush_pos_;
|
||||
uint64_t last_processed_pos_;
|
||||
int dist_cache_[4];
|
||||
int dist_cache_[BROTLI_NUM_DISTANCE_SHORT_CODES];
|
||||
int saved_dist_cache_[4];
|
||||
uint8_t last_byte_;
|
||||
uint8_t last_byte_bits_;
|
||||
@ -580,10 +580,6 @@ static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) {
|
||||
s->cmd_code_, &s->cmd_code_numbits_);
|
||||
}
|
||||
|
||||
/* Initialize hashers. */
|
||||
HashersSetup(&s->memory_manager_, &s->hashers_, ChooseHasher(&s->params));
|
||||
if (BROTLI_IS_OOM(&s->memory_manager_)) return BROTLI_FALSE;
|
||||
|
||||
s->is_initialized_ = BROTLI_TRUE;
|
||||
return BROTLI_TRUE;
|
||||
}
|
||||
@ -609,6 +605,7 @@ static void BrotliEncoderInitState(BrotliEncoderState* s) {
|
||||
s->prev_byte2_ = 0;
|
||||
s->storage_size_ = 0;
|
||||
s->storage_ = 0;
|
||||
s->hasher_ = NULL;
|
||||
s->large_table_ = NULL;
|
||||
s->large_table_size_ = 0;
|
||||
s->cmd_code_numbits_ = 0;
|
||||
@ -621,8 +618,6 @@ static void BrotliEncoderInitState(BrotliEncoderState* s) {
|
||||
s->is_last_block_emitted_ = BROTLI_FALSE;
|
||||
s->is_initialized_ = BROTLI_FALSE;
|
||||
|
||||
InitHashers(&s->hashers_);
|
||||
|
||||
RingBufferInit(&s->ringbuffer_);
|
||||
|
||||
s->commands_ = 0;
|
||||
@ -635,7 +630,7 @@ static void BrotliEncoderInitState(BrotliEncoderState* s) {
|
||||
s->dist_cache_[3] = 16;
|
||||
/* Save the state of the distance cache in case we need to restore it for
|
||||
emitting an uncompressed block. */
|
||||
memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->dist_cache_));
|
||||
memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
|
||||
}
|
||||
|
||||
BrotliEncoderState* BrotliEncoderCreateInstance(brotli_alloc_func alloc_func,
|
||||
@ -666,7 +661,7 @@ static void BrotliEncoderCleanupState(BrotliEncoderState* s) {
|
||||
BROTLI_FREE(m, s->storage_);
|
||||
BROTLI_FREE(m, s->commands_);
|
||||
RingBufferFree(m, &s->ringbuffer_);
|
||||
DestroyHashers(m, &s->hashers_);
|
||||
DestroyHasher(m, &s->hasher_);
|
||||
BROTLI_FREE(m, s->large_table_);
|
||||
BROTLI_FREE(m, s->command_buf_);
|
||||
BROTLI_FREE(m, s->literal_buf_);
|
||||
@ -774,7 +769,7 @@ void BrotliEncoderSetCustomDictionary(BrotliEncoderState* s, size_t size,
|
||||
if (dict_size > 1) {
|
||||
s->prev_byte2_ = dict[dict_size - 2];
|
||||
}
|
||||
HashersPrependCustomDictionary(m, &s->hashers_, &s->params, dict_size, dict);
|
||||
HasherPrependCustomDictionary(m, &s->hasher_, &s->params, dict_size, dict);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
}
|
||||
|
||||
@ -809,6 +804,7 @@ static BROTLI_BOOL EncodeData(
|
||||
uint8_t* data;
|
||||
uint32_t mask;
|
||||
MemoryManager* m = &s->memory_manager_;
|
||||
const BrotliDictionary* dictionary = BrotliGetDictionary();
|
||||
|
||||
if (!EnsureInitialized(s)) return BROTLI_FALSE;
|
||||
data = s->ringbuffer_.buffer_;
|
||||
@ -893,26 +889,28 @@ static BROTLI_BOOL EncodeData(
|
||||
}
|
||||
}
|
||||
|
||||
InitOrStitchToPreviousBlock(m, &s->hashers_, data, mask, &s->params,
|
||||
InitOrStitchToPreviousBlock(m, &s->hasher_, data, mask, &s->params,
|
||||
wrapped_last_processed_pos, bytes, is_last);
|
||||
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
|
||||
|
||||
if (s->params.quality == ZOPFLIFICATION_QUALITY) {
|
||||
assert(s->params.hasher.type == 10);
|
||||
BrotliCreateZopfliBackwardReferences(
|
||||
m, bytes, wrapped_last_processed_pos, data, mask,
|
||||
&s->params, s->hashers_.h10, s->dist_cache_, &s->last_insert_len_,
|
||||
m, dictionary, bytes, wrapped_last_processed_pos, data, mask,
|
||||
&s->params, s->hasher_, s->dist_cache_, &s->last_insert_len_,
|
||||
&s->commands_[s->num_commands_], &s->num_commands_, &s->num_literals_);
|
||||
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
|
||||
} else if (s->params.quality == HQ_ZOPFLIFICATION_QUALITY) {
|
||||
assert(s->params.hasher.type == 10);
|
||||
BrotliCreateHqZopfliBackwardReferences(
|
||||
m, bytes, wrapped_last_processed_pos, data, mask,
|
||||
&s->params, s->hashers_.h10, s->dist_cache_, &s->last_insert_len_,
|
||||
m, dictionary, bytes, wrapped_last_processed_pos, data, mask,
|
||||
&s->params, s->hasher_, s->dist_cache_, &s->last_insert_len_,
|
||||
&s->commands_[s->num_commands_], &s->num_commands_, &s->num_literals_);
|
||||
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
|
||||
} else {
|
||||
BrotliCreateBackwardReferences(
|
||||
bytes, wrapped_last_processed_pos, data, mask,
|
||||
&s->params, &s->hashers_, s->dist_cache_, &s->last_insert_len_,
|
||||
dictionary, bytes, wrapped_last_processed_pos, data, mask,
|
||||
&s->params, s->hasher_, s->dist_cache_, &s->last_insert_len_,
|
||||
&s->commands_[s->num_commands_], &s->num_commands_, &s->num_literals_);
|
||||
}
|
||||
|
||||
@ -936,7 +934,7 @@ static BROTLI_BOOL EncodeData(
|
||||
s->num_commands_ < max_commands) {
|
||||
/* Merge with next input block. Everything will happen later. */
|
||||
if (UpdateLastProcessedPos(s)) {
|
||||
HashersReset(&s->hashers_, ChooseHasher(&s->params));
|
||||
HasherReset(s->hasher_);
|
||||
}
|
||||
*out_size = 0;
|
||||
return BROTLI_TRUE;
|
||||
@ -976,7 +974,7 @@ static BROTLI_BOOL EncodeData(
|
||||
s->last_byte_bits_ = storage_ix & 7u;
|
||||
s->last_flush_pos_ = s->input_pos_;
|
||||
if (UpdateLastProcessedPos(s)) {
|
||||
HashersReset(&s->hashers_, ChooseHasher(&s->params));
|
||||
HasherReset(s->hasher_);
|
||||
}
|
||||
if (s->last_flush_pos_ > 0) {
|
||||
s->prev_byte_ = data[((uint32_t)s->last_flush_pos_ - 1) & mask];
|
||||
@ -988,7 +986,7 @@ static BROTLI_BOOL EncodeData(
|
||||
s->num_literals_ = 0;
|
||||
/* Save the state of the distance cache in case we need to restore it for
|
||||
emitting an uncompressed block. */
|
||||
memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->dist_cache_));
|
||||
memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
|
||||
*output = &storage[0];
|
||||
*out_size = storage_ix >> 3;
|
||||
return BROTLI_TRUE;
|
||||
@ -1037,12 +1035,13 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
|
||||
size_t total_out_size = 0;
|
||||
uint8_t last_byte;
|
||||
uint8_t last_byte_bits;
|
||||
H10* hasher;
|
||||
HasherHandle hasher = NULL;
|
||||
|
||||
const size_t hasher_eff_size =
|
||||
BROTLI_MIN(size_t, input_size, max_backward_limit + BROTLI_WINDOW_GAP);
|
||||
|
||||
BrotliEncoderParams params;
|
||||
const BrotliDictionary* dictionary = BrotliGetDictionary();
|
||||
|
||||
const int lgmetablock = BROTLI_MIN(int, 24, lgwin + 1);
|
||||
size_t max_block_size;
|
||||
@ -1064,12 +1063,11 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
|
||||
|
||||
assert(input_size <= mask + 1);
|
||||
EncodeWindowBits(lgwin, &last_byte, &last_byte_bits);
|
||||
hasher = BROTLI_ALLOC(m, H10, 1);
|
||||
if (BROTLI_IS_OOM(m)) goto oom;
|
||||
InitializeH10(hasher);
|
||||
InitH10(m, hasher, input_buffer, ¶ms, 0, hasher_eff_size, 1);
|
||||
InitOrStitchToPreviousBlock(m, &hasher, input_buffer, mask, ¶ms,
|
||||
0, hasher_eff_size, BROTLI_TRUE);
|
||||
if (BROTLI_IS_OOM(m)) goto oom;
|
||||
|
||||
|
||||
while (ok && metablock_start < input_size) {
|
||||
const size_t metablock_end =
|
||||
BROTLI_MIN(size_t, input_size, metablock_start + max_metablock_size);
|
||||
@ -1097,7 +1095,7 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
|
||||
StitchToPreviousBlockH10(hasher, block_size, block_start,
|
||||
input_buffer, mask);
|
||||
path_size = BrotliZopfliComputeShortestPath(
|
||||
m, block_size, block_start, input_buffer, mask, ¶ms,
|
||||
m, dictionary, block_size, block_start, input_buffer, mask, ¶ms,
|
||||
max_backward_limit, dist_cache, hasher, nodes);
|
||||
if (BROTLI_IS_OOM(m)) goto oom;
|
||||
/* We allocate a command buffer in the first iteration of this loop that
|
||||
@ -1227,8 +1225,7 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
|
||||
}
|
||||
|
||||
*encoded_size = total_out_size;
|
||||
CleanupH10(m, hasher);
|
||||
BROTLI_FREE(m, hasher);
|
||||
DestroyHasher(m, &hasher);
|
||||
return ok;
|
||||
|
||||
oom:
|
||||
@ -1588,15 +1585,25 @@ static BROTLI_BOOL ProcessMetadata(
|
||||
return BROTLI_TRUE;
|
||||
}
|
||||
|
||||
static void UpdateSizeHint(BrotliEncoderState* s, size_t available_in) {
|
||||
if (s->params.size_hint == 0) {
|
||||
uint64_t delta = UnprocessedInputSize(s);
|
||||
uint64_t tail = available_in;
|
||||
uint32_t limit = 1u << 30;
|
||||
uint32_t total;
|
||||
if ((delta >= limit) || (tail >= limit) || ((delta + tail) >= limit)) {
|
||||
total = limit;
|
||||
} else {
|
||||
total = (uint32_t)(delta + tail);
|
||||
}
|
||||
s->params.size_hint = total;
|
||||
}
|
||||
}
|
||||
|
||||
BROTLI_BOOL BrotliEncoderCompressStream(
|
||||
BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in,
|
||||
const uint8_t** next_in, size_t* available_out,uint8_t** next_out,
|
||||
size_t* total_out) {
|
||||
/* If we don't have any size hint, set it based on the size of the first
|
||||
input chunk. */
|
||||
if (s->params.size_hint == 0) {
|
||||
s->params.size_hint = (uint32_t)*available_in;
|
||||
}
|
||||
if (!EnsureInitialized(s)) return BROTLI_FALSE;
|
||||
|
||||
/* Unfinished metadata block; check requirements. */
|
||||
@ -1606,6 +1613,7 @@ BROTLI_BOOL BrotliEncoderCompressStream(
|
||||
}
|
||||
|
||||
if (op == BROTLI_OPERATION_EMIT_METADATA) {
|
||||
UpdateSizeHint(s, 0); /* First data metablock might be emitted here. */
|
||||
return ProcessMetadata(
|
||||
s, available_in, next_in, available_out, next_out, total_out);
|
||||
}
|
||||
@ -1648,7 +1656,9 @@ BROTLI_BOOL BrotliEncoderCompressStream(
|
||||
(*available_in == 0) && op == BROTLI_OPERATION_FINISH);
|
||||
BROTLI_BOOL force_flush = TO_BROTLI_BOOL(
|
||||
(*available_in == 0) && op == BROTLI_OPERATION_FLUSH);
|
||||
BROTLI_BOOL result = EncodeData(s, is_last, force_flush,
|
||||
BROTLI_BOOL result;
|
||||
UpdateSizeHint(s, *available_in);
|
||||
result = EncodeData(s, is_last, force_flush,
|
||||
&s->available_out_, &s->next_out_);
|
||||
if (!result) return BROTLI_FALSE;
|
||||
if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
|
||||
|
338
enc/hash.h
338
enc/hash.h
@ -15,7 +15,6 @@
|
||||
#include "../common/constants.h"
|
||||
#include "../common/dictionary.h"
|
||||
#include <brotli/types.h>
|
||||
#include "./dictionary_hash.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./find_match_length.h"
|
||||
#include "./memory.h"
|
||||
@ -27,19 +26,39 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Pointer to hasher data.
|
||||
*
|
||||
* Excluding initialization and destruction, hasher can be passed as
|
||||
* HasherHandle by value.
|
||||
*
|
||||
* Typically hasher data consists of 3 sections:
|
||||
* * HasherCommon structure
|
||||
* * private structured hasher data, depending on hasher type
|
||||
* * private dynamic hasher data, depending on hasher type and parameters
|
||||
*/
|
||||
typedef uint8_t* HasherHandle;
|
||||
|
||||
typedef struct {
|
||||
BrotliHasherParams params;
|
||||
|
||||
/* False if hasher needs to be "prepared" before use. */
|
||||
BROTLI_BOOL is_prepared_;
|
||||
|
||||
size_t dict_num_lookups;
|
||||
size_t dict_num_matches;
|
||||
} HasherCommon;
|
||||
|
||||
static BROTLI_INLINE HasherCommon* GetHasherCommon(HasherHandle handle) {
|
||||
return (HasherCommon*)handle;
|
||||
}
|
||||
|
||||
#define score_t size_t
|
||||
|
||||
static const uint32_t kDistanceCacheIndex[] = {
|
||||
0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
|
||||
};
|
||||
static const int kDistanceCacheOffset[] = {
|
||||
0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
|
||||
};
|
||||
|
||||
static const uint32_t kCutoffTransformsCount = 10;
|
||||
static const uint8_t kCutoffTransforms[] = {
|
||||
0, 12, 27, 23, 42, 63, 56, 48, 59, 64
|
||||
};
|
||||
/* 0, 12, 27, 23, 42, 63, 56, 48, 59, 64 */
|
||||
/* 0+0, 4+8, 8+19, 12+11, 16+26, 20+43, 24+32, 28+20, 32+27, 36+28 */
|
||||
static const uint64_t kCutoffTransforms =
|
||||
BROTLI_MAKE_UINT64_T(0x071B520A, 0xDA2D3200);
|
||||
|
||||
typedef struct HasherSearchResult {
|
||||
size_t len;
|
||||
@ -48,11 +67,6 @@ typedef struct HasherSearchResult {
|
||||
score_t score;
|
||||
} HasherSearchResult;
|
||||
|
||||
typedef struct DictionarySearchStatictics {
|
||||
size_t num_lookups;
|
||||
size_t num_matches;
|
||||
} DictionarySearchStatictics;
|
||||
|
||||
/* kHashMul32 multiplier has these properties:
|
||||
* The multiplier must be odd. Otherwise we may lose the highest bit.
|
||||
* No long streaks of ones or zeros.
|
||||
@ -61,6 +75,8 @@ typedef struct DictionarySearchStatictics {
|
||||
* The number has been tuned heuristically against compression benchmarks. */
|
||||
static const uint32_t kHashMul32 = 0x1e35a7bd;
|
||||
static const uint64_t kHashMul64 = BROTLI_MAKE_UINT64_T(0x1e35a7bd, 0x1e35a7bd);
|
||||
static const uint64_t kHashMul64Long =
|
||||
BROTLI_MAKE_UINT64_T(0x1fe35a7bU, 0xd3579bd3U);
|
||||
|
||||
static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) {
|
||||
uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
|
||||
@ -69,8 +85,30 @@ static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) {
|
||||
return h >> (32 - 14);
|
||||
}
|
||||
|
||||
#define BROTLI_LITERAL_BYTE_SCORE 540
|
||||
#define BROTLI_DISTANCE_BIT_PENALTY 120
|
||||
static BROTLI_INLINE void PrepareDistanceCache(
|
||||
int* BROTLI_RESTRICT distance_cache, const int num_distances) {
|
||||
if (num_distances > 4) {
|
||||
int last_distance = distance_cache[0];
|
||||
distance_cache[4] = last_distance - 1;
|
||||
distance_cache[5] = last_distance + 1;
|
||||
distance_cache[6] = last_distance - 2;
|
||||
distance_cache[7] = last_distance + 2;
|
||||
distance_cache[8] = last_distance - 3;
|
||||
distance_cache[9] = last_distance + 3;
|
||||
if (num_distances > 10) {
|
||||
int next_last_distance = distance_cache[1];
|
||||
distance_cache[10] = next_last_distance - 1;
|
||||
distance_cache[11] = next_last_distance + 1;
|
||||
distance_cache[12] = next_last_distance - 2;
|
||||
distance_cache[13] = next_last_distance + 2;
|
||||
distance_cache[14] = next_last_distance - 3;
|
||||
distance_cache[15] = next_last_distance + 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define BROTLI_LITERAL_BYTE_SCORE 135
|
||||
#define BROTLI_DISTANCE_BIT_PENALTY 30
|
||||
/* Score must be positive after applying maximal penalty. */
|
||||
#define BROTLI_SCORE_BASE (BROTLI_DISTANCE_BIT_PENALTY * 8 * sizeof(size_t))
|
||||
|
||||
@ -96,44 +134,20 @@ static BROTLI_INLINE score_t BackwardReferenceScore(
|
||||
BROTLI_DISTANCE_BIT_PENALTY * Log2FloorNonZero(backward_reference_offset);
|
||||
}
|
||||
|
||||
static const score_t kDistanceShortCodeCost[BROTLI_NUM_DISTANCE_SHORT_CODES] = {
|
||||
/* Repeat last */
|
||||
BROTLI_SCORE_BASE + 60,
|
||||
/* 2nd, 3rd, 4th last */
|
||||
BROTLI_SCORE_BASE - 95,
|
||||
BROTLI_SCORE_BASE - 117,
|
||||
BROTLI_SCORE_BASE - 127,
|
||||
/* Last with offset */
|
||||
BROTLI_SCORE_BASE - 93,
|
||||
BROTLI_SCORE_BASE - 93,
|
||||
BROTLI_SCORE_BASE - 96,
|
||||
BROTLI_SCORE_BASE - 96,
|
||||
BROTLI_SCORE_BASE - 99,
|
||||
BROTLI_SCORE_BASE - 99,
|
||||
/* 2nd last with offset */
|
||||
BROTLI_SCORE_BASE - 105,
|
||||
BROTLI_SCORE_BASE - 105,
|
||||
BROTLI_SCORE_BASE - 115,
|
||||
BROTLI_SCORE_BASE - 115,
|
||||
BROTLI_SCORE_BASE - 125,
|
||||
BROTLI_SCORE_BASE - 125
|
||||
};
|
||||
|
||||
static BROTLI_INLINE score_t BackwardReferenceScoreUsingLastDistance(
|
||||
size_t copy_length, size_t distance_short_code) {
|
||||
size_t copy_length) {
|
||||
return BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length +
|
||||
kDistanceShortCodeCost[distance_short_code];
|
||||
BROTLI_SCORE_BASE + 15;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void DictionarySearchStaticticsReset(
|
||||
DictionarySearchStatictics* self) {
|
||||
self->num_lookups = 0;
|
||||
self->num_matches = 0;
|
||||
static BROTLI_INLINE score_t BackwardReferencePenaltyUsingLastDistance(
|
||||
size_t distance_short_code) {
|
||||
return (score_t)39 + ((0x1CA10 >> (distance_short_code & 0xE)) & 0xE);
|
||||
}
|
||||
|
||||
static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(
|
||||
size_t item, const uint8_t* data, size_t max_length, size_t max_backward,
|
||||
HasherSearchResult* out) {
|
||||
const BrotliDictionary* dictionary, size_t item, const uint8_t* data,
|
||||
size_t max_length, size_t max_backward, HasherSearchResult* out) {
|
||||
size_t len;
|
||||
size_t dist;
|
||||
size_t offset;
|
||||
@ -142,19 +156,22 @@ static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(
|
||||
score_t score;
|
||||
len = item & 0x1F;
|
||||
dist = item >> 5;
|
||||
offset = kBrotliDictionaryOffsetsByLength[len] + len * dist;
|
||||
offset = dictionary->offsets_by_length[len] + len * dist;
|
||||
if (len > max_length) {
|
||||
return BROTLI_FALSE;
|
||||
}
|
||||
|
||||
matchlen = FindMatchLengthWithLimit(data, &kBrotliDictionary[offset], len);
|
||||
matchlen =
|
||||
FindMatchLengthWithLimit(data, &dictionary->data[offset], len);
|
||||
if (matchlen + kCutoffTransformsCount <= len || matchlen == 0) {
|
||||
return BROTLI_FALSE;
|
||||
}
|
||||
{
|
||||
size_t transform_id = kCutoffTransforms[len - matchlen];
|
||||
size_t cut = len - matchlen;
|
||||
size_t transform_id =
|
||||
(cut << 2) + (size_t)((kCutoffTransforms >> (cut * 6)) & 0x3F);
|
||||
backward = max_backward + dist + 1 +
|
||||
(transform_id << kBrotliDictionarySizeBitsByLength[len]);
|
||||
(transform_id << dictionary->size_bits_by_length[len]);
|
||||
}
|
||||
score = BackwardReferenceScore(matchlen, backward);
|
||||
if (score < out->score) {
|
||||
@ -168,22 +185,27 @@ static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(
|
||||
}
|
||||
|
||||
static BROTLI_INLINE BROTLI_BOOL SearchInStaticDictionary(
|
||||
DictionarySearchStatictics* self, const uint8_t* data, size_t max_length,
|
||||
const BrotliDictionary* dictionary, const uint16_t* dictionary_hash,
|
||||
HasherHandle handle, const uint8_t* data, size_t max_length,
|
||||
size_t max_backward, HasherSearchResult* out, BROTLI_BOOL shallow) {
|
||||
size_t key;
|
||||
size_t i;
|
||||
BROTLI_BOOL is_match_found = BROTLI_FALSE;
|
||||
if (self->num_matches < (self->num_lookups >> 7)) {
|
||||
HasherCommon* self = GetHasherCommon(handle);
|
||||
if (self->dict_num_matches < (self->dict_num_lookups >> 7)) {
|
||||
return BROTLI_FALSE;
|
||||
}
|
||||
key = Hash14(data) << 1;
|
||||
for (i = 0; i < (shallow ? 1u : 2u); ++i, ++key) {
|
||||
size_t item = kStaticDictionaryHash[key];
|
||||
self->num_lookups++;
|
||||
if (item != 0 &&
|
||||
TestStaticDictionaryItem(item, data, max_length, max_backward, out)) {
|
||||
self->num_matches++;
|
||||
is_match_found = BROTLI_TRUE;
|
||||
size_t item = dictionary_hash[key];
|
||||
self->dict_num_lookups++;
|
||||
if (item != 0) {
|
||||
BROTLI_BOOL item_matches = TestStaticDictionaryItem(
|
||||
dictionary, item, data, max_length, max_backward, out);
|
||||
if (item_matches) {
|
||||
self->dict_num_matches++;
|
||||
is_match_found = BROTLI_TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
return is_match_found;
|
||||
@ -267,43 +289,11 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
|
||||
#undef HASHER
|
||||
|
||||
#define HASHER() H5
|
||||
#define BUCKET_BITS 14
|
||||
#define BLOCK_BITS 4
|
||||
#define NUM_LAST_DISTANCES_TO_CHECK 4
|
||||
#include "./hash_longest_match_inc.h" /* NOLINT(build/include) */
|
||||
#undef BLOCK_BITS
|
||||
#undef HASHER
|
||||
|
||||
#define HASHER() H6
|
||||
#define BLOCK_BITS 5
|
||||
#include "./hash_longest_match_inc.h" /* NOLINT(build/include) */
|
||||
#undef NUM_LAST_DISTANCES_TO_CHECK
|
||||
#undef BLOCK_BITS
|
||||
#undef BUCKET_BITS
|
||||
#undef HASHER
|
||||
|
||||
#define HASHER() H7
|
||||
#define BUCKET_BITS 15
|
||||
#define BLOCK_BITS 6
|
||||
#define NUM_LAST_DISTANCES_TO_CHECK 10
|
||||
#include "./hash_longest_match_inc.h" /* NOLINT(build/include) */
|
||||
#undef BLOCK_BITS
|
||||
#undef HASHER
|
||||
|
||||
#define HASHER() H8
|
||||
#define BLOCK_BITS 7
|
||||
#include "./hash_longest_match_inc.h" /* NOLINT(build/include) */
|
||||
#undef NUM_LAST_DISTANCES_TO_CHECK
|
||||
#undef BLOCK_BITS
|
||||
#undef HASHER
|
||||
|
||||
#define HASHER() H9
|
||||
#define BLOCK_BITS 8
|
||||
#define NUM_LAST_DISTANCES_TO_CHECK 16
|
||||
#include "./hash_longest_match_inc.h" /* NOLINT(build/include) */
|
||||
#undef NUM_LAST_DISTANCES_TO_CHECK
|
||||
#undef BLOCK_BITS
|
||||
#undef BUCKET_BITS
|
||||
#include "./hash_longest_match64_inc.h" /* NOLINT(build/include) */
|
||||
#undef HASHER
|
||||
|
||||
#define BUCKET_BITS 15
|
||||
@ -352,97 +342,120 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
|
||||
#undef CAT
|
||||
#undef EXPAND_CAT
|
||||
|
||||
#define FOR_GENERIC_HASHERS(H) H(2) H(3) H(4) H(5) H(6) H(7) H(8) H(9) \
|
||||
H(40) H(41) H(42) H(54)
|
||||
#define FOR_GENERIC_HASHERS(H) H(2) H(3) H(4) H(5) H(6) H(40) H(41) H(42) H(54)
|
||||
#define FOR_ALL_HASHERS(H) FOR_GENERIC_HASHERS(H) H(10)
|
||||
|
||||
typedef struct Hashers {
|
||||
#define MEMBER_(N) H ## N* h ## N;
|
||||
FOR_ALL_HASHERS(MEMBER_)
|
||||
#undef MEMBER_
|
||||
} Hashers;
|
||||
|
||||
static BROTLI_INLINE void InitHashers(Hashers* self) {
|
||||
#define INIT_(N) self->h ## N = 0;
|
||||
FOR_ALL_HASHERS(INIT_)
|
||||
#undef INIT_
|
||||
static BROTLI_INLINE void DestroyHasher(
|
||||
MemoryManager* m, HasherHandle* handle) {
|
||||
if (*handle == NULL) return;
|
||||
BROTLI_FREE(m, *handle);
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void DestroyHashers(MemoryManager* m, Hashers* self) {
|
||||
#define CLEANUP_(N) if (self->h ## N) CleanupH ## N(m, self->h ## N); \
|
||||
BROTLI_FREE(m, self->h ## N);
|
||||
FOR_ALL_HASHERS(CLEANUP_)
|
||||
#undef CLEANUP_
|
||||
static BROTLI_INLINE void HasherReset(HasherHandle handle) {
|
||||
if (handle == NULL) return;
|
||||
GetHasherCommon(handle)->is_prepared_ = BROTLI_FALSE;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void HashersReset(Hashers* self, int type) {
|
||||
switch (type) {
|
||||
#define RESET_(N) case N: ResetH ## N(self->h ## N); break;
|
||||
FOR_ALL_HASHERS(RESET_)
|
||||
#undef RESET_
|
||||
default: break;
|
||||
static BROTLI_INLINE size_t HasherSize(const BrotliEncoderParams* params,
|
||||
BROTLI_BOOL one_shot, const size_t input_size) {
|
||||
size_t result = sizeof(HasherCommon);
|
||||
switch (params->hasher.type) {
|
||||
#define SIZE_(N) \
|
||||
case N: \
|
||||
result += HashMemAllocInBytesH ## N(params, one_shot, input_size); \
|
||||
break;
|
||||
FOR_ALL_HASHERS(SIZE_)
|
||||
#undef SIZE_
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void HashersSetup(
|
||||
MemoryManager* m, Hashers* self, int type) {
|
||||
switch (type) {
|
||||
#define SETUP_(N) case N: self->h ## N = BROTLI_ALLOC(m, H ## N, 1); break;
|
||||
FOR_ALL_HASHERS(SETUP_)
|
||||
#undef SETUP_
|
||||
default: break;
|
||||
}
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
switch (type) {
|
||||
#define INITIALIZE_(N) case N: InitializeH ## N(self->h ## N); break;
|
||||
FOR_ALL_HASHERS(INITIALIZE_);
|
||||
static BROTLI_INLINE void HasherSetup(MemoryManager* m, HasherHandle* handle,
|
||||
BrotliEncoderParams* params, const uint8_t* data, size_t position,
|
||||
size_t input_size, BROTLI_BOOL is_last) {
|
||||
HasherHandle self = NULL;
|
||||
HasherCommon* common = NULL;
|
||||
BROTLI_BOOL one_shot = (position == 0 && is_last);
|
||||
if (*handle == NULL) {
|
||||
size_t alloc_size;
|
||||
ChooseHasher(params, ¶ms->hasher);
|
||||
alloc_size = HasherSize(params, one_shot, input_size);
|
||||
self = BROTLI_ALLOC(m, uint8_t, alloc_size);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
*handle = self;
|
||||
common = GetHasherCommon(self);
|
||||
common->params = params->hasher;
|
||||
switch (common->params.type) {
|
||||
#define INITIALIZE_(N) \
|
||||
case N: \
|
||||
InitializeH ## N(*handle, params); \
|
||||
break;
|
||||
FOR_ALL_HASHERS(INITIALIZE_);
|
||||
#undef INITIALIZE_
|
||||
default: break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
HasherReset(*handle);
|
||||
}
|
||||
HashersReset(self, type);
|
||||
}
|
||||
|
||||
#define WARMUP_HASH_(N) \
|
||||
static BROTLI_INLINE void WarmupHashH ## N(MemoryManager* m, \
|
||||
const BrotliEncoderParams* params, const size_t size, const uint8_t* dict, \
|
||||
H ## N* hasher) { \
|
||||
size_t overlap = (StoreLookaheadH ## N()) - 1; \
|
||||
size_t i; \
|
||||
InitH ## N(m, hasher, dict, params, 0, size, BROTLI_FALSE); \
|
||||
if (BROTLI_IS_OOM(m)) return; \
|
||||
for (i = 0; i + overlap < size; i++) { \
|
||||
StoreH ## N(hasher, dict, ~(size_t)0, i); \
|
||||
} \
|
||||
self = *handle;
|
||||
common = GetHasherCommon(self);
|
||||
if (!common->is_prepared_) {
|
||||
switch (common->params.type) {
|
||||
#define PREPARE_(N) \
|
||||
case N: \
|
||||
PrepareH ## N(self, one_shot, input_size, data); \
|
||||
break;
|
||||
FOR_ALL_HASHERS(PREPARE_)
|
||||
#undef PREPARE_
|
||||
default: break;
|
||||
}
|
||||
if (position == 0) {
|
||||
common->dict_num_lookups = 0;
|
||||
common->dict_num_matches = 0;
|
||||
}
|
||||
common->is_prepared_ = BROTLI_TRUE;
|
||||
}
|
||||
}
|
||||
FOR_ALL_HASHERS(WARMUP_HASH_)
|
||||
#undef WARMUP_HASH_
|
||||
|
||||
/* Custom LZ77 window. */
|
||||
static BROTLI_INLINE void HashersPrependCustomDictionary(
|
||||
MemoryManager* m, Hashers* self, const BrotliEncoderParams* params,
|
||||
static BROTLI_INLINE void HasherPrependCustomDictionary(
|
||||
MemoryManager* m, HasherHandle* handle, BrotliEncoderParams* params,
|
||||
const size_t size, const uint8_t* dict) {
|
||||
int hasher_type = ChooseHasher(params);
|
||||
switch (hasher_type) {
|
||||
#define PREPEND_(N) \
|
||||
case N: WarmupHashH ## N(m, params, size, dict, self->h ## N); break;
|
||||
size_t overlap;
|
||||
size_t i;
|
||||
HasherHandle self;
|
||||
HasherSetup(m, handle, params, dict, 0, size, BROTLI_FALSE);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
self = *handle;
|
||||
switch (GetHasherCommon(self)->params.type) {
|
||||
#define PREPEND_(N) \
|
||||
case N: \
|
||||
overlap = (StoreLookaheadH ## N()) - 1; \
|
||||
for (i = 0; i + overlap < size; i++) { \
|
||||
StoreH ## N(self, dict, ~(size_t)0, i); \
|
||||
} \
|
||||
break;
|
||||
FOR_ALL_HASHERS(PREPEND_)
|
||||
#undef PREPEND_
|
||||
default: break;
|
||||
}
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void InitOrStitchToPreviousBlock(
|
||||
MemoryManager* m, Hashers* self, const uint8_t* data, size_t mask,
|
||||
const BrotliEncoderParams* params, size_t position,
|
||||
size_t bytes, BROTLI_BOOL is_last) {
|
||||
int hasher_type = ChooseHasher(params);
|
||||
switch (hasher_type) {
|
||||
#define INIT_(N) \
|
||||
case N: \
|
||||
InitH ## N(m, self->h ## N, data, params, position, bytes, is_last); \
|
||||
if (BROTLI_IS_OOM(m)) return; \
|
||||
StitchToPreviousBlockH ## N(self->h ## N, bytes, position, data, mask); \
|
||||
MemoryManager* m, HasherHandle* handle, const uint8_t* data, size_t mask,
|
||||
BrotliEncoderParams* params, size_t position, size_t input_size,
|
||||
BROTLI_BOOL is_last) {
|
||||
HasherHandle self;
|
||||
HasherSetup(m, handle, params, data, position, input_size, is_last);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
self = *handle;
|
||||
switch (GetHasherCommon(self)->params.type) {
|
||||
#define INIT_(N) \
|
||||
case N: \
|
||||
StitchToPreviousBlockH ## N(self, input_size, position, data, mask); \
|
||||
break;
|
||||
FOR_ALL_HASHERS(INIT_)
|
||||
#undef INIT_
|
||||
@ -450,7 +463,6 @@ static BROTLI_INLINE void InitOrStitchToPreviousBlock(
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
@ -51,74 +51,57 @@ typedef struct HashForgetfulChain {
|
||||
uint8_t tiny_hash[65536];
|
||||
FN(Bank) banks[NUM_BANKS];
|
||||
uint16_t free_slot_idx[NUM_BANKS];
|
||||
BROTLI_BOOL is_dirty_;
|
||||
DictionarySearchStatictics dict_search_stats_;
|
||||
size_t max_hops;
|
||||
} HashForgetfulChain;
|
||||
|
||||
static void FN(Initialize)(HashForgetfulChain* self) {
|
||||
BROTLI_UNUSED(self);
|
||||
static BROTLI_INLINE HashForgetfulChain* FN(Self)(HasherHandle handle) {
|
||||
return (HashForgetfulChain*)&(GetHasherCommon(handle)[1]);
|
||||
}
|
||||
|
||||
static void FN(Cleanup)(MemoryManager* m, HashForgetfulChain* self) {
|
||||
BROTLI_UNUSED(m);
|
||||
BROTLI_UNUSED(self);
|
||||
static void FN(Initialize)(
|
||||
HasherHandle handle, const BrotliEncoderParams* params) {
|
||||
FN(Self)(handle)->max_hops =
|
||||
(params->quality > 6 ? 7u : 8u) << (params->quality - 4);
|
||||
}
|
||||
|
||||
static void FN(Reset)(HashForgetfulChain* self) {
|
||||
self->is_dirty_ = BROTLI_TRUE;
|
||||
DictionarySearchStaticticsReset(&self->dict_search_stats_);
|
||||
}
|
||||
|
||||
static void FN(InitEmpty)(HashForgetfulChain* self) {
|
||||
if (self->is_dirty_) {
|
||||
static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
|
||||
size_t input_size, const uint8_t* data) {
|
||||
HashForgetfulChain* self = FN(Self)(handle);
|
||||
/* Partial preparation is 100 times slower (per socket). */
|
||||
size_t partial_prepare_threshold = BUCKET_SIZE >> 6;
|
||||
if (one_shot && input_size <= partial_prepare_threshold) {
|
||||
size_t i;
|
||||
for (i = 0; i < input_size; ++i) {
|
||||
size_t bucket = FN(HashBytes)(&data[i]);
|
||||
/* See InitEmpty comment. */
|
||||
self->addr[bucket] = 0xCCCCCCCC;
|
||||
self->head[bucket] = 0xCCCC;
|
||||
}
|
||||
} else {
|
||||
/* Fill |addr| array with 0xCCCCCCCC value. Because of wrapping, position
|
||||
processed by hasher never reaches 3GB + 64M; this makes all new chains
|
||||
to be terminated after the first node. */
|
||||
memset(self->addr, 0xCC, sizeof(self->addr));
|
||||
memset(self->head, 0, sizeof(self->head));
|
||||
memset(self->tiny_hash, 0, sizeof(self->tiny_hash));
|
||||
memset(self->free_slot_idx, 0, sizeof(self->free_slot_idx));
|
||||
self->is_dirty_ = BROTLI_FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
static void FN(InitForData)(HashForgetfulChain* self, const uint8_t* data,
|
||||
size_t num) {
|
||||
size_t i;
|
||||
for (i = 0; i < num; ++i) {
|
||||
size_t bucket = FN(HashBytes)(&data[i]);
|
||||
/* See InitEmpty comment. */
|
||||
self->addr[bucket] = 0xCCCCCCCC;
|
||||
self->head[bucket] = 0xCCCC;
|
||||
}
|
||||
memset(self->tiny_hash, 0, sizeof(self->tiny_hash));
|
||||
memset(self->free_slot_idx, 0, sizeof(self->free_slot_idx));
|
||||
if (num != 0) {
|
||||
self->is_dirty_ = BROTLI_FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
static void FN(Init)(
|
||||
MemoryManager* m, HashForgetfulChain* self, const uint8_t* data,
|
||||
const BrotliEncoderParams* params, size_t position, size_t bytes,
|
||||
BROTLI_BOOL is_last) {
|
||||
/* Choose which initialization method is faster.
|
||||
Init() is about 100 times faster than InitForData(). */
|
||||
const size_t kMaxBytesForPartialHashInit = BUCKET_SIZE >> 6;
|
||||
BROTLI_UNUSED(m);
|
||||
self->max_hops = (params->quality > 6 ? 7u : 8u) << (params->quality - 4);
|
||||
if (position == 0 && is_last && bytes <= kMaxBytesForPartialHashInit) {
|
||||
FN(InitForData)(self, data, bytes);
|
||||
} else {
|
||||
FN(InitEmpty)(self);
|
||||
}
|
||||
static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
|
||||
const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
|
||||
size_t input_size) {
|
||||
BROTLI_UNUSED(params);
|
||||
BROTLI_UNUSED(one_shot);
|
||||
BROTLI_UNUSED(input_size);
|
||||
return sizeof(HashForgetfulChain);
|
||||
}
|
||||
|
||||
/* Look at 4 bytes at &data[ix & mask]. Compute a hash from these, and prepend
|
||||
node to corresponding chain; also update tiny_hash for current position. */
|
||||
static BROTLI_INLINE void FN(Store)(HashForgetfulChain* BROTLI_RESTRICT self,
|
||||
static BROTLI_INLINE void FN(Store)(HasherHandle BROTLI_RESTRICT handle,
|
||||
const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
|
||||
HashForgetfulChain* self = FN(Self)(handle);
|
||||
const size_t key = FN(HashBytes)(&data[ix & mask]);
|
||||
const size_t bank = key & (NUM_BANKS - 1);
|
||||
const size_t idx = self->free_slot_idx[bank]++ & (BANK_SIZE - 1);
|
||||
@ -131,40 +114,52 @@ static BROTLI_INLINE void FN(Store)(HashForgetfulChain* BROTLI_RESTRICT self,
|
||||
self->head[key] = (uint16_t)idx;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void FN(StoreRange)(HashForgetfulChain* self,
|
||||
static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
|
||||
const uint8_t *data, const size_t mask, const size_t ix_start,
|
||||
const size_t ix_end) {
|
||||
size_t i;
|
||||
for (i = ix_start; i < ix_end; ++i) {
|
||||
FN(Store)(self, data, mask, i);
|
||||
FN(Store)(handle, data, mask, i);
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void FN(StitchToPreviousBlock)(HashForgetfulChain* self,
|
||||
static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle,
|
||||
size_t num_bytes, size_t position, const uint8_t* ringbuffer,
|
||||
size_t ring_buffer_mask) {
|
||||
if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
|
||||
/* Prepare the hashes for three last bytes of the last write.
|
||||
These could not be calculated before, since they require knowledge
|
||||
of both the previous and the current block. */
|
||||
FN(Store)(self, ringbuffer, ring_buffer_mask, position - 3);
|
||||
FN(Store)(self, ringbuffer, ring_buffer_mask, position - 2);
|
||||
FN(Store)(self, ringbuffer, ring_buffer_mask, position - 1);
|
||||
FN(Store)(handle, ringbuffer, ring_buffer_mask, position - 3);
|
||||
FN(Store)(handle, ringbuffer, ring_buffer_mask, position - 2);
|
||||
FN(Store)(handle, ringbuffer, ring_buffer_mask, position - 1);
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void FN(PrepareDistanceCache)(
|
||||
HasherHandle handle, int* BROTLI_RESTRICT distance_cache) {
|
||||
BROTLI_UNUSED(handle);
|
||||
PrepareDistanceCache(distance_cache, NUM_LAST_DISTANCES_TO_CHECK);
|
||||
}
|
||||
|
||||
/* Find a longest backward match of &data[cur_ix] up to the length of
|
||||
max_length and stores the position cur_ix in the hash table.
|
||||
|
||||
REQUIRES: FN(PrepareDistanceCache) must be invoked for current distance cache
|
||||
values; if this method is invoked repeatedly with the same distance
|
||||
cache values, it is enough to invoke FN(PrepareDistanceCache) once.
|
||||
|
||||
Does not look for matches longer than max_length.
|
||||
Does not look for matches further away than max_backward.
|
||||
Writes the best match into |out|.
|
||||
Returns 1 when match is found, otherwise 0. */
|
||||
static BROTLI_INLINE BROTLI_BOOL FN(FindLongestMatch)(
|
||||
HashForgetfulChain* self, const uint8_t* BROTLI_RESTRICT data,
|
||||
const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,
|
||||
static BROTLI_INLINE BROTLI_BOOL FN(FindLongestMatch)(HasherHandle handle,
|
||||
const BrotliDictionary* dictionary, const uint16_t* dictionary_hash,
|
||||
const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
|
||||
const int* BROTLI_RESTRICT distance_cache,
|
||||
const size_t cur_ix, const size_t max_length, const size_t max_backward,
|
||||
HasherSearchResult* BROTLI_RESTRICT out) {
|
||||
HashForgetfulChain* self = FN(Self)(handle);
|
||||
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
|
||||
BROTLI_BOOL is_match_found = BROTLI_FALSE;
|
||||
/* Don't accept a short copy from far away. */
|
||||
@ -177,9 +172,7 @@ static BROTLI_INLINE BROTLI_BOOL FN(FindLongestMatch)(
|
||||
out->len_x_code = 0;
|
||||
/* Try last distance first. */
|
||||
for (i = 0; i < NUM_LAST_DISTANCES_TO_CHECK; ++i) {
|
||||
const size_t idx = kDistanceCacheIndex[i];
|
||||
const size_t backward =
|
||||
(size_t)(distance_cache[idx] + kDistanceCacheOffset[i]);
|
||||
const size_t backward = (size_t)distance_cache[i];
|
||||
size_t prev_ix = (cur_ix - backward);
|
||||
/* For distance code 0 we want to consider 2-byte matches. */
|
||||
if (i > 0 && self->tiny_hash[(uint16_t)prev_ix] != tiny_hash) continue;
|
||||
@ -192,14 +185,17 @@ static BROTLI_INLINE BROTLI_BOOL FN(FindLongestMatch)(
|
||||
&data[cur_ix_masked],
|
||||
max_length);
|
||||
if (len >= 2) {
|
||||
score_t score = BackwardReferenceScoreUsingLastDistance(len, i);
|
||||
score_t score = BackwardReferenceScoreUsingLastDistance(len);
|
||||
if (best_score < score) {
|
||||
best_score = score;
|
||||
best_len = len;
|
||||
out->len = best_len;
|
||||
out->distance = backward;
|
||||
out->score = best_score;
|
||||
is_match_found = BROTLI_TRUE;
|
||||
if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
|
||||
if (best_score < score) {
|
||||
best_score = score;
|
||||
best_len = len;
|
||||
out->len = best_len;
|
||||
out->distance = backward;
|
||||
out->score = best_score;
|
||||
is_match_found = BROTLI_TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -243,11 +239,12 @@ static BROTLI_INLINE BROTLI_BOOL FN(FindLongestMatch)(
|
||||
}
|
||||
}
|
||||
}
|
||||
FN(Store)(self, data, ring_buffer_mask, cur_ix);
|
||||
FN(Store)(handle, data, ring_buffer_mask, cur_ix);
|
||||
}
|
||||
if (!is_match_found) {
|
||||
is_match_found = SearchInStaticDictionary(&self->dict_search_stats_,
|
||||
&data[cur_ix_masked], max_length, max_backward, out, BROTLI_FALSE);
|
||||
is_match_found = SearchInStaticDictionary(dictionary, dictionary_hash,
|
||||
handle, &data[cur_ix_masked], max_length, max_backward, out,
|
||||
BROTLI_FALSE);
|
||||
}
|
||||
return is_match_found;
|
||||
}
|
||||
|
269
enc/hash_longest_match64_inc.h
Executable file
269
enc/hash_longest_match64_inc.h
Executable file
@ -0,0 +1,269 @@
|
||||
/* NOLINT(build/header_guard) */
|
||||
/* Copyright 2010 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* template parameters: FN */
|
||||
|
||||
/* A (forgetful) hash table to the data seen by the compressor, to
|
||||
help create backward references to previous data.
|
||||
|
||||
This is a hash map of fixed size (bucket_size_) to a ring buffer of
|
||||
fixed size (block_size_). The ring buffer contains the last block_size_
|
||||
index positions of the given hash key in the compressed data. */
|
||||
|
||||
#define HashLongestMatch HASHER()
|
||||
|
||||
static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 8; }
|
||||
static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; }
|
||||
|
||||
/* HashBytes is the function that chooses the bucket to place the address in. */
|
||||
static BROTLI_INLINE uint32_t FN(HashBytes)(const uint8_t *data,
|
||||
const uint64_t mask,
|
||||
const int shift) {
|
||||
const uint64_t h = (BROTLI_UNALIGNED_LOAD64(data) & mask) * kHashMul64Long;
|
||||
/* The higher bits contain more mixture from the multiplication,
|
||||
so we take our results from there. */
|
||||
return (uint32_t)(h >> shift);
|
||||
}
|
||||
|
||||
typedef struct HashLongestMatch {
|
||||
/* Number of hash buckets. */
|
||||
size_t bucket_size_;
|
||||
/* Only block_size_ newest backward references are kept,
|
||||
and the older are forgotten. */
|
||||
size_t block_size_;
|
||||
/* Left-shift for computing hash bucket index from hash value. */
|
||||
int hash_shift_;
|
||||
/* Mask for selecting the next 4-8 bytes of input */
|
||||
uint64_t hash_mask_;
|
||||
/* Mask for accessing entries in a block (in a ring-buffer manner). */
|
||||
uint32_t block_mask_;
|
||||
|
||||
/* --- Dynamic size members --- */
|
||||
|
||||
/* Number of entries in a particular bucket. */
|
||||
/* uint16_t num[bucket_size]; */
|
||||
|
||||
/* Buckets containing block_size_ of backward references. */
|
||||
/* uint32_t* buckets[bucket_size * block_size]; */
|
||||
} HashLongestMatch;
|
||||
|
||||
static BROTLI_INLINE HashLongestMatch* FN(Self)(HasherHandle handle) {
|
||||
return (HashLongestMatch*)&(GetHasherCommon(handle)[1]);
|
||||
}
|
||||
|
||||
static BROTLI_INLINE uint16_t* FN(Num)(HashLongestMatch* self) {
|
||||
return (uint16_t*)(&self[1]);
|
||||
}
|
||||
|
||||
static BROTLI_INLINE uint32_t* FN(Buckets)(HashLongestMatch* self) {
|
||||
return (uint32_t*)(&FN(Num)(self)[self->bucket_size_]);
|
||||
}
|
||||
|
||||
static void FN(Initialize)(
|
||||
HasherHandle handle, const BrotliEncoderParams* params) {
|
||||
HasherCommon* common = GetHasherCommon(handle);
|
||||
HashLongestMatch* self = FN(Self)(handle);
|
||||
BROTLI_UNUSED(params);
|
||||
self->hash_shift_ = 64 - common->params.bucket_bits;
|
||||
self->hash_mask_ = (~((uint64_t)0U)) >> (64 - 8 * common->params.hash_len);
|
||||
self->bucket_size_ = (size_t)1 << common->params.bucket_bits;
|
||||
self->block_size_ = (size_t)1 << common->params.block_bits;
|
||||
self->block_mask_ = (uint32_t)(self->block_size_ - 1);
|
||||
}
|
||||
|
||||
static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
|
||||
size_t input_size, const uint8_t* data) {
|
||||
HashLongestMatch* self = FN(Self)(handle);
|
||||
uint16_t* num = FN(Num)(self);
|
||||
/* Partial preparation is 100 times slower (per socket). */
|
||||
size_t partial_prepare_threshold = self->bucket_size_ >> 6;
|
||||
if (one_shot && input_size <= partial_prepare_threshold) {
|
||||
size_t i;
|
||||
for (i = 0; i < input_size; ++i) {
|
||||
const uint32_t key = FN(HashBytes)(&data[i], self->hash_mask_,
|
||||
self->hash_shift_);
|
||||
num[key] = 0;
|
||||
}
|
||||
} else {
|
||||
memset(num, 0, self->bucket_size_ * sizeof(num[0]));
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
|
||||
const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
|
||||
size_t input_size) {
|
||||
size_t bucket_size = (size_t)1 << params->hasher.bucket_bits;
|
||||
size_t block_size = (size_t)1 << params->hasher.block_bits;
|
||||
BROTLI_UNUSED(one_shot);
|
||||
BROTLI_UNUSED(input_size);
|
||||
return sizeof(HashLongestMatch) + bucket_size * (2 + 4 * block_size);
|
||||
}
|
||||
|
||||
/* Look at 4 bytes at &data[ix & mask].
|
||||
Compute a hash from these, and store the value of ix at that position. */
|
||||
static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t *data,
|
||||
const size_t mask, const size_t ix) {
|
||||
HashLongestMatch* self = FN(Self)(handle);
|
||||
uint16_t* num = FN(Num)(self);
|
||||
const uint32_t key = FN(HashBytes)(&data[ix & mask], self->hash_mask_,
|
||||
self->hash_shift_);
|
||||
const size_t minor_ix = num[key] & self->block_mask_;
|
||||
const size_t offset =
|
||||
minor_ix + (key << GetHasherCommon(handle)->params.block_bits);
|
||||
FN(Buckets)(self)[offset] = (uint32_t)ix;
|
||||
++num[key];
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
|
||||
const uint8_t *data, const size_t mask, const size_t ix_start,
|
||||
const size_t ix_end) {
|
||||
size_t i;
|
||||
for (i = ix_start; i < ix_end; ++i) {
|
||||
FN(Store)(handle, data, mask, i);
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle,
|
||||
size_t num_bytes, size_t position, const uint8_t* ringbuffer,
|
||||
size_t ringbuffer_mask) {
|
||||
if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
|
||||
/* Prepare the hashes for three last bytes of the last write.
|
||||
These could not be calculated before, since they require knowledge
|
||||
of both the previous and the current block. */
|
||||
FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 3);
|
||||
FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 2);
|
||||
FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 1);
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void FN(PrepareDistanceCache)(
|
||||
HasherHandle handle, int* BROTLI_RESTRICT distance_cache) {
|
||||
PrepareDistanceCache(distance_cache,
|
||||
GetHasherCommon(handle)->params.num_last_distances_to_check);
|
||||
}
|
||||
|
||||
/* Find a longest backward match of &data[cur_ix] up to the length of
|
||||
max_length and stores the position cur_ix in the hash table.
|
||||
|
||||
REQUIRES: FN(PrepareDistanceCache) must be invoked for current distance cache
|
||||
values; if this method is invoked repeatedly with the same distance
|
||||
cache values, it is enough to invoke FN(PrepareDistanceCache) once.
|
||||
|
||||
Does not look for matches longer than max_length.
|
||||
Does not look for matches further away than max_backward.
|
||||
Writes the best match into |out|.
|
||||
Returns true when match is found, otherwise false. */
|
||||
static BROTLI_INLINE BROTLI_BOOL FN(FindLongestMatch)(HasherHandle handle,
|
||||
const BrotliDictionary* dictionary, const uint16_t* dictionary_hash,
|
||||
const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
|
||||
const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
|
||||
const size_t max_length, const size_t max_backward,
|
||||
HasherSearchResult* BROTLI_RESTRICT out) {
|
||||
HasherCommon* common = GetHasherCommon(handle);
|
||||
HashLongestMatch* self = FN(Self)(handle);
|
||||
uint16_t* num = FN(Num)(self);
|
||||
uint32_t* buckets = FN(Buckets)(self);
|
||||
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
|
||||
BROTLI_BOOL is_match_found = BROTLI_FALSE;
|
||||
/* Don't accept a short copy from far away. */
|
||||
score_t best_score = out->score;
|
||||
size_t best_len = out->len;
|
||||
size_t i;
|
||||
out->len = 0;
|
||||
out->len_x_code = 0;
|
||||
/* Try last distance first. */
|
||||
for (i = 0; i < (size_t)common->params.num_last_distances_to_check; ++i) {
|
||||
const size_t backward = (size_t)distance_cache[i];
|
||||
size_t prev_ix = (size_t)(cur_ix - backward);
|
||||
if (prev_ix >= cur_ix) {
|
||||
continue;
|
||||
}
|
||||
if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
|
||||
continue;
|
||||
}
|
||||
prev_ix &= ring_buffer_mask;
|
||||
|
||||
if (cur_ix_masked + best_len > ring_buffer_mask ||
|
||||
prev_ix + best_len > ring_buffer_mask ||
|
||||
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
|
||||
continue;
|
||||
}
|
||||
{
|
||||
const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
|
||||
&data[cur_ix_masked],
|
||||
max_length);
|
||||
if (len >= 3 || (len == 2 && i < 2)) {
|
||||
/* Comparing for >= 2 does not change the semantics, but just saves for
|
||||
a few unnecessary binary logarithms in backward reference score,
|
||||
since we are not interested in such short matches. */
|
||||
score_t score = BackwardReferenceScoreUsingLastDistance(len);
|
||||
if (best_score < score) {
|
||||
if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
|
||||
if (best_score < score) {
|
||||
best_score = score;
|
||||
best_len = len;
|
||||
out->len = best_len;
|
||||
out->distance = backward;
|
||||
out->score = best_score;
|
||||
is_match_found = BROTLI_TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
const uint32_t key = FN(HashBytes)(
|
||||
&data[cur_ix_masked], self->hash_mask_, self->hash_shift_);
|
||||
uint32_t* BROTLI_RESTRICT bucket =
|
||||
&buckets[key << common->params.block_bits];
|
||||
const size_t down =
|
||||
(num[key] > self->block_size_) ?
|
||||
(num[key] - self->block_size_) : 0u;
|
||||
for (i = num[key]; i > down;) {
|
||||
size_t prev_ix = bucket[--i & self->block_mask_];
|
||||
const size_t backward = cur_ix - prev_ix;
|
||||
if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
|
||||
break;
|
||||
}
|
||||
prev_ix &= ring_buffer_mask;
|
||||
if (cur_ix_masked + best_len > ring_buffer_mask ||
|
||||
prev_ix + best_len > ring_buffer_mask ||
|
||||
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
|
||||
continue;
|
||||
}
|
||||
{
|
||||
const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
|
||||
&data[cur_ix_masked],
|
||||
max_length);
|
||||
if (len >= 4) {
|
||||
/* Comparing for >= 3 does not change the semantics, but just saves
|
||||
for a few unnecessary binary logarithms in backward reference
|
||||
score, since we are not interested in such short matches. */
|
||||
score_t score = BackwardReferenceScore(len, backward);
|
||||
if (best_score < score) {
|
||||
best_score = score;
|
||||
best_len = len;
|
||||
out->len = best_len;
|
||||
out->distance = backward;
|
||||
out->score = best_score;
|
||||
is_match_found = BROTLI_TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
bucket[num[key] & self->block_mask_] = (uint32_t)cur_ix;
|
||||
++num[key];
|
||||
}
|
||||
if (!is_match_found) {
|
||||
is_match_found = SearchInStaticDictionary(dictionary, dictionary_hash,
|
||||
handle, &data[cur_ix_masked], max_length, max_backward, out,
|
||||
BROTLI_FALSE);
|
||||
}
|
||||
return is_match_found;
|
||||
}
|
||||
|
||||
#undef HashLongestMatch
|
@ -5,149 +5,161 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* template parameters: FN, BUCKET_BITS, BLOCK_BITS,
|
||||
NUM_LAST_DISTANCES_TO_CHECK */
|
||||
/* template parameters: FN */
|
||||
|
||||
/* A (forgetful) hash table to the data seen by the compressor, to
|
||||
help create backward references to previous data.
|
||||
|
||||
This is a hash map of fixed size (BUCKET_SIZE) to a ring buffer of
|
||||
fixed size (BLOCK_SIZE). The ring buffer contains the last BLOCK_SIZE
|
||||
This is a hash map of fixed size (bucket_size_) to a ring buffer of
|
||||
fixed size (block_size_). The ring buffer contains the last block_size_
|
||||
index positions of the given hash key in the compressed data. */
|
||||
|
||||
#define HashLongestMatch HASHER()
|
||||
|
||||
/* Number of hash buckets. */
|
||||
#define BUCKET_SIZE (1 << BUCKET_BITS)
|
||||
|
||||
/* Only BLOCK_SIZE newest backward references are kept,
|
||||
and the older are forgotten. */
|
||||
#define BLOCK_SIZE (1u << BLOCK_BITS)
|
||||
|
||||
/* Mask for accessing entries in a block (in a ring-buffer manner). */
|
||||
#define BLOCK_MASK ((1 << BLOCK_BITS) - 1)
|
||||
|
||||
#define HASH_MAP_SIZE (2 << BUCKET_BITS)
|
||||
|
||||
static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
|
||||
static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
|
||||
|
||||
/* HashBytes is the function that chooses the bucket to place
|
||||
the address in. The HashLongestMatch and HashLongestMatchQuickly
|
||||
classes have separate, different implementations of hashing. */
|
||||
static uint32_t FN(HashBytes)(const uint8_t *data) {
|
||||
/* HashBytes is the function that chooses the bucket to place the address in. */
|
||||
static uint32_t FN(HashBytes)(const uint8_t *data, const int shift) {
|
||||
uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
|
||||
/* The higher bits contain more mixture from the multiplication,
|
||||
so we take our results from there. */
|
||||
return h >> (32 - BUCKET_BITS);
|
||||
return (uint32_t)(h >> shift);
|
||||
}
|
||||
|
||||
typedef struct HashLongestMatch {
|
||||
/* Number of hash buckets. */
|
||||
size_t bucket_size_;
|
||||
/* Only block_size_ newest backward references are kept,
|
||||
and the older are forgotten. */
|
||||
size_t block_size_;
|
||||
/* Left-shift for computing hash bucket index from hash value. */
|
||||
int hash_shift_;
|
||||
/* Mask for accessing entries in a block (in a ring-buffer manner). */
|
||||
uint32_t block_mask_;
|
||||
|
||||
/* --- Dynamic size members --- */
|
||||
|
||||
/* Number of entries in a particular bucket. */
|
||||
uint16_t num_[BUCKET_SIZE];
|
||||
/* uint16_t num[bucket_size]; */
|
||||
|
||||
/* Buckets containing BLOCK_SIZE of backward references. */
|
||||
uint32_t buckets_[BLOCK_SIZE << BUCKET_BITS];
|
||||
|
||||
/* True if num_ array needs to be initialized. */
|
||||
BROTLI_BOOL is_dirty_;
|
||||
|
||||
DictionarySearchStatictics dict_search_stats_;
|
||||
/* Buckets containing block_size_ of backward references. */
|
||||
/* uint32_t* buckets[bucket_size * block_size]; */
|
||||
} HashLongestMatch;
|
||||
|
||||
static void FN(Initialize)(HashLongestMatch* self) {
|
||||
BROTLI_UNUSED(self);
|
||||
static BROTLI_INLINE HashLongestMatch* FN(Self)(HasherHandle handle) {
|
||||
return (HashLongestMatch*)&(GetHasherCommon(handle)[1]);
|
||||
}
|
||||
|
||||
static void FN(Cleanup)(MemoryManager* m, HashLongestMatch* self) {
|
||||
BROTLI_UNUSED(m);
|
||||
BROTLI_UNUSED(self);
|
||||
static BROTLI_INLINE uint16_t* FN(Num)(HashLongestMatch* self) {
|
||||
return (uint16_t*)(&self[1]);
|
||||
}
|
||||
|
||||
static void FN(Reset)(HashLongestMatch* self) {
|
||||
self->is_dirty_ = BROTLI_TRUE;
|
||||
DictionarySearchStaticticsReset(&self->dict_search_stats_);
|
||||
static BROTLI_INLINE uint32_t* FN(Buckets)(HashLongestMatch* self) {
|
||||
return (uint32_t*)(&FN(Num)(self)[self->bucket_size_]);
|
||||
}
|
||||
|
||||
static void FN(InitEmpty)(HashLongestMatch* self) {
|
||||
if (self->is_dirty_) {
|
||||
memset(self->num_, 0, sizeof(self->num_));
|
||||
self->is_dirty_ = BROTLI_FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
static void FN(InitForData)(HashLongestMatch* self, const uint8_t* data,
|
||||
size_t num) {
|
||||
size_t i;
|
||||
for (i = 0; i < num; ++i) {
|
||||
const uint32_t key = FN(HashBytes)(&data[i]);
|
||||
self->num_[key] = 0;
|
||||
}
|
||||
if (num != 0) {
|
||||
self->is_dirty_ = BROTLI_FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
static void FN(Init)(
|
||||
MemoryManager* m, HashLongestMatch* self, const uint8_t* data,
|
||||
const BrotliEncoderParams* params, size_t position, size_t bytes,
|
||||
BROTLI_BOOL is_last) {
|
||||
/* Choose which initialization method is faster.
|
||||
Init() is about 100 times faster than InitForData(). */
|
||||
const size_t kMaxBytesForPartialHashInit = HASH_MAP_SIZE >> 7;
|
||||
BROTLI_UNUSED(m);
|
||||
static void FN(Initialize)(
|
||||
HasherHandle handle, const BrotliEncoderParams* params) {
|
||||
HasherCommon* common = GetHasherCommon(handle);
|
||||
HashLongestMatch* self = FN(Self)(handle);
|
||||
BROTLI_UNUSED(params);
|
||||
if (position == 0 && is_last && bytes <= kMaxBytesForPartialHashInit) {
|
||||
FN(InitForData)(self, data, bytes);
|
||||
self->hash_shift_ = 32 - common->params.bucket_bits;
|
||||
self->bucket_size_ = (size_t)1 << common->params.bucket_bits;
|
||||
self->block_size_ = (size_t)1 << common->params.block_bits;
|
||||
self->block_mask_ = (uint32_t)(self->block_size_ - 1);
|
||||
}
|
||||
|
||||
static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
|
||||
size_t input_size, const uint8_t* data) {
|
||||
HashLongestMatch* self = FN(Self)(handle);
|
||||
uint16_t* num = FN(Num)(self);
|
||||
/* Partial preparation is 100 times slower (per socket). */
|
||||
size_t partial_prepare_threshold = self->bucket_size_ >> 6;
|
||||
if (one_shot && input_size <= partial_prepare_threshold) {
|
||||
size_t i;
|
||||
for (i = 0; i < input_size; ++i) {
|
||||
const uint32_t key = FN(HashBytes)(&data[i], self->hash_shift_);
|
||||
num[key] = 0;
|
||||
}
|
||||
} else {
|
||||
FN(InitEmpty)(self);
|
||||
memset(num, 0, self->bucket_size_ * sizeof(num[0]));
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
|
||||
const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
|
||||
size_t input_size) {
|
||||
size_t bucket_size = (size_t)1 << params->hasher.bucket_bits;
|
||||
size_t block_size = (size_t)1 << params->hasher.block_bits;
|
||||
BROTLI_UNUSED(one_shot);
|
||||
BROTLI_UNUSED(input_size);
|
||||
return sizeof(HashLongestMatch) + bucket_size * (2 + 4 * block_size);
|
||||
}
|
||||
|
||||
/* Look at 4 bytes at &data[ix & mask].
|
||||
Compute a hash from these, and store the value of ix at that position. */
|
||||
static BROTLI_INLINE void FN(Store)(HashLongestMatch* self, const uint8_t *data,
|
||||
static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t* data,
|
||||
const size_t mask, const size_t ix) {
|
||||
const uint32_t key = FN(HashBytes)(&data[ix & mask]);
|
||||
const size_t minor_ix = self->num_[key] & BLOCK_MASK;
|
||||
self->buckets_[minor_ix + (key << BLOCK_BITS)] = (uint32_t)ix;
|
||||
++self->num_[key];
|
||||
HashLongestMatch* self = FN(Self)(handle);
|
||||
uint16_t* num = FN(Num)(self);
|
||||
const uint32_t key = FN(HashBytes)(&data[ix & mask], self->hash_shift_);
|
||||
const size_t minor_ix = num[key] & self->block_mask_;
|
||||
const size_t offset =
|
||||
minor_ix + (key << GetHasherCommon(handle)->params.block_bits);
|
||||
FN(Buckets)(self)[offset] = (uint32_t)ix;
|
||||
++num[key];
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void FN(StoreRange)(HashLongestMatch* self,
|
||||
static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
|
||||
const uint8_t *data, const size_t mask, const size_t ix_start,
|
||||
const size_t ix_end) {
|
||||
size_t i;
|
||||
for (i = ix_start; i < ix_end; ++i) {
|
||||
FN(Store)(self, data, mask, i);
|
||||
FN(Store)(handle, data, mask, i);
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void FN(StitchToPreviousBlock)(HashLongestMatch* self,
|
||||
static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle,
|
||||
size_t num_bytes, size_t position, const uint8_t* ringbuffer,
|
||||
size_t ringbuffer_mask) {
|
||||
if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
|
||||
/* Prepare the hashes for three last bytes of the last write.
|
||||
These could not be calculated before, since they require knowledge
|
||||
of both the previous and the current block. */
|
||||
FN(Store)(self, ringbuffer, ringbuffer_mask, position - 3);
|
||||
FN(Store)(self, ringbuffer, ringbuffer_mask, position - 2);
|
||||
FN(Store)(self, ringbuffer, ringbuffer_mask, position - 1);
|
||||
FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 3);
|
||||
FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 2);
|
||||
FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 1);
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void FN(PrepareDistanceCache)(
|
||||
HasherHandle handle, int* BROTLI_RESTRICT distance_cache) {
|
||||
PrepareDistanceCache(distance_cache,
|
||||
GetHasherCommon(handle)->params.num_last_distances_to_check);
|
||||
}
|
||||
|
||||
/* Find a longest backward match of &data[cur_ix] up to the length of
|
||||
max_length and stores the position cur_ix in the hash table.
|
||||
|
||||
REQUIRES: FN(PrepareDistanceCache) must be invoked for current distance cache
|
||||
values; if this method is invoked repeatedly with the same distance
|
||||
cache values, it is enough to invoke FN(PrepareDistanceCache) once.
|
||||
|
||||
Does not look for matches longer than max_length.
|
||||
Does not look for matches further away than max_backward.
|
||||
Writes the best match into |out|.
|
||||
Returns true when match is found, otherwise false. */
|
||||
static BROTLI_INLINE BROTLI_BOOL FN(FindLongestMatch)(HashLongestMatch* self,
|
||||
static BROTLI_INLINE BROTLI_BOOL FN(FindLongestMatch)(HasherHandle handle,
|
||||
const BrotliDictionary* dictionary, const uint16_t* dictionary_hash,
|
||||
const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
|
||||
const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
|
||||
const size_t max_length, const size_t max_backward,
|
||||
HasherSearchResult* BROTLI_RESTRICT out) {
|
||||
HasherCommon* common = GetHasherCommon(handle);
|
||||
HashLongestMatch* self = FN(Self)(handle);
|
||||
uint16_t* num = FN(Num)(self);
|
||||
uint32_t* buckets = FN(Buckets)(self);
|
||||
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
|
||||
BROTLI_BOOL is_match_found = BROTLI_FALSE;
|
||||
/* Don't accept a short copy from far away. */
|
||||
@ -157,10 +169,8 @@ static BROTLI_INLINE BROTLI_BOOL FN(FindLongestMatch)(HashLongestMatch* self,
|
||||
out->len = 0;
|
||||
out->len_x_code = 0;
|
||||
/* Try last distance first. */
|
||||
for (i = 0; i < NUM_LAST_DISTANCES_TO_CHECK; ++i) {
|
||||
const size_t idx = kDistanceCacheIndex[i];
|
||||
const size_t backward =
|
||||
(size_t)(distance_cache[idx] + kDistanceCacheOffset[i]);
|
||||
for (i = 0; i < (size_t)common->params.num_last_distances_to_check; ++i) {
|
||||
const size_t backward = (size_t)distance_cache[i];
|
||||
size_t prev_ix = (size_t)(cur_ix - backward);
|
||||
if (prev_ix >= cur_ix) {
|
||||
continue;
|
||||
@ -183,25 +193,30 @@ static BROTLI_INLINE BROTLI_BOOL FN(FindLongestMatch)(HashLongestMatch* self,
|
||||
/* Comparing for >= 2 does not change the semantics, but just saves for
|
||||
a few unnecessary binary logarithms in backward reference score,
|
||||
since we are not interested in such short matches. */
|
||||
score_t score = BackwardReferenceScoreUsingLastDistance(len, i);
|
||||
score_t score = BackwardReferenceScoreUsingLastDistance(len);
|
||||
if (best_score < score) {
|
||||
best_score = score;
|
||||
best_len = len;
|
||||
out->len = best_len;
|
||||
out->distance = backward;
|
||||
out->score = best_score;
|
||||
is_match_found = BROTLI_TRUE;
|
||||
if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
|
||||
if (best_score < score) {
|
||||
best_score = score;
|
||||
best_len = len;
|
||||
out->len = best_len;
|
||||
out->distance = backward;
|
||||
out->score = best_score;
|
||||
is_match_found = BROTLI_TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
const uint32_t key = FN(HashBytes)(&data[cur_ix_masked]);
|
||||
uint32_t* BROTLI_RESTRICT bucket = &self->buckets_[key << BLOCK_BITS];
|
||||
const uint32_t key =
|
||||
FN(HashBytes)(&data[cur_ix_masked], self->hash_shift_);
|
||||
uint32_t* BROTLI_RESTRICT bucket =
|
||||
&buckets[key << common->params.block_bits];
|
||||
const size_t down =
|
||||
(self->num_[key] > BLOCK_SIZE) ? (self->num_[key] - BLOCK_SIZE) : 0u;
|
||||
for (i = self->num_[key]; i > down;) {
|
||||
size_t prev_ix = bucket[--i & BLOCK_MASK];
|
||||
(num[key] > self->block_size_) ? (num[key] - self->block_size_) : 0u;
|
||||
for (i = num[key]; i > down;) {
|
||||
size_t prev_ix = bucket[--i & self->block_mask_];
|
||||
const size_t backward = cur_ix - prev_ix;
|
||||
if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
|
||||
break;
|
||||
@ -232,19 +247,15 @@ static BROTLI_INLINE BROTLI_BOOL FN(FindLongestMatch)(HashLongestMatch* self,
|
||||
}
|
||||
}
|
||||
}
|
||||
bucket[self->num_[key] & BLOCK_MASK] = (uint32_t)cur_ix;
|
||||
++self->num_[key];
|
||||
bucket[num[key] & self->block_mask_] = (uint32_t)cur_ix;
|
||||
++num[key];
|
||||
}
|
||||
if (!is_match_found) {
|
||||
is_match_found = SearchInStaticDictionary(&self->dict_search_stats_,
|
||||
&data[cur_ix_masked], max_length, max_backward, out, BROTLI_FALSE);
|
||||
is_match_found = SearchInStaticDictionary(dictionary, dictionary_hash,
|
||||
handle, &data[cur_ix_masked], max_length, max_backward, out,
|
||||
BROTLI_FALSE);
|
||||
}
|
||||
return is_match_found;
|
||||
}
|
||||
|
||||
#undef HASH_MAP_SIZE
|
||||
#undef BLOCK_MASK
|
||||
#undef BLOCK_SIZE
|
||||
#undef BUCKET_SIZE
|
||||
|
||||
#undef HashLongestMatch
|
||||
|
@ -21,7 +21,7 @@ static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; }
|
||||
/* HashBytes is the function that chooses the bucket to place
|
||||
the address in. The HashLongestMatch and HashLongestMatchQuickly
|
||||
classes have separate, different implementations of hashing. */
|
||||
static uint32_t FN(HashBytes)(const uint8_t *data) {
|
||||
static uint32_t FN(HashBytes)(const uint8_t* data) {
|
||||
const uint64_t h = ((BROTLI_UNALIGNED_LOAD64(data) << (64 - 8 * HASH_LEN)) *
|
||||
kHashMul64);
|
||||
/* The higher bits contain more mixture from the multiplication,
|
||||
@ -36,97 +36,86 @@ static uint32_t FN(HashBytes)(const uint8_t *data) {
|
||||
given index, BUCKET_SWEEP buckets are used to store values of a key. */
|
||||
typedef struct HashLongestMatchQuickly {
|
||||
uint32_t buckets_[BUCKET_SIZE + BUCKET_SWEEP];
|
||||
/* True if buckets_ array needs to be initialized. */
|
||||
BROTLI_BOOL is_dirty_;
|
||||
DictionarySearchStatictics dict_search_stats_;
|
||||
} HashLongestMatchQuickly;
|
||||
|
||||
static void FN(Initialize)(HashLongestMatchQuickly* self) {
|
||||
BROTLI_UNUSED(self);
|
||||
static BROTLI_INLINE HashLongestMatchQuickly* FN(Self)(HasherHandle handle) {
|
||||
return (HashLongestMatchQuickly*)&(GetHasherCommon(handle)[1]);
|
||||
}
|
||||
|
||||
static void FN(Cleanup)(MemoryManager* m, HashLongestMatchQuickly* self) {
|
||||
BROTLI_UNUSED(m);
|
||||
BROTLI_UNUSED(self);
|
||||
static void FN(Initialize)(
|
||||
HasherHandle handle, const BrotliEncoderParams* params) {
|
||||
BROTLI_UNUSED(handle);
|
||||
BROTLI_UNUSED(params);
|
||||
}
|
||||
|
||||
static void FN(Reset)(HashLongestMatchQuickly* self) {
|
||||
self->is_dirty_ = BROTLI_TRUE;
|
||||
DictionarySearchStaticticsReset(&self->dict_search_stats_);
|
||||
}
|
||||
|
||||
static void FN(InitEmpty)(HashLongestMatchQuickly* self) {
|
||||
if (self->is_dirty_) {
|
||||
static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
|
||||
size_t input_size, const uint8_t* data) {
|
||||
HashLongestMatchQuickly* self = FN(Self)(handle);
|
||||
/* Partial preparation is 100 times slower (per socket). */
|
||||
size_t partial_prepare_threshold = HASH_MAP_SIZE >> 7;
|
||||
if (one_shot && input_size <= partial_prepare_threshold) {
|
||||
size_t i;
|
||||
for (i = 0; i < input_size; ++i) {
|
||||
const uint32_t key = FN(HashBytes)(&data[i]);
|
||||
memset(&self->buckets_[key], 0, BUCKET_SWEEP * sizeof(self->buckets_[0]));
|
||||
}
|
||||
} else {
|
||||
/* It is not strictly necessary to fill this buffer here, but
|
||||
not filling will make the results of the compression stochastic
|
||||
(but correct). This is because random data would cause the
|
||||
system to find accidentally good backward references here and there. */
|
||||
memset(&self->buckets_[0], 0, sizeof(self->buckets_));
|
||||
self->is_dirty_ = BROTLI_FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
static void FN(InitForData)(HashLongestMatchQuickly* self, const uint8_t* data,
|
||||
size_t num) {
|
||||
size_t i;
|
||||
for (i = 0; i < num; ++i) {
|
||||
const uint32_t key = FN(HashBytes)(&data[i]);
|
||||
memset(&self->buckets_[key], 0, BUCKET_SWEEP * sizeof(self->buckets_[0]));
|
||||
}
|
||||
if (num != 0) {
|
||||
self->is_dirty_ = BROTLI_FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
static void FN(Init)(
|
||||
MemoryManager* m, HashLongestMatchQuickly* self, const uint8_t* data,
|
||||
const BrotliEncoderParams* params, size_t position, size_t bytes,
|
||||
BROTLI_BOOL is_last) {
|
||||
/* Choose which initialization method is faster.
|
||||
Init() is about 100 times faster than InitForData(). */
|
||||
const size_t kMaxBytesForPartialHashInit = HASH_MAP_SIZE >> 7;
|
||||
BROTLI_UNUSED(m);
|
||||
static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
|
||||
const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
|
||||
size_t input_size) {
|
||||
BROTLI_UNUSED(params);
|
||||
if (position == 0 && is_last && bytes <= kMaxBytesForPartialHashInit) {
|
||||
FN(InitForData)(self, data, bytes);
|
||||
} else {
|
||||
FN(InitEmpty)(self);
|
||||
}
|
||||
BROTLI_UNUSED(one_shot);
|
||||
BROTLI_UNUSED(input_size);
|
||||
return sizeof(HashLongestMatchQuickly);
|
||||
}
|
||||
|
||||
/* Look at 5 bytes at &data[ix & mask].
|
||||
Compute a hash from these, and store the value somewhere within
|
||||
[ix .. ix+3]. */
|
||||
static BROTLI_INLINE void FN(Store)(HashLongestMatchQuickly* self,
|
||||
static BROTLI_INLINE void FN(Store)(HasherHandle handle,
|
||||
const uint8_t *data, const size_t mask, const size_t ix) {
|
||||
const uint32_t key = FN(HashBytes)(&data[ix & mask]);
|
||||
/* Wiggle the value with the bucket sweep range. */
|
||||
const uint32_t off = (ix >> 3) % BUCKET_SWEEP;
|
||||
self->buckets_[key + off] = (uint32_t)ix;
|
||||
FN(Self)(handle)->buckets_[key + off] = (uint32_t)ix;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void FN(StoreRange)(HashLongestMatchQuickly* self,
|
||||
static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
|
||||
const uint8_t *data, const size_t mask, const size_t ix_start,
|
||||
const size_t ix_end) {
|
||||
size_t i;
|
||||
for (i = ix_start; i < ix_end; ++i) {
|
||||
FN(Store)(self, data, mask, i);
|
||||
FN(Store)(handle, data, mask, i);
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void FN(StitchToPreviousBlock)(
|
||||
HashLongestMatchQuickly* self, size_t num_bytes, size_t position,
|
||||
HasherHandle handle, size_t num_bytes, size_t position,
|
||||
const uint8_t* ringbuffer, size_t ringbuffer_mask) {
|
||||
if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
|
||||
/* Prepare the hashes for three last bytes of the last write.
|
||||
These could not be calculated before, since they require knowledge
|
||||
of both the previous and the current block. */
|
||||
FN(Store)(self, ringbuffer, ringbuffer_mask, position - 3);
|
||||
FN(Store)(self, ringbuffer, ringbuffer_mask, position - 2);
|
||||
FN(Store)(self, ringbuffer, ringbuffer_mask, position - 1);
|
||||
FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 3);
|
||||
FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 2);
|
||||
FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 1);
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void FN(PrepareDistanceCache)(
|
||||
HasherHandle handle, int* BROTLI_RESTRICT distance_cache) {
|
||||
BROTLI_UNUSED(handle);
|
||||
BROTLI_UNUSED(distance_cache);
|
||||
}
|
||||
|
||||
/* Find a longest backward match of &data[cur_ix & ring_buffer_mask]
|
||||
up to the length of max_length and stores the position cur_ix in the
|
||||
hash table.
|
||||
@ -136,10 +125,12 @@ static BROTLI_INLINE void FN(StitchToPreviousBlock)(
|
||||
Writes the best match into |out|.
|
||||
Returns true if match is found, otherwise false. */
|
||||
static BROTLI_INLINE BROTLI_BOOL FN(FindLongestMatch)(
|
||||
HashLongestMatchQuickly* self, const uint8_t* BROTLI_RESTRICT data,
|
||||
HasherHandle handle, const BrotliDictionary* dictionary,
|
||||
const uint16_t* dictionary_hash, const uint8_t* BROTLI_RESTRICT data,
|
||||
const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,
|
||||
const size_t cur_ix, const size_t max_length, const size_t max_backward,
|
||||
HasherSearchResult* BROTLI_RESTRICT out) {
|
||||
HashLongestMatchQuickly* self = FN(Self)(handle);
|
||||
const size_t best_len_in = out->len;
|
||||
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
|
||||
const uint32_t key = FN(HashBytes)(&data[cur_ix_masked]);
|
||||
@ -157,7 +148,7 @@ static BROTLI_INLINE BROTLI_BOOL FN(FindLongestMatch)(
|
||||
&data[cur_ix_masked],
|
||||
max_length);
|
||||
if (len >= 4) {
|
||||
best_score = BackwardReferenceScoreUsingLastDistance(len, 0);
|
||||
best_score = BackwardReferenceScoreUsingLastDistance(len);
|
||||
best_len = len;
|
||||
out->len = len;
|
||||
out->distance = cached_backward;
|
||||
@ -227,8 +218,9 @@ static BROTLI_INLINE BROTLI_BOOL FN(FindLongestMatch)(
|
||||
}
|
||||
}
|
||||
if (USE_DICTIONARY && !is_match_found) {
|
||||
is_match_found = SearchInStaticDictionary(&self->dict_search_stats_,
|
||||
&data[cur_ix_masked], max_length, max_backward, out, BROTLI_TRUE);
|
||||
is_match_found = SearchInStaticDictionary(dictionary, dictionary_hash,
|
||||
handle, &data[cur_ix_masked], max_length, max_backward, out,
|
||||
BROTLI_TRUE);
|
||||
}
|
||||
self->buckets_[key + ((cur_ix >> 3) % BUCKET_SWEEP)] = (uint32_t)cur_ix;
|
||||
return is_match_found;
|
||||
|
@ -38,61 +38,58 @@ typedef struct HashToBinaryTree {
|
||||
tree of sequences that share this hash bucket. */
|
||||
uint32_t buckets_[BUCKET_SIZE];
|
||||
|
||||
/* The union of the binary trees of each hash bucket. The root of the tree
|
||||
corresponding to a hash is a sequence starting at buckets_[hash] and
|
||||
the left and right children of a sequence starting at pos are
|
||||
forest_[2 * pos] and forest_[2 * pos + 1]. */
|
||||
uint32_t* forest_;
|
||||
|
||||
/* A position used to mark a non-existent sequence, i.e. a tree is empty if
|
||||
its root is at invalid_pos_ and a node is a leaf if both its children
|
||||
are at invalid_pos_. */
|
||||
uint32_t invalid_pos_;
|
||||
|
||||
size_t forest_size_;
|
||||
BROTLI_BOOL is_dirty_;
|
||||
/* --- Dynamic size members --- */
|
||||
|
||||
/* The union of the binary trees of each hash bucket. The root of the tree
|
||||
corresponding to a hash is a sequence starting at buckets_[hash] and
|
||||
the left and right children of a sequence starting at pos are
|
||||
forest_[2 * pos] and forest_[2 * pos + 1]. */
|
||||
/* uint32_t forest[2 * num_nodes] */
|
||||
} HashToBinaryTree;
|
||||
|
||||
static void FN(Reset)(HashToBinaryTree* self) {
|
||||
self->is_dirty_ = BROTLI_TRUE;
|
||||
static BROTLI_INLINE HashToBinaryTree* FN(Self)(HasherHandle handle) {
|
||||
return (HashToBinaryTree*)&(GetHasherCommon(handle)[1]);
|
||||
}
|
||||
|
||||
static void FN(Initialize)(HashToBinaryTree* self) {
|
||||
self->forest_ = NULL;
|
||||
self->forest_size_ = 0;
|
||||
FN(Reset)(self);
|
||||
static BROTLI_INLINE uint32_t* FN(Forest)(HashToBinaryTree* self) {
|
||||
return (uint32_t*)(&self[1]);
|
||||
}
|
||||
|
||||
static void FN(Cleanup)(MemoryManager* m, HashToBinaryTree* self) {
|
||||
BROTLI_FREE(m, self->forest_);
|
||||
static void FN(Initialize)(
|
||||
HasherHandle handle, const BrotliEncoderParams* params) {
|
||||
HashToBinaryTree* self = FN(Self)(handle);
|
||||
self->window_mask_ = (1u << params->lgwin) - 1u;
|
||||
self->invalid_pos_ = (uint32_t)(0 - self->window_mask_);
|
||||
}
|
||||
|
||||
static void FN(Init)(
|
||||
MemoryManager* m, HashToBinaryTree* self, const uint8_t* data,
|
||||
const BrotliEncoderParams* params, size_t position, size_t bytes,
|
||||
BROTLI_BOOL is_last) {
|
||||
if (self->is_dirty_) {
|
||||
uint32_t invalid_pos;
|
||||
size_t num_nodes;
|
||||
uint32_t i;
|
||||
BROTLI_UNUSED(data);
|
||||
self->window_mask_ = (1u << params->lgwin) - 1u;
|
||||
invalid_pos = (uint32_t)(0 - self->window_mask_);
|
||||
self->invalid_pos_ = invalid_pos;
|
||||
for (i = 0; i < BUCKET_SIZE; i++) {
|
||||
self->buckets_[i] = invalid_pos;
|
||||
}
|
||||
num_nodes = (position == 0 && is_last) ? bytes : self->window_mask_ + 1;
|
||||
if (num_nodes > self->forest_size_) {
|
||||
BROTLI_FREE(m, self->forest_);
|
||||
self->forest_ = BROTLI_ALLOC(m, uint32_t, 2 * num_nodes);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
self->forest_size_ = num_nodes;
|
||||
}
|
||||
self->is_dirty_ = BROTLI_FALSE;
|
||||
static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
|
||||
size_t input_size, const uint8_t* data) {
|
||||
HashToBinaryTree* self = FN(Self)(handle);
|
||||
uint32_t invalid_pos = self->invalid_pos_;
|
||||
uint32_t i;
|
||||
BROTLI_UNUSED(data);
|
||||
BROTLI_UNUSED(one_shot);
|
||||
BROTLI_UNUSED(input_size);
|
||||
for (i = 0; i < BUCKET_SIZE; i++) {
|
||||
self->buckets_[i] = invalid_pos;
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
|
||||
const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
|
||||
size_t input_size) {
|
||||
size_t num_nodes = (size_t)1 << params->lgwin;
|
||||
if (one_shot && input_size < num_nodes) {
|
||||
num_nodes = input_size;
|
||||
}
|
||||
return sizeof(HashToBinaryTree) + 2 * sizeof(uint32_t) * num_nodes;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE size_t FN(LeftChildIndex)(HashToBinaryTree* self,
|
||||
const size_t pos) {
|
||||
return 2 * (pos & self->window_mask_);
|
||||
@ -124,6 +121,7 @@ static BROTLI_INLINE BackwardMatch* FN(StoreAndFindMatches)(
|
||||
const BROTLI_BOOL should_reroot_tree =
|
||||
TO_BROTLI_BOOL(max_length >= MAX_TREE_COMP_LENGTH);
|
||||
const uint32_t key = FN(HashBytes)(&data[cur_ix_masked]);
|
||||
uint32_t* forest = FN(Forest)(self);
|
||||
size_t prev_ix = self->buckets_[key];
|
||||
/* The forest index of the rightmost node of the left subtree of the new
|
||||
root, updated as we traverse and re-root the tree of the hash bucket. */
|
||||
@ -146,8 +144,8 @@ static BROTLI_INLINE BackwardMatch* FN(StoreAndFindMatches)(
|
||||
const size_t prev_ix_masked = prev_ix & ring_buffer_mask;
|
||||
if (backward == 0 || backward > max_backward || depth_remaining == 0) {
|
||||
if (should_reroot_tree) {
|
||||
self->forest_[node_left] = self->invalid_pos_;
|
||||
self->forest_[node_right] = self->invalid_pos_;
|
||||
forest[node_left] = self->invalid_pos_;
|
||||
forest[node_right] = self->invalid_pos_;
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -166,27 +164,25 @@ static BROTLI_INLINE BackwardMatch* FN(StoreAndFindMatches)(
|
||||
}
|
||||
if (len >= max_comp_len) {
|
||||
if (should_reroot_tree) {
|
||||
self->forest_[node_left] =
|
||||
self->forest_[FN(LeftChildIndex)(self, prev_ix)];
|
||||
self->forest_[node_right] =
|
||||
self->forest_[FN(RightChildIndex)(self, prev_ix)];
|
||||
forest[node_left] = forest[FN(LeftChildIndex)(self, prev_ix)];
|
||||
forest[node_right] = forest[FN(RightChildIndex)(self, prev_ix)];
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (data[cur_ix_masked + len] > data[prev_ix_masked + len]) {
|
||||
best_len_left = len;
|
||||
if (should_reroot_tree) {
|
||||
self->forest_[node_left] = (uint32_t)prev_ix;
|
||||
forest[node_left] = (uint32_t)prev_ix;
|
||||
}
|
||||
node_left = FN(RightChildIndex)(self, prev_ix);
|
||||
prev_ix = self->forest_[node_left];
|
||||
prev_ix = forest[node_left];
|
||||
} else {
|
||||
best_len_right = len;
|
||||
if (should_reroot_tree) {
|
||||
self->forest_[node_right] = (uint32_t)prev_ix;
|
||||
forest[node_right] = (uint32_t)prev_ix;
|
||||
}
|
||||
node_right = FN(LeftChildIndex)(self, prev_ix);
|
||||
prev_ix = self->forest_[node_right];
|
||||
prev_ix = forest[node_right];
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -200,8 +196,9 @@ static BROTLI_INLINE BackwardMatch* FN(StoreAndFindMatches)(
|
||||
matches in matches[0] to matches[*num_matches - 1]. The matches will be
|
||||
sorted by strictly increasing length and (non-strictly) increasing
|
||||
distance. */
|
||||
static BROTLI_INLINE size_t FN(FindAllMatches)(HashToBinaryTree* self,
|
||||
const uint8_t* data, const size_t ring_buffer_mask, const size_t cur_ix,
|
||||
static BROTLI_INLINE size_t FN(FindAllMatches)(HasherHandle handle,
|
||||
const BrotliDictionary* dicionary, const uint8_t* data,
|
||||
const size_t ring_buffer_mask, const size_t cur_ix,
|
||||
const size_t max_length, const size_t max_backward,
|
||||
const BrotliEncoderParams* params, BackwardMatch* matches) {
|
||||
BackwardMatch* const orig_matches = matches;
|
||||
@ -235,16 +232,16 @@ static BROTLI_INLINE size_t FN(FindAllMatches)(HashToBinaryTree* self,
|
||||
}
|
||||
}
|
||||
if (best_len < max_length) {
|
||||
matches = FN(StoreAndFindMatches)(self, data, cur_ix, ring_buffer_mask,
|
||||
max_length, max_backward, &best_len, matches);
|
||||
matches = FN(StoreAndFindMatches)(FN(Self)(handle), data, cur_ix,
|
||||
ring_buffer_mask, max_length, max_backward, &best_len, matches);
|
||||
}
|
||||
for (i = 0; i <= BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN; ++i) {
|
||||
dict_matches[i] = kInvalidMatch;
|
||||
}
|
||||
{
|
||||
size_t minlen = BROTLI_MAX(size_t, 4, best_len + 1);
|
||||
if (BrotliFindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen,
|
||||
max_length, &dict_matches[0])) {
|
||||
if (BrotliFindAllStaticDictionaryMatches(dicionary,
|
||||
&data[cur_ix_masked], minlen, max_length, &dict_matches[0])) {
|
||||
size_t maxlen = BROTLI_MIN(
|
||||
size_t, BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN, max_length);
|
||||
size_t l;
|
||||
@ -263,15 +260,16 @@ static BROTLI_INLINE size_t FN(FindAllMatches)(HashToBinaryTree* self,
|
||||
/* Stores the hash of the next 4 bytes and re-roots the binary tree at the
|
||||
current sequence, without returning any matches.
|
||||
REQUIRES: ix + MAX_TREE_COMP_LENGTH <= end-of-current-block */
|
||||
static BROTLI_INLINE void FN(Store)(HashToBinaryTree* self, const uint8_t *data,
|
||||
static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t *data,
|
||||
const size_t mask, const size_t ix) {
|
||||
HashToBinaryTree* self = FN(Self)(handle);
|
||||
/* Maximum distance is window size - 16, see section 9.1. of the spec. */
|
||||
const size_t max_backward = self->window_mask_ - BROTLI_WINDOW_GAP + 1;
|
||||
FN(StoreAndFindMatches)(self, data, ix, mask, MAX_TREE_COMP_LENGTH,
|
||||
max_backward, NULL, NULL);
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void FN(StoreRange)(HashToBinaryTree* self,
|
||||
static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
|
||||
const uint8_t *data, const size_t mask, const size_t ix_start,
|
||||
const size_t ix_end) {
|
||||
size_t i = ix_start;
|
||||
@ -281,17 +279,18 @@ static BROTLI_INLINE void FN(StoreRange)(HashToBinaryTree* self,
|
||||
}
|
||||
if (ix_start + 512 <= i) {
|
||||
for (; j < i; j += 8) {
|
||||
FN(Store)(self, data, mask, j);
|
||||
FN(Store)(handle, data, mask, j);
|
||||
}
|
||||
}
|
||||
for (; i < ix_end; ++i) {
|
||||
FN(Store)(self, data, mask, i);
|
||||
FN(Store)(handle, data, mask, i);
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void FN(StitchToPreviousBlock)(HashToBinaryTree* self,
|
||||
static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle,
|
||||
size_t num_bytes, size_t position, const uint8_t* ringbuffer,
|
||||
size_t ringbuffer_mask) {
|
||||
HashToBinaryTree* self = FN(Self)(handle);
|
||||
if (num_bytes >= FN(HashTypeLength)() - 1 &&
|
||||
position >= MAX_TREE_COMP_LENGTH) {
|
||||
/* Store the last `MAX_TREE_COMP_LENGTH - 1` positions in the hasher.
|
||||
|
@ -31,6 +31,14 @@
|
||||
so we buffer at most this much literals and commands. */
|
||||
#define MAX_NUM_DELAYED_SYMBOLS 0x2fff
|
||||
|
||||
typedef struct BrotliHasherParams {
|
||||
int type;
|
||||
int bucket_bits;
|
||||
int block_bits;
|
||||
int hash_len;
|
||||
int num_last_distances_to_check;
|
||||
} BrotliHasherParams;
|
||||
|
||||
/* Encoding parameters */
|
||||
typedef struct BrotliEncoderParams {
|
||||
BrotliEncoderMode mode;
|
||||
@ -39,6 +47,7 @@ typedef struct BrotliEncoderParams {
|
||||
int lgblock;
|
||||
size_t size_hint;
|
||||
BROTLI_BOOL disable_literal_context_modeling;
|
||||
BrotliHasherParams hasher;
|
||||
} BrotliEncoderParams;
|
||||
|
||||
/* Returns hash-table size for quality levels 0 and 1. */
|
||||
@ -122,17 +131,30 @@ static BROTLI_INLINE size_t LiteralSpreeLengthForSparseSearch(
|
||||
return params->quality < 9 ? 64 : 512;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE int ChooseHasher(const BrotliEncoderParams* params) {
|
||||
static BROTLI_INLINE void ChooseHasher(const BrotliEncoderParams* params,
|
||||
BrotliHasherParams* hparams) {
|
||||
if (params->quality > 9) {
|
||||
return 10;
|
||||
hparams->type = 10;
|
||||
} else if (params->quality == 4 && params->size_hint >= (1 << 20)) {
|
||||
return 54;
|
||||
hparams->type = 54;
|
||||
} else if (params->quality < 5) {
|
||||
return params->quality;
|
||||
hparams->type = params->quality;
|
||||
} else if (params->lgwin <= 16) {
|
||||
return params->quality < 7 ? 40 : params->quality < 9 ? 41 : 42;
|
||||
hparams->type = params->quality < 7 ? 40 : params->quality < 9 ? 41 : 42;
|
||||
} else if (params->size_hint >= (1 << 20) && params->lgwin >= 19) {
|
||||
hparams->type = 6;
|
||||
hparams->block_bits = params->quality - 1;
|
||||
hparams->bucket_bits = 15;
|
||||
hparams->hash_len = 5;
|
||||
hparams->num_last_distances_to_check =
|
||||
params->quality < 7 ? 4 : params->quality < 9 ? 10 : 16;
|
||||
} else {
|
||||
hparams->type = 5;
|
||||
hparams->block_bits = params->quality - 1;
|
||||
hparams->bucket_bits = params->quality < 7 ? 14 : 15;
|
||||
hparams->num_last_distances_to_check =
|
||||
params->quality < 7 ? 4 : params->quality < 9 ? 10 : 16;
|
||||
}
|
||||
return params->quality;
|
||||
}
|
||||
|
||||
#endif /* BROTLI_ENC_QUALITY_H_ */
|
||||
|
@ -33,23 +33,24 @@ static BROTLI_INLINE void AddMatch(size_t distance, size_t len, size_t len_code,
|
||||
matches[len] = BROTLI_MIN(uint32_t, matches[len], match);
|
||||
}
|
||||
|
||||
static BROTLI_INLINE size_t DictMatchLength(const uint8_t* data,
|
||||
static BROTLI_INLINE size_t DictMatchLength(const BrotliDictionary* dictionary,
|
||||
const uint8_t* data,
|
||||
size_t id,
|
||||
size_t len,
|
||||
size_t maxlen) {
|
||||
const size_t offset = kBrotliDictionaryOffsetsByLength[len] + len * id;
|
||||
return FindMatchLengthWithLimit(&kBrotliDictionary[offset], data,
|
||||
const size_t offset = dictionary->offsets_by_length[len] + len * id;
|
||||
return FindMatchLengthWithLimit(&dictionary->data[offset], data,
|
||||
BROTLI_MIN(size_t, len, maxlen));
|
||||
}
|
||||
|
||||
static BROTLI_INLINE BROTLI_BOOL IsMatch(
|
||||
static BROTLI_INLINE BROTLI_BOOL IsMatch(const BrotliDictionary* dictionary,
|
||||
DictWord w, const uint8_t* data, size_t max_length) {
|
||||
if (w.len > max_length) {
|
||||
return BROTLI_FALSE;
|
||||
} else {
|
||||
const size_t offset = kBrotliDictionaryOffsetsByLength[w.len] +
|
||||
const size_t offset = dictionary->offsets_by_length[w.len] +
|
||||
(size_t)w.len * (size_t)w.idx;
|
||||
const uint8_t* dict = &kBrotliDictionary[offset];
|
||||
const uint8_t* dict = &dictionary->data[offset];
|
||||
if (w.transform == 0) {
|
||||
/* Match against base dictionary word. */
|
||||
return
|
||||
@ -78,8 +79,8 @@ static BROTLI_INLINE BROTLI_BOOL IsMatch(
|
||||
}
|
||||
|
||||
BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
||||
const uint8_t* data, size_t min_length, size_t max_length,
|
||||
uint32_t* matches) {
|
||||
const BrotliDictionary* dictionary, const uint8_t* data, size_t min_length,
|
||||
size_t max_length, uint32_t* matches) {
|
||||
BROTLI_BOOL has_found_match = BROTLI_FALSE;
|
||||
{
|
||||
size_t offset = kStaticDictionaryBuckets[Hash(data)];
|
||||
@ -87,12 +88,13 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
||||
while (!end) {
|
||||
DictWord w = kStaticDictionaryWords[offset++];
|
||||
const size_t l = w.len & 0x1F;
|
||||
const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l];
|
||||
const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
|
||||
const size_t id = w.idx;
|
||||
end = !!(w.len & 0x80);
|
||||
w.len = (uint8_t)l;
|
||||
if (w.transform == 0) {
|
||||
const size_t matchlen = DictMatchLength(data, id, l, max_length);
|
||||
const size_t matchlen =
|
||||
DictMatchLength(dictionary, data, id, l, max_length);
|
||||
const uint8_t* s;
|
||||
size_t minlen;
|
||||
size_t maxlen;
|
||||
@ -276,7 +278,7 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
||||
const BROTLI_BOOL is_all_caps =
|
||||
TO_BROTLI_BOOL(w.transform != kUppercaseFirst);
|
||||
const uint8_t* s;
|
||||
if (!IsMatch(w, data, max_length)) {
|
||||
if (!IsMatch(dictionary, w, data, max_length)) {
|
||||
continue;
|
||||
}
|
||||
/* Transform "" + kUppercase{First,All} + "" */
|
||||
@ -326,13 +328,13 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
||||
while (!end) {
|
||||
DictWord w = kStaticDictionaryWords[offset++];
|
||||
const size_t l = w.len & 0x1F;
|
||||
const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l];
|
||||
const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
|
||||
const size_t id = w.idx;
|
||||
end = !!(w.len & 0x80);
|
||||
w.len = (uint8_t)l;
|
||||
if (w.transform == 0) {
|
||||
const uint8_t* s;
|
||||
if (!IsMatch(w, &data[1], max_length - 1)) {
|
||||
if (!IsMatch(dictionary, w, &data[1], max_length - 1)) {
|
||||
continue;
|
||||
}
|
||||
/* Transforms " " + kIdentity + "" and "." + kIdentity + "" */
|
||||
@ -373,7 +375,7 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
||||
const BROTLI_BOOL is_all_caps =
|
||||
TO_BROTLI_BOOL(w.transform != kUppercaseFirst);
|
||||
const uint8_t* s;
|
||||
if (!IsMatch(w, &data[1], max_length - 1)) {
|
||||
if (!IsMatch(dictionary, w, &data[1], max_length - 1)) {
|
||||
continue;
|
||||
}
|
||||
/* Transforms " " + kUppercase{First,All} + "" */
|
||||
@ -418,11 +420,12 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
||||
while (!end) {
|
||||
DictWord w = kStaticDictionaryWords[offset++];
|
||||
const size_t l = w.len & 0x1F;
|
||||
const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l];
|
||||
const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
|
||||
const size_t id = w.idx;
|
||||
end = !!(w.len & 0x80);
|
||||
w.len = (uint8_t)l;
|
||||
if (w.transform == 0 && IsMatch(w, &data[2], max_length - 2)) {
|
||||
if (w.transform == 0 &&
|
||||
IsMatch(dictionary, w, &data[2], max_length - 2)) {
|
||||
if (data[0] == 0xc2) {
|
||||
AddMatch(id + 102 * n, l + 2, l, matches);
|
||||
has_found_match = BROTLI_TRUE;
|
||||
@ -446,11 +449,12 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
||||
while (!end) {
|
||||
DictWord w = kStaticDictionaryWords[offset++];
|
||||
const size_t l = w.len & 0x1F;
|
||||
const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l];
|
||||
const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
|
||||
const size_t id = w.idx;
|
||||
end = !!(w.len & 0x80);
|
||||
w.len = (uint8_t)l;
|
||||
if (w.transform == 0 && IsMatch(w, &data[5], max_length - 5)) {
|
||||
if (w.transform == 0 &&
|
||||
IsMatch(dictionary, w, &data[5], max_length - 5)) {
|
||||
AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);
|
||||
has_found_match = BROTLI_TRUE;
|
||||
if (l + 5 < max_length) {
|
||||
|
@ -9,6 +9,7 @@
|
||||
#ifndef BROTLI_ENC_STATIC_DICT_H_
|
||||
#define BROTLI_ENC_STATIC_DICT_H_
|
||||
|
||||
#include "../common/dictionary.h"
|
||||
#include <brotli/types.h>
|
||||
#include "./port.h"
|
||||
|
||||
@ -27,6 +28,7 @@ static const uint32_t kInvalidMatch = 0xfffffff;
|
||||
matches array is at least BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1 long
|
||||
all elements are initialized to kInvalidMatch */
|
||||
BROTLI_INTERNAL BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
||||
const BrotliDictionary* dictionary,
|
||||
const uint8_t* data, size_t min_length, size_t max_length,
|
||||
uint32_t* matches);
|
||||
|
||||
|
@ -128,6 +128,10 @@ typedef enum {
|
||||
/**
|
||||
* Creates an instance of ::BrotliDecoderState and initializes it.
|
||||
*
|
||||
* The instance can be used once for decoding and should then be destroyed with
|
||||
* ::BrotliDecoderDestroyInstance, it cannot be reused for a new decoding
|
||||
* session.
|
||||
*
|
||||
* @p alloc_func and @p free_func @b MUST be both zero or both non-zero. In the
|
||||
* case they are both zero, default memory allocators are used. @p opaque is
|
||||
* passed to @p alloc_func and @p free_func when they are called.
|
||||
|
@ -50,7 +50,7 @@ typedef enum BrotliEncoderMode {
|
||||
* properties of the input.
|
||||
*/
|
||||
BROTLI_MODE_GENERIC = 0,
|
||||
/** Compression mode for UTF-8 formated text input. */
|
||||
/** Compression mode for UTF-8 formatted text input. */
|
||||
BROTLI_MODE_TEXT = 1,
|
||||
/** Compression mode used in WOFF 2.0. */
|
||||
BROTLI_MODE_FONT = 2
|
||||
|
@ -8,25 +8,20 @@ package org.brotli.dec;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.IntBuffer;
|
||||
|
||||
/**
|
||||
* Bit reading helpers.
|
||||
*/
|
||||
class BitReader {
|
||||
final class BitReader {
|
||||
|
||||
/**
|
||||
* Input byte buffer, consist of a ring-buffer and a "slack" region where bytes from the start of
|
||||
* the ring-buffer are copied.
|
||||
*/
|
||||
private static final int READ_SIZE = 4096;
|
||||
private static final int BUF_SIZE = READ_SIZE + 64;
|
||||
private static final int BUF_SIZE = IntReader.CAPACITY << 2;
|
||||
private static final int READ_SIZE = BUF_SIZE - 64;
|
||||
|
||||
private final ByteBuffer byteBuffer =
|
||||
ByteBuffer.allocateDirect(BUF_SIZE).order(ByteOrder.LITTLE_ENDIAN);
|
||||
private final IntBuffer intBuffer = byteBuffer.asIntBuffer();
|
||||
private final IntReader intReader = new IntReader();
|
||||
private final byte[] shadowBuffer = new byte[BUF_SIZE];
|
||||
|
||||
private InputStream input;
|
||||
@ -73,7 +68,7 @@ class BitReader {
|
||||
}
|
||||
throw new BrotliRuntimeException("No more input");
|
||||
}
|
||||
int readOffset = br.intBuffer.position() << 2;
|
||||
int readOffset = IntReader.position(br.intReader) << 2;
|
||||
int bytesRead = READ_SIZE - readOffset;
|
||||
System.arraycopy(br.shadowBuffer, readOffset, br.shadowBuffer, 0, bytesRead);
|
||||
try {
|
||||
@ -91,9 +86,7 @@ class BitReader {
|
||||
} catch (IOException e) {
|
||||
throw new BrotliRuntimeException("Failed to read input", e);
|
||||
}
|
||||
br.byteBuffer.clear();
|
||||
br.byteBuffer.put(br.shadowBuffer, 0, bytesRead & 0xFFFC);
|
||||
br.intBuffer.rewind();
|
||||
IntReader.reload(br.intReader, br.shadowBuffer, 0, bytesRead >> 2);
|
||||
br.available = bytesRead >> 2;
|
||||
}
|
||||
|
||||
@ -115,7 +108,7 @@ class BitReader {
|
||||
*/
|
||||
static void fillBitWindow(BitReader br) {
|
||||
if (br.bitOffset >= 32) {
|
||||
br.accumulator = ((long) br.intBuffer.get() << 32) | (br.accumulator >>> 32);
|
||||
br.accumulator = ((long) IntReader.read(br.intReader) << 32) | (br.accumulator >>> 32);
|
||||
br.bitOffset -= 32;
|
||||
br.available--;
|
||||
}
|
||||
@ -146,7 +139,7 @@ class BitReader {
|
||||
}
|
||||
br.input = input;
|
||||
br.accumulator = 0;
|
||||
br.intBuffer.position(READ_SIZE >> 2);
|
||||
IntReader.setPosition(br.intReader, READ_SIZE >> 2);
|
||||
br.bitOffset = 64;
|
||||
br.available = 0;
|
||||
br.endOfStreamReached = false;
|
||||
|
48
java/org/brotli/dec/IntReader.java
Executable file
48
java/org/brotli/dec/IntReader.java
Executable file
@ -0,0 +1,48 @@
|
||||
/* Copyright 2017 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
package org.brotli.dec;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.IntBuffer;
|
||||
|
||||
/**
|
||||
* Byte-to-int conversion magic.
|
||||
*/
|
||||
final class IntReader {
|
||||
|
||||
static final int CAPACITY = 1024 + 16;
|
||||
|
||||
private final ByteBuffer byteBuffer =
|
||||
ByteBuffer.allocateDirect(CAPACITY << 2).order(ByteOrder.LITTLE_ENDIAN);
|
||||
private final IntBuffer intBuffer = byteBuffer.asIntBuffer();
|
||||
|
||||
/**
|
||||
* Reinitialize reader with new data chunk.
|
||||
*
|
||||
* NB: intLen == 4 * byteSize!
|
||||
* NB: intLen should be less or equal to {@link CAPACITY}
|
||||
*/
|
||||
static void reload(IntReader ir, byte[] data, int offset, int intLen) {
|
||||
ir.byteBuffer.clear();
|
||||
ir.byteBuffer.put(data, offset, intLen << 2);
|
||||
ir.intBuffer.rewind();
|
||||
}
|
||||
|
||||
static int position(IntReader ir) {
|
||||
return ir.intBuffer.position();
|
||||
}
|
||||
|
||||
static void setPosition(IntReader ir, int position) {
|
||||
ir.intBuffer.position(position);
|
||||
}
|
||||
|
||||
static int read(IntReader ir) {
|
||||
// Advances position by 1.
|
||||
return ir.intBuffer.get();
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user