Update common, decoder, encoder, java (#520)

Common:
 * wrap dictionary data into `BrotliDictionary` structure
 * replace public constant with getter `BrotliGetDictionary`
 * reformat dictionary data

Decoder:
 * adopt common changes
 * clarify acceptable instance usage patterns
 * hold reference to dictionary in state

Encoder:
 * adopt common changes
 * eliminate PIC spots in `CreateBackwardReferences`
 * add per-chunk ratio guards for q0 and q1
 * precompute relative distances to avoid repeated calculations
 * prostpone hasher allocation/initialization
 * refactor Hashers to be class-like structure
 * further improvements for 1MiB+ inputs
 * added new hasher type; made hashers more configurable

Java:
 * Pull byte->int magic to `IntReader` from `BitReader`
This commit is contained in:
Eugene Kliuchnikov 2017-03-06 14:22:45 +01:00 committed by GitHub
parent aaa4424d9b
commit cdca91b6f5
30 changed files with 6927 additions and 10104 deletions

File diff suppressed because it is too large Load Diff

View File

@ -16,9 +16,29 @@
extern "C" {
#endif
BROTLI_COMMON_API extern const uint8_t kBrotliDictionary[122784];
BROTLI_COMMON_API extern const uint32_t kBrotliDictionaryOffsetsByLength[32];
BROTLI_COMMON_API extern const uint8_t kBrotliDictionarySizeBitsByLength[32];
typedef struct BrotliDictionary {
/**
* Number of bits to encode index of dictionary word in a bucket.
*
* Specification: Appendix A. Static Dictionary Data
*
* Words in a dictionary are bucketed by length.
* @c 0 means that there are no words of a given length.
* Dictionary consists of words with length of [4..24] bytes.
* Values at [0..3] and [25..31] indices should not be addressed.
*/
uint8_t size_bits_by_length[32];
/* assert(offset[i + 1] == offset[i] + (bits[i] ? (i << bits[i]) : 0)) */
uint32_t offsets_by_length[32];
/* Data array is not bound, and should obey to size_bits_by_length values.
Specified size matches default (RFC 7932) dictionary. */
/* assert(sizeof(data) == offsets_by_length[31]) */
uint8_t data[122784];
} BrotliDictionary;
BROTLI_COMMON_API extern const BrotliDictionary* BrotliGetDictionary();
#define BROTLI_MIN_DICTIONARY_WORD_LENGTH 4
#define BROTLI_MAX_DICTIONARY_WORD_LENGTH 24

View File

@ -1730,9 +1730,9 @@ postReadDistance:
if (s->distance_code > s->max_distance) {
if (i >= BROTLI_MIN_DICTIONARY_WORD_LENGTH &&
i <= BROTLI_MAX_DICTIONARY_WORD_LENGTH) {
int offset = (int)kBrotliDictionaryOffsetsByLength[i];
int offset = (int)s->dictionary->offsets_by_length[i];
int word_id = s->distance_code - s->max_distance - 1;
uint32_t shift = kBrotliDictionarySizeBitsByLength[i];
uint32_t shift = s->dictionary->size_bits_by_length[i];
int mask = (int)BitMask(shift);
int word_idx = word_id & mask;
int transform_idx = word_id >> shift;
@ -1740,7 +1740,7 @@ postReadDistance:
s->dist_rb_idx += s->distance_context;
offset += word_idx * i;
if (transform_idx < kNumTransforms) {
const uint8_t* word = &kBrotliDictionary[offset];
const uint8_t* word = &s->dictionary->data[offset];
int len = i;
if (transform_idx == 0) {
memcpy(&s->ringbuffer[pos], word, (size_t)len);

View File

@ -51,6 +51,8 @@ void BrotliDecoderStateInitWithCustomAllocators(BrotliDecoderState* s,
s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_NONE;
s->substate_read_block_length = BROTLI_STATE_READ_BLOCK_LENGTH_NONE;
s->dictionary = BrotliGetDictionary();
s->buffer_length = 0;
s->loop_counter = 0;
s->pos = 0;

View File

@ -10,6 +10,7 @@
#define BROTLI_DEC_STATE_H_
#include "../common/constants.h"
#include "../common/dictionary.h"
#include <brotli/types.h>
#include "./bit_reader.h"
#include "./huffman.h"
@ -222,6 +223,7 @@ struct BrotliDecoderStateStruct {
uint32_t num_literal_htrees;
uint8_t* context_map;
uint8_t* context_modes;
const BrotliDictionary* dictionary;
uint32_t trivial_literal_contexts[8]; /* 256 bits */
};

View File

@ -9,8 +9,10 @@
#include "./backward_references.h"
#include "../common/constants.h"
#include "../common/dictionary.h"
#include <brotli/types.h>
#include "./command.h"
#include "./dictionary_hash.h"
#include "./memory.h"
#include "./port.h"
#include "./quality.h"
@ -72,21 +74,6 @@ static BROTLI_INLINE size_t ComputeDistanceCode(size_t distance,
#include "./backward_references_inc.h"
#undef HASHER
#define HASHER() H7
/* NOLINTNEXTLINE(build/include) */
#include "./backward_references_inc.h"
#undef HASHER
#define HASHER() H8
/* NOLINTNEXTLINE(build/include) */
#include "./backward_references_inc.h"
#undef HASHER
#define HASHER() H9
/* NOLINTNEXTLINE(build/include) */
#include "./backward_references_inc.h"
#undef HASHER
#define HASHER() H40
/* NOLINTNEXTLINE(build/include) */
#include "./backward_references_inc.h"
@ -111,23 +98,25 @@ static BROTLI_INLINE size_t ComputeDistanceCode(size_t distance,
#undef CAT
#undef EXPAND_CAT
void BrotliCreateBackwardReferences(size_t num_bytes,
void BrotliCreateBackwardReferences(const BrotliDictionary* dictionary,
size_t num_bytes,
size_t position,
const uint8_t* ringbuffer,
size_t ringbuffer_mask,
const BrotliEncoderParams* params,
Hashers* hashers,
HasherHandle hasher,
int* dist_cache,
size_t* last_insert_len,
Command* commands,
size_t* num_commands,
size_t* num_literals) {
switch (ChooseHasher(params)) {
#define CASE_(N) \
case N: \
CreateBackwardReferencesH ## N(num_bytes, position, \
ringbuffer, ringbuffer_mask, params, hashers->h ## N, dist_cache, \
last_insert_len, commands, num_commands, num_literals); \
switch (params->hasher.type) {
#define CASE_(N) \
case N: \
CreateBackwardReferencesH ## N(dictionary, \
kStaticDictionaryHash, num_bytes, position, ringbuffer, \
ringbuffer_mask, params, hasher, dist_cache, \
last_insert_len, commands, num_commands, num_literals); \
break;
FOR_GENERIC_HASHERS(CASE_)
#undef CASE_

View File

@ -10,6 +10,7 @@
#define BROTLI_ENC_BACKWARD_REFERENCES_H_
#include "../common/constants.h"
#include "../common/dictionary.h"
#include <brotli/types.h>
#include "./command.h"
#include "./hash.h"
@ -25,9 +26,9 @@ extern "C" {
CreateBackwardReferences calls, and must be incremented by the amount written
by this call. */
BROTLI_INTERNAL void BrotliCreateBackwardReferences(
size_t num_bytes, size_t position,
const BrotliDictionary* dictionary, size_t num_bytes, size_t position,
const uint8_t* ringbuffer, size_t ringbuffer_mask,
const BrotliEncoderParams* params, Hashers* hashers, int* dist_cache,
const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache,
size_t* last_insert_len, Command* commands, size_t* num_commands,
size_t* num_literals);

View File

@ -27,6 +27,13 @@ extern "C" {
static const float kInfinity = 1.7e38f; /* ~= 2 ^ 127 */
static const uint32_t kDistanceCacheIndex[] = {
0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
};
static const int kDistanceCacheOffset[] = {
0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
};
void BrotliInitZopfliNodes(ZopfliNode* array, size_t length) {
ZopfliNode stub;
size_t i;
@ -604,6 +611,7 @@ static size_t ZopfliIterate(size_t num_bytes,
size_t BrotliZopfliComputeShortestPath(MemoryManager* m,
const BrotliDictionary* dictionary,
size_t num_bytes,
size_t position,
const uint8_t* ringbuffer,
@ -611,7 +619,7 @@ size_t BrotliZopfliComputeShortestPath(MemoryManager* m,
const BrotliEncoderParams* params,
const size_t max_backward_limit,
const int* dist_cache,
H10* hasher,
HasherHandle hasher,
ZopfliNode* nodes) {
const size_t max_zopfli_len = MaxZopfliLen(params);
ZopfliCostModel model;
@ -630,8 +638,8 @@ size_t BrotliZopfliComputeShortestPath(MemoryManager* m,
for (i = 0; i + HashTypeLengthH10() - 1 < num_bytes; i++) {
const size_t pos = position + i;
const size_t max_distance = BROTLI_MIN(size_t, pos, max_backward_limit);
size_t num_matches = FindAllMatchesH10(hasher, ringbuffer, ringbuffer_mask,
pos, num_bytes - i, max_distance, params, matches);
size_t num_matches = FindAllMatchesH10(hasher, dictionary, ringbuffer,
ringbuffer_mask, pos, num_bytes - i, max_distance, params, matches);
size_t skip;
if (num_matches > 0 &&
BackwardMatchLength(&matches[num_matches - 1]) > max_zopfli_len) {
@ -664,9 +672,9 @@ size_t BrotliZopfliComputeShortestPath(MemoryManager* m,
}
void BrotliCreateZopfliBackwardReferences(
MemoryManager* m, size_t num_bytes, size_t position,
const uint8_t* ringbuffer, size_t ringbuffer_mask,
const BrotliEncoderParams* params, H10* hasher, int* dist_cache,
MemoryManager* m, const BrotliDictionary* dictionary, size_t num_bytes,
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache,
size_t* last_insert_len, Command* commands, size_t* num_commands,
size_t* num_literals) {
const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
@ -674,8 +682,8 @@ void BrotliCreateZopfliBackwardReferences(
nodes = BROTLI_ALLOC(m, ZopfliNode, num_bytes + 1);
if (BROTLI_IS_OOM(m)) return;
BrotliInitZopfliNodes(nodes, num_bytes + 1);
*num_commands += BrotliZopfliComputeShortestPath(m, num_bytes, position,
ringbuffer, ringbuffer_mask, params, max_backward_limit,
*num_commands += BrotliZopfliComputeShortestPath(m, dictionary, num_bytes,
position, ringbuffer, ringbuffer_mask, params, max_backward_limit,
dist_cache, hasher, nodes);
if (BROTLI_IS_OOM(m)) return;
BrotliZopfliCreateCommands(num_bytes, position, max_backward_limit, nodes,
@ -684,9 +692,9 @@ void BrotliCreateZopfliBackwardReferences(
}
void BrotliCreateHqZopfliBackwardReferences(
MemoryManager* m, size_t num_bytes, size_t position,
const uint8_t* ringbuffer, size_t ringbuffer_mask,
const BrotliEncoderParams* params, H10* hasher, int* dist_cache,
MemoryManager* m, const BrotliDictionary* dictionary, size_t num_bytes,
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache,
size_t* last_insert_len, Command* commands, size_t* num_commands,
size_t* num_literals) {
const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
@ -715,8 +723,9 @@ void BrotliCreateHqZopfliBackwardReferences(
BROTLI_ENSURE_CAPACITY(m, BackwardMatch, matches, matches_size,
cur_match_pos + MAX_NUM_MATCHES_H10);
if (BROTLI_IS_OOM(m)) return;
num_found_matches = FindAllMatchesH10(hasher, ringbuffer, ringbuffer_mask,
pos, max_length, max_distance, params, &matches[cur_match_pos]);
num_found_matches = FindAllMatchesH10(hasher, dictionary, ringbuffer,
ringbuffer_mask, pos, max_length, max_distance, params,
&matches[cur_match_pos]);
cur_match_end = cur_match_pos + num_found_matches;
for (j = cur_match_pos; j + 1 < cur_match_end; ++j) {
assert(BackwardMatchLength(&matches[j]) <

View File

@ -10,6 +10,7 @@
#define BROTLI_ENC_BACKWARD_REFERENCES_HQ_H_
#include "../common/constants.h"
#include "../common/dictionary.h"
#include <brotli/types.h>
#include "./command.h"
#include "./hash.h"
@ -22,16 +23,16 @@ extern "C" {
#endif
BROTLI_INTERNAL void BrotliCreateZopfliBackwardReferences(
MemoryManager* m, size_t num_bytes, size_t position,
const uint8_t* ringbuffer, size_t ringbuffer_mask,
const BrotliEncoderParams* params, H10* hasher, int* dist_cache,
MemoryManager* m, const BrotliDictionary* dictionary, size_t num_bytes,
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache,
size_t* last_insert_len, Command* commands, size_t* num_commands,
size_t* num_literals);
BROTLI_INTERNAL void BrotliCreateHqZopfliBackwardReferences(
MemoryManager* m, size_t num_bytes, size_t position,
const uint8_t* ringbuffer, size_t ringbuffer_mask,
const BrotliEncoderParams* params, H10* hasher, int* dist_cache,
MemoryManager* m, const BrotliDictionary* dictionary, size_t num_bytes,
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache,
size_t* last_insert_len, Command* commands, size_t* num_commands,
size_t* num_literals);
@ -77,10 +78,10 @@ BROTLI_INTERNAL void BrotliInitZopfliNodes(ZopfliNode* array, size_t length);
(2) nodes[i].command_length() <= i and
(3) nodes[i - nodes[i].command_length()].cost < kInfinity */
BROTLI_INTERNAL size_t BrotliZopfliComputeShortestPath(
MemoryManager* m, size_t num_bytes, size_t position,
const uint8_t* ringbuffer, size_t ringbuffer_mask,
MemoryManager* m, const BrotliDictionary* dictionary, size_t num_bytes,
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
const BrotliEncoderParams* params, const size_t max_backward_limit,
const int* dist_cache, H10* hasher, ZopfliNode* nodes);
const int* dist_cache, HasherHandle hasher, ZopfliNode* nodes);
BROTLI_INTERNAL void BrotliZopfliCreateCommands(const size_t num_bytes,
const size_t block_start,

View File

@ -7,12 +7,11 @@
/* template parameters: FN */
#define Hasher HASHER()
static BROTLI_NOINLINE void FN(CreateBackwardReferences)(
const BrotliDictionary* dictionary, const uint16_t* dictionary_hash,
size_t num_bytes, size_t position,
const uint8_t* ringbuffer, size_t ringbuffer_mask,
const BrotliEncoderParams* params, Hasher* hasher, int* dist_cache,
const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache,
size_t* last_insert_len, Command* commands, size_t* num_commands,
size_t* num_literals) {
/* Set maximum distance, see section 9.1. of the spec. */
@ -30,7 +29,9 @@ static BROTLI_NOINLINE void FN(CreateBackwardReferences)(
size_t apply_random_heuristics = position + random_heuristics_window_size;
/* Minimum score to accept a backward reference. */
const score_t kMinScore = BROTLI_SCORE_BASE + 400;
const score_t kMinScore = BROTLI_SCORE_BASE + 100;
FN(PrepareDistanceCache)(hasher, dist_cache);
while (position + FN(HashTypeLength)() < pos_end) {
size_t max_length = pos_end - position;
@ -40,13 +41,14 @@ static BROTLI_NOINLINE void FN(CreateBackwardReferences)(
sr.len_x_code = 0;
sr.distance = 0;
sr.score = kMinScore;
if (FN(FindLongestMatch)(hasher, ringbuffer, ringbuffer_mask, dist_cache,
if (FN(FindLongestMatch)(hasher, dictionary, dictionary_hash,
ringbuffer, ringbuffer_mask, dist_cache,
position, max_length, max_distance, &sr)) {
/* Found a match. Let's look for something even better ahead. */
int delayed_backward_references_in_row = 0;
--max_length;
for (;; --max_length) {
const score_t cost_diff_lazy = 700;
const score_t cost_diff_lazy = 175;
BROTLI_BOOL is_match_found;
HasherSearchResult sr2;
sr2.len = params->quality < MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH ?
@ -55,9 +57,9 @@ static BROTLI_NOINLINE void FN(CreateBackwardReferences)(
sr2.distance = 0;
sr2.score = kMinScore;
max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
is_match_found = FN(FindLongestMatch)(hasher, ringbuffer,
ringbuffer_mask, dist_cache, position + 1, max_length, max_distance,
&sr2);
is_match_found = FN(FindLongestMatch)(hasher, dictionary,
dictionary_hash, ringbuffer, ringbuffer_mask, dist_cache,
position + 1, max_length, max_distance, &sr2);
if (is_match_found && sr2.score >= sr.score + cost_diff_lazy) {
/* Ok, let's just write one byte for now and start a match from the
next byte. */
@ -84,6 +86,7 @@ static BROTLI_NOINLINE void FN(CreateBackwardReferences)(
dist_cache[2] = dist_cache[1];
dist_cache[1] = dist_cache[0];
dist_cache[0] = (int)sr.distance;
FN(PrepareDistanceCache)(hasher, dist_cache);
}
InitCommand(commands++, insert_length, sr.len, sr.len ^ sr.len_x_code,
distance_code);
@ -138,5 +141,3 @@ static BROTLI_NOINLINE void FN(CreateBackwardReferences)(
*last_insert_len = insert_length;
*num_commands += (size_t)(commands - orig_commands);
}
#undef Hasher

View File

@ -65,11 +65,17 @@ static BROTLI_INLINE uint16_t CombineLengthCodes(
if (use_last_distance && inscode < 8 && copycode < 16) {
return (copycode < 8) ? bits64 : (bits64 | 64);
} else {
/* "To convert an insert-and-copy length code to an insert length code and
a copy length code, the following table can be used" */
static const uint16_t cells[9] = { 128u, 192u, 384u, 256u, 320u, 512u,
448u, 576u, 640u };
return cells[(copycode >> 3) + 3 * (inscode >> 3)] | bits64;
/* Specification: 5 Encoding of ... (last table) */
/* offset = 2 * index, where index is in range [0..8] */
int offset = 2 * ((copycode >> 3) + 3 * (inscode >> 3));
/* All values in specification are K * 64,
where K = [2, 3, 6, 4, 5, 8, 7, 9, 10],
i + 1 = [1, 2, 3, 4, 5, 6, 7, 8, 9],
K - i - 1 = [1, 1, 3, 0, 0, 2, 0, 1, 2] = D.
All values in D require only 2 bits to encode.
Magic constant is shifted 6 bits left, to avoid final multiplication. */
offset = (offset << 5) + 0x40 + ((0x520D40 >> offset) & 0xC0);
return (uint16_t)offset | bits64;
}
}

View File

@ -325,21 +325,20 @@ static BROTLI_INLINE void EmitLiterals(const uint8_t* input, const size_t len,
}
}
/* REQUIRES: len <= 1 << 20. */
/* REQUIRES: len <= 1 << 24. */
static void BrotliStoreMetaBlockHeader(
size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix,
uint8_t* storage) {
size_t nibbles = 6;
/* ISLAST */
BrotliWriteBits(1, 0, storage_ix, storage);
if (len <= (1U << 16)) {
/* MNIBBLES is 4 */
BrotliWriteBits(2, 0, storage_ix, storage);
BrotliWriteBits(16, len - 1, storage_ix, storage);
} else {
/* MNIBBLES is 5 */
BrotliWriteBits(2, 1, storage_ix, storage);
BrotliWriteBits(20, len - 1, storage_ix, storage);
nibbles = 4;
} else if (len <= (1U << 20)) {
nibbles = 5;
}
BrotliWriteBits(2, nibbles - 4, storage_ix, storage);
BrotliWriteBits(nibbles * 4, len - 1, storage_ix, storage);
/* ISUNCOMPRESSED */
BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
}
@ -463,14 +462,6 @@ static BROTLI_INLINE void BrotliCompressFragmentFastImpl(
const size_t shift = 64u - table_bits;
if (input_size == 0) {
assert(is_last);
BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
*storage_ix = (*storage_ix + 7u) & ~7u;
return;
}
BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
/* No block splits, no contexts. */
BrotliWriteBits(13, 0, storage_ix, storage);
@ -728,11 +719,7 @@ next_block:
goto emit_commands;
}
if (is_last) {
BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
*storage_ix = (*storage_ix + 7u) & ~7u;
} else {
if (!is_last) {
/* If this is not the last block, update the command and distance prefix
codes for the next block and store the compressed forms. */
cmd_code[0] = 0;
@ -761,7 +748,17 @@ void BrotliCompressFragmentFast(
BROTLI_BOOL is_last, int* table, size_t table_size, uint8_t cmd_depth[128],
uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code,
size_t* storage_ix, uint8_t* storage) {
const size_t initial_storage_ix = *storage_ix;
const size_t table_bits = Log2FloorNonZero(table_size);
if (input_size == 0) {
assert(is_last);
BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
*storage_ix = (*storage_ix + 7u) & ~7u;
return;
}
switch (table_bits) {
#define CASE_(B) \
case B: \
@ -773,6 +770,18 @@ void BrotliCompressFragmentFast(
#undef CASE_
default: assert(0); break;
}
/* If output is larger than single uncompressed block, rewrite it. */
if (*storage_ix - initial_storage_ix > 31 + (input_size << 3)) {
EmitUncompressedMetaBlock(input, input + input_size, initial_storage_ix,
storage_ix, storage);
}
if (is_last) {
BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
*storage_ix = (*storage_ix + 7u) & ~7u;
}
}
#undef FOR_TABLE_BITS_

View File

@ -37,6 +37,7 @@ extern "C" {
updated to represent the updated "cmd_depth" and "cmd_bits".
REQUIRES: "input_size" is greater than zero, or "is_last" is 1.
REQUIRES: "input_size" is less or equal to maximal metablock size (1 << 24).
REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
REQUIRES: "table_size" is an odd (9, 11, 13, 15) power of two
OUTPUT: maximal copy distance <= |input_size|

View File

@ -216,21 +216,20 @@ static BROTLI_INLINE void EmitDistance(uint32_t distance, uint32_t** commands) {
++(*commands);
}
/* REQUIRES: len <= 1 << 20. */
/* REQUIRES: len <= 1 << 24. */
static void BrotliStoreMetaBlockHeader(
size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix,
uint8_t* storage) {
size_t nibbles = 6;
/* ISLAST */
BrotliWriteBits(1, 0, storage_ix, storage);
if (len <= (1U << 16)) {
/* MNIBBLES is 4 */
BrotliWriteBits(2, 0, storage_ix, storage);
BrotliWriteBits(16, len - 1, storage_ix, storage);
} else {
/* MNIBBLES is 5 */
BrotliWriteBits(2, 1, storage_ix, storage);
BrotliWriteBits(20, len - 1, storage_ix, storage);
nibbles = 4;
} else if (len <= (1U << 20)) {
nibbles = 5;
}
BrotliWriteBits(2, nibbles - 4, storage_ix, storage);
BrotliWriteBits(nibbles * 4, len - 1, storage_ix, storage);
/* ISUNCOMPRESSED */
BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
}
@ -507,6 +506,23 @@ static BROTLI_BOOL ShouldCompress(
}
}
static void RewindBitPosition(const size_t new_storage_ix,
size_t* storage_ix, uint8_t* storage) {
const size_t bitpos = new_storage_ix & 7;
const size_t mask = (1u << bitpos) - 1;
storage[new_storage_ix >> 3] &= (uint8_t)mask;
*storage_ix = new_storage_ix;
}
static void EmitUncompressedMetaBlock(const uint8_t* input, size_t input_size,
size_t* storage_ix, uint8_t* storage) {
BrotliStoreMetaBlockHeader(input_size, 1, storage_ix, storage);
*storage_ix = (*storage_ix + 7u) & ~7u;
memcpy(&storage[*storage_ix >> 3], input, input_size);
*storage_ix += input_size << 3;
storage[*storage_ix >> 3] = 0;
}
static BROTLI_INLINE void BrotliCompressFragmentTwoPassImpl(
MemoryManager* m, const uint8_t* input, size_t input_size,
BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf,
@ -514,6 +530,7 @@ static BROTLI_INLINE void BrotliCompressFragmentTwoPassImpl(
/* Save the start of the first block for position and distance computations.
*/
const uint8_t* base_ip = input;
BROTLI_UNUSED(is_last);
while (input_size > 0) {
size_t block_size =
@ -536,21 +553,11 @@ static BROTLI_INLINE void BrotliCompressFragmentTwoPassImpl(
/* Since we did not find many backward references and the entropy of
the data is close to 8 bits, we can simply emit an uncompressed block.
This makes compression speed of uncompressible data about 3x faster. */
BrotliStoreMetaBlockHeader(block_size, 1, storage_ix, storage);
*storage_ix = (*storage_ix + 7u) & ~7u;
memcpy(&storage[*storage_ix >> 3], input, block_size);
*storage_ix += block_size << 3;
storage[*storage_ix >> 3] = 0;
EmitUncompressedMetaBlock(input, block_size, storage_ix, storage);
}
input += block_size;
input_size -= block_size;
}
if (is_last) {
BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
*storage_ix = (*storage_ix + 7u) & ~7u;
}
}
#define FOR_TABLE_BITS_(X) \
@ -571,6 +578,7 @@ void BrotliCompressFragmentTwoPass(
MemoryManager* m, const uint8_t* input, size_t input_size,
BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf,
int* table, size_t table_size, size_t* storage_ix, uint8_t* storage) {
const size_t initial_storage_ix = *storage_ix;
const size_t table_bits = Log2FloorNonZero(table_size);
switch (table_bits) {
#define CASE_(B) \
@ -583,6 +591,18 @@ void BrotliCompressFragmentTwoPass(
#undef CASE_
default: assert(0); break;
}
/* If output is larger than single uncompressed block, rewrite it. */
if (*storage_ix - initial_storage_ix > 31 + (input_size << 3)) {
RewindBitPosition(initial_storage_ix, storage_ix, storage);
EmitUncompressedMetaBlock(input, input_size, storage_ix, storage);
}
if (is_last) {
BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
*storage_ix = (*storage_ix + 7u) & ~7u;
}
}
#undef FOR_TABLE_BITS_

View File

@ -29,6 +29,7 @@ static const size_t kCompressFragmentTwoPassBlockSize = 1 << 17;
If "is_last" is 1, emits an additional empty last meta-block.
REQUIRES: "input_size" is greater than zero, or "is_last" is 1.
REQUIRES: "input_size" is less or equal to maximal metablock size (1 << 24).
REQUIRES: "command_buf" and "literal_buf" point to at least
kCompressFragmentTwoPassBlockSize long arrays.
REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.

View File

@ -57,7 +57,7 @@ typedef struct BrotliEncoderStateStruct {
MemoryManager memory_manager_;
Hashers hashers_;
HasherHandle hasher_;
uint64_t input_pos_;
RingBuffer ringbuffer_;
size_t cmd_alloc_size_;
@ -67,7 +67,7 @@ typedef struct BrotliEncoderStateStruct {
size_t last_insert_len_;
uint64_t last_flush_pos_;
uint64_t last_processed_pos_;
int dist_cache_[4];
int dist_cache_[BROTLI_NUM_DISTANCE_SHORT_CODES];
int saved_dist_cache_[4];
uint8_t last_byte_;
uint8_t last_byte_bits_;
@ -580,10 +580,6 @@ static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) {
s->cmd_code_, &s->cmd_code_numbits_);
}
/* Initialize hashers. */
HashersSetup(&s->memory_manager_, &s->hashers_, ChooseHasher(&s->params));
if (BROTLI_IS_OOM(&s->memory_manager_)) return BROTLI_FALSE;
s->is_initialized_ = BROTLI_TRUE;
return BROTLI_TRUE;
}
@ -609,6 +605,7 @@ static void BrotliEncoderInitState(BrotliEncoderState* s) {
s->prev_byte2_ = 0;
s->storage_size_ = 0;
s->storage_ = 0;
s->hasher_ = NULL;
s->large_table_ = NULL;
s->large_table_size_ = 0;
s->cmd_code_numbits_ = 0;
@ -621,8 +618,6 @@ static void BrotliEncoderInitState(BrotliEncoderState* s) {
s->is_last_block_emitted_ = BROTLI_FALSE;
s->is_initialized_ = BROTLI_FALSE;
InitHashers(&s->hashers_);
RingBufferInit(&s->ringbuffer_);
s->commands_ = 0;
@ -635,7 +630,7 @@ static void BrotliEncoderInitState(BrotliEncoderState* s) {
s->dist_cache_[3] = 16;
/* Save the state of the distance cache in case we need to restore it for
emitting an uncompressed block. */
memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->dist_cache_));
memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
}
BrotliEncoderState* BrotliEncoderCreateInstance(brotli_alloc_func alloc_func,
@ -666,7 +661,7 @@ static void BrotliEncoderCleanupState(BrotliEncoderState* s) {
BROTLI_FREE(m, s->storage_);
BROTLI_FREE(m, s->commands_);
RingBufferFree(m, &s->ringbuffer_);
DestroyHashers(m, &s->hashers_);
DestroyHasher(m, &s->hasher_);
BROTLI_FREE(m, s->large_table_);
BROTLI_FREE(m, s->command_buf_);
BROTLI_FREE(m, s->literal_buf_);
@ -774,7 +769,7 @@ void BrotliEncoderSetCustomDictionary(BrotliEncoderState* s, size_t size,
if (dict_size > 1) {
s->prev_byte2_ = dict[dict_size - 2];
}
HashersPrependCustomDictionary(m, &s->hashers_, &s->params, dict_size, dict);
HasherPrependCustomDictionary(m, &s->hasher_, &s->params, dict_size, dict);
if (BROTLI_IS_OOM(m)) return;
}
@ -809,6 +804,7 @@ static BROTLI_BOOL EncodeData(
uint8_t* data;
uint32_t mask;
MemoryManager* m = &s->memory_manager_;
const BrotliDictionary* dictionary = BrotliGetDictionary();
if (!EnsureInitialized(s)) return BROTLI_FALSE;
data = s->ringbuffer_.buffer_;
@ -893,26 +889,28 @@ static BROTLI_BOOL EncodeData(
}
}
InitOrStitchToPreviousBlock(m, &s->hashers_, data, mask, &s->params,
InitOrStitchToPreviousBlock(m, &s->hasher_, data, mask, &s->params,
wrapped_last_processed_pos, bytes, is_last);
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
if (s->params.quality == ZOPFLIFICATION_QUALITY) {
assert(s->params.hasher.type == 10);
BrotliCreateZopfliBackwardReferences(
m, bytes, wrapped_last_processed_pos, data, mask,
&s->params, s->hashers_.h10, s->dist_cache_, &s->last_insert_len_,
m, dictionary, bytes, wrapped_last_processed_pos, data, mask,
&s->params, s->hasher_, s->dist_cache_, &s->last_insert_len_,
&s->commands_[s->num_commands_], &s->num_commands_, &s->num_literals_);
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
} else if (s->params.quality == HQ_ZOPFLIFICATION_QUALITY) {
assert(s->params.hasher.type == 10);
BrotliCreateHqZopfliBackwardReferences(
m, bytes, wrapped_last_processed_pos, data, mask,
&s->params, s->hashers_.h10, s->dist_cache_, &s->last_insert_len_,
m, dictionary, bytes, wrapped_last_processed_pos, data, mask,
&s->params, s->hasher_, s->dist_cache_, &s->last_insert_len_,
&s->commands_[s->num_commands_], &s->num_commands_, &s->num_literals_);
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
} else {
BrotliCreateBackwardReferences(
bytes, wrapped_last_processed_pos, data, mask,
&s->params, &s->hashers_, s->dist_cache_, &s->last_insert_len_,
dictionary, bytes, wrapped_last_processed_pos, data, mask,
&s->params, s->hasher_, s->dist_cache_, &s->last_insert_len_,
&s->commands_[s->num_commands_], &s->num_commands_, &s->num_literals_);
}
@ -936,7 +934,7 @@ static BROTLI_BOOL EncodeData(
s->num_commands_ < max_commands) {
/* Merge with next input block. Everything will happen later. */
if (UpdateLastProcessedPos(s)) {
HashersReset(&s->hashers_, ChooseHasher(&s->params));
HasherReset(s->hasher_);
}
*out_size = 0;
return BROTLI_TRUE;
@ -976,7 +974,7 @@ static BROTLI_BOOL EncodeData(
s->last_byte_bits_ = storage_ix & 7u;
s->last_flush_pos_ = s->input_pos_;
if (UpdateLastProcessedPos(s)) {
HashersReset(&s->hashers_, ChooseHasher(&s->params));
HasherReset(s->hasher_);
}
if (s->last_flush_pos_ > 0) {
s->prev_byte_ = data[((uint32_t)s->last_flush_pos_ - 1) & mask];
@ -988,7 +986,7 @@ static BROTLI_BOOL EncodeData(
s->num_literals_ = 0;
/* Save the state of the distance cache in case we need to restore it for
emitting an uncompressed block. */
memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->dist_cache_));
memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
*output = &storage[0];
*out_size = storage_ix >> 3;
return BROTLI_TRUE;
@ -1037,12 +1035,13 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
size_t total_out_size = 0;
uint8_t last_byte;
uint8_t last_byte_bits;
H10* hasher;
HasherHandle hasher = NULL;
const size_t hasher_eff_size =
BROTLI_MIN(size_t, input_size, max_backward_limit + BROTLI_WINDOW_GAP);
BrotliEncoderParams params;
const BrotliDictionary* dictionary = BrotliGetDictionary();
const int lgmetablock = BROTLI_MIN(int, 24, lgwin + 1);
size_t max_block_size;
@ -1064,12 +1063,11 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
assert(input_size <= mask + 1);
EncodeWindowBits(lgwin, &last_byte, &last_byte_bits);
hasher = BROTLI_ALLOC(m, H10, 1);
if (BROTLI_IS_OOM(m)) goto oom;
InitializeH10(hasher);
InitH10(m, hasher, input_buffer, &params, 0, hasher_eff_size, 1);
InitOrStitchToPreviousBlock(m, &hasher, input_buffer, mask, &params,
0, hasher_eff_size, BROTLI_TRUE);
if (BROTLI_IS_OOM(m)) goto oom;
while (ok && metablock_start < input_size) {
const size_t metablock_end =
BROTLI_MIN(size_t, input_size, metablock_start + max_metablock_size);
@ -1097,7 +1095,7 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
StitchToPreviousBlockH10(hasher, block_size, block_start,
input_buffer, mask);
path_size = BrotliZopfliComputeShortestPath(
m, block_size, block_start, input_buffer, mask, &params,
m, dictionary, block_size, block_start, input_buffer, mask, &params,
max_backward_limit, dist_cache, hasher, nodes);
if (BROTLI_IS_OOM(m)) goto oom;
/* We allocate a command buffer in the first iteration of this loop that
@ -1227,8 +1225,7 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
}
*encoded_size = total_out_size;
CleanupH10(m, hasher);
BROTLI_FREE(m, hasher);
DestroyHasher(m, &hasher);
return ok;
oom:
@ -1588,15 +1585,25 @@ static BROTLI_BOOL ProcessMetadata(
return BROTLI_TRUE;
}
static void UpdateSizeHint(BrotliEncoderState* s, size_t available_in) {
if (s->params.size_hint == 0) {
uint64_t delta = UnprocessedInputSize(s);
uint64_t tail = available_in;
uint32_t limit = 1u << 30;
uint32_t total;
if ((delta >= limit) || (tail >= limit) || ((delta + tail) >= limit)) {
total = limit;
} else {
total = (uint32_t)(delta + tail);
}
s->params.size_hint = total;
}
}
BROTLI_BOOL BrotliEncoderCompressStream(
BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in,
const uint8_t** next_in, size_t* available_out,uint8_t** next_out,
size_t* total_out) {
/* If we don't have any size hint, set it based on the size of the first
input chunk. */
if (s->params.size_hint == 0) {
s->params.size_hint = (uint32_t)*available_in;
}
if (!EnsureInitialized(s)) return BROTLI_FALSE;
/* Unfinished metadata block; check requirements. */
@ -1606,6 +1613,7 @@ BROTLI_BOOL BrotliEncoderCompressStream(
}
if (op == BROTLI_OPERATION_EMIT_METADATA) {
UpdateSizeHint(s, 0); /* First data metablock might be emitted here. */
return ProcessMetadata(
s, available_in, next_in, available_out, next_out, total_out);
}
@ -1648,7 +1656,9 @@ BROTLI_BOOL BrotliEncoderCompressStream(
(*available_in == 0) && op == BROTLI_OPERATION_FINISH);
BROTLI_BOOL force_flush = TO_BROTLI_BOOL(
(*available_in == 0) && op == BROTLI_OPERATION_FLUSH);
BROTLI_BOOL result = EncodeData(s, is_last, force_flush,
BROTLI_BOOL result;
UpdateSizeHint(s, *available_in);
result = EncodeData(s, is_last, force_flush,
&s->available_out_, &s->next_out_);
if (!result) return BROTLI_FALSE;
if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;

View File

@ -15,7 +15,6 @@
#include "../common/constants.h"
#include "../common/dictionary.h"
#include <brotli/types.h>
#include "./dictionary_hash.h"
#include "./fast_log.h"
#include "./find_match_length.h"
#include "./memory.h"
@ -27,19 +26,39 @@
extern "C" {
#endif
/* Pointer to hasher data.
*
* Excluding initialization and destruction, hasher can be passed as
* HasherHandle by value.
*
* Typically hasher data consists of 3 sections:
* * HasherCommon structure
* * private structured hasher data, depending on hasher type
* * private dynamic hasher data, depending on hasher type and parameters
*/
typedef uint8_t* HasherHandle;
typedef struct {
BrotliHasherParams params;
/* False if hasher needs to be "prepared" before use. */
BROTLI_BOOL is_prepared_;
size_t dict_num_lookups;
size_t dict_num_matches;
} HasherCommon;
static BROTLI_INLINE HasherCommon* GetHasherCommon(HasherHandle handle) {
return (HasherCommon*)handle;
}
#define score_t size_t
static const uint32_t kDistanceCacheIndex[] = {
0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
};
static const int kDistanceCacheOffset[] = {
0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
};
static const uint32_t kCutoffTransformsCount = 10;
static const uint8_t kCutoffTransforms[] = {
0, 12, 27, 23, 42, 63, 56, 48, 59, 64
};
/* 0, 12, 27, 23, 42, 63, 56, 48, 59, 64 */
/* 0+0, 4+8, 8+19, 12+11, 16+26, 20+43, 24+32, 28+20, 32+27, 36+28 */
static const uint64_t kCutoffTransforms =
BROTLI_MAKE_UINT64_T(0x071B520A, 0xDA2D3200);
typedef struct HasherSearchResult {
size_t len;
@ -48,11 +67,6 @@ typedef struct HasherSearchResult {
score_t score;
} HasherSearchResult;
typedef struct DictionarySearchStatictics {
size_t num_lookups;
size_t num_matches;
} DictionarySearchStatictics;
/* kHashMul32 multiplier has these properties:
* The multiplier must be odd. Otherwise we may lose the highest bit.
* No long streaks of ones or zeros.
@ -61,6 +75,8 @@ typedef struct DictionarySearchStatictics {
* The number has been tuned heuristically against compression benchmarks. */
static const uint32_t kHashMul32 = 0x1e35a7bd;
static const uint64_t kHashMul64 = BROTLI_MAKE_UINT64_T(0x1e35a7bd, 0x1e35a7bd);
static const uint64_t kHashMul64Long =
BROTLI_MAKE_UINT64_T(0x1fe35a7bU, 0xd3579bd3U);
static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) {
uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
@ -69,8 +85,30 @@ static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) {
return h >> (32 - 14);
}
#define BROTLI_LITERAL_BYTE_SCORE 540
#define BROTLI_DISTANCE_BIT_PENALTY 120
static BROTLI_INLINE void PrepareDistanceCache(
int* BROTLI_RESTRICT distance_cache, const int num_distances) {
if (num_distances > 4) {
int last_distance = distance_cache[0];
distance_cache[4] = last_distance - 1;
distance_cache[5] = last_distance + 1;
distance_cache[6] = last_distance - 2;
distance_cache[7] = last_distance + 2;
distance_cache[8] = last_distance - 3;
distance_cache[9] = last_distance + 3;
if (num_distances > 10) {
int next_last_distance = distance_cache[1];
distance_cache[10] = next_last_distance - 1;
distance_cache[11] = next_last_distance + 1;
distance_cache[12] = next_last_distance - 2;
distance_cache[13] = next_last_distance + 2;
distance_cache[14] = next_last_distance - 3;
distance_cache[15] = next_last_distance + 3;
}
}
}
#define BROTLI_LITERAL_BYTE_SCORE 135
#define BROTLI_DISTANCE_BIT_PENALTY 30
/* Score must be positive after applying maximal penalty. */
#define BROTLI_SCORE_BASE (BROTLI_DISTANCE_BIT_PENALTY * 8 * sizeof(size_t))
@ -96,44 +134,20 @@ static BROTLI_INLINE score_t BackwardReferenceScore(
BROTLI_DISTANCE_BIT_PENALTY * Log2FloorNonZero(backward_reference_offset);
}
static const score_t kDistanceShortCodeCost[BROTLI_NUM_DISTANCE_SHORT_CODES] = {
/* Repeat last */
BROTLI_SCORE_BASE + 60,
/* 2nd, 3rd, 4th last */
BROTLI_SCORE_BASE - 95,
BROTLI_SCORE_BASE - 117,
BROTLI_SCORE_BASE - 127,
/* Last with offset */
BROTLI_SCORE_BASE - 93,
BROTLI_SCORE_BASE - 93,
BROTLI_SCORE_BASE - 96,
BROTLI_SCORE_BASE - 96,
BROTLI_SCORE_BASE - 99,
BROTLI_SCORE_BASE - 99,
/* 2nd last with offset */
BROTLI_SCORE_BASE - 105,
BROTLI_SCORE_BASE - 105,
BROTLI_SCORE_BASE - 115,
BROTLI_SCORE_BASE - 115,
BROTLI_SCORE_BASE - 125,
BROTLI_SCORE_BASE - 125
};
static BROTLI_INLINE score_t BackwardReferenceScoreUsingLastDistance(
size_t copy_length, size_t distance_short_code) {
size_t copy_length) {
return BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length +
kDistanceShortCodeCost[distance_short_code];
BROTLI_SCORE_BASE + 15;
}
static BROTLI_INLINE void DictionarySearchStaticticsReset(
DictionarySearchStatictics* self) {
self->num_lookups = 0;
self->num_matches = 0;
static BROTLI_INLINE score_t BackwardReferencePenaltyUsingLastDistance(
size_t distance_short_code) {
return (score_t)39 + ((0x1CA10 >> (distance_short_code & 0xE)) & 0xE);
}
static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(
size_t item, const uint8_t* data, size_t max_length, size_t max_backward,
HasherSearchResult* out) {
const BrotliDictionary* dictionary, size_t item, const uint8_t* data,
size_t max_length, size_t max_backward, HasherSearchResult* out) {
size_t len;
size_t dist;
size_t offset;
@ -142,19 +156,22 @@ static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(
score_t score;
len = item & 0x1F;
dist = item >> 5;
offset = kBrotliDictionaryOffsetsByLength[len] + len * dist;
offset = dictionary->offsets_by_length[len] + len * dist;
if (len > max_length) {
return BROTLI_FALSE;
}
matchlen = FindMatchLengthWithLimit(data, &kBrotliDictionary[offset], len);
matchlen =
FindMatchLengthWithLimit(data, &dictionary->data[offset], len);
if (matchlen + kCutoffTransformsCount <= len || matchlen == 0) {
return BROTLI_FALSE;
}
{
size_t transform_id = kCutoffTransforms[len - matchlen];
size_t cut = len - matchlen;
size_t transform_id =
(cut << 2) + (size_t)((kCutoffTransforms >> (cut * 6)) & 0x3F);
backward = max_backward + dist + 1 +
(transform_id << kBrotliDictionarySizeBitsByLength[len]);
(transform_id << dictionary->size_bits_by_length[len]);
}
score = BackwardReferenceScore(matchlen, backward);
if (score < out->score) {
@ -168,22 +185,27 @@ static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(
}
static BROTLI_INLINE BROTLI_BOOL SearchInStaticDictionary(
DictionarySearchStatictics* self, const uint8_t* data, size_t max_length,
const BrotliDictionary* dictionary, const uint16_t* dictionary_hash,
HasherHandle handle, const uint8_t* data, size_t max_length,
size_t max_backward, HasherSearchResult* out, BROTLI_BOOL shallow) {
size_t key;
size_t i;
BROTLI_BOOL is_match_found = BROTLI_FALSE;
if (self->num_matches < (self->num_lookups >> 7)) {
HasherCommon* self = GetHasherCommon(handle);
if (self->dict_num_matches < (self->dict_num_lookups >> 7)) {
return BROTLI_FALSE;
}
key = Hash14(data) << 1;
for (i = 0; i < (shallow ? 1u : 2u); ++i, ++key) {
size_t item = kStaticDictionaryHash[key];
self->num_lookups++;
if (item != 0 &&
TestStaticDictionaryItem(item, data, max_length, max_backward, out)) {
self->num_matches++;
is_match_found = BROTLI_TRUE;
size_t item = dictionary_hash[key];
self->dict_num_lookups++;
if (item != 0) {
BROTLI_BOOL item_matches = TestStaticDictionaryItem(
dictionary, item, data, max_length, max_backward, out);
if (item_matches) {
self->dict_num_matches++;
is_match_found = BROTLI_TRUE;
}
}
}
return is_match_found;
@ -267,43 +289,11 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
#undef HASHER
#define HASHER() H5
#define BUCKET_BITS 14
#define BLOCK_BITS 4
#define NUM_LAST_DISTANCES_TO_CHECK 4
#include "./hash_longest_match_inc.h" /* NOLINT(build/include) */
#undef BLOCK_BITS
#undef HASHER
#define HASHER() H6
#define BLOCK_BITS 5
#include "./hash_longest_match_inc.h" /* NOLINT(build/include) */
#undef NUM_LAST_DISTANCES_TO_CHECK
#undef BLOCK_BITS
#undef BUCKET_BITS
#undef HASHER
#define HASHER() H7
#define BUCKET_BITS 15
#define BLOCK_BITS 6
#define NUM_LAST_DISTANCES_TO_CHECK 10
#include "./hash_longest_match_inc.h" /* NOLINT(build/include) */
#undef BLOCK_BITS
#undef HASHER
#define HASHER() H8
#define BLOCK_BITS 7
#include "./hash_longest_match_inc.h" /* NOLINT(build/include) */
#undef NUM_LAST_DISTANCES_TO_CHECK
#undef BLOCK_BITS
#undef HASHER
#define HASHER() H9
#define BLOCK_BITS 8
#define NUM_LAST_DISTANCES_TO_CHECK 16
#include "./hash_longest_match_inc.h" /* NOLINT(build/include) */
#undef NUM_LAST_DISTANCES_TO_CHECK
#undef BLOCK_BITS
#undef BUCKET_BITS
#include "./hash_longest_match64_inc.h" /* NOLINT(build/include) */
#undef HASHER
#define BUCKET_BITS 15
@ -352,97 +342,120 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
#undef CAT
#undef EXPAND_CAT
#define FOR_GENERIC_HASHERS(H) H(2) H(3) H(4) H(5) H(6) H(7) H(8) H(9) \
H(40) H(41) H(42) H(54)
#define FOR_GENERIC_HASHERS(H) H(2) H(3) H(4) H(5) H(6) H(40) H(41) H(42) H(54)
#define FOR_ALL_HASHERS(H) FOR_GENERIC_HASHERS(H) H(10)
typedef struct Hashers {
#define MEMBER_(N) H ## N* h ## N;
FOR_ALL_HASHERS(MEMBER_)
#undef MEMBER_
} Hashers;
static BROTLI_INLINE void InitHashers(Hashers* self) {
#define INIT_(N) self->h ## N = 0;
FOR_ALL_HASHERS(INIT_)
#undef INIT_
static BROTLI_INLINE void DestroyHasher(
MemoryManager* m, HasherHandle* handle) {
if (*handle == NULL) return;
BROTLI_FREE(m, *handle);
}
static BROTLI_INLINE void DestroyHashers(MemoryManager* m, Hashers* self) {
#define CLEANUP_(N) if (self->h ## N) CleanupH ## N(m, self->h ## N); \
BROTLI_FREE(m, self->h ## N);
FOR_ALL_HASHERS(CLEANUP_)
#undef CLEANUP_
static BROTLI_INLINE void HasherReset(HasherHandle handle) {
if (handle == NULL) return;
GetHasherCommon(handle)->is_prepared_ = BROTLI_FALSE;
}
static BROTLI_INLINE void HashersReset(Hashers* self, int type) {
switch (type) {
#define RESET_(N) case N: ResetH ## N(self->h ## N); break;
FOR_ALL_HASHERS(RESET_)
#undef RESET_
default: break;
static BROTLI_INLINE size_t HasherSize(const BrotliEncoderParams* params,
BROTLI_BOOL one_shot, const size_t input_size) {
size_t result = sizeof(HasherCommon);
switch (params->hasher.type) {
#define SIZE_(N) \
case N: \
result += HashMemAllocInBytesH ## N(params, one_shot, input_size); \
break;
FOR_ALL_HASHERS(SIZE_)
#undef SIZE_
default:
break;
}
return result;
}
static BROTLI_INLINE void HashersSetup(
MemoryManager* m, Hashers* self, int type) {
switch (type) {
#define SETUP_(N) case N: self->h ## N = BROTLI_ALLOC(m, H ## N, 1); break;
FOR_ALL_HASHERS(SETUP_)
#undef SETUP_
default: break;
}
if (BROTLI_IS_OOM(m)) return;
switch (type) {
#define INITIALIZE_(N) case N: InitializeH ## N(self->h ## N); break;
FOR_ALL_HASHERS(INITIALIZE_);
static BROTLI_INLINE void HasherSetup(MemoryManager* m, HasherHandle* handle,
BrotliEncoderParams* params, const uint8_t* data, size_t position,
size_t input_size, BROTLI_BOOL is_last) {
HasherHandle self = NULL;
HasherCommon* common = NULL;
BROTLI_BOOL one_shot = (position == 0 && is_last);
if (*handle == NULL) {
size_t alloc_size;
ChooseHasher(params, &params->hasher);
alloc_size = HasherSize(params, one_shot, input_size);
self = BROTLI_ALLOC(m, uint8_t, alloc_size);
if (BROTLI_IS_OOM(m)) return;
*handle = self;
common = GetHasherCommon(self);
common->params = params->hasher;
switch (common->params.type) {
#define INITIALIZE_(N) \
case N: \
InitializeH ## N(*handle, params); \
break;
FOR_ALL_HASHERS(INITIALIZE_);
#undef INITIALIZE_
default: break;
default:
break;
}
HasherReset(*handle);
}
HashersReset(self, type);
}
#define WARMUP_HASH_(N) \
static BROTLI_INLINE void WarmupHashH ## N(MemoryManager* m, \
const BrotliEncoderParams* params, const size_t size, const uint8_t* dict, \
H ## N* hasher) { \
size_t overlap = (StoreLookaheadH ## N()) - 1; \
size_t i; \
InitH ## N(m, hasher, dict, params, 0, size, BROTLI_FALSE); \
if (BROTLI_IS_OOM(m)) return; \
for (i = 0; i + overlap < size; i++) { \
StoreH ## N(hasher, dict, ~(size_t)0, i); \
} \
self = *handle;
common = GetHasherCommon(self);
if (!common->is_prepared_) {
switch (common->params.type) {
#define PREPARE_(N) \
case N: \
PrepareH ## N(self, one_shot, input_size, data); \
break;
FOR_ALL_HASHERS(PREPARE_)
#undef PREPARE_
default: break;
}
if (position == 0) {
common->dict_num_lookups = 0;
common->dict_num_matches = 0;
}
common->is_prepared_ = BROTLI_TRUE;
}
}
FOR_ALL_HASHERS(WARMUP_HASH_)
#undef WARMUP_HASH_
/* Custom LZ77 window. */
static BROTLI_INLINE void HashersPrependCustomDictionary(
MemoryManager* m, Hashers* self, const BrotliEncoderParams* params,
static BROTLI_INLINE void HasherPrependCustomDictionary(
MemoryManager* m, HasherHandle* handle, BrotliEncoderParams* params,
const size_t size, const uint8_t* dict) {
int hasher_type = ChooseHasher(params);
switch (hasher_type) {
#define PREPEND_(N) \
case N: WarmupHashH ## N(m, params, size, dict, self->h ## N); break;
size_t overlap;
size_t i;
HasherHandle self;
HasherSetup(m, handle, params, dict, 0, size, BROTLI_FALSE);
if (BROTLI_IS_OOM(m)) return;
self = *handle;
switch (GetHasherCommon(self)->params.type) {
#define PREPEND_(N) \
case N: \
overlap = (StoreLookaheadH ## N()) - 1; \
for (i = 0; i + overlap < size; i++) { \
StoreH ## N(self, dict, ~(size_t)0, i); \
} \
break;
FOR_ALL_HASHERS(PREPEND_)
#undef PREPEND_
default: break;
}
if (BROTLI_IS_OOM(m)) return;
}
static BROTLI_INLINE void InitOrStitchToPreviousBlock(
MemoryManager* m, Hashers* self, const uint8_t* data, size_t mask,
const BrotliEncoderParams* params, size_t position,
size_t bytes, BROTLI_BOOL is_last) {
int hasher_type = ChooseHasher(params);
switch (hasher_type) {
#define INIT_(N) \
case N: \
InitH ## N(m, self->h ## N, data, params, position, bytes, is_last); \
if (BROTLI_IS_OOM(m)) return; \
StitchToPreviousBlockH ## N(self->h ## N, bytes, position, data, mask); \
MemoryManager* m, HasherHandle* handle, const uint8_t* data, size_t mask,
BrotliEncoderParams* params, size_t position, size_t input_size,
BROTLI_BOOL is_last) {
HasherHandle self;
HasherSetup(m, handle, params, data, position, input_size, is_last);
if (BROTLI_IS_OOM(m)) return;
self = *handle;
switch (GetHasherCommon(self)->params.type) {
#define INIT_(N) \
case N: \
StitchToPreviousBlockH ## N(self, input_size, position, data, mask); \
break;
FOR_ALL_HASHERS(INIT_)
#undef INIT_
@ -450,7 +463,6 @@ static BROTLI_INLINE void InitOrStitchToPreviousBlock(
}
}
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
#endif

View File

@ -51,74 +51,57 @@ typedef struct HashForgetfulChain {
uint8_t tiny_hash[65536];
FN(Bank) banks[NUM_BANKS];
uint16_t free_slot_idx[NUM_BANKS];
BROTLI_BOOL is_dirty_;
DictionarySearchStatictics dict_search_stats_;
size_t max_hops;
} HashForgetfulChain;
static void FN(Initialize)(HashForgetfulChain* self) {
BROTLI_UNUSED(self);
static BROTLI_INLINE HashForgetfulChain* FN(Self)(HasherHandle handle) {
return (HashForgetfulChain*)&(GetHasherCommon(handle)[1]);
}
static void FN(Cleanup)(MemoryManager* m, HashForgetfulChain* self) {
BROTLI_UNUSED(m);
BROTLI_UNUSED(self);
static void FN(Initialize)(
HasherHandle handle, const BrotliEncoderParams* params) {
FN(Self)(handle)->max_hops =
(params->quality > 6 ? 7u : 8u) << (params->quality - 4);
}
static void FN(Reset)(HashForgetfulChain* self) {
self->is_dirty_ = BROTLI_TRUE;
DictionarySearchStaticticsReset(&self->dict_search_stats_);
}
static void FN(InitEmpty)(HashForgetfulChain* self) {
if (self->is_dirty_) {
static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
size_t input_size, const uint8_t* data) {
HashForgetfulChain* self = FN(Self)(handle);
/* Partial preparation is 100 times slower (per socket). */
size_t partial_prepare_threshold = BUCKET_SIZE >> 6;
if (one_shot && input_size <= partial_prepare_threshold) {
size_t i;
for (i = 0; i < input_size; ++i) {
size_t bucket = FN(HashBytes)(&data[i]);
/* See InitEmpty comment. */
self->addr[bucket] = 0xCCCCCCCC;
self->head[bucket] = 0xCCCC;
}
} else {
/* Fill |addr| array with 0xCCCCCCCC value. Because of wrapping, position
processed by hasher never reaches 3GB + 64M; this makes all new chains
to be terminated after the first node. */
memset(self->addr, 0xCC, sizeof(self->addr));
memset(self->head, 0, sizeof(self->head));
memset(self->tiny_hash, 0, sizeof(self->tiny_hash));
memset(self->free_slot_idx, 0, sizeof(self->free_slot_idx));
self->is_dirty_ = BROTLI_FALSE;
}
}
static void FN(InitForData)(HashForgetfulChain* self, const uint8_t* data,
size_t num) {
size_t i;
for (i = 0; i < num; ++i) {
size_t bucket = FN(HashBytes)(&data[i]);
/* See InitEmpty comment. */
self->addr[bucket] = 0xCCCCCCCC;
self->head[bucket] = 0xCCCC;
}
memset(self->tiny_hash, 0, sizeof(self->tiny_hash));
memset(self->free_slot_idx, 0, sizeof(self->free_slot_idx));
if (num != 0) {
self->is_dirty_ = BROTLI_FALSE;
}
}
static void FN(Init)(
MemoryManager* m, HashForgetfulChain* self, const uint8_t* data,
const BrotliEncoderParams* params, size_t position, size_t bytes,
BROTLI_BOOL is_last) {
/* Choose which initialization method is faster.
Init() is about 100 times faster than InitForData(). */
const size_t kMaxBytesForPartialHashInit = BUCKET_SIZE >> 6;
BROTLI_UNUSED(m);
self->max_hops = (params->quality > 6 ? 7u : 8u) << (params->quality - 4);
if (position == 0 && is_last && bytes <= kMaxBytesForPartialHashInit) {
FN(InitForData)(self, data, bytes);
} else {
FN(InitEmpty)(self);
}
static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
size_t input_size) {
BROTLI_UNUSED(params);
BROTLI_UNUSED(one_shot);
BROTLI_UNUSED(input_size);
return sizeof(HashForgetfulChain);
}
/* Look at 4 bytes at &data[ix & mask]. Compute a hash from these, and prepend
node to corresponding chain; also update tiny_hash for current position. */
static BROTLI_INLINE void FN(Store)(HashForgetfulChain* BROTLI_RESTRICT self,
static BROTLI_INLINE void FN(Store)(HasherHandle BROTLI_RESTRICT handle,
const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
HashForgetfulChain* self = FN(Self)(handle);
const size_t key = FN(HashBytes)(&data[ix & mask]);
const size_t bank = key & (NUM_BANKS - 1);
const size_t idx = self->free_slot_idx[bank]++ & (BANK_SIZE - 1);
@ -131,40 +114,52 @@ static BROTLI_INLINE void FN(Store)(HashForgetfulChain* BROTLI_RESTRICT self,
self->head[key] = (uint16_t)idx;
}
static BROTLI_INLINE void FN(StoreRange)(HashForgetfulChain* self,
static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
const uint8_t *data, const size_t mask, const size_t ix_start,
const size_t ix_end) {
size_t i;
for (i = ix_start; i < ix_end; ++i) {
FN(Store)(self, data, mask, i);
FN(Store)(handle, data, mask, i);
}
}
static BROTLI_INLINE void FN(StitchToPreviousBlock)(HashForgetfulChain* self,
static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle,
size_t num_bytes, size_t position, const uint8_t* ringbuffer,
size_t ring_buffer_mask) {
if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
/* Prepare the hashes for three last bytes of the last write.
These could not be calculated before, since they require knowledge
of both the previous and the current block. */
FN(Store)(self, ringbuffer, ring_buffer_mask, position - 3);
FN(Store)(self, ringbuffer, ring_buffer_mask, position - 2);
FN(Store)(self, ringbuffer, ring_buffer_mask, position - 1);
FN(Store)(handle, ringbuffer, ring_buffer_mask, position - 3);
FN(Store)(handle, ringbuffer, ring_buffer_mask, position - 2);
FN(Store)(handle, ringbuffer, ring_buffer_mask, position - 1);
}
}
static BROTLI_INLINE void FN(PrepareDistanceCache)(
HasherHandle handle, int* BROTLI_RESTRICT distance_cache) {
BROTLI_UNUSED(handle);
PrepareDistanceCache(distance_cache, NUM_LAST_DISTANCES_TO_CHECK);
}
/* Find a longest backward match of &data[cur_ix] up to the length of
max_length and stores the position cur_ix in the hash table.
REQUIRES: FN(PrepareDistanceCache) must be invoked for current distance cache
values; if this method is invoked repeatedly with the same distance
cache values, it is enough to invoke FN(PrepareDistanceCache) once.
Does not look for matches longer than max_length.
Does not look for matches further away than max_backward.
Writes the best match into |out|.
Returns 1 when match is found, otherwise 0. */
static BROTLI_INLINE BROTLI_BOOL FN(FindLongestMatch)(
HashForgetfulChain* self, const uint8_t* BROTLI_RESTRICT data,
const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,
static BROTLI_INLINE BROTLI_BOOL FN(FindLongestMatch)(HasherHandle handle,
const BrotliDictionary* dictionary, const uint16_t* dictionary_hash,
const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
const int* BROTLI_RESTRICT distance_cache,
const size_t cur_ix, const size_t max_length, const size_t max_backward,
HasherSearchResult* BROTLI_RESTRICT out) {
HashForgetfulChain* self = FN(Self)(handle);
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
BROTLI_BOOL is_match_found = BROTLI_FALSE;
/* Don't accept a short copy from far away. */
@ -177,9 +172,7 @@ static BROTLI_INLINE BROTLI_BOOL FN(FindLongestMatch)(
out->len_x_code = 0;
/* Try last distance first. */
for (i = 0; i < NUM_LAST_DISTANCES_TO_CHECK; ++i) {
const size_t idx = kDistanceCacheIndex[i];
const size_t backward =
(size_t)(distance_cache[idx] + kDistanceCacheOffset[i]);
const size_t backward = (size_t)distance_cache[i];
size_t prev_ix = (cur_ix - backward);
/* For distance code 0 we want to consider 2-byte matches. */
if (i > 0 && self->tiny_hash[(uint16_t)prev_ix] != tiny_hash) continue;
@ -192,14 +185,17 @@ static BROTLI_INLINE BROTLI_BOOL FN(FindLongestMatch)(
&data[cur_ix_masked],
max_length);
if (len >= 2) {
score_t score = BackwardReferenceScoreUsingLastDistance(len, i);
score_t score = BackwardReferenceScoreUsingLastDistance(len);
if (best_score < score) {
best_score = score;
best_len = len;
out->len = best_len;
out->distance = backward;
out->score = best_score;
is_match_found = BROTLI_TRUE;
if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
if (best_score < score) {
best_score = score;
best_len = len;
out->len = best_len;
out->distance = backward;
out->score = best_score;
is_match_found = BROTLI_TRUE;
}
}
}
}
@ -243,11 +239,12 @@ static BROTLI_INLINE BROTLI_BOOL FN(FindLongestMatch)(
}
}
}
FN(Store)(self, data, ring_buffer_mask, cur_ix);
FN(Store)(handle, data, ring_buffer_mask, cur_ix);
}
if (!is_match_found) {
is_match_found = SearchInStaticDictionary(&self->dict_search_stats_,
&data[cur_ix_masked], max_length, max_backward, out, BROTLI_FALSE);
is_match_found = SearchInStaticDictionary(dictionary, dictionary_hash,
handle, &data[cur_ix_masked], max_length, max_backward, out,
BROTLI_FALSE);
}
return is_match_found;
}

269
enc/hash_longest_match64_inc.h Executable file
View File

@ -0,0 +1,269 @@
/* NOLINT(build/header_guard) */
/* Copyright 2010 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* template parameters: FN */
/* A (forgetful) hash table to the data seen by the compressor, to
help create backward references to previous data.
This is a hash map of fixed size (bucket_size_) to a ring buffer of
fixed size (block_size_). The ring buffer contains the last block_size_
index positions of the given hash key in the compressed data. */
#define HashLongestMatch HASHER()
static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 8; }
static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; }
/* HashBytes is the function that chooses the bucket to place the address in. */
static BROTLI_INLINE uint32_t FN(HashBytes)(const uint8_t *data,
const uint64_t mask,
const int shift) {
const uint64_t h = (BROTLI_UNALIGNED_LOAD64(data) & mask) * kHashMul64Long;
/* The higher bits contain more mixture from the multiplication,
so we take our results from there. */
return (uint32_t)(h >> shift);
}
typedef struct HashLongestMatch {
/* Number of hash buckets. */
size_t bucket_size_;
/* Only block_size_ newest backward references are kept,
and the older are forgotten. */
size_t block_size_;
/* Left-shift for computing hash bucket index from hash value. */
int hash_shift_;
/* Mask for selecting the next 4-8 bytes of input */
uint64_t hash_mask_;
/* Mask for accessing entries in a block (in a ring-buffer manner). */
uint32_t block_mask_;
/* --- Dynamic size members --- */
/* Number of entries in a particular bucket. */
/* uint16_t num[bucket_size]; */
/* Buckets containing block_size_ of backward references. */
/* uint32_t* buckets[bucket_size * block_size]; */
} HashLongestMatch;
static BROTLI_INLINE HashLongestMatch* FN(Self)(HasherHandle handle) {
return (HashLongestMatch*)&(GetHasherCommon(handle)[1]);
}
static BROTLI_INLINE uint16_t* FN(Num)(HashLongestMatch* self) {
return (uint16_t*)(&self[1]);
}
static BROTLI_INLINE uint32_t* FN(Buckets)(HashLongestMatch* self) {
return (uint32_t*)(&FN(Num)(self)[self->bucket_size_]);
}
static void FN(Initialize)(
HasherHandle handle, const BrotliEncoderParams* params) {
HasherCommon* common = GetHasherCommon(handle);
HashLongestMatch* self = FN(Self)(handle);
BROTLI_UNUSED(params);
self->hash_shift_ = 64 - common->params.bucket_bits;
self->hash_mask_ = (~((uint64_t)0U)) >> (64 - 8 * common->params.hash_len);
self->bucket_size_ = (size_t)1 << common->params.bucket_bits;
self->block_size_ = (size_t)1 << common->params.block_bits;
self->block_mask_ = (uint32_t)(self->block_size_ - 1);
}
static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
size_t input_size, const uint8_t* data) {
HashLongestMatch* self = FN(Self)(handle);
uint16_t* num = FN(Num)(self);
/* Partial preparation is 100 times slower (per socket). */
size_t partial_prepare_threshold = self->bucket_size_ >> 6;
if (one_shot && input_size <= partial_prepare_threshold) {
size_t i;
for (i = 0; i < input_size; ++i) {
const uint32_t key = FN(HashBytes)(&data[i], self->hash_mask_,
self->hash_shift_);
num[key] = 0;
}
} else {
memset(num, 0, self->bucket_size_ * sizeof(num[0]));
}
}
static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
size_t input_size) {
size_t bucket_size = (size_t)1 << params->hasher.bucket_bits;
size_t block_size = (size_t)1 << params->hasher.block_bits;
BROTLI_UNUSED(one_shot);
BROTLI_UNUSED(input_size);
return sizeof(HashLongestMatch) + bucket_size * (2 + 4 * block_size);
}
/* Look at 4 bytes at &data[ix & mask].
Compute a hash from these, and store the value of ix at that position. */
static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t *data,
const size_t mask, const size_t ix) {
HashLongestMatch* self = FN(Self)(handle);
uint16_t* num = FN(Num)(self);
const uint32_t key = FN(HashBytes)(&data[ix & mask], self->hash_mask_,
self->hash_shift_);
const size_t minor_ix = num[key] & self->block_mask_;
const size_t offset =
minor_ix + (key << GetHasherCommon(handle)->params.block_bits);
FN(Buckets)(self)[offset] = (uint32_t)ix;
++num[key];
}
static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
const uint8_t *data, const size_t mask, const size_t ix_start,
const size_t ix_end) {
size_t i;
for (i = ix_start; i < ix_end; ++i) {
FN(Store)(handle, data, mask, i);
}
}
static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle,
size_t num_bytes, size_t position, const uint8_t* ringbuffer,
size_t ringbuffer_mask) {
if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
/* Prepare the hashes for three last bytes of the last write.
These could not be calculated before, since they require knowledge
of both the previous and the current block. */
FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 3);
FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 2);
FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 1);
}
}
static BROTLI_INLINE void FN(PrepareDistanceCache)(
HasherHandle handle, int* BROTLI_RESTRICT distance_cache) {
PrepareDistanceCache(distance_cache,
GetHasherCommon(handle)->params.num_last_distances_to_check);
}
/* Find a longest backward match of &data[cur_ix] up to the length of
max_length and stores the position cur_ix in the hash table.
REQUIRES: FN(PrepareDistanceCache) must be invoked for current distance cache
values; if this method is invoked repeatedly with the same distance
cache values, it is enough to invoke FN(PrepareDistanceCache) once.
Does not look for matches longer than max_length.
Does not look for matches further away than max_backward.
Writes the best match into |out|.
Returns true when match is found, otherwise false. */
static BROTLI_INLINE BROTLI_BOOL FN(FindLongestMatch)(HasherHandle handle,
const BrotliDictionary* dictionary, const uint16_t* dictionary_hash,
const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
const size_t max_length, const size_t max_backward,
HasherSearchResult* BROTLI_RESTRICT out) {
HasherCommon* common = GetHasherCommon(handle);
HashLongestMatch* self = FN(Self)(handle);
uint16_t* num = FN(Num)(self);
uint32_t* buckets = FN(Buckets)(self);
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
BROTLI_BOOL is_match_found = BROTLI_FALSE;
/* Don't accept a short copy from far away. */
score_t best_score = out->score;
size_t best_len = out->len;
size_t i;
out->len = 0;
out->len_x_code = 0;
/* Try last distance first. */
for (i = 0; i < (size_t)common->params.num_last_distances_to_check; ++i) {
const size_t backward = (size_t)distance_cache[i];
size_t prev_ix = (size_t)(cur_ix - backward);
if (prev_ix >= cur_ix) {
continue;
}
if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
continue;
}
prev_ix &= ring_buffer_mask;
if (cur_ix_masked + best_len > ring_buffer_mask ||
prev_ix + best_len > ring_buffer_mask ||
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
continue;
}
{
const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
&data[cur_ix_masked],
max_length);
if (len >= 3 || (len == 2 && i < 2)) {
/* Comparing for >= 2 does not change the semantics, but just saves for
a few unnecessary binary logarithms in backward reference score,
since we are not interested in such short matches. */
score_t score = BackwardReferenceScoreUsingLastDistance(len);
if (best_score < score) {
if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
if (best_score < score) {
best_score = score;
best_len = len;
out->len = best_len;
out->distance = backward;
out->score = best_score;
is_match_found = BROTLI_TRUE;
}
}
}
}
}
{
const uint32_t key = FN(HashBytes)(
&data[cur_ix_masked], self->hash_mask_, self->hash_shift_);
uint32_t* BROTLI_RESTRICT bucket =
&buckets[key << common->params.block_bits];
const size_t down =
(num[key] > self->block_size_) ?
(num[key] - self->block_size_) : 0u;
for (i = num[key]; i > down;) {
size_t prev_ix = bucket[--i & self->block_mask_];
const size_t backward = cur_ix - prev_ix;
if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
break;
}
prev_ix &= ring_buffer_mask;
if (cur_ix_masked + best_len > ring_buffer_mask ||
prev_ix + best_len > ring_buffer_mask ||
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
continue;
}
{
const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
&data[cur_ix_masked],
max_length);
if (len >= 4) {
/* Comparing for >= 3 does not change the semantics, but just saves
for a few unnecessary binary logarithms in backward reference
score, since we are not interested in such short matches. */
score_t score = BackwardReferenceScore(len, backward);
if (best_score < score) {
best_score = score;
best_len = len;
out->len = best_len;
out->distance = backward;
out->score = best_score;
is_match_found = BROTLI_TRUE;
}
}
}
}
bucket[num[key] & self->block_mask_] = (uint32_t)cur_ix;
++num[key];
}
if (!is_match_found) {
is_match_found = SearchInStaticDictionary(dictionary, dictionary_hash,
handle, &data[cur_ix_masked], max_length, max_backward, out,
BROTLI_FALSE);
}
return is_match_found;
}
#undef HashLongestMatch

View File

@ -5,149 +5,161 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* template parameters: FN, BUCKET_BITS, BLOCK_BITS,
NUM_LAST_DISTANCES_TO_CHECK */
/* template parameters: FN */
/* A (forgetful) hash table to the data seen by the compressor, to
help create backward references to previous data.
This is a hash map of fixed size (BUCKET_SIZE) to a ring buffer of
fixed size (BLOCK_SIZE). The ring buffer contains the last BLOCK_SIZE
This is a hash map of fixed size (bucket_size_) to a ring buffer of
fixed size (block_size_). The ring buffer contains the last block_size_
index positions of the given hash key in the compressed data. */
#define HashLongestMatch HASHER()
/* Number of hash buckets. */
#define BUCKET_SIZE (1 << BUCKET_BITS)
/* Only BLOCK_SIZE newest backward references are kept,
and the older are forgotten. */
#define BLOCK_SIZE (1u << BLOCK_BITS)
/* Mask for accessing entries in a block (in a ring-buffer manner). */
#define BLOCK_MASK ((1 << BLOCK_BITS) - 1)
#define HASH_MAP_SIZE (2 << BUCKET_BITS)
static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
/* HashBytes is the function that chooses the bucket to place
the address in. The HashLongestMatch and HashLongestMatchQuickly
classes have separate, different implementations of hashing. */
static uint32_t FN(HashBytes)(const uint8_t *data) {
/* HashBytes is the function that chooses the bucket to place the address in. */
static uint32_t FN(HashBytes)(const uint8_t *data, const int shift) {
uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
/* The higher bits contain more mixture from the multiplication,
so we take our results from there. */
return h >> (32 - BUCKET_BITS);
return (uint32_t)(h >> shift);
}
typedef struct HashLongestMatch {
/* Number of hash buckets. */
size_t bucket_size_;
/* Only block_size_ newest backward references are kept,
and the older are forgotten. */
size_t block_size_;
/* Left-shift for computing hash bucket index from hash value. */
int hash_shift_;
/* Mask for accessing entries in a block (in a ring-buffer manner). */
uint32_t block_mask_;
/* --- Dynamic size members --- */
/* Number of entries in a particular bucket. */
uint16_t num_[BUCKET_SIZE];
/* uint16_t num[bucket_size]; */
/* Buckets containing BLOCK_SIZE of backward references. */
uint32_t buckets_[BLOCK_SIZE << BUCKET_BITS];
/* True if num_ array needs to be initialized. */
BROTLI_BOOL is_dirty_;
DictionarySearchStatictics dict_search_stats_;
/* Buckets containing block_size_ of backward references. */
/* uint32_t* buckets[bucket_size * block_size]; */
} HashLongestMatch;
static void FN(Initialize)(HashLongestMatch* self) {
BROTLI_UNUSED(self);
static BROTLI_INLINE HashLongestMatch* FN(Self)(HasherHandle handle) {
return (HashLongestMatch*)&(GetHasherCommon(handle)[1]);
}
static void FN(Cleanup)(MemoryManager* m, HashLongestMatch* self) {
BROTLI_UNUSED(m);
BROTLI_UNUSED(self);
static BROTLI_INLINE uint16_t* FN(Num)(HashLongestMatch* self) {
return (uint16_t*)(&self[1]);
}
static void FN(Reset)(HashLongestMatch* self) {
self->is_dirty_ = BROTLI_TRUE;
DictionarySearchStaticticsReset(&self->dict_search_stats_);
static BROTLI_INLINE uint32_t* FN(Buckets)(HashLongestMatch* self) {
return (uint32_t*)(&FN(Num)(self)[self->bucket_size_]);
}
static void FN(InitEmpty)(HashLongestMatch* self) {
if (self->is_dirty_) {
memset(self->num_, 0, sizeof(self->num_));
self->is_dirty_ = BROTLI_FALSE;
}
}
static void FN(InitForData)(HashLongestMatch* self, const uint8_t* data,
size_t num) {
size_t i;
for (i = 0; i < num; ++i) {
const uint32_t key = FN(HashBytes)(&data[i]);
self->num_[key] = 0;
}
if (num != 0) {
self->is_dirty_ = BROTLI_FALSE;
}
}
static void FN(Init)(
MemoryManager* m, HashLongestMatch* self, const uint8_t* data,
const BrotliEncoderParams* params, size_t position, size_t bytes,
BROTLI_BOOL is_last) {
/* Choose which initialization method is faster.
Init() is about 100 times faster than InitForData(). */
const size_t kMaxBytesForPartialHashInit = HASH_MAP_SIZE >> 7;
BROTLI_UNUSED(m);
static void FN(Initialize)(
HasherHandle handle, const BrotliEncoderParams* params) {
HasherCommon* common = GetHasherCommon(handle);
HashLongestMatch* self = FN(Self)(handle);
BROTLI_UNUSED(params);
if (position == 0 && is_last && bytes <= kMaxBytesForPartialHashInit) {
FN(InitForData)(self, data, bytes);
self->hash_shift_ = 32 - common->params.bucket_bits;
self->bucket_size_ = (size_t)1 << common->params.bucket_bits;
self->block_size_ = (size_t)1 << common->params.block_bits;
self->block_mask_ = (uint32_t)(self->block_size_ - 1);
}
static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
size_t input_size, const uint8_t* data) {
HashLongestMatch* self = FN(Self)(handle);
uint16_t* num = FN(Num)(self);
/* Partial preparation is 100 times slower (per socket). */
size_t partial_prepare_threshold = self->bucket_size_ >> 6;
if (one_shot && input_size <= partial_prepare_threshold) {
size_t i;
for (i = 0; i < input_size; ++i) {
const uint32_t key = FN(HashBytes)(&data[i], self->hash_shift_);
num[key] = 0;
}
} else {
FN(InitEmpty)(self);
memset(num, 0, self->bucket_size_ * sizeof(num[0]));
}
}
static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
size_t input_size) {
size_t bucket_size = (size_t)1 << params->hasher.bucket_bits;
size_t block_size = (size_t)1 << params->hasher.block_bits;
BROTLI_UNUSED(one_shot);
BROTLI_UNUSED(input_size);
return sizeof(HashLongestMatch) + bucket_size * (2 + 4 * block_size);
}
/* Look at 4 bytes at &data[ix & mask].
Compute a hash from these, and store the value of ix at that position. */
static BROTLI_INLINE void FN(Store)(HashLongestMatch* self, const uint8_t *data,
static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t* data,
const size_t mask, const size_t ix) {
const uint32_t key = FN(HashBytes)(&data[ix & mask]);
const size_t minor_ix = self->num_[key] & BLOCK_MASK;
self->buckets_[minor_ix + (key << BLOCK_BITS)] = (uint32_t)ix;
++self->num_[key];
HashLongestMatch* self = FN(Self)(handle);
uint16_t* num = FN(Num)(self);
const uint32_t key = FN(HashBytes)(&data[ix & mask], self->hash_shift_);
const size_t minor_ix = num[key] & self->block_mask_;
const size_t offset =
minor_ix + (key << GetHasherCommon(handle)->params.block_bits);
FN(Buckets)(self)[offset] = (uint32_t)ix;
++num[key];
}
static BROTLI_INLINE void FN(StoreRange)(HashLongestMatch* self,
static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
const uint8_t *data, const size_t mask, const size_t ix_start,
const size_t ix_end) {
size_t i;
for (i = ix_start; i < ix_end; ++i) {
FN(Store)(self, data, mask, i);
FN(Store)(handle, data, mask, i);
}
}
static BROTLI_INLINE void FN(StitchToPreviousBlock)(HashLongestMatch* self,
static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle,
size_t num_bytes, size_t position, const uint8_t* ringbuffer,
size_t ringbuffer_mask) {
if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
/* Prepare the hashes for three last bytes of the last write.
These could not be calculated before, since they require knowledge
of both the previous and the current block. */
FN(Store)(self, ringbuffer, ringbuffer_mask, position - 3);
FN(Store)(self, ringbuffer, ringbuffer_mask, position - 2);
FN(Store)(self, ringbuffer, ringbuffer_mask, position - 1);
FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 3);
FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 2);
FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 1);
}
}
static BROTLI_INLINE void FN(PrepareDistanceCache)(
HasherHandle handle, int* BROTLI_RESTRICT distance_cache) {
PrepareDistanceCache(distance_cache,
GetHasherCommon(handle)->params.num_last_distances_to_check);
}
/* Find a longest backward match of &data[cur_ix] up to the length of
max_length and stores the position cur_ix in the hash table.
REQUIRES: FN(PrepareDistanceCache) must be invoked for current distance cache
values; if this method is invoked repeatedly with the same distance
cache values, it is enough to invoke FN(PrepareDistanceCache) once.
Does not look for matches longer than max_length.
Does not look for matches further away than max_backward.
Writes the best match into |out|.
Returns true when match is found, otherwise false. */
static BROTLI_INLINE BROTLI_BOOL FN(FindLongestMatch)(HashLongestMatch* self,
static BROTLI_INLINE BROTLI_BOOL FN(FindLongestMatch)(HasherHandle handle,
const BrotliDictionary* dictionary, const uint16_t* dictionary_hash,
const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
const size_t max_length, const size_t max_backward,
HasherSearchResult* BROTLI_RESTRICT out) {
HasherCommon* common = GetHasherCommon(handle);
HashLongestMatch* self = FN(Self)(handle);
uint16_t* num = FN(Num)(self);
uint32_t* buckets = FN(Buckets)(self);
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
BROTLI_BOOL is_match_found = BROTLI_FALSE;
/* Don't accept a short copy from far away. */
@ -157,10 +169,8 @@ static BROTLI_INLINE BROTLI_BOOL FN(FindLongestMatch)(HashLongestMatch* self,
out->len = 0;
out->len_x_code = 0;
/* Try last distance first. */
for (i = 0; i < NUM_LAST_DISTANCES_TO_CHECK; ++i) {
const size_t idx = kDistanceCacheIndex[i];
const size_t backward =
(size_t)(distance_cache[idx] + kDistanceCacheOffset[i]);
for (i = 0; i < (size_t)common->params.num_last_distances_to_check; ++i) {
const size_t backward = (size_t)distance_cache[i];
size_t prev_ix = (size_t)(cur_ix - backward);
if (prev_ix >= cur_ix) {
continue;
@ -183,25 +193,30 @@ static BROTLI_INLINE BROTLI_BOOL FN(FindLongestMatch)(HashLongestMatch* self,
/* Comparing for >= 2 does not change the semantics, but just saves for
a few unnecessary binary logarithms in backward reference score,
since we are not interested in such short matches. */
score_t score = BackwardReferenceScoreUsingLastDistance(len, i);
score_t score = BackwardReferenceScoreUsingLastDistance(len);
if (best_score < score) {
best_score = score;
best_len = len;
out->len = best_len;
out->distance = backward;
out->score = best_score;
is_match_found = BROTLI_TRUE;
if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
if (best_score < score) {
best_score = score;
best_len = len;
out->len = best_len;
out->distance = backward;
out->score = best_score;
is_match_found = BROTLI_TRUE;
}
}
}
}
}
{
const uint32_t key = FN(HashBytes)(&data[cur_ix_masked]);
uint32_t* BROTLI_RESTRICT bucket = &self->buckets_[key << BLOCK_BITS];
const uint32_t key =
FN(HashBytes)(&data[cur_ix_masked], self->hash_shift_);
uint32_t* BROTLI_RESTRICT bucket =
&buckets[key << common->params.block_bits];
const size_t down =
(self->num_[key] > BLOCK_SIZE) ? (self->num_[key] - BLOCK_SIZE) : 0u;
for (i = self->num_[key]; i > down;) {
size_t prev_ix = bucket[--i & BLOCK_MASK];
(num[key] > self->block_size_) ? (num[key] - self->block_size_) : 0u;
for (i = num[key]; i > down;) {
size_t prev_ix = bucket[--i & self->block_mask_];
const size_t backward = cur_ix - prev_ix;
if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
break;
@ -232,19 +247,15 @@ static BROTLI_INLINE BROTLI_BOOL FN(FindLongestMatch)(HashLongestMatch* self,
}
}
}
bucket[self->num_[key] & BLOCK_MASK] = (uint32_t)cur_ix;
++self->num_[key];
bucket[num[key] & self->block_mask_] = (uint32_t)cur_ix;
++num[key];
}
if (!is_match_found) {
is_match_found = SearchInStaticDictionary(&self->dict_search_stats_,
&data[cur_ix_masked], max_length, max_backward, out, BROTLI_FALSE);
is_match_found = SearchInStaticDictionary(dictionary, dictionary_hash,
handle, &data[cur_ix_masked], max_length, max_backward, out,
BROTLI_FALSE);
}
return is_match_found;
}
#undef HASH_MAP_SIZE
#undef BLOCK_MASK
#undef BLOCK_SIZE
#undef BUCKET_SIZE
#undef HashLongestMatch

View File

@ -21,7 +21,7 @@ static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; }
/* HashBytes is the function that chooses the bucket to place
the address in. The HashLongestMatch and HashLongestMatchQuickly
classes have separate, different implementations of hashing. */
static uint32_t FN(HashBytes)(const uint8_t *data) {
static uint32_t FN(HashBytes)(const uint8_t* data) {
const uint64_t h = ((BROTLI_UNALIGNED_LOAD64(data) << (64 - 8 * HASH_LEN)) *
kHashMul64);
/* The higher bits contain more mixture from the multiplication,
@ -36,97 +36,86 @@ static uint32_t FN(HashBytes)(const uint8_t *data) {
given index, BUCKET_SWEEP buckets are used to store values of a key. */
typedef struct HashLongestMatchQuickly {
uint32_t buckets_[BUCKET_SIZE + BUCKET_SWEEP];
/* True if buckets_ array needs to be initialized. */
BROTLI_BOOL is_dirty_;
DictionarySearchStatictics dict_search_stats_;
} HashLongestMatchQuickly;
static void FN(Initialize)(HashLongestMatchQuickly* self) {
BROTLI_UNUSED(self);
static BROTLI_INLINE HashLongestMatchQuickly* FN(Self)(HasherHandle handle) {
return (HashLongestMatchQuickly*)&(GetHasherCommon(handle)[1]);
}
static void FN(Cleanup)(MemoryManager* m, HashLongestMatchQuickly* self) {
BROTLI_UNUSED(m);
BROTLI_UNUSED(self);
static void FN(Initialize)(
HasherHandle handle, const BrotliEncoderParams* params) {
BROTLI_UNUSED(handle);
BROTLI_UNUSED(params);
}
static void FN(Reset)(HashLongestMatchQuickly* self) {
self->is_dirty_ = BROTLI_TRUE;
DictionarySearchStaticticsReset(&self->dict_search_stats_);
}
static void FN(InitEmpty)(HashLongestMatchQuickly* self) {
if (self->is_dirty_) {
static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
size_t input_size, const uint8_t* data) {
HashLongestMatchQuickly* self = FN(Self)(handle);
/* Partial preparation is 100 times slower (per socket). */
size_t partial_prepare_threshold = HASH_MAP_SIZE >> 7;
if (one_shot && input_size <= partial_prepare_threshold) {
size_t i;
for (i = 0; i < input_size; ++i) {
const uint32_t key = FN(HashBytes)(&data[i]);
memset(&self->buckets_[key], 0, BUCKET_SWEEP * sizeof(self->buckets_[0]));
}
} else {
/* It is not strictly necessary to fill this buffer here, but
not filling will make the results of the compression stochastic
(but correct). This is because random data would cause the
system to find accidentally good backward references here and there. */
memset(&self->buckets_[0], 0, sizeof(self->buckets_));
self->is_dirty_ = BROTLI_FALSE;
}
}
static void FN(InitForData)(HashLongestMatchQuickly* self, const uint8_t* data,
size_t num) {
size_t i;
for (i = 0; i < num; ++i) {
const uint32_t key = FN(HashBytes)(&data[i]);
memset(&self->buckets_[key], 0, BUCKET_SWEEP * sizeof(self->buckets_[0]));
}
if (num != 0) {
self->is_dirty_ = BROTLI_FALSE;
}
}
static void FN(Init)(
MemoryManager* m, HashLongestMatchQuickly* self, const uint8_t* data,
const BrotliEncoderParams* params, size_t position, size_t bytes,
BROTLI_BOOL is_last) {
/* Choose which initialization method is faster.
Init() is about 100 times faster than InitForData(). */
const size_t kMaxBytesForPartialHashInit = HASH_MAP_SIZE >> 7;
BROTLI_UNUSED(m);
static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
size_t input_size) {
BROTLI_UNUSED(params);
if (position == 0 && is_last && bytes <= kMaxBytesForPartialHashInit) {
FN(InitForData)(self, data, bytes);
} else {
FN(InitEmpty)(self);
}
BROTLI_UNUSED(one_shot);
BROTLI_UNUSED(input_size);
return sizeof(HashLongestMatchQuickly);
}
/* Look at 5 bytes at &data[ix & mask].
Compute a hash from these, and store the value somewhere within
[ix .. ix+3]. */
static BROTLI_INLINE void FN(Store)(HashLongestMatchQuickly* self,
static BROTLI_INLINE void FN(Store)(HasherHandle handle,
const uint8_t *data, const size_t mask, const size_t ix) {
const uint32_t key = FN(HashBytes)(&data[ix & mask]);
/* Wiggle the value with the bucket sweep range. */
const uint32_t off = (ix >> 3) % BUCKET_SWEEP;
self->buckets_[key + off] = (uint32_t)ix;
FN(Self)(handle)->buckets_[key + off] = (uint32_t)ix;
}
static BROTLI_INLINE void FN(StoreRange)(HashLongestMatchQuickly* self,
static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
const uint8_t *data, const size_t mask, const size_t ix_start,
const size_t ix_end) {
size_t i;
for (i = ix_start; i < ix_end; ++i) {
FN(Store)(self, data, mask, i);
FN(Store)(handle, data, mask, i);
}
}
static BROTLI_INLINE void FN(StitchToPreviousBlock)(
HashLongestMatchQuickly* self, size_t num_bytes, size_t position,
HasherHandle handle, size_t num_bytes, size_t position,
const uint8_t* ringbuffer, size_t ringbuffer_mask) {
if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
/* Prepare the hashes for three last bytes of the last write.
These could not be calculated before, since they require knowledge
of both the previous and the current block. */
FN(Store)(self, ringbuffer, ringbuffer_mask, position - 3);
FN(Store)(self, ringbuffer, ringbuffer_mask, position - 2);
FN(Store)(self, ringbuffer, ringbuffer_mask, position - 1);
FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 3);
FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 2);
FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 1);
}
}
static BROTLI_INLINE void FN(PrepareDistanceCache)(
HasherHandle handle, int* BROTLI_RESTRICT distance_cache) {
BROTLI_UNUSED(handle);
BROTLI_UNUSED(distance_cache);
}
/* Find a longest backward match of &data[cur_ix & ring_buffer_mask]
up to the length of max_length and stores the position cur_ix in the
hash table.
@ -136,10 +125,12 @@ static BROTLI_INLINE void FN(StitchToPreviousBlock)(
Writes the best match into |out|.
Returns true if match is found, otherwise false. */
static BROTLI_INLINE BROTLI_BOOL FN(FindLongestMatch)(
HashLongestMatchQuickly* self, const uint8_t* BROTLI_RESTRICT data,
HasherHandle handle, const BrotliDictionary* dictionary,
const uint16_t* dictionary_hash, const uint8_t* BROTLI_RESTRICT data,
const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,
const size_t cur_ix, const size_t max_length, const size_t max_backward,
HasherSearchResult* BROTLI_RESTRICT out) {
HashLongestMatchQuickly* self = FN(Self)(handle);
const size_t best_len_in = out->len;
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
const uint32_t key = FN(HashBytes)(&data[cur_ix_masked]);
@ -157,7 +148,7 @@ static BROTLI_INLINE BROTLI_BOOL FN(FindLongestMatch)(
&data[cur_ix_masked],
max_length);
if (len >= 4) {
best_score = BackwardReferenceScoreUsingLastDistance(len, 0);
best_score = BackwardReferenceScoreUsingLastDistance(len);
best_len = len;
out->len = len;
out->distance = cached_backward;
@ -227,8 +218,9 @@ static BROTLI_INLINE BROTLI_BOOL FN(FindLongestMatch)(
}
}
if (USE_DICTIONARY && !is_match_found) {
is_match_found = SearchInStaticDictionary(&self->dict_search_stats_,
&data[cur_ix_masked], max_length, max_backward, out, BROTLI_TRUE);
is_match_found = SearchInStaticDictionary(dictionary, dictionary_hash,
handle, &data[cur_ix_masked], max_length, max_backward, out,
BROTLI_TRUE);
}
self->buckets_[key + ((cur_ix >> 3) % BUCKET_SWEEP)] = (uint32_t)cur_ix;
return is_match_found;

View File

@ -38,61 +38,58 @@ typedef struct HashToBinaryTree {
tree of sequences that share this hash bucket. */
uint32_t buckets_[BUCKET_SIZE];
/* The union of the binary trees of each hash bucket. The root of the tree
corresponding to a hash is a sequence starting at buckets_[hash] and
the left and right children of a sequence starting at pos are
forest_[2 * pos] and forest_[2 * pos + 1]. */
uint32_t* forest_;
/* A position used to mark a non-existent sequence, i.e. a tree is empty if
its root is at invalid_pos_ and a node is a leaf if both its children
are at invalid_pos_. */
uint32_t invalid_pos_;
size_t forest_size_;
BROTLI_BOOL is_dirty_;
/* --- Dynamic size members --- */
/* The union of the binary trees of each hash bucket. The root of the tree
corresponding to a hash is a sequence starting at buckets_[hash] and
the left and right children of a sequence starting at pos are
forest_[2 * pos] and forest_[2 * pos + 1]. */
/* uint32_t forest[2 * num_nodes] */
} HashToBinaryTree;
static void FN(Reset)(HashToBinaryTree* self) {
self->is_dirty_ = BROTLI_TRUE;
static BROTLI_INLINE HashToBinaryTree* FN(Self)(HasherHandle handle) {
return (HashToBinaryTree*)&(GetHasherCommon(handle)[1]);
}
static void FN(Initialize)(HashToBinaryTree* self) {
self->forest_ = NULL;
self->forest_size_ = 0;
FN(Reset)(self);
static BROTLI_INLINE uint32_t* FN(Forest)(HashToBinaryTree* self) {
return (uint32_t*)(&self[1]);
}
static void FN(Cleanup)(MemoryManager* m, HashToBinaryTree* self) {
BROTLI_FREE(m, self->forest_);
static void FN(Initialize)(
HasherHandle handle, const BrotliEncoderParams* params) {
HashToBinaryTree* self = FN(Self)(handle);
self->window_mask_ = (1u << params->lgwin) - 1u;
self->invalid_pos_ = (uint32_t)(0 - self->window_mask_);
}
static void FN(Init)(
MemoryManager* m, HashToBinaryTree* self, const uint8_t* data,
const BrotliEncoderParams* params, size_t position, size_t bytes,
BROTLI_BOOL is_last) {
if (self->is_dirty_) {
uint32_t invalid_pos;
size_t num_nodes;
uint32_t i;
BROTLI_UNUSED(data);
self->window_mask_ = (1u << params->lgwin) - 1u;
invalid_pos = (uint32_t)(0 - self->window_mask_);
self->invalid_pos_ = invalid_pos;
for (i = 0; i < BUCKET_SIZE; i++) {
self->buckets_[i] = invalid_pos;
}
num_nodes = (position == 0 && is_last) ? bytes : self->window_mask_ + 1;
if (num_nodes > self->forest_size_) {
BROTLI_FREE(m, self->forest_);
self->forest_ = BROTLI_ALLOC(m, uint32_t, 2 * num_nodes);
if (BROTLI_IS_OOM(m)) return;
self->forest_size_ = num_nodes;
}
self->is_dirty_ = BROTLI_FALSE;
static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
size_t input_size, const uint8_t* data) {
HashToBinaryTree* self = FN(Self)(handle);
uint32_t invalid_pos = self->invalid_pos_;
uint32_t i;
BROTLI_UNUSED(data);
BROTLI_UNUSED(one_shot);
BROTLI_UNUSED(input_size);
for (i = 0; i < BUCKET_SIZE; i++) {
self->buckets_[i] = invalid_pos;
}
}
static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
size_t input_size) {
size_t num_nodes = (size_t)1 << params->lgwin;
if (one_shot && input_size < num_nodes) {
num_nodes = input_size;
}
return sizeof(HashToBinaryTree) + 2 * sizeof(uint32_t) * num_nodes;
}
static BROTLI_INLINE size_t FN(LeftChildIndex)(HashToBinaryTree* self,
const size_t pos) {
return 2 * (pos & self->window_mask_);
@ -124,6 +121,7 @@ static BROTLI_INLINE BackwardMatch* FN(StoreAndFindMatches)(
const BROTLI_BOOL should_reroot_tree =
TO_BROTLI_BOOL(max_length >= MAX_TREE_COMP_LENGTH);
const uint32_t key = FN(HashBytes)(&data[cur_ix_masked]);
uint32_t* forest = FN(Forest)(self);
size_t prev_ix = self->buckets_[key];
/* The forest index of the rightmost node of the left subtree of the new
root, updated as we traverse and re-root the tree of the hash bucket. */
@ -146,8 +144,8 @@ static BROTLI_INLINE BackwardMatch* FN(StoreAndFindMatches)(
const size_t prev_ix_masked = prev_ix & ring_buffer_mask;
if (backward == 0 || backward > max_backward || depth_remaining == 0) {
if (should_reroot_tree) {
self->forest_[node_left] = self->invalid_pos_;
self->forest_[node_right] = self->invalid_pos_;
forest[node_left] = self->invalid_pos_;
forest[node_right] = self->invalid_pos_;
}
break;
}
@ -166,27 +164,25 @@ static BROTLI_INLINE BackwardMatch* FN(StoreAndFindMatches)(
}
if (len >= max_comp_len) {
if (should_reroot_tree) {
self->forest_[node_left] =
self->forest_[FN(LeftChildIndex)(self, prev_ix)];
self->forest_[node_right] =
self->forest_[FN(RightChildIndex)(self, prev_ix)];
forest[node_left] = forest[FN(LeftChildIndex)(self, prev_ix)];
forest[node_right] = forest[FN(RightChildIndex)(self, prev_ix)];
}
break;
}
if (data[cur_ix_masked + len] > data[prev_ix_masked + len]) {
best_len_left = len;
if (should_reroot_tree) {
self->forest_[node_left] = (uint32_t)prev_ix;
forest[node_left] = (uint32_t)prev_ix;
}
node_left = FN(RightChildIndex)(self, prev_ix);
prev_ix = self->forest_[node_left];
prev_ix = forest[node_left];
} else {
best_len_right = len;
if (should_reroot_tree) {
self->forest_[node_right] = (uint32_t)prev_ix;
forest[node_right] = (uint32_t)prev_ix;
}
node_right = FN(LeftChildIndex)(self, prev_ix);
prev_ix = self->forest_[node_right];
prev_ix = forest[node_right];
}
}
}
@ -200,8 +196,9 @@ static BROTLI_INLINE BackwardMatch* FN(StoreAndFindMatches)(
matches in matches[0] to matches[*num_matches - 1]. The matches will be
sorted by strictly increasing length and (non-strictly) increasing
distance. */
static BROTLI_INLINE size_t FN(FindAllMatches)(HashToBinaryTree* self,
const uint8_t* data, const size_t ring_buffer_mask, const size_t cur_ix,
static BROTLI_INLINE size_t FN(FindAllMatches)(HasherHandle handle,
const BrotliDictionary* dicionary, const uint8_t* data,
const size_t ring_buffer_mask, const size_t cur_ix,
const size_t max_length, const size_t max_backward,
const BrotliEncoderParams* params, BackwardMatch* matches) {
BackwardMatch* const orig_matches = matches;
@ -235,16 +232,16 @@ static BROTLI_INLINE size_t FN(FindAllMatches)(HashToBinaryTree* self,
}
}
if (best_len < max_length) {
matches = FN(StoreAndFindMatches)(self, data, cur_ix, ring_buffer_mask,
max_length, max_backward, &best_len, matches);
matches = FN(StoreAndFindMatches)(FN(Self)(handle), data, cur_ix,
ring_buffer_mask, max_length, max_backward, &best_len, matches);
}
for (i = 0; i <= BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN; ++i) {
dict_matches[i] = kInvalidMatch;
}
{
size_t minlen = BROTLI_MAX(size_t, 4, best_len + 1);
if (BrotliFindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen,
max_length, &dict_matches[0])) {
if (BrotliFindAllStaticDictionaryMatches(dicionary,
&data[cur_ix_masked], minlen, max_length, &dict_matches[0])) {
size_t maxlen = BROTLI_MIN(
size_t, BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN, max_length);
size_t l;
@ -263,15 +260,16 @@ static BROTLI_INLINE size_t FN(FindAllMatches)(HashToBinaryTree* self,
/* Stores the hash of the next 4 bytes and re-roots the binary tree at the
current sequence, without returning any matches.
REQUIRES: ix + MAX_TREE_COMP_LENGTH <= end-of-current-block */
static BROTLI_INLINE void FN(Store)(HashToBinaryTree* self, const uint8_t *data,
static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t *data,
const size_t mask, const size_t ix) {
HashToBinaryTree* self = FN(Self)(handle);
/* Maximum distance is window size - 16, see section 9.1. of the spec. */
const size_t max_backward = self->window_mask_ - BROTLI_WINDOW_GAP + 1;
FN(StoreAndFindMatches)(self, data, ix, mask, MAX_TREE_COMP_LENGTH,
max_backward, NULL, NULL);
}
static BROTLI_INLINE void FN(StoreRange)(HashToBinaryTree* self,
static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
const uint8_t *data, const size_t mask, const size_t ix_start,
const size_t ix_end) {
size_t i = ix_start;
@ -281,17 +279,18 @@ static BROTLI_INLINE void FN(StoreRange)(HashToBinaryTree* self,
}
if (ix_start + 512 <= i) {
for (; j < i; j += 8) {
FN(Store)(self, data, mask, j);
FN(Store)(handle, data, mask, j);
}
}
for (; i < ix_end; ++i) {
FN(Store)(self, data, mask, i);
FN(Store)(handle, data, mask, i);
}
}
static BROTLI_INLINE void FN(StitchToPreviousBlock)(HashToBinaryTree* self,
static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle,
size_t num_bytes, size_t position, const uint8_t* ringbuffer,
size_t ringbuffer_mask) {
HashToBinaryTree* self = FN(Self)(handle);
if (num_bytes >= FN(HashTypeLength)() - 1 &&
position >= MAX_TREE_COMP_LENGTH) {
/* Store the last `MAX_TREE_COMP_LENGTH - 1` positions in the hasher.

View File

@ -31,6 +31,14 @@
so we buffer at most this much literals and commands. */
#define MAX_NUM_DELAYED_SYMBOLS 0x2fff
typedef struct BrotliHasherParams {
int type;
int bucket_bits;
int block_bits;
int hash_len;
int num_last_distances_to_check;
} BrotliHasherParams;
/* Encoding parameters */
typedef struct BrotliEncoderParams {
BrotliEncoderMode mode;
@ -39,6 +47,7 @@ typedef struct BrotliEncoderParams {
int lgblock;
size_t size_hint;
BROTLI_BOOL disable_literal_context_modeling;
BrotliHasherParams hasher;
} BrotliEncoderParams;
/* Returns hash-table size for quality levels 0 and 1. */
@ -122,17 +131,30 @@ static BROTLI_INLINE size_t LiteralSpreeLengthForSparseSearch(
return params->quality < 9 ? 64 : 512;
}
static BROTLI_INLINE int ChooseHasher(const BrotliEncoderParams* params) {
static BROTLI_INLINE void ChooseHasher(const BrotliEncoderParams* params,
BrotliHasherParams* hparams) {
if (params->quality > 9) {
return 10;
hparams->type = 10;
} else if (params->quality == 4 && params->size_hint >= (1 << 20)) {
return 54;
hparams->type = 54;
} else if (params->quality < 5) {
return params->quality;
hparams->type = params->quality;
} else if (params->lgwin <= 16) {
return params->quality < 7 ? 40 : params->quality < 9 ? 41 : 42;
hparams->type = params->quality < 7 ? 40 : params->quality < 9 ? 41 : 42;
} else if (params->size_hint >= (1 << 20) && params->lgwin >= 19) {
hparams->type = 6;
hparams->block_bits = params->quality - 1;
hparams->bucket_bits = 15;
hparams->hash_len = 5;
hparams->num_last_distances_to_check =
params->quality < 7 ? 4 : params->quality < 9 ? 10 : 16;
} else {
hparams->type = 5;
hparams->block_bits = params->quality - 1;
hparams->bucket_bits = params->quality < 7 ? 14 : 15;
hparams->num_last_distances_to_check =
params->quality < 7 ? 4 : params->quality < 9 ? 10 : 16;
}
return params->quality;
}
#endif /* BROTLI_ENC_QUALITY_H_ */

View File

@ -33,23 +33,24 @@ static BROTLI_INLINE void AddMatch(size_t distance, size_t len, size_t len_code,
matches[len] = BROTLI_MIN(uint32_t, matches[len], match);
}
static BROTLI_INLINE size_t DictMatchLength(const uint8_t* data,
static BROTLI_INLINE size_t DictMatchLength(const BrotliDictionary* dictionary,
const uint8_t* data,
size_t id,
size_t len,
size_t maxlen) {
const size_t offset = kBrotliDictionaryOffsetsByLength[len] + len * id;
return FindMatchLengthWithLimit(&kBrotliDictionary[offset], data,
const size_t offset = dictionary->offsets_by_length[len] + len * id;
return FindMatchLengthWithLimit(&dictionary->data[offset], data,
BROTLI_MIN(size_t, len, maxlen));
}
static BROTLI_INLINE BROTLI_BOOL IsMatch(
static BROTLI_INLINE BROTLI_BOOL IsMatch(const BrotliDictionary* dictionary,
DictWord w, const uint8_t* data, size_t max_length) {
if (w.len > max_length) {
return BROTLI_FALSE;
} else {
const size_t offset = kBrotliDictionaryOffsetsByLength[w.len] +
const size_t offset = dictionary->offsets_by_length[w.len] +
(size_t)w.len * (size_t)w.idx;
const uint8_t* dict = &kBrotliDictionary[offset];
const uint8_t* dict = &dictionary->data[offset];
if (w.transform == 0) {
/* Match against base dictionary word. */
return
@ -78,8 +79,8 @@ static BROTLI_INLINE BROTLI_BOOL IsMatch(
}
BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
const uint8_t* data, size_t min_length, size_t max_length,
uint32_t* matches) {
const BrotliDictionary* dictionary, const uint8_t* data, size_t min_length,
size_t max_length, uint32_t* matches) {
BROTLI_BOOL has_found_match = BROTLI_FALSE;
{
size_t offset = kStaticDictionaryBuckets[Hash(data)];
@ -87,12 +88,13 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
while (!end) {
DictWord w = kStaticDictionaryWords[offset++];
const size_t l = w.len & 0x1F;
const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l];
const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
const size_t id = w.idx;
end = !!(w.len & 0x80);
w.len = (uint8_t)l;
if (w.transform == 0) {
const size_t matchlen = DictMatchLength(data, id, l, max_length);
const size_t matchlen =
DictMatchLength(dictionary, data, id, l, max_length);
const uint8_t* s;
size_t minlen;
size_t maxlen;
@ -276,7 +278,7 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
const BROTLI_BOOL is_all_caps =
TO_BROTLI_BOOL(w.transform != kUppercaseFirst);
const uint8_t* s;
if (!IsMatch(w, data, max_length)) {
if (!IsMatch(dictionary, w, data, max_length)) {
continue;
}
/* Transform "" + kUppercase{First,All} + "" */
@ -326,13 +328,13 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
while (!end) {
DictWord w = kStaticDictionaryWords[offset++];
const size_t l = w.len & 0x1F;
const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l];
const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
const size_t id = w.idx;
end = !!(w.len & 0x80);
w.len = (uint8_t)l;
if (w.transform == 0) {
const uint8_t* s;
if (!IsMatch(w, &data[1], max_length - 1)) {
if (!IsMatch(dictionary, w, &data[1], max_length - 1)) {
continue;
}
/* Transforms " " + kIdentity + "" and "." + kIdentity + "" */
@ -373,7 +375,7 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
const BROTLI_BOOL is_all_caps =
TO_BROTLI_BOOL(w.transform != kUppercaseFirst);
const uint8_t* s;
if (!IsMatch(w, &data[1], max_length - 1)) {
if (!IsMatch(dictionary, w, &data[1], max_length - 1)) {
continue;
}
/* Transforms " " + kUppercase{First,All} + "" */
@ -418,11 +420,12 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
while (!end) {
DictWord w = kStaticDictionaryWords[offset++];
const size_t l = w.len & 0x1F;
const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l];
const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
const size_t id = w.idx;
end = !!(w.len & 0x80);
w.len = (uint8_t)l;
if (w.transform == 0 && IsMatch(w, &data[2], max_length - 2)) {
if (w.transform == 0 &&
IsMatch(dictionary, w, &data[2], max_length - 2)) {
if (data[0] == 0xc2) {
AddMatch(id + 102 * n, l + 2, l, matches);
has_found_match = BROTLI_TRUE;
@ -446,11 +449,12 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
while (!end) {
DictWord w = kStaticDictionaryWords[offset++];
const size_t l = w.len & 0x1F;
const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l];
const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
const size_t id = w.idx;
end = !!(w.len & 0x80);
w.len = (uint8_t)l;
if (w.transform == 0 && IsMatch(w, &data[5], max_length - 5)) {
if (w.transform == 0 &&
IsMatch(dictionary, w, &data[5], max_length - 5)) {
AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);
has_found_match = BROTLI_TRUE;
if (l + 5 < max_length) {

View File

@ -9,6 +9,7 @@
#ifndef BROTLI_ENC_STATIC_DICT_H_
#define BROTLI_ENC_STATIC_DICT_H_
#include "../common/dictionary.h"
#include <brotli/types.h>
#include "./port.h"
@ -27,6 +28,7 @@ static const uint32_t kInvalidMatch = 0xfffffff;
matches array is at least BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1 long
all elements are initialized to kInvalidMatch */
BROTLI_INTERNAL BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
const BrotliDictionary* dictionary,
const uint8_t* data, size_t min_length, size_t max_length,
uint32_t* matches);

View File

@ -128,6 +128,10 @@ typedef enum {
/**
* Creates an instance of ::BrotliDecoderState and initializes it.
*
* The instance can be used once for decoding and should then be destroyed with
* ::BrotliDecoderDestroyInstance, it cannot be reused for a new decoding
* session.
*
* @p alloc_func and @p free_func @b MUST be both zero or both non-zero. In the
* case they are both zero, default memory allocators are used. @p opaque is
* passed to @p alloc_func and @p free_func when they are called.

View File

@ -50,7 +50,7 @@ typedef enum BrotliEncoderMode {
* properties of the input.
*/
BROTLI_MODE_GENERIC = 0,
/** Compression mode for UTF-8 formated text input. */
/** Compression mode for UTF-8 formatted text input. */
BROTLI_MODE_TEXT = 1,
/** Compression mode used in WOFF 2.0. */
BROTLI_MODE_FONT = 2

View File

@ -8,25 +8,20 @@ package org.brotli.dec;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.IntBuffer;
/**
* Bit reading helpers.
*/
class BitReader {
final class BitReader {
/**
* Input byte buffer, consist of a ring-buffer and a "slack" region where bytes from the start of
* the ring-buffer are copied.
*/
private static final int READ_SIZE = 4096;
private static final int BUF_SIZE = READ_SIZE + 64;
private static final int BUF_SIZE = IntReader.CAPACITY << 2;
private static final int READ_SIZE = BUF_SIZE - 64;
private final ByteBuffer byteBuffer =
ByteBuffer.allocateDirect(BUF_SIZE).order(ByteOrder.LITTLE_ENDIAN);
private final IntBuffer intBuffer = byteBuffer.asIntBuffer();
private final IntReader intReader = new IntReader();
private final byte[] shadowBuffer = new byte[BUF_SIZE];
private InputStream input;
@ -73,7 +68,7 @@ class BitReader {
}
throw new BrotliRuntimeException("No more input");
}
int readOffset = br.intBuffer.position() << 2;
int readOffset = IntReader.position(br.intReader) << 2;
int bytesRead = READ_SIZE - readOffset;
System.arraycopy(br.shadowBuffer, readOffset, br.shadowBuffer, 0, bytesRead);
try {
@ -91,9 +86,7 @@ class BitReader {
} catch (IOException e) {
throw new BrotliRuntimeException("Failed to read input", e);
}
br.byteBuffer.clear();
br.byteBuffer.put(br.shadowBuffer, 0, bytesRead & 0xFFFC);
br.intBuffer.rewind();
IntReader.reload(br.intReader, br.shadowBuffer, 0, bytesRead >> 2);
br.available = bytesRead >> 2;
}
@ -115,7 +108,7 @@ class BitReader {
*/
static void fillBitWindow(BitReader br) {
if (br.bitOffset >= 32) {
br.accumulator = ((long) br.intBuffer.get() << 32) | (br.accumulator >>> 32);
br.accumulator = ((long) IntReader.read(br.intReader) << 32) | (br.accumulator >>> 32);
br.bitOffset -= 32;
br.available--;
}
@ -146,7 +139,7 @@ class BitReader {
}
br.input = input;
br.accumulator = 0;
br.intBuffer.position(READ_SIZE >> 2);
IntReader.setPosition(br.intReader, READ_SIZE >> 2);
br.bitOffset = 64;
br.available = 0;
br.endOfStreamReached = false;

View File

@ -0,0 +1,48 @@
/* Copyright 2017 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
package org.brotli.dec;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.IntBuffer;
/**
* Byte-to-int conversion magic.
*/
final class IntReader {
static final int CAPACITY = 1024 + 16;
private final ByteBuffer byteBuffer =
ByteBuffer.allocateDirect(CAPACITY << 2).order(ByteOrder.LITTLE_ENDIAN);
private final IntBuffer intBuffer = byteBuffer.asIntBuffer();
/**
* Reinitialize reader with new data chunk.
*
* NB: intLen == 4 * byteSize!
* NB: intLen should be less or equal to {@link CAPACITY}
*/
static void reload(IntReader ir, byte[] data, int offset, int intLen) {
ir.byteBuffer.clear();
ir.byteBuffer.put(data, offset, intLen << 2);
ir.intBuffer.rewind();
}
static int position(IntReader ir) {
return ir.intBuffer.position();
}
static void setPosition(IntReader ir, int position) {
ir.intBuffer.position(position);
}
static int read(IntReader ir) {
// Advances position by 1.
return ir.intBuffer.get();
}
}

View File

@ -221,6 +221,7 @@ EXT_MODULES = [
'enc/find_match_length.h',
'enc/hash.h',
'enc/hash_to_binary_tree_inc.h',
'enc/hash_longest_match64_inc.h',
'enc/hash_longest_match_inc.h',
'enc/hash_longest_match_quickly_inc.h',
'enc/histogram.h',