Update:

 * Bazel: fix MSVC configuration
 * C: common: extended documentation and helpers around distance codes
 * C: common: enable BROTLI_DCHECK in "debug" builds
 * C: common: fix implicit trailing zero in `kPrefixSuffix`
 * C: dec: fix possible bit reader discharge for "large-window" mode
 * C: dec: simplify distance decoding via lookup table
 * C: dec: reuse decoder state members memory via union with lookup table
 * C: dec: add decoder state diagram
 * C: enc: clarify access to static dictionary
 * C: enc: improve static dictionary hash
 * C: enc: add "stream offset" parameter for parallel encoding
 * C: enc: reorganize hasher; now Q2-Q3 require exactly 256KiB
           to avoid global TCMalloc lock
 * C: enc: fix rare access to uninitialized data in ring-buffer
 * C: enc: reorganize logging / checks in `write_bits.h`
 * Java: dec: add "large-window" support
 * Java: dec: improve speed
 * Java: dec: debug and 32-bit mode are now activated via system properties
 * Java: dec: demystify some state variables (use better names)
 * Dictionary generator: add single input mode
 * Java: dec: modernize tests
 * Bazel: js: pick working commit for closure rules
This commit is contained in:
Eugene Kliuchnikov 2019-04-12 13:57:42 +02:00 committed by GitHub
parent 9cd01c0437
commit 4b2b2d4f83
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
62 changed files with 4874 additions and 2697 deletions

8
BUILD
View File

@ -39,11 +39,9 @@ config_setting(
visibility = ["//visibility:public"],
)
config_setting(
name = "msvc",
values = {"compiler": "msvc-cl"},
visibility = ["//visibility:public"],
)
load(":compiler_config_setting.bzl", "create_msvc_config")
create_msvc_config()
STRICT_C_OPTIONS = select({
":msvc": [],

View File

@ -32,7 +32,7 @@ The basic commands to build, test and install brotli are:
$ make
$ make test
$ make install
By default, debug binaries are built. To generate "release" `Makefile` specify `--disable-debug` option to `configure-cmake`.
#### Bazel

View File

@ -4,9 +4,17 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/**
* @file
* Common constants used in decoder and encoder API.
*/
#ifndef BROTLI_COMMON_CONSTANTS_H_
#define BROTLI_COMMON_CONSTANTS_H_
#include "./platform.h"
#include <brotli/types.h>
/* Specification: 7.3. Encoding of the context map */
#define BROTLI_CONTEXT_MAP_MAX_RLE 16
@ -29,12 +37,31 @@
#define BROTLI_INITIAL_REPEATED_CODE_LENGTH 8
/* "Large Window Brotli" */
/**
* The theoretical maximum number of distance bits specified for large window
* brotli, for 64-bit encoders and decoders. Even when in practice 32-bit
* encoders and decoders only support up to 30 max distance bits, the value is
* set to 62 because it affects the large window brotli file format.
* Specifically, it affects the encoding of simple huffman tree for distances,
* see Specification RFC 7932 chapter 3.4.
*/
#define BROTLI_LARGE_MAX_DISTANCE_BITS 62U
#define BROTLI_LARGE_MIN_WBITS 10
/**
* The maximum supported large brotli window bits by the encoder and decoder.
* Large window brotli allows up to 62 bits, however the current encoder and
* decoder, designed for 32-bit integers, only support up to 30 bits maximum.
*/
#define BROTLI_LARGE_MAX_WBITS 30
/* Specification: 4. Encoding of distances */
#define BROTLI_NUM_DISTANCE_SHORT_CODES 16
/**
* Maximal number of "postfix" bits.
*
* Number of "postfix" bits is stored as 2 bits in meta-block header.
*/
#define BROTLI_MAX_NPOSTFIX 3
#define BROTLI_MAX_NDIRECT 120
#define BROTLI_MAX_DISTANCE_BITS 24U
@ -45,7 +72,16 @@
#define BROTLI_NUM_DISTANCE_SYMBOLS \
BROTLI_DISTANCE_ALPHABET_SIZE( \
BROTLI_MAX_NDIRECT, BROTLI_MAX_NPOSTFIX, BROTLI_LARGE_MAX_DISTANCE_BITS)
/* ((1 << 26) - 4) is the maximal distance that can be expressed in RFC 7932
brotli stream using NPOSTFIX = 0 and NDIRECT = 0. With other NPOSTFIX and
NDIRECT values distances up to ((1 << 29) + 88) could be expressed. */
#define BROTLI_MAX_DISTANCE 0x3FFFFFC
/* ((1 << 31) - 4) is the safe distance limit. Using this number as a limit
allows safe distance calculation without overflows, given the distance
alphabet size is limited to corresponding size
(see kLargeWindowDistanceCodeLimits). */
#define BROTLI_MAX_ALLOWED_DISTANCE 0x7FFFFFFC
/* 7.1. Context modes and context ID lookup for literals */
@ -61,4 +97,88 @@
#define BROTLI_WINDOW_GAP 16
#define BROTLI_MAX_BACKWARD_LIMIT(W) (((size_t)1 << (W)) - BROTLI_WINDOW_GAP)
typedef struct BrotliDistanceCodeLimit {
uint32_t max_alphabet_size;
uint32_t max_distance;
} BrotliDistanceCodeLimit;
/* This function calculates maximal size of distance alphabet, such that the
distances greater than the given values can not be represented.
This limits are designed to support fast and safe 32-bit decoders.
"32-bit" means that signed integer values up to ((1 << 31) - 1) could be
safely expressed.
Brotli distance alphabet symbols do not represent consecutive distance
ranges. Each distance alphabet symbol (excluding direct distances and short
codes), represent interleaved (for NPOSTFIX > 0) range of distances.
A "group" of consecutive (1 << NPOSTFIX) symbols represent non-interleaved
range. Two consecutive groups require the same amount of "extra bits".
It is important that distance alphabet represents complete "groups".
To avoid complex logic on encoder side about interleaved ranges
it was decided to restrict both sides to complete distance code "groups".
*/
BROTLI_UNUSED_FUNCTION BrotliDistanceCodeLimit BrotliCalculateDistanceCodeLimit(
uint32_t max_distance, uint32_t npostfix, uint32_t ndirect) {
BrotliDistanceCodeLimit result;
/* Marking this function as unused, because not all files
including "constants.h" use it -> compiler warns about that. */
BROTLI_UNUSED(&BrotliCalculateDistanceCodeLimit);
if (max_distance <= ndirect) {
/* This case never happens / exists only for the sake of completeness. */
result.max_alphabet_size = max_distance + BROTLI_NUM_DISTANCE_SHORT_CODES;
result.max_distance = max_distance;
return result;
} else {
/* The first prohibited value. */
uint32_t forbidden_distance = max_distance + 1;
/* Subtract "directly" encoded region. */
uint32_t offset = forbidden_distance - ndirect - 1;
uint32_t ndistbits = 0;
uint32_t tmp;
uint32_t half;
uint32_t group;
/* Postfix for the last dcode in the group. */
uint32_t postfix = (1u << npostfix) - 1;
uint32_t extra;
uint32_t start;
/* Remove postfix and "head-start". */
offset = (offset >> npostfix) + 4;
/* Calculate the number of distance bits. */
tmp = offset / 2;
/* Poor-man's log2floor, to avoid extra dependencies. */
while (tmp != 0) {ndistbits++; tmp = tmp >> 1;}
/* One bit is covered with subrange addressing ("half"). */
ndistbits--;
/* Find subrange. */
half = (offset >> ndistbits) & 1;
/* Calculate the "group" part of dcode. */
group = ((ndistbits - 1) << 1) | half;
/* Calculated "group" covers the prohibited distance value. */
if (group == 0) {
/* This case is added for correctness; does not occur for limit > 128. */
result.max_alphabet_size = ndirect + BROTLI_NUM_DISTANCE_SHORT_CODES;
result.max_distance = ndirect;
return result;
}
/* Decrement "group", so it is the last permitted "group". */
group--;
/* After group was decremented, ndistbits and half must be recalculated. */
ndistbits = (group >> 1) + 1;
/* The last available distance in the subrange has all extra bits set. */
extra = (1u << ndistbits) - 1;
/* Calculate region start. NB: ndistbits >= 1. */
start = (1u << (ndistbits + 1)) - 4;
/* Move to subregion. */
start += (group & 1) << ndistbits;
/* Calculate the alphabet size. */
result.max_alphabet_size = ((group << npostfix) | postfix) + ndirect +
BROTLI_NUM_DISTANCE_SHORT_CODES + 1;
/* Calculate the maximal distance representable by alphabet. */
result.max_distance = ((start + extra) << npostfix) + postfix + ndirect + 1;
return result;
}
}
#endif /* BROTLI_COMMON_CONSTANTS_H_ */

View File

@ -466,20 +466,20 @@ static BROTLI_INLINE void BROTLI_UNALIGNED_STORE64LE(void* p, uint64_t v) {
#endif
#if defined(BROTLI_ENABLE_LOG)
#define BROTLI_DCHECK(x) assert(x)
#define BROTLI_LOG(x) printf x
#else
#define BROTLI_DCHECK(x)
#define BROTLI_LOG(x)
#endif
#if defined(BROTLI_DEBUG) || defined(BROTLI_ENABLE_LOG)
#define BROTLI_DCHECK(x) assert(x)
static BROTLI_INLINE void BrotliDump(const char* f, int l, const char* fn) {
fprintf(stderr, "%s:%d (%s)\n", f, l, fn);
fflush(stderr);
}
#define BROTLI_DUMP() BrotliDump(__FILE__, __LINE__, __FUNCTION__)
#else
#define BROTLI_DCHECK(x)
#define BROTLI_DUMP() (void)(0)
#endif

View File

@ -24,8 +24,8 @@ static const char kPrefixSuffix[217] =
/* 8x _0 _ _3 _8 _C _E _ _1 _7 _F */
" not \3er \3al \4ful \4ive \5less \4es"
/* Ax _5 _9 _D _2 _7 _D */
"t \4ize \2\xc2\xa0\4ous \5 the \2e \0";
/* Cx _2 _7___ ___ _A _F _5 _8 */
"t \4ize \2\xc2\xa0\4ous \5 the \2e "; /* \0 - implicit trailing zero. */
/* Cx _2 _7___ ___ _A _F _5 _8 */
static const uint16_t kPrefixSuffixMap[50] = {
0x00, 0x02, 0x05, 0x0E, 0x13, 0x16, 0x18, 0x1E, 0x23, 0x25,

View File

@ -43,6 +43,23 @@ BROTLI_BOOL BrotliWarmupBitReader(BrotliBitReader* const br) {
return BROTLI_TRUE;
}
BROTLI_BOOL BrotliSafeReadBits32Slow(BrotliBitReader* const br,
uint32_t n_bits, uint32_t* val) {
uint32_t low_val;
uint32_t high_val;
BrotliBitReaderState memento;
BROTLI_DCHECK(n_bits <= 32);
BROTLI_DCHECK(n_bits > 24);
BrotliBitReaderSaveState(br, &memento);
if (!BrotliSafeReadBits(br, 16, &low_val) ||
!BrotliSafeReadBits(br, n_bits - 16, &high_val)) {
BrotliBitReaderRestoreState(br, &memento);
return BROTLI_FALSE;
}
*val = low_val | (high_val << 16);
return BROTLI_TRUE;
}
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
#endif

View File

@ -65,6 +65,12 @@ BROTLI_INTERNAL void BrotliInitBitReader(BrotliBitReader* const br);
reading. */
BROTLI_INTERNAL BROTLI_BOOL BrotliWarmupBitReader(BrotliBitReader* const br);
/* Fallback for BrotliSafeReadBits32. Extracted as noninlined method to unburden
the main code-path. Never called for RFC brotli streams, required only for
"large-window" mode and other extensions. */
BROTLI_INTERNAL BROTLI_NOINLINE BROTLI_BOOL BrotliSafeReadBits32Slow(
BrotliBitReader* const br, uint32_t n_bits, uint32_t* val);
static BROTLI_INLINE void BrotliBitReaderSaveState(
BrotliBitReader* const from, BrotliBitReaderState* to) {
to->val_ = from->val_;
@ -237,15 +243,17 @@ static BROTLI_INLINE void BrotliBitReaderUnload(BrotliBitReader* br) {
static BROTLI_INLINE void BrotliTakeBits(
BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) {
*val = (uint32_t)BrotliGetBitsUnmasked(br) & BitMask(n_bits);
BROTLI_LOG(("[BrotliReadBits] %d %d %d val: %6x\n",
BROTLI_LOG(("[BrotliTakeBits] %d %d %d val: %6x\n",
(int)br->avail_in, (int)br->bit_pos_, (int)n_bits, (int)*val));
BrotliDropBits(br, n_bits);
}
/* Reads the specified number of bits from |br| and advances the bit pos.
Assumes that there is enough input to perform BrotliFillBitWindow. */
static BROTLI_INLINE uint32_t BrotliReadBits(
Assumes that there is enough input to perform BrotliFillBitWindow.
Up to 24 bits are allowed to be requested from this method. */
static BROTLI_INLINE uint32_t BrotliReadBits24(
BrotliBitReader* const br, uint32_t n_bits) {
BROTLI_DCHECK(n_bits <= 24);
if (BROTLI_64_BITS || (n_bits <= 16)) {
uint32_t val;
BrotliFillBitWindow(br, n_bits);
@ -262,10 +270,32 @@ static BROTLI_INLINE uint32_t BrotliReadBits(
}
}
/* Same as BrotliReadBits24, but allows reading up to 32 bits. */
static BROTLI_INLINE uint32_t BrotliReadBits32(
BrotliBitReader* const br, uint32_t n_bits) {
BROTLI_DCHECK(n_bits <= 32);
if (BROTLI_64_BITS || (n_bits <= 16)) {
uint32_t val;
BrotliFillBitWindow(br, n_bits);
BrotliTakeBits(br, n_bits, &val);
return val;
} else {
uint32_t low_val;
uint32_t high_val;
BrotliFillBitWindow(br, 16);
BrotliTakeBits(br, 16, &low_val);
BrotliFillBitWindow(br, 16);
BrotliTakeBits(br, n_bits - 16, &high_val);
return low_val | (high_val << 16);
}
}
/* Tries to read the specified amount of bits. Returns BROTLI_FALSE, if there
is not enough input. |n_bits| MUST be positive. */
is not enough input. |n_bits| MUST be positive.
Up to 24 bits are allowed to be requested from this method. */
static BROTLI_INLINE BROTLI_BOOL BrotliSafeReadBits(
BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) {
BROTLI_DCHECK(n_bits <= 24);
while (BrotliGetAvailableBits(br) < n_bits) {
if (!BrotliPullByte(br)) {
return BROTLI_FALSE;
@ -275,6 +305,23 @@ static BROTLI_INLINE BROTLI_BOOL BrotliSafeReadBits(
return BROTLI_TRUE;
}
/* Same as BrotliSafeReadBits, but allows reading up to 32 bits. */
static BROTLI_INLINE BROTLI_BOOL BrotliSafeReadBits32(
BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) {
BROTLI_DCHECK(n_bits <= 32);
if (BROTLI_64_BITS || (n_bits <= 24)) {
while (BrotliGetAvailableBits(br) < n_bits) {
if (!BrotliPullByte(br)) {
return BROTLI_FALSE;
}
}
BrotliTakeBits(br, n_bits, val);
return BROTLI_TRUE;
} else {
return BrotliSafeReadBits32Slow(br, n_bits, val);
}
}
/* Advances the bit reader position to the next byte boundary and verifies
that any skipped bits are set to zero. */
static BROTLI_INLINE BROTLI_BOOL BrotliJumpToByteBoundary(BrotliBitReader* br) {

View File

@ -470,32 +470,34 @@ static BROTLI_INLINE uint32_t Log2Floor(uint32_t x) {
Totally 1..4 symbols are read, 1..11 bits each.
The list of symbols MUST NOT contain duplicates. */
static BrotliDecoderErrorCode ReadSimpleHuffmanSymbols(
uint32_t alphabet_size, uint32_t max_symbol, BrotliDecoderState* s) {
uint32_t alphabet_size_max, uint32_t alphabet_size_limit,
BrotliDecoderState* s) {
/* max_bits == 1..11; symbol == 0..3; 1..44 bits will be read. */
BrotliBitReader* br = &s->br;
uint32_t max_bits = Log2Floor(alphabet_size - 1);
uint32_t i = s->sub_loop_counter;
uint32_t num_symbols = s->symbol;
BrotliMetablockHeaderArena* h = &s->arena.header;
uint32_t max_bits = Log2Floor(alphabet_size_max - 1);
uint32_t i = h->sub_loop_counter;
uint32_t num_symbols = h->symbol;
while (i <= num_symbols) {
uint32_t v;
if (BROTLI_PREDICT_FALSE(!BrotliSafeReadBits(br, max_bits, &v))) {
s->sub_loop_counter = i;
s->substate_huffman = BROTLI_STATE_HUFFMAN_SIMPLE_READ;
h->sub_loop_counter = i;
h->substate_huffman = BROTLI_STATE_HUFFMAN_SIMPLE_READ;
return BROTLI_DECODER_NEEDS_MORE_INPUT;
}
if (v >= max_symbol) {
if (v >= alphabet_size_limit) {
return
BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_SIMPLE_HUFFMAN_ALPHABET);
}
s->symbols_lists_array[i] = (uint16_t)v;
BROTLI_LOG_UINT(s->symbols_lists_array[i]);
h->symbols_lists_array[i] = (uint16_t)v;
BROTLI_LOG_UINT(h->symbols_lists_array[i]);
++i;
}
for (i = 0; i < num_symbols; ++i) {
uint32_t k = i + 1;
for (; k <= num_symbols; ++k) {
if (s->symbols_lists_array[i] == s->symbols_lists_array[k]) {
if (h->symbols_lists_array[i] == h->symbols_lists_array[k]) {
return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_SIMPLE_HUFFMAN_SAME);
}
}
@ -588,27 +590,28 @@ static BROTLI_INLINE void ProcessRepeatedCodeLength(uint32_t code_len,
static BrotliDecoderErrorCode ReadSymbolCodeLengths(
uint32_t alphabet_size, BrotliDecoderState* s) {
BrotliBitReader* br = &s->br;
uint32_t symbol = s->symbol;
uint32_t repeat = s->repeat;
uint32_t space = s->space;
uint32_t prev_code_len = s->prev_code_len;
uint32_t repeat_code_len = s->repeat_code_len;
uint16_t* symbol_lists = s->symbol_lists;
uint16_t* code_length_histo = s->code_length_histo;
int* next_symbol = s->next_symbol;
BrotliMetablockHeaderArena* h = &s->arena.header;
uint32_t symbol = h->symbol;
uint32_t repeat = h->repeat;
uint32_t space = h->space;
uint32_t prev_code_len = h->prev_code_len;
uint32_t repeat_code_len = h->repeat_code_len;
uint16_t* symbol_lists = h->symbol_lists;
uint16_t* code_length_histo = h->code_length_histo;
int* next_symbol = h->next_symbol;
if (!BrotliWarmupBitReader(br)) {
return BROTLI_DECODER_NEEDS_MORE_INPUT;
}
while (symbol < alphabet_size && space > 0) {
const HuffmanCode* p = s->table;
const HuffmanCode* p = h->table;
uint32_t code_len;
BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(p);
if (!BrotliCheckInputAmount(br, BROTLI_SHORT_FILL_BIT_WINDOW_READ)) {
s->symbol = symbol;
s->repeat = repeat;
s->prev_code_len = prev_code_len;
s->repeat_code_len = repeat_code_len;
s->space = space;
h->symbol = symbol;
h->repeat = repeat;
h->prev_code_len = prev_code_len;
h->repeat_code_len = repeat_code_len;
h->space = space;
return BROTLI_DECODER_NEEDS_MORE_INPUT;
}
BrotliFillBitWindow16(br);
@ -630,16 +633,17 @@ static BrotliDecoderErrorCode ReadSymbolCodeLengths(
symbol_lists, code_length_histo, next_symbol);
}
}
s->space = space;
h->space = space;
return BROTLI_DECODER_SUCCESS;
}
static BrotliDecoderErrorCode SafeReadSymbolCodeLengths(
uint32_t alphabet_size, BrotliDecoderState* s) {
BrotliBitReader* br = &s->br;
BrotliMetablockHeaderArena* h = &s->arena.header;
BROTLI_BOOL get_byte = BROTLI_FALSE;
while (s->symbol < alphabet_size && s->space > 0) {
const HuffmanCode* p = s->table;
while (h->symbol < alphabet_size && h->space > 0) {
const HuffmanCode* p = h->table;
uint32_t code_len;
uint32_t available_bits;
uint32_t bits = 0;
@ -659,9 +663,9 @@ static BrotliDecoderErrorCode SafeReadSymbolCodeLengths(
code_len = BROTLI_HC_FAST_LOAD_VALUE(p); /* code_len == 0..17 */
if (code_len < BROTLI_REPEAT_PREVIOUS_CODE_LENGTH) {
BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(p));
ProcessSingleCodeLength(code_len, &s->symbol, &s->repeat, &s->space,
&s->prev_code_len, s->symbol_lists, s->code_length_histo,
s->next_symbol);
ProcessSingleCodeLength(code_len, &h->symbol, &h->repeat, &h->space,
&h->prev_code_len, h->symbol_lists, h->code_length_histo,
h->next_symbol);
} else { /* code_len == 16..17, extra_bits == 2..3 */
uint32_t extra_bits = code_len - 14U;
uint32_t repeat_delta = (bits >> BROTLI_HC_FAST_LOAD_BITS(p)) &
@ -672,9 +676,9 @@ static BrotliDecoderErrorCode SafeReadSymbolCodeLengths(
}
BrotliDropBits(br, BROTLI_HC_FAST_LOAD_BITS(p) + extra_bits);
ProcessRepeatedCodeLength(code_len, repeat_delta, alphabet_size,
&s->symbol, &s->repeat, &s->space, &s->prev_code_len,
&s->repeat_code_len, s->symbol_lists, s->code_length_histo,
s->next_symbol);
&h->symbol, &h->repeat, &h->space, &h->prev_code_len,
&h->repeat_code_len, h->symbol_lists, h->code_length_histo,
h->next_symbol);
}
}
return BROTLI_DECODER_SUCCESS;
@ -684,9 +688,10 @@ static BrotliDecoderErrorCode SafeReadSymbolCodeLengths(
Each code is 2..4 bits long. In total 30..72 bits are used. */
static BrotliDecoderErrorCode ReadCodeLengthCodeLengths(BrotliDecoderState* s) {
BrotliBitReader* br = &s->br;
uint32_t num_codes = s->repeat;
unsigned space = s->space;
uint32_t i = s->sub_loop_counter;
BrotliMetablockHeaderArena* h = &s->arena.header;
uint32_t num_codes = h->repeat;
unsigned space = h->space;
uint32_t i = h->sub_loop_counter;
for (; i < BROTLI_CODE_LENGTH_CODES; ++i) {
const uint8_t code_len_idx = kCodeLengthCodeOrder[i];
uint32_t ix;
@ -699,21 +704,21 @@ static BrotliDecoderErrorCode ReadCodeLengthCodeLengths(BrotliDecoderState* s) {
ix = 0;
}
if (kCodeLengthPrefixLength[ix] > available_bits) {
s->sub_loop_counter = i;
s->repeat = num_codes;
s->space = space;
s->substate_huffman = BROTLI_STATE_HUFFMAN_COMPLEX;
h->sub_loop_counter = i;
h->repeat = num_codes;
h->space = space;
h->substate_huffman = BROTLI_STATE_HUFFMAN_COMPLEX;
return BROTLI_DECODER_NEEDS_MORE_INPUT;
}
}
v = kCodeLengthPrefixValue[ix];
BrotliDropBits(br, kCodeLengthPrefixLength[ix]);
s->code_length_code_lengths[code_len_idx] = (uint8_t)v;
BROTLI_LOG_ARRAY_INDEX(s->code_length_code_lengths, code_len_idx);
h->code_length_code_lengths[code_len_idx] = (uint8_t)v;
BROTLI_LOG_ARRAY_INDEX(h->code_length_code_lengths, code_len_idx);
if (v != 0) {
space = space - (32U >> v);
++num_codes;
++s->code_length_histo[v];
++h->code_length_histo[v];
if (space - 1U >= 32U) {
/* space is 0 or wrapped around. */
break;
@ -737,49 +742,48 @@ static BrotliDecoderErrorCode ReadCodeLengthCodeLengths(BrotliDecoderState* s) {
encoded with predefined entropy code. 32 - 74 bits are used.
B.2) Decoded table is used to decode code lengths of symbols in resulting
Huffman table. In worst case 3520 bits are read. */
static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size,
uint32_t max_symbol,
static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size_max,
uint32_t alphabet_size_limit,
HuffmanCode* table,
uint32_t* opt_table_size,
BrotliDecoderState* s) {
BrotliBitReader* br = &s->br;
/* Unnecessary masking, but might be good for safety. */
alphabet_size &= 0x7FF;
BrotliMetablockHeaderArena* h = &s->arena.header;
/* State machine. */
for (;;) {
switch (s->substate_huffman) {
switch (h->substate_huffman) {
case BROTLI_STATE_HUFFMAN_NONE:
if (!BrotliSafeReadBits(br, 2, &s->sub_loop_counter)) {
if (!BrotliSafeReadBits(br, 2, &h->sub_loop_counter)) {
return BROTLI_DECODER_NEEDS_MORE_INPUT;
}
BROTLI_LOG_UINT(s->sub_loop_counter);
BROTLI_LOG_UINT(h->sub_loop_counter);
/* The value is used as follows:
1 for simple code;
0 for no skipping, 2 skips 2 code lengths, 3 skips 3 code lengths */
if (s->sub_loop_counter != 1) {
s->space = 32;
s->repeat = 0; /* num_codes */
memset(&s->code_length_histo[0], 0, sizeof(s->code_length_histo[0]) *
if (h->sub_loop_counter != 1) {
h->space = 32;
h->repeat = 0; /* num_codes */
memset(&h->code_length_histo[0], 0, sizeof(h->code_length_histo[0]) *
(BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH + 1));
memset(&s->code_length_code_lengths[0], 0,
sizeof(s->code_length_code_lengths));
s->substate_huffman = BROTLI_STATE_HUFFMAN_COMPLEX;
memset(&h->code_length_code_lengths[0], 0,
sizeof(h->code_length_code_lengths));
h->substate_huffman = BROTLI_STATE_HUFFMAN_COMPLEX;
continue;
}
/* Fall through. */
case BROTLI_STATE_HUFFMAN_SIMPLE_SIZE:
/* Read symbols, codes & code lengths directly. */
if (!BrotliSafeReadBits(br, 2, &s->symbol)) { /* num_symbols */
s->substate_huffman = BROTLI_STATE_HUFFMAN_SIMPLE_SIZE;
if (!BrotliSafeReadBits(br, 2, &h->symbol)) { /* num_symbols */
h->substate_huffman = BROTLI_STATE_HUFFMAN_SIMPLE_SIZE;
return BROTLI_DECODER_NEEDS_MORE_INPUT;
}
s->sub_loop_counter = 0;
h->sub_loop_counter = 0;
/* Fall through. */
case BROTLI_STATE_HUFFMAN_SIMPLE_READ: {
BrotliDecoderErrorCode result =
ReadSimpleHuffmanSymbols(alphabet_size, max_symbol, s);
ReadSimpleHuffmanSymbols(alphabet_size_max, alphabet_size_limit, s);
if (result != BROTLI_DECODER_SUCCESS) {
return result;
}
@ -788,21 +792,21 @@ static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size,
case BROTLI_STATE_HUFFMAN_SIMPLE_BUILD: {
uint32_t table_size;
if (s->symbol == 3) {
if (h->symbol == 3) {
uint32_t bits;
if (!BrotliSafeReadBits(br, 1, &bits)) {
s->substate_huffman = BROTLI_STATE_HUFFMAN_SIMPLE_BUILD;
h->substate_huffman = BROTLI_STATE_HUFFMAN_SIMPLE_BUILD;
return BROTLI_DECODER_NEEDS_MORE_INPUT;
}
s->symbol += bits;
h->symbol += bits;
}
BROTLI_LOG_UINT(s->symbol);
BROTLI_LOG_UINT(h->symbol);
table_size = BrotliBuildSimpleHuffmanTable(
table, HUFFMAN_TABLE_BITS, s->symbols_lists_array, s->symbol);
table, HUFFMAN_TABLE_BITS, h->symbols_lists_array, h->symbol);
if (opt_table_size) {
*opt_table_size = table_size;
}
s->substate_huffman = BROTLI_STATE_HUFFMAN_NONE;
h->substate_huffman = BROTLI_STATE_HUFFMAN_NONE;
return BROTLI_DECODER_SUCCESS;
}
@ -813,44 +817,45 @@ static BrotliDecoderErrorCode ReadHuffmanCode(uint32_t alphabet_size,
if (result != BROTLI_DECODER_SUCCESS) {
return result;
}
BrotliBuildCodeLengthsHuffmanTable(s->table,
s->code_length_code_lengths,
s->code_length_histo);
memset(&s->code_length_histo[0], 0, sizeof(s->code_length_histo));
BrotliBuildCodeLengthsHuffmanTable(h->table,
h->code_length_code_lengths,
h->code_length_histo);
memset(&h->code_length_histo[0], 0, sizeof(h->code_length_histo));
for (i = 0; i <= BROTLI_HUFFMAN_MAX_CODE_LENGTH; ++i) {
s->next_symbol[i] = (int)i - (BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1);
s->symbol_lists[s->next_symbol[i]] = 0xFFFF;
h->next_symbol[i] = (int)i - (BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1);
h->symbol_lists[h->next_symbol[i]] = 0xFFFF;
}
s->symbol = 0;
s->prev_code_len = BROTLI_INITIAL_REPEATED_CODE_LENGTH;
s->repeat = 0;
s->repeat_code_len = 0;
s->space = 32768;
s->substate_huffman = BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS;
h->symbol = 0;
h->prev_code_len = BROTLI_INITIAL_REPEATED_CODE_LENGTH;
h->repeat = 0;
h->repeat_code_len = 0;
h->space = 32768;
h->substate_huffman = BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS;
}
/* Fall through. */
case BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS: {
uint32_t table_size;
BrotliDecoderErrorCode result = ReadSymbolCodeLengths(max_symbol, s);
BrotliDecoderErrorCode result = ReadSymbolCodeLengths(
alphabet_size_limit, s);
if (result == BROTLI_DECODER_NEEDS_MORE_INPUT) {
result = SafeReadSymbolCodeLengths(max_symbol, s);
result = SafeReadSymbolCodeLengths(alphabet_size_limit, s);
}
if (result != BROTLI_DECODER_SUCCESS) {
return result;
}
if (s->space != 0) {
BROTLI_LOG(("[ReadHuffmanCode] space = %d\n", (int)s->space));
if (h->space != 0) {
BROTLI_LOG(("[ReadHuffmanCode] space = %d\n", (int)h->space));
return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_HUFFMAN_SPACE);
}
table_size = BrotliBuildHuffmanTable(
table, HUFFMAN_TABLE_BITS, s->symbol_lists, s->code_length_histo);
table, HUFFMAN_TABLE_BITS, h->symbol_lists, h->code_length_histo);
if (opt_table_size) {
*opt_table_size = table_size;
}
s->substate_huffman = BROTLI_STATE_HUFFMAN_NONE;
h->substate_huffman = BROTLI_STATE_HUFFMAN_NONE;
return BROTLI_DECODER_SUCCESS;
}
@ -868,7 +873,7 @@ static BROTLI_INLINE uint32_t ReadBlockLength(const HuffmanCode* table,
uint32_t nbits;
code = ReadSymbol(table, br);
nbits = kBlockLengthPrefixCode[code].nbits; /* nbits == 2..24 */
return kBlockLengthPrefixCode[code].offset + BrotliReadBits(br, nbits);
return kBlockLengthPrefixCode[code].offset + BrotliReadBits24(br, nbits);
}
/* WARNING: if state is not BROTLI_STATE_READ_BLOCK_LENGTH_NONE, then
@ -952,22 +957,22 @@ static BROTLI_NOINLINE void InverseMoveToFrontTransform(
/* Decodes a series of Huffman table using ReadHuffmanCode function. */
static BrotliDecoderErrorCode HuffmanTreeGroupDecode(
HuffmanTreeGroup* group, BrotliDecoderState* s) {
if (s->substate_tree_group != BROTLI_STATE_TREE_GROUP_LOOP) {
s->next = group->codes;
s->htree_index = 0;
s->substate_tree_group = BROTLI_STATE_TREE_GROUP_LOOP;
BrotliMetablockHeaderArena* h = &s->arena.header;
if (h->substate_tree_group != BROTLI_STATE_TREE_GROUP_LOOP) {
h->next = group->codes;
h->htree_index = 0;
h->substate_tree_group = BROTLI_STATE_TREE_GROUP_LOOP;
}
while (s->htree_index < group->num_htrees) {
while (h->htree_index < group->num_htrees) {
uint32_t table_size;
BrotliDecoderErrorCode result =
ReadHuffmanCode(group->alphabet_size, group->max_symbol,
s->next, &table_size, s);
BrotliDecoderErrorCode result = ReadHuffmanCode(group->alphabet_size_max,
group->alphabet_size_limit, h->next, &table_size, s);
if (result != BROTLI_DECODER_SUCCESS) return result;
group->htrees[s->htree_index] = s->next;
s->next += table_size;
++s->htree_index;
group->htrees[h->htree_index] = h->next;
h->next += table_size;
++h->htree_index;
}
s->substate_tree_group = BROTLI_STATE_TREE_GROUP_NONE;
h->substate_tree_group = BROTLI_STATE_TREE_GROUP_NONE;
return BROTLI_DECODER_SUCCESS;
}
@ -985,15 +990,16 @@ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size,
BrotliDecoderState* s) {
BrotliBitReader* br = &s->br;
BrotliDecoderErrorCode result = BROTLI_DECODER_SUCCESS;
BrotliMetablockHeaderArena* h = &s->arena.header;
switch ((int)s->substate_context_map) {
switch ((int)h->substate_context_map) {
case BROTLI_STATE_CONTEXT_MAP_NONE:
result = DecodeVarLenUint8(s, br, num_htrees);
if (result != BROTLI_DECODER_SUCCESS) {
return result;
}
(*num_htrees)++;
s->context_index = 0;
h->context_index = 0;
BROTLI_LOG_UINT(context_map_size);
BROTLI_LOG_UINT(*num_htrees);
*context_map_arg =
@ -1005,7 +1011,7 @@ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size,
memset(*context_map_arg, 0, (size_t)context_map_size);
return BROTLI_DECODER_SUCCESS;
}
s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_READ_PREFIX;
h->substate_context_map = BROTLI_STATE_CONTEXT_MAP_READ_PREFIX;
/* Fall through. */
case BROTLI_STATE_CONTEXT_MAP_READ_PREFIX: {
@ -1016,38 +1022,38 @@ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size,
return BROTLI_DECODER_NEEDS_MORE_INPUT;
}
if ((bits & 1) != 0) { /* Use RLE for zeros. */
s->max_run_length_prefix = (bits >> 1) + 1;
h->max_run_length_prefix = (bits >> 1) + 1;
BrotliDropBits(br, 5);
} else {
s->max_run_length_prefix = 0;
h->max_run_length_prefix = 0;
BrotliDropBits(br, 1);
}
BROTLI_LOG_UINT(s->max_run_length_prefix);
s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_HUFFMAN;
BROTLI_LOG_UINT(h->max_run_length_prefix);
h->substate_context_map = BROTLI_STATE_CONTEXT_MAP_HUFFMAN;
}
/* Fall through. */
case BROTLI_STATE_CONTEXT_MAP_HUFFMAN: {
uint32_t alphabet_size = *num_htrees + s->max_run_length_prefix;
uint32_t alphabet_size = *num_htrees + h->max_run_length_prefix;
result = ReadHuffmanCode(alphabet_size, alphabet_size,
s->context_map_table, NULL, s);
h->context_map_table, NULL, s);
if (result != BROTLI_DECODER_SUCCESS) return result;
s->code = 0xFFFF;
s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_DECODE;
h->code = 0xFFFF;
h->substate_context_map = BROTLI_STATE_CONTEXT_MAP_DECODE;
}
/* Fall through. */
case BROTLI_STATE_CONTEXT_MAP_DECODE: {
uint32_t context_index = s->context_index;
uint32_t max_run_length_prefix = s->max_run_length_prefix;
uint32_t context_index = h->context_index;
uint32_t max_run_length_prefix = h->max_run_length_prefix;
uint8_t* context_map = *context_map_arg;
uint32_t code = s->code;
uint32_t code = h->code;
BROTLI_BOOL skip_preamble = (code != 0xFFFF);
while (context_index < context_map_size || skip_preamble) {
if (!skip_preamble) {
if (!SafeReadSymbol(s->context_map_table, br, &code)) {
s->code = 0xFFFF;
s->context_index = context_index;
if (!SafeReadSymbol(h->context_map_table, br, &code)) {
h->code = 0xFFFF;
h->context_index = context_index;
return BROTLI_DECODER_NEEDS_MORE_INPUT;
}
BROTLI_LOG_UINT(code);
@ -1068,8 +1074,8 @@ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size,
{
uint32_t reps;
if (!BrotliSafeReadBits(br, code, &reps)) {
s->code = code;
s->context_index = context_index;
h->code = code;
h->context_index = context_index;
return BROTLI_DECODER_NEEDS_MORE_INPUT;
}
reps += 1U << code;
@ -1089,13 +1095,13 @@ static BrotliDecoderErrorCode DecodeContextMap(uint32_t context_map_size,
case BROTLI_STATE_CONTEXT_MAP_TRANSFORM: {
uint32_t bits;
if (!BrotliSafeReadBits(br, 1, &bits)) {
s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_TRANSFORM;
h->substate_context_map = BROTLI_STATE_CONTEXT_MAP_TRANSFORM;
return BROTLI_DECODER_NEEDS_MORE_INPUT;
}
if (bits != 0) {
InverseMoveToFrontTransform(*context_map_arg, context_map_size, s);
}
s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_NONE;
h->substate_context_map = BROTLI_STATE_CONTEXT_MAP_NONE;
return BROTLI_DECODER_SUCCESS;
}
@ -1457,32 +1463,28 @@ static BrotliDecoderErrorCode ReadContextModes(BrotliDecoderState* s) {
}
static BROTLI_INLINE void TakeDistanceFromRingBuffer(BrotliDecoderState* s) {
if (s->distance_code == 0) {
--s->dist_rb_idx;
s->distance_code = s->dist_rb[s->dist_rb_idx & 3];
int offset = s->distance_code - 3;
if (s->distance_code <= 3) {
/* Compensate double distance-ring-buffer roll for dictionary items. */
s->distance_context = 1;
s->distance_context = 1 >> s->distance_code;
s->distance_code = s->dist_rb[(s->dist_rb_idx - offset) & 3];
s->dist_rb_idx -= s->distance_context;
} else {
int distance_code = s->distance_code << 1;
/* kDistanceShortCodeIndexOffset has 2-bit values from LSB:
3, 2, 1, 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2 */
const uint32_t kDistanceShortCodeIndexOffset = 0xAAAFFF1B;
/* kDistanceShortCodeValueOffset has 2-bit values from LSB:
-0, 0,-0, 0,-1, 1,-2, 2,-3, 3,-1, 1,-2, 2,-3, 3 */
const uint32_t kDistanceShortCodeValueOffset = 0xFA5FA500;
int v = (s->dist_rb_idx +
(int)(kDistanceShortCodeIndexOffset >> distance_code)) & 0x3;
s->distance_code = s->dist_rb[v];
v = (int)(kDistanceShortCodeValueOffset >> distance_code) & 0x3;
if ((distance_code & 0x3) != 0) {
s->distance_code += v;
int index_delta = 3;
int delta;
int base = s->distance_code - 10;
if (s->distance_code < 10) {
base = s->distance_code - 4;
} else {
s->distance_code -= v;
if (s->distance_code <= 0) {
/* A huge distance will cause a BROTLI_FAILURE() soon.
This is a little faster than failing here. */
s->distance_code = 0x7FFFFFFF;
}
index_delta = 2;
}
/* Unpack one of six 4-bit values. */
delta = ((0x605142 >> (4 * base)) & 0xF) - 3;
s->distance_code = s->dist_rb[(s->dist_rb_idx + index_delta) & 0x3] + delta;
if (s->distance_code <= 0) {
/* A huge distance will cause a BROTLI_FAILURE() soon.
This is a little faster than failing here. */
s->distance_code = 0x7FFFFFFF;
}
}
}
@ -1497,62 +1499,153 @@ static BROTLI_INLINE BROTLI_BOOL SafeReadBits(
}
}
static BROTLI_INLINE BROTLI_BOOL SafeReadBits32(
BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) {
if (n_bits != 0) {
return BrotliSafeReadBits32(br, n_bits, val);
} else {
*val = 0;
return BROTLI_TRUE;
}
}
/*
RFC 7932 Section 4 with "..." shortenings and "[]" emendations.
Each distance ... is represented with a pair <distance code, extra bits>...
The distance code is encoded using a prefix code... The number of extra bits
can be 0..24... Two additional parameters: NPOSTFIX (0..3), and ...
NDIRECT (0..120) ... are encoded in the meta-block header...
The first 16 distance symbols ... reference past distances... ring buffer ...
Next NDIRECT distance symbols ... represent distances from 1 to NDIRECT...
[For] distance symbols 16 + NDIRECT and greater ... the number of extra bits
... is given by the following formula:
[ xcode = dcode - NDIRECT - 16 ]
ndistbits = 1 + [ xcode ] >> (NPOSTFIX + 1)
...
*/
/*
RFC 7932 Section 9.2 with "..." shortenings and "[]" emendations.
... to get the actual value of the parameter NDIRECT, left-shift this
four-bit number by NPOSTFIX bits ...
*/
/* Remaining formulas from RFC 7932 Section 4 could be rewritten as following:
alphabet_size = 16 + NDIRECT + (max_distbits << (NPOSTFIX + 1))
half = ((xcode >> NPOSTFIX) & 1) << ndistbits
postfix = xcode & ((1 << NPOSTFIX) - 1)
range_start = 2 * (1 << ndistbits - 1 - 1)
distance = (range_start + half + extra) << NPOSTFIX + postfix + NDIRECT + 1
NB: ndistbits >= 1 -> range_start >= 0
NB: range_start has factor 2, as the range is covered by 2 "halves"
NB: extra -1 offset in range_start formula covers the absence of
ndistbits = 0 case
NB: when NPOSTFIX = 0, NDIRECT is not greater than 15
In other words, xcode has the following binary structure - XXXHPPP:
- XXX represent the number of extra distance bits
- H selects upper / lower range of distances
- PPP represent "postfix"
"Regular" distance encoding has NPOSTFIX = 0; omitting the postfix part
simplifies distance calculation.
Using NPOSTFIX > 0 allows cheaper encoding of regular structures, e.g. where
most of distances have the same reminder of division by 2/4/8. For example,
the table of int32_t values that come from different sources; if it is likely
that 3 highest bytes of values from the same source are the same, then
copy distance often looks like 4x + y.
Distance calculation could be rewritten to:
ndistbits = NDISTBITS(NDIRECT, NPOSTFIX)[dcode]
distance = OFFSET(NDIRECT, NPOSTFIX)[dcode] + extra << NPOSTFIX
NDISTBITS and OFFSET could be pre-calculated, as NDIRECT and NPOSTFIX could
change only once per meta-block.
*/
/* Calculates distance lookup table.
NB: it is possible to have all 64 tables precalculated. */
static void CalculateDistanceLut(BrotliDecoderState* s) {
BrotliMetablockBodyArena* b = &s->arena.body;
uint32_t npostfix = s->distance_postfix_bits;
uint32_t ndirect = s->num_direct_distance_codes;
uint32_t alphabet_size_limit = s->distance_hgroup.alphabet_size_limit;
uint32_t postfix = 1u << npostfix;
uint32_t j;
uint32_t bits = 1;
uint32_t half = 0;
/* Skip short codes. */
uint32_t i = BROTLI_NUM_DISTANCE_SHORT_CODES;
/* Fill direct codes. */
for (j = 0; j < ndirect; ++j) {
b->dist_extra_bits[i] = 0;
b->dist_offset[i] = j + 1;
++i;
}
/* Fill regular distance codes. */
while (i < alphabet_size_limit) {
uint32_t base = ndirect + ((((2 + half) << bits) - 4) << npostfix) + 1;
/* Always fill the complete group. */
for (j = 0; j < postfix; ++j) {
b->dist_extra_bits[i] = (uint8_t)bits;
b->dist_offset[i] = base + j;
++i;
}
bits = bits + half;
half = half ^ 1;
}
}
/* Precondition: s->distance_code < 0. */
static BROTLI_INLINE BROTLI_BOOL ReadDistanceInternal(
int safe, BrotliDecoderState* s, BrotliBitReader* br) {
int distval;
BrotliMetablockBodyArena* b = &s->arena.body;
uint32_t code;
uint32_t bits;
BrotliBitReaderState memento;
HuffmanCode* distance_tree = s->distance_hgroup.htrees[s->dist_htree_index];
if (!safe) {
s->distance_code = (int)ReadSymbol(distance_tree, br);
code = ReadSymbol(distance_tree, br);
} else {
uint32_t code;
BrotliBitReaderSaveState(br, &memento);
if (!SafeReadSymbol(distance_tree, br, &code)) {
return BROTLI_FALSE;
}
s->distance_code = (int)code;
}
--s->block_length[2];
/* Convert the distance code to the actual distance by possibly
looking up past distances from the s->ringbuffer. */
looking up past distances from the s->dist_rb. */
s->distance_context = 0;
if ((s->distance_code & ~0xF) == 0) {
if ((code & ~0xFu) == 0) {
s->distance_code = (int)code;
TakeDistanceFromRingBuffer(s);
--s->block_length[2];
return BROTLI_TRUE;
}
distval = s->distance_code - (int)s->num_direct_distance_codes;
if (distval >= 0) {
uint32_t nbits;
int postfix;
int offset;
if (!safe && (s->distance_postfix_bits == 0)) {
nbits = ((uint32_t)distval >> 1) + 1;
offset = ((2 + (distval & 1)) << nbits) - 4;
s->distance_code = (int)s->num_direct_distance_codes + offset +
(int)BrotliReadBits(br, nbits);
} else {
/* This branch also works well when s->distance_postfix_bits == 0. */
uint32_t bits;
postfix = distval & s->distance_postfix_mask;
distval >>= s->distance_postfix_bits;
nbits = ((uint32_t)distval >> 1) + 1;
if (safe) {
if (!SafeReadBits(br, nbits, &bits)) {
s->distance_code = -1; /* Restore precondition. */
BrotliBitReaderRestoreState(br, &memento);
return BROTLI_FALSE;
}
} else {
bits = BrotliReadBits(br, nbits);
}
offset = ((2 + (distval & 1)) << nbits) - 4;
s->distance_code = (int)s->num_direct_distance_codes +
((offset + (int)bits) << s->distance_postfix_bits) + postfix;
if (!safe) {
bits = BrotliReadBits32(br, b->dist_extra_bits[code]);
} else {
if (!SafeReadBits32(br, b->dist_extra_bits[code], &bits)) {
++s->block_length[2];
BrotliBitReaderRestoreState(br, &memento);
return BROTLI_FALSE;
}
}
s->distance_code = s->distance_code - BROTLI_NUM_DISTANCE_SHORT_CODES + 1;
--s->block_length[2];
s->distance_code =
(int)(b->dist_offset[code] + (bits << s->distance_postfix_bits));
return BROTLI_TRUE;
}
@ -1588,9 +1681,9 @@ static BROTLI_INLINE BROTLI_BOOL ReadCommandInternal(
*insert_length = v.insert_len_offset;
if (!safe) {
if (BROTLI_PREDICT_FALSE(v.insert_len_extra_bits != 0)) {
insert_len_extra = BrotliReadBits(br, v.insert_len_extra_bits);
insert_len_extra = BrotliReadBits24(br, v.insert_len_extra_bits);
}
copy_length = BrotliReadBits(br, v.copy_len_extra_bits);
copy_length = BrotliReadBits24(br, v.copy_len_extra_bits);
} else {
if (!SafeReadBits(br, v.insert_len_extra_bits, &insert_len_extra) ||
!SafeReadBits(br, v.copy_len_extra_bits, &copy_length)) {
@ -1935,21 +2028,6 @@ static BROTLI_NOINLINE BrotliDecoderErrorCode SafeProcessCommands(
return ProcessCommandsInternal(1, s);
}
/* Returns the maximum number of distance symbols which can only represent
distances not exceeding BROTLI_MAX_ALLOWED_DISTANCE. */
static uint32_t BrotliMaxDistanceSymbol(uint32_t ndirect, uint32_t npostfix) {
static const uint32_t bound[BROTLI_MAX_NPOSTFIX + 1] = {0, 4, 12, 28};
static const uint32_t diff[BROTLI_MAX_NPOSTFIX + 1] = {73, 126, 228, 424};
uint32_t postfix = 1U << npostfix;
if (ndirect < bound[npostfix]) {
return ndirect + diff[npostfix] + postfix;
} else if (ndirect > bound[npostfix] + postfix) {
return ndirect + diff[npostfix];
} else {
return bound[npostfix] + diff[npostfix] + postfix;
}
}
BrotliDecoderResult BrotliDecoderDecompress(
size_t encoded_size, const uint8_t* encoded_buffer, size_t* decoded_size,
uint8_t* decoded_buffer) {
@ -2167,33 +2245,23 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
s->state = BROTLI_STATE_UNCOMPRESSED;
break;
}
s->state = BROTLI_STATE_BEFORE_COMPRESSED_METABLOCK_HEADER;
/* Fall through. */
case BROTLI_STATE_BEFORE_COMPRESSED_METABLOCK_HEADER: {
BrotliMetablockHeaderArena* h = &s->arena.header;
s->loop_counter = 0;
/* Initialize compressed metablock header arena. */
h->sub_loop_counter = 0;
/* Make small negative indexes addressable. */
h->symbol_lists =
&h->symbols_lists_array[BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1];
h->substate_huffman = BROTLI_STATE_HUFFMAN_NONE;
h->substate_tree_group = BROTLI_STATE_TREE_GROUP_NONE;
h->substate_context_map = BROTLI_STATE_CONTEXT_MAP_NONE;
s->state = BROTLI_STATE_HUFFMAN_CODE_0;
break;
case BROTLI_STATE_UNCOMPRESSED: {
result = CopyUncompressedBlockToOutput(
available_out, next_out, total_out, s);
if (result != BROTLI_DECODER_SUCCESS) {
break;
}
s->state = BROTLI_STATE_METABLOCK_DONE;
break;
}
case BROTLI_STATE_METADATA:
for (; s->meta_block_remaining_len > 0; --s->meta_block_remaining_len) {
uint32_t bits;
/* Read one byte and ignore it. */
if (!BrotliSafeReadBits(br, 8, &bits)) {
result = BROTLI_DECODER_NEEDS_MORE_INPUT;
break;
}
}
if (result == BROTLI_DECODER_SUCCESS) {
s->state = BROTLI_STATE_METABLOCK_DONE;
}
break;
/* Fall through. */
case BROTLI_STATE_HUFFMAN_CODE_0:
if (s->loop_counter >= 3) {
@ -2247,6 +2315,30 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
break;
}
case BROTLI_STATE_UNCOMPRESSED: {
result = CopyUncompressedBlockToOutput(
available_out, next_out, total_out, s);
if (result != BROTLI_DECODER_SUCCESS) {
break;
}
s->state = BROTLI_STATE_METABLOCK_DONE;
break;
}
case BROTLI_STATE_METADATA:
for (; s->meta_block_remaining_len > 0; --s->meta_block_remaining_len) {
uint32_t bits;
/* Read one byte and ignore it. */
if (!BrotliSafeReadBits(br, 8, &bits)) {
result = BROTLI_DECODER_NEEDS_MORE_INPUT;
break;
}
}
if (result == BROTLI_DECODER_SUCCESS) {
s->state = BROTLI_STATE_METABLOCK_DONE;
}
break;
case BROTLI_STATE_METABLOCK_HEADER_2: {
uint32_t bits;
if (!BrotliSafeReadBits(br, 6, &bits)) {
@ -2255,11 +2347,9 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
}
s->distance_postfix_bits = bits & BitMask(2);
bits >>= 2;
s->num_direct_distance_codes = BROTLI_NUM_DISTANCE_SHORT_CODES +
(bits << s->distance_postfix_bits);
s->num_direct_distance_codes = bits << s->distance_postfix_bits;
BROTLI_LOG_UINT(s->num_direct_distance_codes);
BROTLI_LOG_UINT(s->distance_postfix_bits);
s->distance_postfix_mask = (int)BitMask(s->distance_postfix_bits);
s->context_modes =
(uint8_t*)BROTLI_DECODER_ALLOC(s, (size_t)s->num_block_types[0]);
if (s->context_modes == 0) {
@ -2291,17 +2381,19 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
/* Fall through. */
case BROTLI_STATE_CONTEXT_MAP_2: {
uint32_t num_direct_codes =
s->num_direct_distance_codes - BROTLI_NUM_DISTANCE_SHORT_CODES;
uint32_t num_distance_codes = BROTLI_DISTANCE_ALPHABET_SIZE(
s->distance_postfix_bits, num_direct_codes,
(s->large_window ? BROTLI_LARGE_MAX_DISTANCE_BITS :
BROTLI_MAX_DISTANCE_BITS));
uint32_t max_distance_symbol = (s->large_window ?
BrotliMaxDistanceSymbol(
num_direct_codes, s->distance_postfix_bits) :
num_distance_codes);
uint32_t npostfix = s->distance_postfix_bits;
uint32_t ndirect = s->num_direct_distance_codes;
uint32_t distance_alphabet_size_max = BROTLI_DISTANCE_ALPHABET_SIZE(
npostfix, ndirect, BROTLI_MAX_DISTANCE_BITS);
uint32_t distance_alphabet_size_limit = distance_alphabet_size_max;
BROTLI_BOOL allocation_success = BROTLI_TRUE;
if (s->large_window) {
BrotliDistanceCodeLimit limit = BrotliCalculateDistanceCodeLimit(
BROTLI_MAX_ALLOWED_DISTANCE, npostfix, ndirect);
distance_alphabet_size_max = BROTLI_DISTANCE_ALPHABET_SIZE(
npostfix, ndirect, BROTLI_LARGE_MAX_DISTANCE_BITS);
distance_alphabet_size_limit = limit.max_alphabet_size;
}
result = DecodeContextMap(
s->num_block_types[2] << BROTLI_DISTANCE_CONTEXT_BITS,
&s->num_dist_htrees, &s->dist_context_map, s);
@ -2315,8 +2407,8 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
s, &s->insert_copy_hgroup, BROTLI_NUM_COMMAND_SYMBOLS,
BROTLI_NUM_COMMAND_SYMBOLS, s->num_block_types[1]);
allocation_success &= BrotliDecoderHuffmanTreeGroupInit(
s, &s->distance_hgroup, num_distance_codes,
max_distance_symbol, s->num_dist_htrees);
s, &s->distance_hgroup, distance_alphabet_size_max,
distance_alphabet_size_limit, s->num_dist_htrees);
if (!allocation_success) {
return SaveErrorCode(s,
BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_TREE_GROUPS));
@ -2338,18 +2430,24 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
result = HuffmanTreeGroupDecode(hgroup, s);
if (result != BROTLI_DECODER_SUCCESS) break;
s->loop_counter++;
if (s->loop_counter >= 3) {
PrepareLiteralDecoding(s);
s->dist_context_map_slice = s->dist_context_map;
s->htree_command = s->insert_copy_hgroup.htrees[0];
if (!BrotliEnsureRingBuffer(s)) {
result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_RING_BUFFER_2);
break;
}
s->state = BROTLI_STATE_COMMAND_BEGIN;
if (s->loop_counter < 3) {
break;
}
break;
s->state = BROTLI_STATE_BEFORE_COMPRESSED_METABLOCK_BODY;
}
/* Fall through. */
case BROTLI_STATE_BEFORE_COMPRESSED_METABLOCK_BODY:
PrepareLiteralDecoding(s);
s->dist_context_map_slice = s->dist_context_map;
s->htree_command = s->insert_copy_hgroup.htrees[0];
if (!BrotliEnsureRingBuffer(s)) {
result = BROTLI_FAILURE(BROTLI_DECODER_ERROR_ALLOC_RING_BUFFER_2);
break;
}
CalculateDistanceLut(s);
s->state = BROTLI_STATE_COMMAND_BEGIN;
/* Fall through. */
case BROTLI_STATE_COMMAND_BEGIN:
/* Fall through. */

View File

@ -19,7 +19,8 @@ extern "C" {
#define BROTLI_HUFFMAN_MAX_CODE_LENGTH 15
/* Maximum possible Huffman table size for an alphabet size of (index * 32),
max code length 15 and root table bits 8. */
max code length 15 and root table bits 8. This table describes table sizes
for alphabets containing up to 1152 = 36 * 32 symbols. */
static const uint16_t kMaxHuffmanTableSize[] = {
256, 402, 436, 468, 500, 534, 566, 598, 630, 662, 694, 726, 758, 790, 822,
854, 886, 920, 952, 984, 1016, 1048, 1080, 1112, 1144, 1176, 1208, 1240, 1272,
@ -110,13 +111,13 @@ BROTLI_INTERNAL uint32_t BrotliBuildSimpleHuffmanTable(HuffmanCode* table,
int root_bits, uint16_t* symbols, uint32_t num_symbols);
/* Contains a collection of Huffman trees with the same alphabet size. */
/* max_symbol is needed due to simple codes since log2(alphabet_size) could be
greater than log2(max_symbol). */
/* alphabet_size_limit is needed due to simple codes, since
log2(alphabet_size_max) could be greater than log2(alphabet_size_limit). */
typedef struct {
HuffmanCode** htrees;
HuffmanCode* codes;
uint16_t alphabet_size;
uint16_t max_symbol;
uint16_t alphabet_size_max;
uint16_t alphabet_size_limit;
uint16_t num_htrees;
} HuffmanTreeGroup;

View File

@ -33,10 +33,7 @@ BROTLI_BOOL BrotliDecoderStateInit(BrotliDecoderState* s,
s->state = BROTLI_STATE_UNINITED;
s->large_window = 0;
s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE;
s->substate_tree_group = BROTLI_STATE_TREE_GROUP_NONE;
s->substate_context_map = BROTLI_STATE_CONTEXT_MAP_NONE;
s->substate_uncompressed = BROTLI_STATE_UNCOMPRESSED_NONE;
s->substate_huffman = BROTLI_STATE_HUFFMAN_NONE;
s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_NONE;
s->substate_read_block_length = BROTLI_STATE_READ_BLOCK_LENGTH_NONE;
@ -59,8 +56,6 @@ BROTLI_BOOL BrotliDecoderStateInit(BrotliDecoderState* s,
s->context_map_slice = NULL;
s->dist_context_map_slice = NULL;
s->sub_loop_counter = 0;
s->literal_hgroup.codes = NULL;
s->literal_hgroup.htrees = NULL;
s->insert_copy_hgroup.codes = NULL;
@ -84,9 +79,6 @@ BROTLI_BOOL BrotliDecoderStateInit(BrotliDecoderState* s,
s->block_type_trees = NULL;
s->block_len_trees = NULL;
/* Make small negative indexes addressable. */
s->symbol_lists = &s->symbols_lists_array[BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1];
s->mtf_upper_bound = 63;
s->dictionary = BrotliGetDictionary();
@ -142,17 +134,18 @@ void BrotliDecoderStateCleanup(BrotliDecoderState* s) {
}
BROTLI_BOOL BrotliDecoderHuffmanTreeGroupInit(BrotliDecoderState* s,
HuffmanTreeGroup* group, uint32_t alphabet_size, uint32_t max_symbol,
uint32_t ntrees) {
HuffmanTreeGroup* group, uint32_t alphabet_size_max,
uint32_t alphabet_size_limit, uint32_t ntrees) {
/* Pack two allocations into one */
const size_t max_table_size = kMaxHuffmanTableSize[(alphabet_size + 31) >> 5];
const size_t max_table_size =
kMaxHuffmanTableSize[(alphabet_size_limit + 31) >> 5];
const size_t code_size = sizeof(HuffmanCode) * ntrees * max_table_size;
const size_t htree_size = sizeof(HuffmanCode*) * ntrees;
/* Pointer alignment is, hopefully, wider than sizeof(HuffmanCode). */
HuffmanCode** p = (HuffmanCode**)BROTLI_DECODER_ALLOC(s,
code_size + htree_size);
group->alphabet_size = (uint16_t)alphabet_size;
group->max_symbol = (uint16_t)max_symbol;
group->alphabet_size_max = (uint16_t)alphabet_size_max;
group->alphabet_size_limit = (uint16_t)alphabet_size_limit;
group->num_htrees = (uint16_t)ntrees;
group->htrees = p;
group->codes = (HuffmanCode*)(&p[ntrees]);

View File

@ -21,6 +21,95 @@
extern "C" {
#endif
/* Graphviz diagram that describes state transitions:
digraph States {
graph [compound=true]
concentrate=true
node [shape="box"]
UNINITED -> {LARGE_WINDOW_BITS -> INITIALIZE}
subgraph cluster_metablock_workflow {
style="rounded"
label=< <B>METABLOCK CYCLE</B> >
METABLOCK_BEGIN -> METABLOCK_HEADER
METABLOCK_HEADER:sw -> METADATA
METABLOCK_HEADER:s -> UNCOMPRESSED
METABLOCK_HEADER:se -> METABLOCK_DONE:ne
METADATA:s -> METABLOCK_DONE:w
UNCOMPRESSED:s -> METABLOCK_DONE:n
METABLOCK_DONE:e -> METABLOCK_BEGIN:e [constraint="false"]
}
INITIALIZE -> METABLOCK_BEGIN
METABLOCK_DONE -> DONE
subgraph cluster_compressed_metablock {
style="rounded"
label=< <B>COMPRESSED METABLOCK</B> >
subgraph cluster_command {
style="rounded"
label=< <B>HOT LOOP</B> >
_METABLOCK_DONE_PORT_ [shape=point style=invis]
{
// Set different shape for nodes returning from "compressed metablock".
node [shape=invhouse]; CMD_INNER CMD_POST_DECODE_LITERALS;
CMD_POST_WRAP_COPY; CMD_INNER_WRITE; CMD_POST_WRITE_1;
}
CMD_BEGIN -> CMD_INNER -> CMD_POST_DECODE_LITERALS -> CMD_POST_WRAP_COPY
// IO ("write") nodes are not in the hot loop!
CMD_INNER_WRITE [style=dashed]
CMD_INNER -> CMD_INNER_WRITE
CMD_POST_WRITE_1 [style=dashed]
CMD_POST_DECODE_LITERALS -> CMD_POST_WRITE_1
CMD_POST_WRITE_2 [style=dashed]
CMD_POST_WRAP_COPY -> CMD_POST_WRITE_2
CMD_POST_WRITE_1 -> CMD_BEGIN:s [constraint="false"]
CMD_INNER_WRITE -> {CMD_INNER CMD_POST_DECODE_LITERALS}
[constraint="false"]
CMD_BEGIN:ne -> CMD_POST_DECODE_LITERALS [constraint="false"]
CMD_POST_WRAP_COPY -> CMD_BEGIN [constraint="false"]
CMD_POST_DECODE_LITERALS -> CMD_BEGIN:ne [constraint="false"]
CMD_POST_WRITE_2 -> CMD_POST_WRAP_COPY [constraint="false"]
{rank=same; CMD_BEGIN; CMD_INNER; CMD_POST_DECODE_LITERALS;
CMD_POST_WRAP_COPY}
{rank=same; CMD_INNER_WRITE; CMD_POST_WRITE_1; CMD_POST_WRITE_2}
{CMD_INNER CMD_POST_DECODE_LITERALS CMD_POST_WRAP_COPY} ->
_METABLOCK_DONE_PORT_ [style=invis]
{CMD_INNER_WRITE CMD_POST_WRITE_1} -> _METABLOCK_DONE_PORT_
[constraint="false" style=invis]
}
BEFORE_COMPRESSED_METABLOCK_HEADER:s -> HUFFMAN_CODE_0:n
HUFFMAN_CODE_0 -> HUFFMAN_CODE_1 -> HUFFMAN_CODE_2 -> HUFFMAN_CODE_3
HUFFMAN_CODE_0 -> METABLOCK_HEADER_2 -> CONTEXT_MODES -> CONTEXT_MAP_1
CONTEXT_MAP_1 -> CONTEXT_MAP_2 -> TREE_GROUP
TREE_GROUP -> BEFORE_COMPRESSED_METABLOCK_BODY:e
BEFORE_COMPRESSED_METABLOCK_BODY:s -> CMD_BEGIN:n
HUFFMAN_CODE_3:e -> HUFFMAN_CODE_0:ne [constraint="false"]
{rank=same; HUFFMAN_CODE_0; HUFFMAN_CODE_1; HUFFMAN_CODE_2; HUFFMAN_CODE_3}
{rank=same; METABLOCK_HEADER_2; CONTEXT_MODES; CONTEXT_MAP_1; CONTEXT_MAP_2;
TREE_GROUP}
}
METABLOCK_HEADER:e -> BEFORE_COMPRESSED_METABLOCK_HEADER:n
_METABLOCK_DONE_PORT_ -> METABLOCK_DONE:se
[constraint="false" ltail=cluster_command]
UNINITED [shape=Mdiamond];
DONE [shape=Msquare];
}
*/
typedef enum {
BROTLI_STATE_UNINITED,
BROTLI_STATE_LARGE_WINDOW_BITS,
@ -39,6 +128,7 @@ typedef enum {
BROTLI_STATE_METABLOCK_DONE,
BROTLI_STATE_COMMAND_POST_WRITE_1,
BROTLI_STATE_COMMAND_POST_WRITE_2,
BROTLI_STATE_BEFORE_COMPRESSED_METABLOCK_HEADER,
BROTLI_STATE_HUFFMAN_CODE_0,
BROTLI_STATE_HUFFMAN_CODE_1,
BROTLI_STATE_HUFFMAN_CODE_2,
@ -46,6 +136,7 @@ typedef enum {
BROTLI_STATE_CONTEXT_MAP_1,
BROTLI_STATE_CONTEXT_MAP_2,
BROTLI_STATE_TREE_GROUP,
BROTLI_STATE_BEFORE_COMPRESSED_METABLOCK_BODY,
BROTLI_STATE_DONE
} BrotliRunningState;
@ -98,6 +189,50 @@ typedef enum {
BROTLI_STATE_READ_BLOCK_LENGTH_SUFFIX
} BrotliRunningReadBlockLengthState;
typedef struct BrotliMetablockHeaderArena {
BrotliRunningTreeGroupState substate_tree_group;
BrotliRunningContextMapState substate_context_map;
BrotliRunningHuffmanState substate_huffman;
uint32_t sub_loop_counter;
uint32_t repeat_code_len;
uint32_t prev_code_len;
/* For ReadHuffmanCode. */
uint32_t symbol;
uint32_t repeat;
uint32_t space;
/* Huffman table for "histograms". */
HuffmanCode table[32];
/* List of heads of symbol chains. */
uint16_t* symbol_lists;
/* Storage from symbol_lists. */
uint16_t symbols_lists_array[BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1 +
BROTLI_NUM_COMMAND_SYMBOLS];
/* Tails of symbol chains. */
int next_symbol[32];
uint8_t code_length_code_lengths[BROTLI_CODE_LENGTH_CODES];
/* Population counts for the code lengths. */
uint16_t code_length_histo[16];
/* For HuffmanTreeGroupDecode. */
int htree_index;
HuffmanCode* next;
/* For DecodeContextMap. */
uint32_t context_index;
uint32_t max_run_length_prefix;
uint32_t code;
HuffmanCode context_map_table[BROTLI_HUFFMAN_MAX_SIZE_272];
} BrotliMetablockHeaderArena;
typedef struct BrotliMetablockBodyArena {
uint8_t dist_extra_bits[544];
uint32_t dist_offset[544];
} BrotliMetablockBodyArena;
struct BrotliDecoderStateStruct {
BrotliRunningState state;
@ -110,7 +245,8 @@ struct BrotliDecoderStateStruct {
brotli_free_func free_func;
void* memory_manager_opaque;
/* Temporary storage for remaining input. */
/* Temporary storage for remaining input. Brotli stream format is designed in
a way, that 64 bits are enough to make progress in decoding. */
union {
uint64_t u64;
uint8_t u8[8];
@ -125,7 +261,6 @@ struct BrotliDecoderStateStruct {
int dist_rb_idx;
int dist_rb[4];
int error_code;
uint32_t sub_loop_counter;
uint8_t* ringbuffer;
uint8_t* ringbuffer_end;
HuffmanCode* htree_command;
@ -153,13 +288,10 @@ struct BrotliDecoderStateStruct {
uint32_t block_type_rb[6];
uint32_t distance_postfix_bits;
uint32_t num_direct_distance_codes;
int distance_postfix_mask;
uint32_t num_dist_htrees;
uint8_t* dist_context_map;
HuffmanCode* literal_htree;
uint8_t dist_htree_index;
uint32_t repeat_code_len;
uint32_t prev_code_len;
int copy_length;
int distance_code;
@ -168,33 +300,6 @@ struct BrotliDecoderStateStruct {
size_t rb_roundtrips; /* how many times we went around the ring-buffer */
size_t partial_pos_out; /* how much output to the user in total */
/* For ReadHuffmanCode. */
uint32_t symbol;
uint32_t repeat;
uint32_t space;
HuffmanCode table[32];
/* List of heads of symbol chains. */
uint16_t* symbol_lists;
/* Storage from symbol_lists. */
uint16_t symbols_lists_array[BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1 +
BROTLI_NUM_COMMAND_SYMBOLS];
/* Tails of symbol chains. */
int next_symbol[32];
uint8_t code_length_code_lengths[BROTLI_CODE_LENGTH_CODES];
/* Population counts for the code lengths. */
uint16_t code_length_histo[16];
/* For HuffmanTreeGroupDecode. */
int htree_index;
HuffmanCode* next;
/* For DecodeContextMap. */
uint32_t context_index;
uint32_t max_run_length_prefix;
uint32_t code;
HuffmanCode context_map_table[BROTLI_HUFFMAN_MAX_SIZE_272];
/* For InverseMoveToFrontTransform. */
uint32_t mtf_upper_bound;
uint32_t mtf[64 + 1];
@ -203,10 +308,7 @@ struct BrotliDecoderStateStruct {
/* States inside function calls. */
BrotliRunningMetablockHeaderState substate_metablock_header;
BrotliRunningTreeGroupState substate_tree_group;
BrotliRunningContextMapState substate_context_map;
BrotliRunningUncompressedState substate_uncompressed;
BrotliRunningHuffmanState substate_huffman;
BrotliRunningDecodeUint8State substate_decode_uint8;
BrotliRunningReadBlockLengthState substate_read_block_length;
@ -229,6 +331,11 @@ struct BrotliDecoderStateStruct {
const BrotliTransforms* transforms;
uint32_t trivial_literal_contexts[8]; /* 256 bits */
union {
BrotliMetablockHeaderArena header;
BrotliMetablockBodyArena body;
} arena;
};
typedef struct BrotliDecoderStateStruct BrotliDecoderStateInternal;
@ -241,8 +348,8 @@ BROTLI_INTERNAL void BrotliDecoderStateMetablockBegin(BrotliDecoderState* s);
BROTLI_INTERNAL void BrotliDecoderStateCleanupAfterMetablock(
BrotliDecoderState* s);
BROTLI_INTERNAL BROTLI_BOOL BrotliDecoderHuffmanTreeGroupInit(
BrotliDecoderState* s, HuffmanTreeGroup* group, uint32_t alphabet_size,
uint32_t max_symbol, uint32_t ntrees);
BrotliDecoderState* s, HuffmanTreeGroup* group, uint32_t alphabet_size_max,
uint32_t alphabet_size_limit, uint32_t ntrees);
#define BROTLI_DECODER_ALLOC(S, L) S->alloc_func(S->memory_manager_opaque, L)

View File

@ -9,6 +9,7 @@
#include "./backward_references.h"
#include "../common/constants.h"
#include "../common/context.h"
#include "../common/dictionary.h"
#include "../common/platform.h"
#include <brotli/types.h>
@ -119,17 +120,17 @@ static BROTLI_INLINE size_t ComputeDistanceCode(size_t distance,
#undef CAT
#undef EXPAND_CAT
void BrotliCreateBackwardReferences(
size_t num_bytes, size_t position, const uint8_t* ringbuffer,
size_t ringbuffer_mask, const BrotliEncoderParams* params,
HasherHandle hasher, int* dist_cache, size_t* last_insert_len,
void BrotliCreateBackwardReferences(size_t num_bytes,
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
ContextLut literal_context_lut, const BrotliEncoderParams* params,
Hasher* hasher, int* dist_cache, size_t* last_insert_len,
Command* commands, size_t* num_commands, size_t* num_literals) {
switch (params->hasher.type) {
#define CASE_(N) \
case N: \
CreateBackwardReferencesNH ## N( \
num_bytes, position, ringbuffer, \
ringbuffer_mask, params, hasher, dist_cache, \
CreateBackwardReferencesNH ## N(num_bytes, \
position, ringbuffer, ringbuffer_mask, \
literal_context_lut, params, hasher, dist_cache, \
last_insert_len, commands, num_commands, num_literals); \
return;
FOR_GENERIC_HASHERS(CASE_)

View File

@ -10,6 +10,7 @@
#define BROTLI_ENC_BACKWARD_REFERENCES_H_
#include "../common/constants.h"
#include "../common/context.h"
#include "../common/dictionary.h"
#include "../common/platform.h"
#include <brotli/types.h>
@ -27,8 +28,8 @@ extern "C" {
by this call. */
BROTLI_INTERNAL void BrotliCreateBackwardReferences(size_t num_bytes,
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
const BrotliEncoderParams* params,
HasherHandle hasher, int* dist_cache, size_t* last_insert_len,
ContextLut literal_context_lut, const BrotliEncoderParams* params,
Hasher* hasher, int* dist_cache, size_t* last_insert_len,
Command* commands, size_t* num_commands, size_t* num_literals);
#if defined(__cplusplus) || defined(c_plusplus)

View File

@ -11,6 +11,7 @@
#include <string.h> /* memcpy, memset */
#include "../common/constants.h"
#include "../common/context.h"
#include "../common/platform.h"
#include <brotli/types.h>
#include "./command.h"
@ -26,6 +27,7 @@
extern "C" {
#endif
/* BrotliCalculateDistanceCodeLimit(BROTLI_MAX_ALLOWED_DISTANCE, 3, 120). */
#define BROTLI_MAX_EFFECTIVE_DISTANCE_ALPHABET_SIZE 544
static const float kInfinity = 1.7e38f; /* ~= 2 ^ 127 */
@ -86,14 +88,10 @@ typedef struct ZopfliCostModel {
static void InitZopfliCostModel(
MemoryManager* m, ZopfliCostModel* self, const BrotliDistanceParams* dist,
size_t num_bytes) {
uint32_t distance_histogram_size = dist->alphabet_size;
if (distance_histogram_size > BROTLI_MAX_EFFECTIVE_DISTANCE_ALPHABET_SIZE) {
distance_histogram_size = BROTLI_MAX_EFFECTIVE_DISTANCE_ALPHABET_SIZE;
}
self->num_bytes_ = num_bytes;
self->literal_costs_ = BROTLI_ALLOC(m, float, num_bytes + 2);
self->cost_dist_ = BROTLI_ALLOC(m, float, dist->alphabet_size);
self->distance_histogram_size = distance_histogram_size;
self->cost_dist_ = BROTLI_ALLOC(m, float, dist->alphabet_size_limit);
self->distance_histogram_size = dist->alphabet_size_limit;
if (BROTLI_IS_OOM(m)) return;
}
@ -408,9 +406,12 @@ static size_t UpdateNodes(
const int* starting_dist_cache, const size_t num_matches,
const BackwardMatch* matches, const ZopfliCostModel* model,
StartPosQueue* queue, ZopfliNode* nodes) {
const size_t stream_offset = params->stream_offset;
const size_t cur_ix = block_start + pos;
const size_t cur_ix_masked = cur_ix & ringbuffer_mask;
const size_t max_distance = BROTLI_MIN(size_t, cur_ix, max_backward_limit);
const size_t dictionary_start = BROTLI_MIN(size_t,
cur_ix + stream_offset, max_backward_limit);
const size_t max_len = num_bytes - pos;
const size_t max_zopfli_len = MaxZopfliLen(params);
const size_t max_iters = MaxZopfliCandidates(params);
@ -419,7 +420,7 @@ static size_t UpdateNodes(
size_t k;
size_t gap = 0;
EvaluateNode(block_start, pos, max_backward_limit, gap,
EvaluateNode(block_start + stream_offset, pos, max_backward_limit, gap,
starting_dist_cache, model, queue, nodes);
{
@ -453,7 +454,7 @@ static size_t UpdateNodes(
if (cur_ix_masked + best_len > ringbuffer_mask) {
break;
}
if (BROTLI_PREDICT_FALSE(backward > max_distance + gap)) {
if (BROTLI_PREDICT_FALSE(backward > dictionary_start + gap)) {
/* Word dictionary -> ignore. */
continue;
}
@ -472,6 +473,8 @@ static size_t UpdateNodes(
&ringbuffer[cur_ix_masked],
max_len);
} else {
/* "Gray" area. It is addressable by decoder, but this encoder
instance does not have that data -> should not touch it. */
continue;
}
{
@ -506,7 +509,7 @@ static size_t UpdateNodes(
BackwardMatch match = matches[j];
size_t dist = match.distance;
BROTLI_BOOL is_dictionary_match =
TO_BROTLI_BOOL(dist > max_distance + gap);
TO_BROTLI_BOOL(dist > dictionary_start + gap);
/* We already tried all possible last distance matches, so we can use
normal distance code here. */
size_t dist_code = dist + BROTLI_NUM_DISTANCE_SHORT_CODES - 1;
@ -569,6 +572,7 @@ void BrotliZopfliCreateCommands(const size_t num_bytes,
const size_t block_start, const ZopfliNode* nodes, int* dist_cache,
size_t* last_insert_len, const BrotliEncoderParams* params,
Command* commands, size_t* num_literals) {
const size_t stream_offset = params->stream_offset;
const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
size_t pos = 0;
uint32_t offset = nodes[0].u.next;
@ -587,10 +591,10 @@ void BrotliZopfliCreateCommands(const size_t num_bytes,
{
size_t distance = ZopfliNodeCopyDistance(next);
size_t len_code = ZopfliNodeLengthCode(next);
size_t max_distance = BROTLI_MIN(size_t,
block_start + pos, max_backward_limit);
size_t dictionary_start = BROTLI_MIN(size_t,
block_start + pos + stream_offset, max_backward_limit);
BROTLI_BOOL is_dictionary =
TO_BROTLI_BOOL(distance > max_distance + gap);
TO_BROTLI_BOOL(distance > dictionary_start + gap);
size_t dist_code = ZopfliNodeDistanceCode(next);
InitCommand(&commands[i], &params->dist, insert_length,
copy_length, (int)len_code - (int)copy_length, dist_code);
@ -614,6 +618,7 @@ static size_t ZopfliIterate(size_t num_bytes, size_t position,
const BrotliEncoderParams* params, const size_t gap, const int* dist_cache,
const ZopfliCostModel* model, const uint32_t* num_matches,
const BackwardMatch* matches, ZopfliNode* nodes) {
const size_t stream_offset = params->stream_offset;
const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
const size_t max_zopfli_len = MaxZopfliLen(params);
StartPosQueue queue;
@ -638,7 +643,7 @@ static size_t ZopfliIterate(size_t num_bytes, size_t position,
while (skip) {
i++;
if (i + 3 >= num_bytes) break;
EvaluateNode(position, i, max_backward_limit, gap,
EvaluateNode(position + stream_offset, i, max_backward_limit, gap,
dist_cache, model, &queue, nodes);
cur_match_pos += num_matches[i];
skip--;
@ -651,8 +656,9 @@ static size_t ZopfliIterate(size_t num_bytes, size_t position,
/* REQUIRES: nodes != NULL and len(nodes) >= num_bytes + 1 */
size_t BrotliZopfliComputeShortestPath(MemoryManager* m, size_t num_bytes,
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
const BrotliEncoderParams* params,
const int* dist_cache, HasherHandle hasher, ZopfliNode* nodes) {
ContextLut literal_context_lut, const BrotliEncoderParams* params,
const int* dist_cache, Hasher* hasher, ZopfliNode* nodes) {
const size_t stream_offset = params->stream_offset;
const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
const size_t max_zopfli_len = MaxZopfliLen(params);
ZopfliCostModel model;
@ -663,6 +669,7 @@ size_t BrotliZopfliComputeShortestPath(MemoryManager* m, size_t num_bytes,
size_t i;
size_t gap = 0;
size_t lz_matches_offset = 0;
BROTLI_UNUSED(literal_context_lut);
nodes[0].length = 0;
nodes[0].u.cost = 0;
InitZopfliCostModel(m, &model, &params->dist, num_bytes);
@ -673,12 +680,14 @@ size_t BrotliZopfliComputeShortestPath(MemoryManager* m, size_t num_bytes,
for (i = 0; i + HashTypeLengthH10() - 1 < num_bytes; i++) {
const size_t pos = position + i;
const size_t max_distance = BROTLI_MIN(size_t, pos, max_backward_limit);
const size_t dictionary_start = BROTLI_MIN(size_t,
pos + stream_offset, max_backward_limit);
size_t skip;
size_t num_matches;
num_matches = FindAllMatchesH10(hasher,
num_matches = FindAllMatchesH10(&hasher->privat._H10,
&params->dictionary,
ringbuffer, ringbuffer_mask, pos, num_bytes - i, max_distance,
gap, params, &matches[lz_matches_offset]);
dictionary_start + gap, params, &matches[lz_matches_offset]);
if (num_matches > 0 &&
BackwardMatchLength(&matches[num_matches - 1]) > max_zopfli_len) {
matches[0] = matches[num_matches - 1];
@ -693,13 +702,14 @@ size_t BrotliZopfliComputeShortestPath(MemoryManager* m, size_t num_bytes,
}
if (skip > 1) {
/* Add the tail of the copy to the hasher. */
StoreRangeH10(hasher, ringbuffer, ringbuffer_mask, pos + 1, BROTLI_MIN(
StoreRangeH10(&hasher->privat._H10,
ringbuffer, ringbuffer_mask, pos + 1, BROTLI_MIN(
size_t, pos + skip, store_end));
skip--;
while (skip) {
i++;
if (i + HashTypeLengthH10() - 1 >= num_bytes) break;
EvaluateNode(position, i, max_backward_limit, gap,
EvaluateNode(position + stream_offset, i, max_backward_limit, gap,
dist_cache, &model, &queue, nodes);
skip--;
}
@ -711,15 +721,15 @@ size_t BrotliZopfliComputeShortestPath(MemoryManager* m, size_t num_bytes,
void BrotliCreateZopfliBackwardReferences(MemoryManager* m, size_t num_bytes,
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
const BrotliEncoderParams* params,
HasherHandle hasher, int* dist_cache, size_t* last_insert_len,
ContextLut literal_context_lut, const BrotliEncoderParams* params,
Hasher* hasher, int* dist_cache, size_t* last_insert_len,
Command* commands, size_t* num_commands, size_t* num_literals) {
ZopfliNode* nodes;
nodes = BROTLI_ALLOC(m, ZopfliNode, num_bytes + 1);
if (BROTLI_IS_OOM(m)) return;
BrotliInitZopfliNodes(nodes, num_bytes + 1);
*num_commands += BrotliZopfliComputeShortestPath(m, num_bytes,
position, ringbuffer, ringbuffer_mask, params,
position, ringbuffer, ringbuffer_mask, literal_context_lut, params,
dist_cache, hasher, nodes);
if (BROTLI_IS_OOM(m)) return;
BrotliZopfliCreateCommands(num_bytes, position, nodes, dist_cache,
@ -729,9 +739,10 @@ void BrotliCreateZopfliBackwardReferences(MemoryManager* m, size_t num_bytes,
void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m, size_t num_bytes,
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
const BrotliEncoderParams* params,
HasherHandle hasher, int* dist_cache, size_t* last_insert_len,
ContextLut literal_context_lut, const BrotliEncoderParams* params,
Hasher* hasher, int* dist_cache, size_t* last_insert_len,
Command* commands, size_t* num_commands, size_t* num_literals) {
const size_t stream_offset = params->stream_offset;
const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
uint32_t* num_matches = BROTLI_ALLOC(m, uint32_t, num_bytes);
size_t matches_size = 4 * num_bytes;
@ -748,10 +759,13 @@ void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m, size_t num_bytes,
BackwardMatch* matches = BROTLI_ALLOC(m, BackwardMatch, matches_size);
size_t gap = 0;
size_t shadow_matches = 0;
BROTLI_UNUSED(literal_context_lut);
if (BROTLI_IS_OOM(m)) return;
for (i = 0; i + HashTypeLengthH10() - 1 < num_bytes; ++i) {
const size_t pos = position + i;
size_t max_distance = BROTLI_MIN(size_t, pos, max_backward_limit);
size_t dictionary_start = BROTLI_MIN(size_t,
pos + stream_offset, max_backward_limit);
size_t max_length = num_bytes - i;
size_t num_found_matches;
size_t cur_match_end;
@ -760,10 +774,10 @@ void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m, size_t num_bytes,
BROTLI_ENSURE_CAPACITY(m, BackwardMatch, matches, matches_size,
cur_match_pos + MAX_NUM_MATCHES_H10 + shadow_matches);
if (BROTLI_IS_OOM(m)) return;
num_found_matches = FindAllMatchesH10(hasher,
num_found_matches = FindAllMatchesH10(&hasher->privat._H10,
&params->dictionary,
ringbuffer, ringbuffer_mask, pos, max_length,
max_distance, gap, params,
max_distance, dictionary_start + gap, params,
&matches[cur_match_pos + shadow_matches]);
cur_match_end = cur_match_pos + num_found_matches;
for (j = cur_match_pos; j + 1 < cur_match_end; ++j) {
@ -778,7 +792,8 @@ void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m, size_t num_bytes,
matches[cur_match_pos++] = matches[cur_match_end - 1];
num_matches[i] = 1;
/* Add the tail of the copy to the hasher. */
StoreRangeH10(hasher, ringbuffer, ringbuffer_mask, pos + 1,
StoreRangeH10(&hasher->privat._H10,
ringbuffer, ringbuffer_mask, pos + 1,
BROTLI_MIN(size_t, pos + match_len, store_end));
memset(&num_matches[i + 1], 0, skip * sizeof(num_matches[0]));
i += skip;

View File

@ -10,6 +10,7 @@
#define BROTLI_ENC_BACKWARD_REFERENCES_HQ_H_
#include "../common/constants.h"
#include "../common/context.h"
#include "../common/dictionary.h"
#include "../common/platform.h"
#include <brotli/types.h>
@ -25,15 +26,15 @@ extern "C" {
BROTLI_INTERNAL void BrotliCreateZopfliBackwardReferences(MemoryManager* m,
size_t num_bytes,
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
const BrotliEncoderParams* params,
HasherHandle hasher, int* dist_cache, size_t* last_insert_len,
ContextLut literal_context_lut, const BrotliEncoderParams* params,
Hasher* hasher, int* dist_cache, size_t* last_insert_len,
Command* commands, size_t* num_commands, size_t* num_literals);
BROTLI_INTERNAL void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m,
size_t num_bytes,
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
const BrotliEncoderParams* params,
HasherHandle hasher, int* dist_cache, size_t* last_insert_len,
ContextLut literal_context_lut, const BrotliEncoderParams* params,
Hasher* hasher, int* dist_cache, size_t* last_insert_len,
Command* commands, size_t* num_commands, size_t* num_literals);
typedef struct ZopfliNode {
@ -79,8 +80,8 @@ BROTLI_INTERNAL void BrotliInitZopfliNodes(ZopfliNode* array, size_t length);
BROTLI_INTERNAL size_t BrotliZopfliComputeShortestPath(
MemoryManager* m, size_t num_bytes,
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
const BrotliEncoderParams* params,
const int* dist_cache, HasherHandle hasher, ZopfliNode* nodes);
ContextLut literal_context_lut, const BrotliEncoderParams* params,
const int* dist_cache, Hasher* hasher, ZopfliNode* nodes);
BROTLI_INTERNAL void BrotliZopfliCreateCommands(
const size_t num_bytes, const size_t block_start, const ZopfliNode* nodes,

View File

@ -10,11 +10,13 @@
static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
size_t num_bytes, size_t position,
const uint8_t* ringbuffer, size_t ringbuffer_mask,
const BrotliEncoderParams* params,
HasherHandle hasher, int* dist_cache, size_t* last_insert_len,
ContextLut literal_context_lut, const BrotliEncoderParams* params,
Hasher* hasher, int* dist_cache, size_t* last_insert_len,
Command* commands, size_t* num_commands, size_t* num_literals) {
HASHER()* privat = &hasher->privat.FN(_);
/* Set maximum distance, see section 9.1. of the spec. */
const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
const size_t position_offset = params->stream_offset;
const Command* const orig_commands = commands;
size_t insert_length = *last_insert_len;
@ -31,19 +33,23 @@ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
/* Minimum score to accept a backward reference. */
const score_t kMinScore = BROTLI_SCORE_BASE + 100;
FN(PrepareDistanceCache)(hasher, dist_cache);
BROTLI_UNUSED(literal_context_lut);
FN(PrepareDistanceCache)(privat, dist_cache);
while (position + FN(HashTypeLength)() < pos_end) {
size_t max_length = pos_end - position;
size_t max_distance = BROTLI_MIN(size_t, position, max_backward_limit);
size_t dictionary_start = BROTLI_MIN(size_t,
position + position_offset, max_backward_limit);
HasherSearchResult sr;
sr.len = 0;
sr.len_code_delta = 0;
sr.distance = 0;
sr.score = kMinScore;
FN(FindLongestMatch)(hasher, &params->dictionary,
FN(FindLongestMatch)(privat, &params->dictionary,
ringbuffer, ringbuffer_mask, dist_cache, position, max_length,
max_distance, gap, params->dist.max_distance, &sr);
max_distance, dictionary_start + gap, params->dist.max_distance, &sr);
if (sr.score > kMinScore) {
/* Found a match. Let's look for something even better ahead. */
int delayed_backward_references_in_row = 0;
@ -57,10 +63,12 @@ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
sr2.distance = 0;
sr2.score = kMinScore;
max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
FN(FindLongestMatch)(hasher,
dictionary_start = BROTLI_MIN(size_t,
position + 1 + position_offset, max_backward_limit);
FN(FindLongestMatch)(privat,
&params->dictionary,
ringbuffer, ringbuffer_mask, dist_cache, position + 1, max_length,
max_distance, gap, params->dist.max_distance,
max_distance, dictionary_start + gap, params->dist.max_distance,
&sr2);
if (sr2.score >= sr.score + cost_diff_lazy) {
/* Ok, let's just write one byte for now and start a match from the
@ -77,19 +85,19 @@ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
}
apply_random_heuristics =
position + 2 * sr.len + random_heuristics_window_size;
max_distance = BROTLI_MIN(size_t,
position, max_backward_limit);
dictionary_start = BROTLI_MIN(size_t,
position + position_offset, max_backward_limit);
{
/* The first 16 codes are special short-codes,
and the minimum offset is 1. */
size_t distance_code = ComputeDistanceCode(
sr.distance, max_distance + gap, dist_cache);
if ((sr.distance <= (max_distance + gap)) && distance_code > 0) {
sr.distance, dictionary_start + gap, dist_cache);
if ((sr.distance <= (dictionary_start + gap)) && distance_code > 0) {
dist_cache[3] = dist_cache[2];
dist_cache[2] = dist_cache[1];
dist_cache[1] = dist_cache[0];
dist_cache[0] = (int)sr.distance;
FN(PrepareDistanceCache)(hasher, dist_cache);
FN(PrepareDistanceCache)(privat, dist_cache);
}
InitCommand(commands++, &params->dist, insert_length,
sr.len, sr.len_code_delta, distance_code);
@ -107,7 +115,7 @@ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
range_start = BROTLI_MIN(size_t, range_end, BROTLI_MAX(size_t,
range_start, position + sr.len - (sr.distance << 2)));
}
FN(StoreRange)(hasher, ringbuffer, ringbuffer_mask, range_start,
FN(StoreRange)(privat, ringbuffer, ringbuffer_mask, range_start,
range_end);
}
position += sr.len;
@ -133,7 +141,7 @@ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
size_t pos_jump =
BROTLI_MIN(size_t, position + 16, pos_end - kMargin);
for (; position < pos_jump; position += 4) {
FN(Store)(hasher, ringbuffer, ringbuffer_mask, position);
FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
insert_length += 4;
}
} else {
@ -142,7 +150,7 @@ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
size_t pos_jump =
BROTLI_MIN(size_t, position + 8, pos_end - kMargin);
for (; position < pos_jump; position += 2) {
FN(Store)(hasher, ringbuffer, ringbuffer_mask, position);
FN(Store)(privat, ringbuffer, ringbuffer_mask, position);
insert_length += 2;
}
}

View File

@ -956,18 +956,16 @@ void BrotliStoreMetaBlock(MemoryManager* m,
size_t pos = start_pos;
size_t i;
uint32_t num_distance_symbols = params->dist.alphabet_size;
uint32_t num_effective_distance_symbols = num_distance_symbols;
uint32_t num_distance_symbols = params->dist.alphabet_size_max;
uint32_t num_effective_distance_symbols = params->dist.alphabet_size_limit;
HuffmanTree* tree;
ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
BlockEncoder literal_enc;
BlockEncoder command_enc;
BlockEncoder distance_enc;
const BrotliDistanceParams* dist = &params->dist;
if (params->large_window &&
num_effective_distance_symbols > BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS) {
num_effective_distance_symbols = BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS;
}
BROTLI_DCHECK(
num_effective_distance_symbols <= BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS);
StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage);
@ -1163,7 +1161,7 @@ void BrotliStoreMetaBlockTrivial(MemoryManager* m,
uint8_t dist_depth[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE];
uint16_t dist_bits[MAX_SIMPLE_DISTANCE_ALPHABET_SIZE];
HuffmanTree* tree;
uint32_t num_distance_symbols = params->dist.alphabet_size;
uint32_t num_distance_symbols = params->dist.alphabet_size_max;
StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage);
@ -1206,7 +1204,7 @@ void BrotliStoreMetaBlockFast(MemoryManager* m,
BROTLI_BOOL is_last, const BrotliEncoderParams* params,
const Command* commands, size_t n_commands,
size_t* storage_ix, uint8_t* storage) {
uint32_t num_distance_symbols = params->dist.alphabet_size;
uint32_t num_distance_symbols = params->dist.alphabet_size_max;
uint32_t distance_alphabet_bits =
Log2FloorNonZero(num_distance_symbols - 1) + 1;

File diff suppressed because it is too large Load Diff

View File

@ -15,7 +15,8 @@
extern "C" {
#endif
extern const uint16_t kStaticDictionaryHash[32768];
extern const uint16_t kStaticDictionaryHashWords[32768];
extern const uint8_t kStaticDictionaryHashLengths[32768];
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */

View File

@ -54,12 +54,19 @@ typedef enum BrotliEncoderStreamState {
BROTLI_STREAM_METADATA_BODY = 4
} BrotliEncoderStreamState;
typedef enum BrotliEncoderFlintState {
BROTLI_FLINT_NEEDS_2_BYTES = 2,
BROTLI_FLINT_NEEDS_1_BYTE = 1,
BROTLI_FLINT_WAITING_FOR_PROCESSING = 0,
BROTLI_FLINT_WAITING_FOR_FLUSHING = -1,
BROTLI_FLINT_DONE = -2
} BrotliEncoderFlintState;
typedef struct BrotliEncoderStateStruct {
BrotliEncoderParams params;
MemoryManager memory_manager_;
HasherHandle hasher_;
uint64_t input_pos_;
RingBuffer ringbuffer_;
size_t cmd_alloc_size_;
@ -73,10 +80,17 @@ typedef struct BrotliEncoderStateStruct {
int saved_dist_cache_[4];
uint16_t last_bytes_;
uint8_t last_bytes_bits_;
/* "Flint" is a tiny uncompressed block emitted before the continuation
block to unwire literal context from previous data. Despite being int8_t,
field is actually BrotliEncoderFlintState enum. */
int8_t flint_;
uint8_t prev_byte_;
uint8_t prev_byte2_;
size_t storage_size_;
uint8_t* storage_;
Hasher hasher_;
/* Hash table for FAST_ONE_PASS_COMPRESSION_QUALITY mode. */
int small_table_[1 << 10]; /* 4KiB */
int* large_table_; /* Allocated only when needed */
@ -172,6 +186,11 @@ BROTLI_BOOL BrotliEncoderSetParameter(
state->params.dist.num_direct_distance_codes = value;
return BROTLI_TRUE;
case BROTLI_PARAM_STREAM_OFFSET:
if (value > (1u << 30)) return BROTLI_FALSE;
state->params.stream_offset = value;
return BROTLI_TRUE;
default: return BROTLI_FALSE;
}
}
@ -615,11 +634,7 @@ static void WriteMetaBlockInternal(MemoryManager* m,
/* The number of distance symbols effectively used for distance
histograms. It might be less than distance alphabet size
for "Large Window Brotli" (32-bit). */
uint32_t num_effective_dist_codes = block_params.dist.alphabet_size;
if (num_effective_dist_codes > BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS) {
num_effective_dist_codes = BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS;
}
BrotliOptimizeHistograms(num_effective_dist_codes, &mb);
BrotliOptimizeHistograms(block_params.dist.alphabet_size_limit, &mb);
}
BrotliStoreMetaBlock(m, data, wrapped_last_flush_pos, bytes, mask,
prev_byte, prev_byte2,
@ -676,12 +691,23 @@ static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) {
s->last_bytes_bits_ = 0;
s->last_bytes_ = 0;
s->flint_ = BROTLI_FLINT_DONE;
s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX;
SanitizeParams(&s->params);
s->params.lgblock = ComputeLgBlock(&s->params);
ChooseDistanceParams(&s->params);
if (s->params.stream_offset != 0) {
s->flint_ = BROTLI_FLINT_NEEDS_2_BYTES;
/* Poison the distance cache. -16 +- 3 is still less than zero (invalid). */
s->dist_cache_[0] = -16;
s->dist_cache_[1] = -16;
s->dist_cache_[2] = -16;
s->dist_cache_[3] = -16;
memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
}
RingBufferSetup(&s->params, &s->ringbuffer_);
/* Initialize last byte with stream header. */
@ -691,8 +717,14 @@ static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) {
s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
lgwin = BROTLI_MAX(int, lgwin, 18);
}
EncodeWindowBits(lgwin, s->params.large_window,
&s->last_bytes_, &s->last_bytes_bits_);
if (s->params.stream_offset == 0) {
EncodeWindowBits(lgwin, s->params.large_window,
&s->last_bytes_, &s->last_bytes_bits_);
} else {
/* Bigger values have the same effect, but could cause overflows. */
s->params.stream_offset = BROTLI_MIN(size_t,
s->params.stream_offset, BROTLI_MAX_BACKWARD_LIMIT(lgwin));
}
}
if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
@ -710,13 +742,15 @@ static void BrotliEncoderInitParams(BrotliEncoderParams* params) {
params->quality = BROTLI_DEFAULT_QUALITY;
params->lgwin = BROTLI_DEFAULT_WINDOW;
params->lgblock = 0;
params->stream_offset = 0;
params->size_hint = 0;
params->disable_literal_context_modeling = BROTLI_FALSE;
BrotliInitEncoderDictionary(&params->dictionary);
params->dist.distance_postfix_bits = 0;
params->dist.num_direct_distance_codes = 0;
params->dist.alphabet_size =
params->dist.alphabet_size_max =
BROTLI_DISTANCE_ALPHABET_SIZE(0, 0, BROTLI_MAX_DISTANCE_BITS);
params->dist.alphabet_size_limit = params->dist.alphabet_size_max;
params->dist.max_distance = BROTLI_MAX_DISTANCE;
}
@ -732,7 +766,7 @@ static void BrotliEncoderInitState(BrotliEncoderState* s) {
s->prev_byte2_ = 0;
s->storage_size_ = 0;
s->storage_ = 0;
s->hasher_ = NULL;
HasherInit(&s->hasher_);
s->large_table_ = NULL;
s->large_table_size_ = 0;
s->cmd_code_numbits_ = 0;
@ -900,6 +934,7 @@ static void ExtendLastCommand(BrotliEncoderState* s, uint32_t* bytes,
(*bytes)--;
(*wrapped_last_processed_pos)++;
}
} else {
}
/* The copy length is at most the metablock size, and thus expressible. */
GetLengthCode(last_command->insert_len_,
@ -932,6 +967,7 @@ static BROTLI_BOOL EncodeData(
uint32_t mask;
MemoryManager* m = &s->memory_manager_;
ContextType literal_context_mode;
ContextLut literal_context_lut;
data = s->ringbuffer_.buffer_;
mask = s->ringbuffer_.mask_;
@ -1022,6 +1058,7 @@ static BROTLI_BOOL EncodeData(
literal_context_mode = ChooseContextMode(
&s->params, data, WrapPosition(s->last_flush_pos_),
mask, (size_t)(s->input_pos_ - s->last_flush_pos_));
literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
@ -1032,20 +1069,23 @@ static BROTLI_BOOL EncodeData(
if (s->params.quality == ZOPFLIFICATION_QUALITY) {
BROTLI_DCHECK(s->params.hasher.type == 10);
BrotliCreateZopfliBackwardReferences(m, bytes, wrapped_last_processed_pos,
data, mask, &s->params, s->hasher_, s->dist_cache_,
data, mask, literal_context_lut, &s->params,
&s->hasher_, s->dist_cache_,
&s->last_insert_len_, &s->commands_[s->num_commands_],
&s->num_commands_, &s->num_literals_);
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
} else if (s->params.quality == HQ_ZOPFLIFICATION_QUALITY) {
BROTLI_DCHECK(s->params.hasher.type == 10);
BrotliCreateHqZopfliBackwardReferences(m, bytes, wrapped_last_processed_pos,
data, mask, &s->params, s->hasher_, s->dist_cache_,
data, mask, literal_context_lut, &s->params,
&s->hasher_, s->dist_cache_,
&s->last_insert_len_, &s->commands_[s->num_commands_],
&s->num_commands_, &s->num_literals_);
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
} else {
BrotliCreateBackwardReferences(bytes, wrapped_last_processed_pos,
data, mask, &s->params, s->hasher_, s->dist_cache_,
data, mask, literal_context_lut, &s->params,
&s->hasher_, s->dist_cache_,
&s->last_insert_len_, &s->commands_[s->num_commands_],
&s->num_commands_, &s->num_literals_);
}
@ -1070,7 +1110,7 @@ static BROTLI_BOOL EncodeData(
s->num_commands_ < max_commands) {
/* Merge with next input block. Everything will happen later. */
if (UpdateLastProcessedPos(s)) {
HasherReset(s->hasher_);
HasherReset(&s->hasher_);
}
*out_size = 0;
return BROTLI_TRUE;
@ -1111,7 +1151,7 @@ static BROTLI_BOOL EncodeData(
s->last_bytes_bits_ = storage_ix & 7u;
s->last_flush_pos_ = s->input_pos_;
if (UpdateLastProcessedPos(s)) {
HasherReset(s->hasher_);
HasherReset(&s->hasher_);
}
if (s->last_flush_pos_ > 0) {
s->prev_byte_ = data[((uint32_t)s->last_flush_pos_ - 1) & mask];
@ -1172,7 +1212,6 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
size_t total_out_size = 0;
uint16_t last_bytes;
uint8_t last_bytes_bits;
HasherHandle hasher = NULL;
const size_t hasher_eff_size = BROTLI_MIN(size_t,
input_size, BROTLI_MAX_BACKWARD_LIMIT(lgwin) + BROTLI_WINDOW_GAP);
@ -1188,6 +1227,9 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
uint8_t prev_byte = 0;
uint8_t prev_byte2 = 0;
Hasher hasher;
HasherInit(&hasher);
BrotliEncoderInitParams(&params);
params.quality = 10;
params.lgwin = lgwin;
@ -1224,6 +1266,7 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
ContextType literal_context_mode = ChooseContextMode(&params,
input_buffer, metablock_start, mask, metablock_end - metablock_start);
ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
size_t block_start;
for (block_start = metablock_start; block_start < metablock_end; ) {
@ -1234,10 +1277,10 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
size_t new_cmd_alloc_size;
if (BROTLI_IS_OOM(m)) goto oom;
BrotliInitZopfliNodes(nodes, block_size + 1);
StitchToPreviousBlockH10(hasher, block_size, block_start,
StitchToPreviousBlockH10(&hasher.privat._H10, block_size, block_start,
input_buffer, mask);
path_size = BrotliZopfliComputeShortestPath(m, block_size, block_start,
input_buffer, mask, &params, dist_cache, hasher,
input_buffer, mask, literal_context_lut, &params, dist_cache, &hasher,
nodes);
if (BROTLI_IS_OOM(m)) goto oom;
/* We allocate a command buffer in the first iteration of this loop that
@ -1316,11 +1359,7 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
/* The number of distance symbols effectively used for distance
histograms. It might be less than distance alphabet size
for "Large Window Brotli" (32-bit). */
uint32_t num_effective_dist_codes = block_params.dist.alphabet_size;
if (num_effective_dist_codes > BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS) {
num_effective_dist_codes = BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS;
}
BrotliOptimizeHistograms(num_effective_dist_codes, &mb);
BrotliOptimizeHistograms(block_params.dist.alphabet_size_limit, &mb);
}
storage = BROTLI_ALLOC(m, uint8_t, 2 * metablock_size + 503);
if (BROTLI_IS_OOM(m)) goto oom;
@ -1784,6 +1823,10 @@ BROTLI_BOOL BrotliEncoderCompressStream(
}
while (BROTLI_TRUE) {
size_t remaining_block_size = RemainingInputBlockSize(s);
/* Shorten input to flint size. */
if (s->flint_ >= 0 && remaining_block_size > (size_t)s->flint_) {
remaining_block_size = (size_t)s->flint_;
}
if (remaining_block_size != 0 && *available_in != 0) {
size_t copy_input_size =
@ -1791,10 +1834,18 @@ BROTLI_BOOL BrotliEncoderCompressStream(
CopyInputToRingBuffer(s, copy_input_size, *next_in);
*next_in += copy_input_size;
*available_in -= copy_input_size;
if (s->flint_ > 0) s->flint_ = (int8_t)(s->flint_ - (int)copy_input_size);
continue;
}
if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) {
/* Exit the "emit flint" workflow. */
if (s->flint_ == BROTLI_FLINT_WAITING_FOR_FLUSHING) {
CheckFlushComplete(s);
if (s->stream_state_ == BROTLI_STREAM_PROCESSING) {
s->flint_ = BROTLI_FLINT_DONE;
}
}
continue;
}
@ -1808,6 +1859,11 @@ BROTLI_BOOL BrotliEncoderCompressStream(
BROTLI_BOOL force_flush = TO_BROTLI_BOOL(
(*available_in == 0) && op == BROTLI_OPERATION_FLUSH);
BROTLI_BOOL result;
/* Force emitting (uncompressed) piece containing flint. */
if (!is_last && s->flint_ == 0) {
s->flint_ = BROTLI_FLINT_WAITING_FOR_FLUSHING;
force_flush = BROTLI_TRUE;
}
UpdateSizeHint(s, *available_in);
result = EncodeData(s, is_last, force_flush,
&s->available_out_, &s->next_out_);

View File

@ -17,8 +17,10 @@ extern "C" {
void BrotliInitEncoderDictionary(BrotliEncoderDictionary* dict) {
dict->words = BrotliGetDictionary();
dict->num_transforms = (uint32_t)BrotliGetTransforms()->num_transforms;
dict->hash_table = kStaticDictionaryHash;
dict->hash_table_words = kStaticDictionaryHashWords;
dict->hash_table_lengths = kStaticDictionaryHashLengths;
dict->buckets = kStaticDictionaryBuckets;
dict->dict_words = kStaticDictionaryWords;

View File

@ -19,13 +19,15 @@ extern "C" {
/* Dictionary data (words and transforms) for 1 possible context */
typedef struct BrotliEncoderDictionary {
const BrotliDictionary* words;
uint32_t num_transforms;
/* cut off for fast encoder */
uint32_t cutoffTransformsCount;
uint64_t cutoffTransforms;
/* from dictionary_hash.h, for fast encoder */
const uint16_t* hash_table;
const uint16_t* hash_table_words;
const uint8_t* hash_table_lengths;
/* from static_dict_lut.h, for slow encoder */
const uint16_t* buckets;

View File

@ -27,34 +27,19 @@
extern "C" {
#endif
/* Pointer to hasher data.
*
* Excluding initialization and destruction, hasher can be passed as
* HasherHandle by value.
*
* Typically hasher data consists of 3 sections:
* * HasherCommon structure
* * private structured hasher data, depending on hasher type
* * private dynamic hasher data, depending on hasher type and parameters
*
* Using "define" instead of "typedef", because on MSVC __restrict does not work
* on typedef pointer types. */
#define HasherHandle uint8_t*
typedef struct {
/* Dynamically allocated area; first member for quickest access. */
void* extra;
size_t dict_num_lookups;
size_t dict_num_matches;
BrotliHasherParams params;
/* False if hasher needs to be "prepared" before use. */
BROTLI_BOOL is_prepared_;
size_t dict_num_lookups;
size_t dict_num_matches;
} HasherCommon;
static BROTLI_INLINE HasherCommon* GetHasherCommon(HasherHandle handle) {
return (HasherCommon*)handle;
}
#define score_t size_t
static const uint32_t kCutoffTransformsCount = 10;
@ -149,17 +134,13 @@ static BROTLI_INLINE score_t BackwardReferencePenaltyUsingLastDistance(
}
static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(
const BrotliEncoderDictionary* dictionary, size_t item,
const BrotliEncoderDictionary* dictionary, size_t len, size_t word_idx,
const uint8_t* data, size_t max_length, size_t max_backward,
size_t max_distance, HasherSearchResult* out) {
size_t len;
size_t word_idx;
size_t offset;
size_t matchlen;
size_t backward;
score_t score;
len = item & 0x1F;
word_idx = item >> 5;
offset = dictionary->words->offsets_by_length[len] + len * word_idx;
if (len > max_length) {
return BROTLI_FALSE;
@ -193,25 +174,24 @@ static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(
static BROTLI_INLINE void SearchInStaticDictionary(
const BrotliEncoderDictionary* dictionary,
HasherHandle handle, const uint8_t* data, size_t max_length,
HasherCommon* common, const uint8_t* data, size_t max_length,
size_t max_backward, size_t max_distance,
HasherSearchResult* out, BROTLI_BOOL shallow) {
size_t key;
size_t i;
HasherCommon* self = GetHasherCommon(handle);
if (self->dict_num_matches < (self->dict_num_lookups >> 7)) {
if (common->dict_num_matches < (common->dict_num_lookups >> 7)) {
return;
}
key = Hash14(data) << 1;
for (i = 0; i < (shallow ? 1u : 2u); ++i, ++key) {
size_t item = dictionary->hash_table[key];
self->dict_num_lookups++;
if (item != 0) {
common->dict_num_lookups++;
if (dictionary->hash_table_lengths[key] != 0) {
BROTLI_BOOL item_matches = TestStaticDictionaryItem(
dictionary, item, data,
dictionary, dictionary->hash_table_lengths[key],
dictionary->hash_table_words[key], data,
max_length, max_backward, max_distance, out);
if (item_matches) {
self->dict_num_matches++;
common->dict_num_matches++;
}
}
}
@ -260,37 +240,37 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
/* MAX_NUM_MATCHES == 64 + MAX_TREE_SEARCH_DEPTH */
#define MAX_NUM_MATCHES_H10 128
/* For BUCKET_SWEEP == 1, enabling the dictionary lookup makes compression
/* For BUCKET_SWEEP_BITS == 0, enabling the dictionary lookup makes compression
a little faster (0.5% - 1%) and it compresses 0.15% better on small text
and HTML inputs. */
#define HASHER() H2
#define BUCKET_BITS 16
#define BUCKET_SWEEP 1
#define BUCKET_SWEEP_BITS 0
#define HASH_LEN 5
#define USE_DICTIONARY 1
#include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
#undef BUCKET_SWEEP
#undef BUCKET_SWEEP_BITS
#undef USE_DICTIONARY
#undef HASHER
#define HASHER() H3
#define BUCKET_SWEEP 2
#define BUCKET_SWEEP_BITS 1
#define USE_DICTIONARY 0
#include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
#undef USE_DICTIONARY
#undef BUCKET_SWEEP
#undef BUCKET_SWEEP_BITS
#undef BUCKET_BITS
#undef HASHER
#define HASHER() H4
#define BUCKET_BITS 17
#define BUCKET_SWEEP 4
#define BUCKET_SWEEP_BITS 2
#define USE_DICTIONARY 1
#include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
#undef USE_DICTIONARY
#undef HASH_LEN
#undef BUCKET_SWEEP
#undef BUCKET_SWEEP_BITS
#undef BUCKET_BITS
#undef HASHER
@ -334,13 +314,13 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
#define HASHER() H54
#define BUCKET_BITS 20
#define BUCKET_SWEEP 4
#define BUCKET_SWEEP_BITS 2
#define HASH_LEN 7
#define USE_DICTIONARY 0
#include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
#undef USE_DICTIONARY
#undef HASH_LEN
#undef BUCKET_SWEEP
#undef BUCKET_SWEEP_BITS
#undef BUCKET_BITS
#undef HASHER
@ -393,97 +373,107 @@ static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
#undef CAT
#undef EXPAND_CAT
#define FOR_GENERIC_HASHERS(H) H(2) H(3) H(4) H(5) H(6) H(40) H(41) H(42) H(54)\
H(35) H(55) H(65)
#define FOR_SIMPLE_HASHERS(H) H(2) H(3) H(4) H(5) H(6) H(40) H(41) H(42) H(54)
#define FOR_COMPOSITE_HASHERS(H) H(35) H(55) H(65)
#define FOR_GENERIC_HASHERS(H) FOR_SIMPLE_HASHERS(H) FOR_COMPOSITE_HASHERS(H)
#define FOR_ALL_HASHERS(H) FOR_GENERIC_HASHERS(H) H(10)
static BROTLI_INLINE void DestroyHasher(
MemoryManager* m, HasherHandle* handle) {
if (*handle == NULL) return;
BROTLI_FREE(m, *handle);
typedef struct {
HasherCommon common;
union {
#define MEMBER_(N) \
H ## N _H ## N;
FOR_ALL_HASHERS(MEMBER_)
#undef MEMBER_
} privat;
} Hasher;
/* MUST be invoked before any other method. */
static BROTLI_INLINE void HasherInit(Hasher* hasher) {
hasher->common.extra = NULL;
}
static BROTLI_INLINE void HasherReset(HasherHandle handle) {
if (handle == NULL) return;
GetHasherCommon(handle)->is_prepared_ = BROTLI_FALSE;
static BROTLI_INLINE void DestroyHasher(MemoryManager* m, Hasher* hasher) {
if (hasher->common.extra == NULL) return;
BROTLI_FREE(m, hasher->common.extra);
}
static BROTLI_INLINE void HasherReset(Hasher* hasher) {
hasher->common.is_prepared_ = BROTLI_FALSE;
}
static BROTLI_INLINE size_t HasherSize(const BrotliEncoderParams* params,
BROTLI_BOOL one_shot, const size_t input_size) {
size_t result = sizeof(HasherCommon);
switch (params->hasher.type) {
#define SIZE_(N) \
case N: \
result += HashMemAllocInBytesH ## N(params, one_shot, input_size); \
break;
#define SIZE_(N) \
case N: \
return HashMemAllocInBytesH ## N(params, one_shot, input_size);
FOR_ALL_HASHERS(SIZE_)
#undef SIZE_
default:
break;
}
return result;
return 0; /* Default case. */
}
static BROTLI_INLINE void HasherSetup(MemoryManager* m, HasherHandle* handle,
static BROTLI_INLINE void HasherSetup(MemoryManager* m, Hasher* hasher,
BrotliEncoderParams* params, const uint8_t* data, size_t position,
size_t input_size, BROTLI_BOOL is_last) {
HasherHandle self = NULL;
HasherCommon* common = NULL;
BROTLI_BOOL one_shot = (position == 0 && is_last);
if (*handle == NULL) {
if (hasher->common.extra == NULL) {
size_t alloc_size;
ChooseHasher(params, &params->hasher);
alloc_size = HasherSize(params, one_shot, input_size);
self = BROTLI_ALLOC(m, uint8_t, alloc_size);
hasher->common.extra = BROTLI_ALLOC(m, uint8_t, alloc_size);
if (BROTLI_IS_OOM(m)) return;
*handle = self;
common = GetHasherCommon(self);
common->params = params->hasher;
switch (common->params.type) {
#define INITIALIZE_(N) \
case N: \
InitializeH ## N(*handle, params); \
hasher->common.params = params->hasher;
switch (hasher->common.params.type) {
#define INITIALIZE_(N) \
case N: \
InitializeH ## N(&hasher->common, \
&hasher->privat._H ## N, params); \
break;
FOR_ALL_HASHERS(INITIALIZE_);
#undef INITIALIZE_
default:
break;
}
HasherReset(*handle);
HasherReset(hasher);
}
self = *handle;
common = GetHasherCommon(self);
if (!common->is_prepared_) {
switch (common->params.type) {
#define PREPARE_(N) \
case N: \
PrepareH ## N(self, one_shot, input_size, data); \
if (!hasher->common.is_prepared_) {
switch (hasher->common.params.type) {
#define PREPARE_(N) \
case N: \
PrepareH ## N( \
&hasher->privat._H ## N, \
one_shot, input_size, data); \
break;
FOR_ALL_HASHERS(PREPARE_)
#undef PREPARE_
default: break;
}
if (position == 0) {
common->dict_num_lookups = 0;
common->dict_num_matches = 0;
hasher->common.dict_num_lookups = 0;
hasher->common.dict_num_matches = 0;
}
common->is_prepared_ = BROTLI_TRUE;
hasher->common.is_prepared_ = BROTLI_TRUE;
}
}
static BROTLI_INLINE void InitOrStitchToPreviousBlock(
MemoryManager* m, HasherHandle* handle, const uint8_t* data, size_t mask,
MemoryManager* m, Hasher* hasher, const uint8_t* data, size_t mask,
BrotliEncoderParams* params, size_t position, size_t input_size,
BROTLI_BOOL is_last) {
HasherHandle self;
HasherSetup(m, handle, params, data, position, input_size, is_last);
HasherSetup(m, hasher, params, data, position, input_size, is_last);
if (BROTLI_IS_OOM(m)) return;
self = *handle;
switch (GetHasherCommon(self)->params.type) {
#define INIT_(N) \
case N: \
StitchToPreviousBlockH ## N(self, input_size, position, data, mask); \
switch (hasher->common.params.type) {
#define INIT_(N) \
case N: \
StitchToPreviousBlockH ## N( \
&hasher->privat._H ## N, \
input_size, position, data, mask); \
break;
FOR_ALL_HASHERS(INIT_)
#undef INIT_

View File

@ -28,20 +28,25 @@ static BROTLI_INLINE size_t FN(StoreLookahead)(void) {
}
typedef struct HashComposite {
HasherHandle ha;
HasherHandle hb;
HASHER_A ha;
HASHER_B hb;
HasherCommon hb_common;
/* Shortcuts. */
void* extra;
HasherCommon* common;
BROTLI_BOOL fresh;
const BrotliEncoderParams* params;
} HashComposite;
static BROTLI_INLINE HashComposite* FN(Self)(HasherHandle handle) {
return (HashComposite*)&(GetHasherCommon(handle)[1]);
}
static void FN(Initialize)(HasherCommon* common,
HashComposite* BROTLI_RESTRICT self, const BrotliEncoderParams* params) {
self->common = common;
self->extra = common->extra;
static void FN(Initialize)(
HasherHandle handle, const BrotliEncoderParams* params) {
HashComposite* self = FN(Self)(handle);
self->ha = 0;
self->hb = 0;
self->hb_common = *self->common;
self->fresh = BROTLI_TRUE;
self->params = params;
/* TODO: Initialize of the hashers is defered to Prepare (and params
remembered here) because we don't get the one_shot and input_size params
@ -49,87 +54,71 @@ static void FN(Initialize)(
those params to all hashers FN(Initialize) */
}
static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
size_t input_size, const uint8_t* data) {
HashComposite* self = FN(Self)(handle);
if (!self->ha) {
HasherCommon* common_a;
HasherCommon* common_b;
static void FN(Prepare)(
HashComposite* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
if (self->fresh) {
self->fresh = BROTLI_FALSE;
self->hb_common.extra = (uint8_t*)self->extra +
FN_A(HashMemAllocInBytes)(self->params, one_shot, input_size);
self->ha = handle + sizeof(HasherCommon) + sizeof(HashComposite);
common_a = (HasherCommon*)self->ha;
common_a->params = self->params->hasher;
common_a->is_prepared_ = BROTLI_FALSE;
common_a->dict_num_lookups = 0;
common_a->dict_num_matches = 0;
FN_A(Initialize)(self->ha, self->params);
self->hb = self->ha + sizeof(HasherCommon) + FN_A(HashMemAllocInBytes)(
self->params, one_shot, input_size);
common_b = (HasherCommon*)self->hb;
common_b->params = self->params->hasher;
common_b->is_prepared_ = BROTLI_FALSE;
common_b->dict_num_lookups = 0;
common_b->dict_num_matches = 0;
FN_B(Initialize)(self->hb, self->params);
FN_A(Initialize)(self->common, &self->ha, self->params);
FN_B(Initialize)(&self->hb_common, &self->hb, self->params);
}
FN_A(Prepare)(self->ha, one_shot, input_size, data);
FN_B(Prepare)(self->hb, one_shot, input_size, data);
FN_A(Prepare)(&self->ha, one_shot, input_size, data);
FN_B(Prepare)(&self->hb, one_shot, input_size, data);
}
static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
size_t input_size) {
return sizeof(HashComposite) + 2 * sizeof(HasherCommon) +
FN_A(HashMemAllocInBytes)(params, one_shot, input_size) +
return FN_A(HashMemAllocInBytes)(params, one_shot, input_size) +
FN_B(HashMemAllocInBytes)(params, one_shot, input_size);
}
static BROTLI_INLINE void FN(Store)(HasherHandle BROTLI_RESTRICT handle,
static BROTLI_INLINE void FN(Store)(HashComposite* BROTLI_RESTRICT self,
const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
HashComposite* self = FN(Self)(handle);
FN_A(Store)(self->ha, data, mask, ix);
FN_B(Store)(self->hb, data, mask, ix);
FN_A(Store)(&self->ha, data, mask, ix);
FN_B(Store)(&self->hb, data, mask, ix);
}
static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
const uint8_t* data, const size_t mask, const size_t ix_start,
static BROTLI_INLINE void FN(StoreRange)(
HashComposite* BROTLI_RESTRICT self, const uint8_t* BROTLI_RESTRICT data,
const size_t mask, const size_t ix_start,
const size_t ix_end) {
HashComposite* self = FN(Self)(handle);
FN_A(StoreRange)(self->ha, data, mask, ix_start, ix_end);
FN_B(StoreRange)(self->hb, data, mask, ix_start, ix_end);
FN_A(StoreRange)(&self->ha, data, mask, ix_start, ix_end);
FN_B(StoreRange)(&self->hb, data, mask, ix_start, ix_end);
}
static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle,
static BROTLI_INLINE void FN(StitchToPreviousBlock)(
HashComposite* BROTLI_RESTRICT self,
size_t num_bytes, size_t position, const uint8_t* ringbuffer,
size_t ring_buffer_mask) {
HashComposite* self = FN(Self)(handle);
FN_A(StitchToPreviousBlock)(self->ha, num_bytes, position, ringbuffer,
ring_buffer_mask);
FN_B(StitchToPreviousBlock)(self->hb, num_bytes, position, ringbuffer,
ring_buffer_mask);
FN_A(StitchToPreviousBlock)(&self->ha, num_bytes, position,
ringbuffer, ring_buffer_mask);
FN_B(StitchToPreviousBlock)(&self->hb, num_bytes, position,
ringbuffer, ring_buffer_mask);
}
static BROTLI_INLINE void FN(PrepareDistanceCache)(
HasherHandle handle, int* BROTLI_RESTRICT distance_cache) {
HashComposite* self = FN(Self)(handle);
FN_A(PrepareDistanceCache)(self->ha, distance_cache);
FN_B(PrepareDistanceCache)(self->hb, distance_cache);
HashComposite* BROTLI_RESTRICT self, int* BROTLI_RESTRICT distance_cache) {
FN_A(PrepareDistanceCache)(&self->ha, distance_cache);
FN_B(PrepareDistanceCache)(&self->hb, distance_cache);
}
static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
static BROTLI_INLINE void FN(FindLongestMatch)(
HashComposite* BROTLI_RESTRICT self,
const BrotliEncoderDictionary* dictionary,
const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
const size_t max_length, const size_t max_backward,
const size_t gap, const size_t max_distance,
const size_t dictionary_distance, const size_t max_distance,
HasherSearchResult* BROTLI_RESTRICT out) {
HashComposite* self = FN(Self)(handle);
FN_A(FindLongestMatch)(self->ha, dictionary, data, ring_buffer_mask,
distance_cache, cur_ix, max_length, max_backward, gap,
FN_A(FindLongestMatch)(&self->ha, dictionary, data, ring_buffer_mask,
distance_cache, cur_ix, max_length, max_backward, dictionary_distance,
max_distance, out);
FN_B(FindLongestMatch)(self->hb, dictionary, data, ring_buffer_mask,
distance_cache, cur_ix, max_length, max_backward, gap,
FN_B(FindLongestMatch)(&self->hb, dictionary, data, ring_buffer_mask,
distance_cache, cur_ix, max_length, max_backward, dictionary_distance,
max_distance, out);
}

View File

@ -28,7 +28,7 @@ static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
/* HashBytes is the function that chooses the bucket to place the address in.*/
static BROTLI_INLINE size_t FN(HashBytes)(const uint8_t* data) {
static BROTLI_INLINE size_t FN(HashBytes)(const uint8_t* BROTLI_RESTRICT data) {
const uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
/* The higher bits contain more mixture from the multiplication,
so we take our results from there. */
@ -45,28 +45,56 @@ typedef struct FN(Bank) {
} FN(Bank);
typedef struct HashForgetfulChain {
uint32_t addr[BUCKET_SIZE];
uint16_t head[BUCKET_SIZE];
/* Truncated hash used for quick rejection of "distance cache" candidates. */
uint8_t tiny_hash[65536];
FN(Bank) banks[NUM_BANKS];
uint16_t free_slot_idx[NUM_BANKS];
uint16_t free_slot_idx[NUM_BANKS]; /* Up to 1KiB. Move to dynamic? */
size_t max_hops;
/* Shortcuts. */
void* extra;
HasherCommon* common;
/* --- Dynamic size members --- */
/* uint32_t addr[BUCKET_SIZE]; */
/* uint16_t head[BUCKET_SIZE]; */
/* Truncated hash used for quick rejection of "distance cache" candidates. */
/* uint8_t tiny_hash[65536];*/
/* FN(Bank) banks[NUM_BANKS]; */
} HashForgetfulChain;
static BROTLI_INLINE HashForgetfulChain* FN(Self)(HasherHandle handle) {
return (HashForgetfulChain*)&(GetHasherCommon(handle)[1]);
static uint32_t* FN(Addr)(void* extra) {
return (uint32_t*)extra;
}
static uint16_t* FN(Head)(void* extra) {
return (uint16_t*)(&FN(Addr)(extra)[BUCKET_SIZE]);
}
static uint8_t* FN(TinyHash)(void* extra) {
return (uint8_t*)(&FN(Head)(extra)[BUCKET_SIZE]);
}
static FN(Bank)* FN(Banks)(void* extra) {
return (FN(Bank)*)(&FN(TinyHash)(extra)[65536]);
}
static void FN(Initialize)(
HasherHandle handle, const BrotliEncoderParams* params) {
FN(Self)(handle)->max_hops =
(params->quality > 6 ? 7u : 8u) << (params->quality - 4);
HasherCommon* common, HashForgetfulChain* BROTLI_RESTRICT self,
const BrotliEncoderParams* params) {
self->common = common;
self->extra = common->extra;
self->max_hops = (params->quality > 6 ? 7u : 8u) << (params->quality - 4);
}
static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
size_t input_size, const uint8_t* data) {
HashForgetfulChain* self = FN(Self)(handle);
static void FN(Prepare)(
HashForgetfulChain* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
uint32_t* BROTLI_RESTRICT addr = FN(Addr)(self->extra);
uint16_t* BROTLI_RESTRICT head = FN(Head)(self->extra);
uint8_t* BROTLI_RESTRICT tiny_hash = FN(TinyHash)(self->extra);
/* Partial preparation is 100 times slower (per socket). */
size_t partial_prepare_threshold = BUCKET_SIZE >> 6;
if (one_shot && input_size <= partial_prepare_threshold) {
@ -74,17 +102,17 @@ static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
for (i = 0; i < input_size; ++i) {
size_t bucket = FN(HashBytes)(&data[i]);
/* See InitEmpty comment. */
self->addr[bucket] = 0xCCCCCCCC;
self->head[bucket] = 0xCCCC;
addr[bucket] = 0xCCCCCCCC;
head[bucket] = 0xCCCC;
}
} else {
/* Fill |addr| array with 0xCCCCCCCC value. Because of wrapping, position
processed by hasher never reaches 3GB + 64M; this makes all new chains
to be terminated after the first node. */
memset(self->addr, 0xCC, sizeof(self->addr));
memset(self->head, 0, sizeof(self->head));
memset(addr, 0xCC, sizeof(uint32_t) * BUCKET_SIZE);
memset(head, 0, sizeof(uint16_t) * BUCKET_SIZE);
}
memset(self->tiny_hash, 0, sizeof(self->tiny_hash));
memset(tiny_hash, 0, sizeof(uint8_t) * 65536);
memset(self->free_slot_idx, 0, sizeof(self->free_slot_idx));
}
@ -94,51 +122,58 @@ static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
BROTLI_UNUSED(params);
BROTLI_UNUSED(one_shot);
BROTLI_UNUSED(input_size);
return sizeof(HashForgetfulChain);
return sizeof(uint32_t) * BUCKET_SIZE + sizeof(uint16_t) * BUCKET_SIZE +
sizeof(uint8_t) * 65536 + sizeof(FN(Bank)) * NUM_BANKS;
}
/* Look at 4 bytes at &data[ix & mask]. Compute a hash from these, and prepend
node to corresponding chain; also update tiny_hash for current position. */
static BROTLI_INLINE void FN(Store)(HasherHandle BROTLI_RESTRICT handle,
static BROTLI_INLINE void FN(Store)(HashForgetfulChain* BROTLI_RESTRICT self,
const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
HashForgetfulChain* self = FN(Self)(handle);
uint32_t* BROTLI_RESTRICT addr = FN(Addr)(self->extra);
uint16_t* BROTLI_RESTRICT head = FN(Head)(self->extra);
uint8_t* BROTLI_RESTRICT tiny_hash = FN(TinyHash)(self->extra);
FN(Bank)* BROTLI_RESTRICT banks = FN(Banks)(self->extra);
const size_t key = FN(HashBytes)(&data[ix & mask]);
const size_t bank = key & (NUM_BANKS - 1);
const size_t idx = self->free_slot_idx[bank]++ & (BANK_SIZE - 1);
size_t delta = ix - self->addr[key];
self->tiny_hash[(uint16_t)ix] = (uint8_t)key;
size_t delta = ix - addr[key];
tiny_hash[(uint16_t)ix] = (uint8_t)key;
if (delta > 0xFFFF) delta = CAPPED_CHAINS ? 0 : 0xFFFF;
self->banks[bank].slots[idx].delta = (uint16_t)delta;
self->banks[bank].slots[idx].next = self->head[key];
self->addr[key] = (uint32_t)ix;
self->head[key] = (uint16_t)idx;
banks[bank].slots[idx].delta = (uint16_t)delta;
banks[bank].slots[idx].next = head[key];
addr[key] = (uint32_t)ix;
head[key] = (uint16_t)idx;
}
static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
const uint8_t* data, const size_t mask, const size_t ix_start,
const size_t ix_end) {
static BROTLI_INLINE void FN(StoreRange)(
HashForgetfulChain* BROTLI_RESTRICT self,
const uint8_t* BROTLI_RESTRICT data, const size_t mask,
const size_t ix_start, const size_t ix_end) {
size_t i;
for (i = ix_start; i < ix_end; ++i) {
FN(Store)(handle, data, mask, i);
FN(Store)(self, data, mask, i);
}
}
static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle,
static BROTLI_INLINE void FN(StitchToPreviousBlock)(
HashForgetfulChain* BROTLI_RESTRICT self,
size_t num_bytes, size_t position, const uint8_t* ringbuffer,
size_t ring_buffer_mask) {
if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
/* Prepare the hashes for three last bytes of the last write.
These could not be calculated before, since they require knowledge
of both the previous and the current block. */
FN(Store)(handle, ringbuffer, ring_buffer_mask, position - 3);
FN(Store)(handle, ringbuffer, ring_buffer_mask, position - 2);
FN(Store)(handle, ringbuffer, ring_buffer_mask, position - 1);
FN(Store)(self, ringbuffer, ring_buffer_mask, position - 3);
FN(Store)(self, ringbuffer, ring_buffer_mask, position - 2);
FN(Store)(self, ringbuffer, ring_buffer_mask, position - 1);
}
}
static BROTLI_INLINE void FN(PrepareDistanceCache)(
HasherHandle handle, int* BROTLI_RESTRICT distance_cache) {
BROTLI_UNUSED(handle);
HashForgetfulChain* BROTLI_RESTRICT self,
int* BROTLI_RESTRICT distance_cache) {
BROTLI_UNUSED(self);
PrepareDistanceCache(distance_cache, NUM_LAST_DISTANCES_TO_CHECK);
}
@ -153,14 +188,18 @@ static BROTLI_INLINE void FN(PrepareDistanceCache)(
Does not look for matches further away than max_backward.
Writes the best match into |out|.
|out|->score is updated only if a better match is found. */
static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
static BROTLI_INLINE void FN(FindLongestMatch)(
HashForgetfulChain* BROTLI_RESTRICT self,
const BrotliEncoderDictionary* dictionary,
const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
const int* BROTLI_RESTRICT distance_cache,
const size_t cur_ix, const size_t max_length, const size_t max_backward,
const size_t gap, const size_t max_distance,
const size_t dictionary_distance, const size_t max_distance,
HasherSearchResult* BROTLI_RESTRICT out) {
HashForgetfulChain* self = FN(Self)(handle);
uint32_t* BROTLI_RESTRICT addr = FN(Addr)(self->extra);
uint16_t* BROTLI_RESTRICT head = FN(Head)(self->extra);
uint8_t* BROTLI_RESTRICT tiny_hashes = FN(TinyHash)(self->extra);
FN(Bank)* BROTLI_RESTRICT banks = FN(Banks)(self->extra);
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
/* Don't accept a short copy from far away. */
score_t min_score = out->score;
@ -176,7 +215,7 @@ static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
const size_t backward = (size_t)distance_cache[i];
size_t prev_ix = (cur_ix - backward);
/* For distance code 0 we want to consider 2-byte matches. */
if (i > 0 && self->tiny_hash[(uint16_t)prev_ix] != tiny_hash) continue;
if (i > 0 && tiny_hashes[(uint16_t)prev_ix] != tiny_hash) continue;
if (prev_ix >= cur_ix || backward > max_backward) {
continue;
}
@ -204,16 +243,16 @@ static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
const size_t bank = key & (NUM_BANKS - 1);
size_t backward = 0;
size_t hops = self->max_hops;
size_t delta = cur_ix - self->addr[key];
size_t slot = self->head[key];
size_t delta = cur_ix - addr[key];
size_t slot = head[key];
while (hops--) {
size_t prev_ix;
size_t last = slot;
backward += delta;
if (backward > max_backward || (CAPPED_CHAINS && !delta)) break;
prev_ix = (cur_ix - backward) & ring_buffer_mask;
slot = self->banks[bank].slots[last].next;
delta = self->banks[bank].slots[last].delta;
slot = banks[bank].slots[last].next;
delta = banks[bank].slots[last].delta;
if (cur_ix_masked + best_len > ring_buffer_mask ||
prev_ix + best_len > ring_buffer_mask ||
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
@ -238,11 +277,11 @@ static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
}
}
}
FN(Store)(handle, data, ring_buffer_mask, cur_ix);
FN(Store)(self, data, ring_buffer_mask, cur_ix);
}
if (out->score == min_score) {
SearchInStaticDictionary(dictionary,
handle, &data[cur_ix_masked], max_length, max_backward + gap,
self->common, &data[cur_ix_masked], max_length, dictionary_distance,
max_distance, out, BROTLI_FALSE);
}
}

View File

@ -20,7 +20,7 @@ static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 8; }
static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; }
/* HashBytes is the function that chooses the bucket to place the address in. */
static BROTLI_INLINE uint32_t FN(HashBytes)(const uint8_t* data,
static BROTLI_INLINE uint32_t FN(HashBytes)(const uint8_t* BROTLI_RESTRICT data,
const uint64_t mask,
const int shift) {
const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(data) & mask) * kHashMul64Long;
@ -42,43 +42,43 @@ typedef struct HashLongestMatch {
/* Mask for accessing entries in a block (in a ring-buffer manner). */
uint32_t block_mask_;
int block_bits_;
int num_last_distances_to_check_;
/* Shortcuts. */
HasherCommon* common_;
/* --- Dynamic size members --- */
/* Number of entries in a particular bucket. */
/* uint16_t num[bucket_size]; */
uint16_t* num_; /* uint16_t[bucket_size]; */
/* Buckets containing block_size_ of backward references. */
/* uint32_t* buckets[bucket_size * block_size]; */
uint32_t* buckets_; /* uint32_t[bucket_size * block_size]; */
} HashLongestMatch;
static BROTLI_INLINE HashLongestMatch* FN(Self)(HasherHandle handle) {
return (HashLongestMatch*)&(GetHasherCommon(handle)[1]);
}
static BROTLI_INLINE uint16_t* FN(Num)(HashLongestMatch* self) {
return (uint16_t*)(&self[1]);
}
static BROTLI_INLINE uint32_t* FN(Buckets)(HashLongestMatch* self) {
return (uint32_t*)(&FN(Num)(self)[self->bucket_size_]);
}
static void FN(Initialize)(
HasherHandle handle, const BrotliEncoderParams* params) {
HasherCommon* common = GetHasherCommon(handle);
HashLongestMatch* self = FN(Self)(handle);
HasherCommon* common, HashLongestMatch* BROTLI_RESTRICT self,
const BrotliEncoderParams* params) {
self->common_ = common;
BROTLI_UNUSED(params);
self->hash_shift_ = 64 - common->params.bucket_bits;
self->hash_mask_ = (~((uint64_t)0U)) >> (64 - 8 * common->params.hash_len);
self->bucket_size_ = (size_t)1 << common->params.bucket_bits;
self->block_bits_ = common->params.block_bits;
self->block_size_ = (size_t)1 << common->params.block_bits;
self->block_mask_ = (uint32_t)(self->block_size_ - 1);
self->num_last_distances_to_check_ =
common->params.num_last_distances_to_check;
self->num_ = (uint16_t*)common->extra;
self->buckets_ = (uint32_t*)&self->num_[self->bucket_size_];
}
static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
size_t input_size, const uint8_t* data) {
HashLongestMatch* self = FN(Self)(handle);
uint16_t* num = FN(Num)(self);
static void FN(Prepare)(
HashLongestMatch* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
uint16_t* BROTLI_RESTRICT num = self->num_;
/* Partial preparation is 100 times slower (per socket). */
size_t partial_prepare_threshold = self->bucket_size_ >> 6;
if (one_shot && input_size <= partial_prepare_threshold) {
@ -100,50 +100,52 @@ static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
size_t block_size = (size_t)1 << params->hasher.block_bits;
BROTLI_UNUSED(one_shot);
BROTLI_UNUSED(input_size);
return sizeof(HashLongestMatch) + bucket_size * (2 + 4 * block_size);
return sizeof(uint16_t) * bucket_size +
sizeof(uint32_t) * bucket_size * block_size;
}
/* Look at 4 bytes at &data[ix & mask].
Compute a hash from these, and store the value of ix at that position. */
static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t* data,
static BROTLI_INLINE void FN(Store)(
HashLongestMatch* BROTLI_RESTRICT self, const uint8_t* BROTLI_RESTRICT data,
const size_t mask, const size_t ix) {
HashLongestMatch* self = FN(Self)(handle);
uint16_t* num = FN(Num)(self);
uint16_t* BROTLI_RESTRICT num = self->num_;
uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
const uint32_t key = FN(HashBytes)(&data[ix & mask], self->hash_mask_,
self->hash_shift_);
const size_t minor_ix = num[key] & self->block_mask_;
const size_t offset =
minor_ix + (key << GetHasherCommon(handle)->params.block_bits);
FN(Buckets)(self)[offset] = (uint32_t)ix;
const size_t offset = minor_ix + (key << self->block_bits_);
buckets[offset] = (uint32_t)ix;
++num[key];
}
static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
const uint8_t* data, const size_t mask, const size_t ix_start,
const size_t ix_end) {
static BROTLI_INLINE void FN(StoreRange)(HashLongestMatch* BROTLI_RESTRICT self,
const uint8_t* BROTLI_RESTRICT data, const size_t mask,
const size_t ix_start, const size_t ix_end) {
size_t i;
for (i = ix_start; i < ix_end; ++i) {
FN(Store)(handle, data, mask, i);
FN(Store)(self, data, mask, i);
}
}
static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle,
static BROTLI_INLINE void FN(StitchToPreviousBlock)(
HashLongestMatch* BROTLI_RESTRICT self,
size_t num_bytes, size_t position, const uint8_t* ringbuffer,
size_t ringbuffer_mask) {
if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
/* Prepare the hashes for three last bytes of the last write.
These could not be calculated before, since they require knowledge
of both the previous and the current block. */
FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 3);
FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 2);
FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 1);
FN(Store)(self, ringbuffer, ringbuffer_mask, position - 3);
FN(Store)(self, ringbuffer, ringbuffer_mask, position - 2);
FN(Store)(self, ringbuffer, ringbuffer_mask, position - 1);
}
}
static BROTLI_INLINE void FN(PrepareDistanceCache)(
HasherHandle handle, int* BROTLI_RESTRICT distance_cache) {
PrepareDistanceCache(distance_cache,
GetHasherCommon(handle)->params.num_last_distances_to_check);
HashLongestMatch* BROTLI_RESTRICT self,
int* BROTLI_RESTRICT distance_cache) {
PrepareDistanceCache(distance_cache, self->num_last_distances_to_check_);
}
/* Find a longest backward match of &data[cur_ix] up to the length of
@ -157,17 +159,16 @@ static BROTLI_INLINE void FN(PrepareDistanceCache)(
Does not look for matches further away than max_backward.
Writes the best match into |out|.
|out|->score is updated only if a better match is found. */
static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
static BROTLI_INLINE void FN(FindLongestMatch)(
HashLongestMatch* BROTLI_RESTRICT self,
const BrotliEncoderDictionary* dictionary,
const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
const size_t max_length, const size_t max_backward,
const size_t gap, const size_t max_distance,
const size_t dictionary_distance, const size_t max_distance,
HasherSearchResult* BROTLI_RESTRICT out) {
HasherCommon* common = GetHasherCommon(handle);
HashLongestMatch* self = FN(Self)(handle);
uint16_t* num = FN(Num)(self);
uint32_t* buckets = FN(Buckets)(self);
uint16_t* BROTLI_RESTRICT num = self->num_;
uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
/* Don't accept a short copy from far away. */
score_t min_score = out->score;
@ -177,7 +178,7 @@ static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
out->len = 0;
out->len_code_delta = 0;
/* Try last distance first. */
for (i = 0; i < (size_t)common->params.num_last_distances_to_check; ++i) {
for (i = 0; i < (size_t)self->num_last_distances_to_check_; ++i) {
const size_t backward = (size_t)distance_cache[i];
size_t prev_ix = (size_t)(cur_ix - backward);
if (prev_ix >= cur_ix) {
@ -218,8 +219,7 @@ static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
{
const uint32_t key = FN(HashBytes)(
&data[cur_ix_masked], self->hash_mask_, self->hash_shift_);
uint32_t* BROTLI_RESTRICT bucket =
&buckets[key << common->params.block_bits];
uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
const size_t down =
(num[key] > self->block_size_) ?
(num[key] - self->block_size_) : 0u;
@ -259,7 +259,7 @@ static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
}
if (min_score == out->score) {
SearchInStaticDictionary(dictionary,
handle, &data[cur_ix_masked], max_length, max_backward + gap,
self->common_, &data[cur_ix_masked], max_length, dictionary_distance,
max_distance, out, BROTLI_FALSE);
}
}

View File

@ -20,7 +20,8 @@ static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
/* HashBytes is the function that chooses the bucket to place the address in. */
static uint32_t FN(HashBytes)(const uint8_t* data, const int shift) {
static uint32_t FN(HashBytes)(
const uint8_t* BROTLI_RESTRICT data, const int shift) {
uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
/* The higher bits contain more mixture from the multiplication,
so we take our results from there. */
@ -38,42 +39,46 @@ typedef struct HashLongestMatch {
/* Mask for accessing entries in a block (in a ring-buffer manner). */
uint32_t block_mask_;
int block_bits_;
int num_last_distances_to_check_;
/* Shortcuts. */
HasherCommon* common_;
/* --- Dynamic size members --- */
/* Number of entries in a particular bucket. */
/* uint16_t num[bucket_size]; */
uint16_t* num_; /* uint16_t[bucket_size]; */
/* Buckets containing block_size_ of backward references. */
/* uint32_t* buckets[bucket_size * block_size]; */
uint32_t* buckets_; /* uint32_t[bucket_size * block_size]; */
} HashLongestMatch;
static BROTLI_INLINE HashLongestMatch* FN(Self)(HasherHandle handle) {
return (HashLongestMatch*)&(GetHasherCommon(handle)[1]);
}
static BROTLI_INLINE uint16_t* FN(Num)(HashLongestMatch* self) {
return (uint16_t*)(&self[1]);
}
static BROTLI_INLINE uint32_t* FN(Buckets)(HashLongestMatch* self) {
return (uint32_t*)(&FN(Num)(self)[self->bucket_size_]);
static BROTLI_INLINE uint16_t* FN(Num)(void* extra) {
return (uint16_t*)extra;
}
static void FN(Initialize)(
HasherHandle handle, const BrotliEncoderParams* params) {
HasherCommon* common = GetHasherCommon(handle);
HashLongestMatch* self = FN(Self)(handle);
HasherCommon* common, HashLongestMatch* BROTLI_RESTRICT self,
const BrotliEncoderParams* params) {
self->common_ = common;
BROTLI_UNUSED(params);
self->hash_shift_ = 32 - common->params.bucket_bits;
self->bucket_size_ = (size_t)1 << common->params.bucket_bits;
self->block_size_ = (size_t)1 << common->params.block_bits;
self->block_mask_ = (uint32_t)(self->block_size_ - 1);
self->num_ = (uint16_t*)common->extra;
self->buckets_ = (uint32_t*)(&self->num_[self->bucket_size_]);
self->block_bits_ = common->params.block_bits;
self->num_last_distances_to_check_ =
common->params.num_last_distances_to_check;
}
static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
size_t input_size, const uint8_t* data) {
HashLongestMatch* self = FN(Self)(handle);
uint16_t* num = FN(Num)(self);
static void FN(Prepare)(
HashLongestMatch* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
uint16_t* BROTLI_RESTRICT num = self->num_;
/* Partial preparation is 100 times slower (per socket). */
size_t partial_prepare_threshold = self->bucket_size_ >> 6;
if (one_shot && input_size <= partial_prepare_threshold) {
@ -94,49 +99,49 @@ static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
size_t block_size = (size_t)1 << params->hasher.block_bits;
BROTLI_UNUSED(one_shot);
BROTLI_UNUSED(input_size);
return sizeof(HashLongestMatch) + bucket_size * (2 + 4 * block_size);
return sizeof(uint16_t) * bucket_size +
sizeof(uint32_t) * bucket_size * block_size;
}
/* Look at 4 bytes at &data[ix & mask].
Compute a hash from these, and store the value of ix at that position. */
static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t* data,
static BROTLI_INLINE void FN(Store)(
HashLongestMatch* BROTLI_RESTRICT self, const uint8_t* BROTLI_RESTRICT data,
const size_t mask, const size_t ix) {
HashLongestMatch* self = FN(Self)(handle);
uint16_t* num = FN(Num)(self);
const uint32_t key = FN(HashBytes)(&data[ix & mask], self->hash_shift_);
const size_t minor_ix = num[key] & self->block_mask_;
const size_t offset =
minor_ix + (key << GetHasherCommon(handle)->params.block_bits);
FN(Buckets)(self)[offset] = (uint32_t)ix;
++num[key];
const size_t minor_ix = self->num_[key] & self->block_mask_;
const size_t offset = minor_ix + (key << self->block_bits_);
self->buckets_[offset] = (uint32_t)ix;
++self->num_[key];
}
static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
const uint8_t* data, const size_t mask, const size_t ix_start,
const size_t ix_end) {
static BROTLI_INLINE void FN(StoreRange)(HashLongestMatch* BROTLI_RESTRICT self,
const uint8_t* BROTLI_RESTRICT data, const size_t mask,
const size_t ix_start, const size_t ix_end) {
size_t i;
for (i = ix_start; i < ix_end; ++i) {
FN(Store)(handle, data, mask, i);
FN(Store)(self, data, mask, i);
}
}
static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle,
static BROTLI_INLINE void FN(StitchToPreviousBlock)(
HashLongestMatch* BROTLI_RESTRICT self,
size_t num_bytes, size_t position, const uint8_t* ringbuffer,
size_t ringbuffer_mask) {
if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
/* Prepare the hashes for three last bytes of the last write.
These could not be calculated before, since they require knowledge
of both the previous and the current block. */
FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 3);
FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 2);
FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 1);
FN(Store)(self, ringbuffer, ringbuffer_mask, position - 3);
FN(Store)(self, ringbuffer, ringbuffer_mask, position - 2);
FN(Store)(self, ringbuffer, ringbuffer_mask, position - 1);
}
}
static BROTLI_INLINE void FN(PrepareDistanceCache)(
HasherHandle handle, int* BROTLI_RESTRICT distance_cache) {
PrepareDistanceCache(distance_cache,
GetHasherCommon(handle)->params.num_last_distances_to_check);
HashLongestMatch* BROTLI_RESTRICT self,
int* BROTLI_RESTRICT distance_cache) {
PrepareDistanceCache(distance_cache, self->num_last_distances_to_check_);
}
/* Find a longest backward match of &data[cur_ix] up to the length of
@ -150,17 +155,16 @@ static BROTLI_INLINE void FN(PrepareDistanceCache)(
Does not look for matches further away than max_backward.
Writes the best match into |out|.
|out|->score is updated only if a better match is found. */
static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
static BROTLI_INLINE void FN(FindLongestMatch)(
HashLongestMatch* BROTLI_RESTRICT self,
const BrotliEncoderDictionary* dictionary,
const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
const size_t max_length, const size_t max_backward,
const size_t gap, const size_t max_distance,
const size_t dictionary_distance, const size_t max_distance,
HasherSearchResult* BROTLI_RESTRICT out) {
HasherCommon* common = GetHasherCommon(handle);
HashLongestMatch* self = FN(Self)(handle);
uint16_t* num = FN(Num)(self);
uint32_t* buckets = FN(Buckets)(self);
uint16_t* BROTLI_RESTRICT num = self->num_;
uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
/* Don't accept a short copy from far away. */
score_t min_score = out->score;
@ -170,7 +174,7 @@ static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
out->len = 0;
out->len_code_delta = 0;
/* Try last distance first. */
for (i = 0; i < (size_t)common->params.num_last_distances_to_check; ++i) {
for (i = 0; i < (size_t)self->num_last_distances_to_check_; ++i) {
const size_t backward = (size_t)distance_cache[i];
size_t prev_ix = (size_t)(cur_ix - backward);
if (prev_ix >= cur_ix) {
@ -211,8 +215,7 @@ static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
{
const uint32_t key =
FN(HashBytes)(&data[cur_ix_masked], self->hash_shift_);
uint32_t* BROTLI_RESTRICT bucket =
&buckets[key << common->params.block_bits];
uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
const size_t down =
(num[key] > self->block_size_) ? (num[key] - self->block_size_) : 0u;
for (i = num[key]; i > down;) {
@ -251,7 +254,7 @@ static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
}
if (min_score == out->score) {
SearchInStaticDictionary(dictionary,
handle, &data[cur_ix_masked], max_length, max_backward + gap,
self->common_, &data[cur_ix_masked], max_length, dictionary_distance,
max_distance, out, BROTLI_FALSE);
}
}

View File

@ -5,15 +5,16 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* template parameters: FN, BUCKET_BITS, BUCKET_SWEEP, HASH_LEN,
/* template parameters: FN, BUCKET_BITS, BUCKET_SWEEP_BITS, HASH_LEN,
USE_DICTIONARY
*/
#define HashLongestMatchQuickly HASHER()
#define BUCKET_SIZE (1 << BUCKET_BITS)
#define HASH_MAP_SIZE (4 << BUCKET_BITS)
#define BUCKET_MASK (BUCKET_SIZE - 1)
#define BUCKET_SWEEP (1 << BUCKET_SWEEP_BITS)
#define BUCKET_SWEEP_MASK ((BUCKET_SWEEP - 1) << 3)
static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 8; }
static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; }
@ -32,39 +33,50 @@ static uint32_t FN(HashBytes)(const uint8_t* data) {
/* A (forgetful) hash table to the data seen by the compressor, to
help create backward references to previous data.
This is a hash map of fixed size (BUCKET_SIZE). Starting from the
given index, BUCKET_SWEEP buckets are used to store values of a key. */
This is a hash map of fixed size (BUCKET_SIZE). */
typedef struct HashLongestMatchQuickly {
uint32_t buckets_[BUCKET_SIZE + BUCKET_SWEEP];
/* Shortcuts. */
HasherCommon* common;
/* --- Dynamic size members --- */
uint32_t* buckets_; /* uint32_t[BUCKET_SIZE]; */
} HashLongestMatchQuickly;
static BROTLI_INLINE HashLongestMatchQuickly* FN(Self)(HasherHandle handle) {
return (HashLongestMatchQuickly*)&(GetHasherCommon(handle)[1]);
}
static void FN(Initialize)(
HasherHandle handle, const BrotliEncoderParams* params) {
BROTLI_UNUSED(handle);
HasherCommon* common, HashLongestMatchQuickly* BROTLI_RESTRICT self,
const BrotliEncoderParams* params) {
self->common = common;
BROTLI_UNUSED(params);
self->buckets_ = (uint32_t*)common->extra;
}
static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
size_t input_size, const uint8_t* data) {
HashLongestMatchQuickly* self = FN(Self)(handle);
static void FN(Prepare)(
HashLongestMatchQuickly* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
/* Partial preparation is 100 times slower (per socket). */
size_t partial_prepare_threshold = HASH_MAP_SIZE >> 7;
size_t partial_prepare_threshold = BUCKET_SIZE >> 5;
if (one_shot && input_size <= partial_prepare_threshold) {
size_t i;
for (i = 0; i < input_size; ++i) {
const uint32_t key = FN(HashBytes)(&data[i]);
memset(&self->buckets_[key], 0, BUCKET_SWEEP * sizeof(self->buckets_[0]));
if (BUCKET_SWEEP == 1) {
buckets[key] = 0;
} else {
uint32_t j;
for (j = 0; j < BUCKET_SWEEP; ++j) {
buckets[(key + (j << 3)) & BUCKET_MASK] = 0;
}
}
}
} else {
/* It is not strictly necessary to fill this buffer here, but
not filling will make the results of the compression stochastic
(but correct). This is because random data would cause the
system to find accidentally good backward references here and there. */
memset(&self->buckets_[0], 0, sizeof(self->buckets_));
memset(buckets, 0, sizeof(uint32_t) * BUCKET_SIZE);
}
}
@ -74,45 +86,53 @@ static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
BROTLI_UNUSED(params);
BROTLI_UNUSED(one_shot);
BROTLI_UNUSED(input_size);
return sizeof(HashLongestMatchQuickly);
return sizeof(uint32_t) * BUCKET_SIZE;
}
/* Look at 5 bytes at &data[ix & mask].
Compute a hash from these, and store the value somewhere within
[ix .. ix+3]. */
static BROTLI_INLINE void FN(Store)(HasherHandle handle,
const uint8_t* data, const size_t mask, const size_t ix) {
static BROTLI_INLINE void FN(Store)(
HashLongestMatchQuickly* BROTLI_RESTRICT self,
const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
const uint32_t key = FN(HashBytes)(&data[ix & mask]);
/* Wiggle the value with the bucket sweep range. */
const uint32_t off = (ix >> 3) % BUCKET_SWEEP;
FN(Self)(handle)->buckets_[key + off] = (uint32_t)ix;
if (BUCKET_SWEEP == 1) {
self->buckets_[key] = (uint32_t)ix;
} else {
/* Wiggle the value with the bucket sweep range. */
const uint32_t off = ix & BUCKET_SWEEP_MASK;
self->buckets_[(key + off) & BUCKET_MASK] = (uint32_t)ix;
}
}
static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
const uint8_t* data, const size_t mask, const size_t ix_start,
const size_t ix_end) {
static BROTLI_INLINE void FN(StoreRange)(
HashLongestMatchQuickly* BROTLI_RESTRICT self,
const uint8_t* BROTLI_RESTRICT data, const size_t mask,
const size_t ix_start, const size_t ix_end) {
size_t i;
for (i = ix_start; i < ix_end; ++i) {
FN(Store)(handle, data, mask, i);
FN(Store)(self, data, mask, i);
}
}
static BROTLI_INLINE void FN(StitchToPreviousBlock)(
HasherHandle handle, size_t num_bytes, size_t position,
HashLongestMatchQuickly* BROTLI_RESTRICT self,
size_t num_bytes, size_t position,
const uint8_t* ringbuffer, size_t ringbuffer_mask) {
if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
/* Prepare the hashes for three last bytes of the last write.
These could not be calculated before, since they require knowledge
of both the previous and the current block. */
FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 3);
FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 2);
FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 1);
FN(Store)(self, ringbuffer, ringbuffer_mask, position - 3);
FN(Store)(self, ringbuffer, ringbuffer_mask, position - 2);
FN(Store)(self, ringbuffer, ringbuffer_mask, position - 1);
}
}
static BROTLI_INLINE void FN(PrepareDistanceCache)(
HasherHandle handle, int* BROTLI_RESTRICT distance_cache) {
BROTLI_UNUSED(handle);
HashLongestMatchQuickly* BROTLI_RESTRICT self,
int* BROTLI_RESTRICT distance_cache) {
BROTLI_UNUSED(self);
BROTLI_UNUSED(distance_cache);
}
@ -125,17 +145,19 @@ static BROTLI_INLINE void FN(PrepareDistanceCache)(
Writes the best match into |out|.
|out|->score is updated only if a better match is found. */
static BROTLI_INLINE void FN(FindLongestMatch)(
HasherHandle handle, const BrotliEncoderDictionary* dictionary,
HashLongestMatchQuickly* BROTLI_RESTRICT self,
const BrotliEncoderDictionary* dictionary,
const uint8_t* BROTLI_RESTRICT data,
const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,
const size_t cur_ix, const size_t max_length, const size_t max_backward,
const size_t gap, const size_t max_distance,
const size_t dictionary_distance, const size_t max_distance,
HasherSearchResult* BROTLI_RESTRICT out) {
HashLongestMatchQuickly* self = FN(Self)(handle);
uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
const size_t best_len_in = out->len;
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
const uint32_t key = FN(HashBytes)(&data[cur_ix_masked]);
int compare_char = data[cur_ix_masked + best_len_in];
size_t key = FN(HashBytes)(&data[cur_ix_masked]);
size_t key_out;
score_t min_score = out->score;
score_t best_score = out->score;
size_t best_len = best_len_in;
@ -158,7 +180,7 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
out->score = best_score;
compare_char = data[cur_ix_masked + best_len];
if (BUCKET_SWEEP == 1) {
self->buckets_[key] = (uint32_t)cur_ix;
buckets[key] = (uint32_t)cur_ix;
return;
}
}
@ -169,8 +191,8 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
size_t backward;
size_t len;
/* Only one to look for, don't bother to prepare for a loop. */
prev_ix = self->buckets_[key];
self->buckets_[key] = (uint32_t)cur_ix;
prev_ix = buckets[key];
buckets[key] = (uint32_t)cur_ix;
backward = cur_ix - prev_ix;
prev_ix &= (uint32_t)ring_buffer_mask;
if (compare_char != data[prev_ix + best_len_in]) {
@ -192,12 +214,17 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
}
}
} else {
uint32_t* bucket = self->buckets_ + key;
int i;
prev_ix = *bucket++;
for (i = 0; i < BUCKET_SWEEP; ++i, prev_ix = *bucket++) {
const size_t backward = cur_ix - prev_ix;
size_t keys[BUCKET_SWEEP];
size_t i;
for (i = 0; i < BUCKET_SWEEP; ++i) {
keys[i] = (key + (i << 3)) & BUCKET_MASK;
}
key_out = keys[(cur_ix & BUCKET_SWEEP_MASK) >> 3];
for (i = 0; i < BUCKET_SWEEP; ++i) {
size_t len;
size_t backward;
prev_ix = buckets[keys[i]];
backward = cur_ix - prev_ix;
prev_ix &= (uint32_t)ring_buffer_mask;
if (compare_char != data[prev_ix + best_len]) {
continue;
@ -211,25 +238,29 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
if (len >= 4) {
const score_t score = BackwardReferenceScore(len, backward);
if (best_score < score) {
best_score = score;
best_len = len;
out->len = best_len;
out->distance = backward;
out->len = len;
compare_char = data[cur_ix_masked + len];
best_score = score;
out->score = score;
compare_char = data[cur_ix_masked + best_len];
out->distance = backward;
}
}
}
}
if (USE_DICTIONARY && min_score == out->score) {
SearchInStaticDictionary(dictionary,
handle, &data[cur_ix_masked], max_length, max_backward + gap,
self->common, &data[cur_ix_masked], max_length, dictionary_distance,
max_distance, out, BROTLI_TRUE);
}
self->buckets_[key + ((cur_ix >> 3) % BUCKET_SWEEP)] = (uint32_t)cur_ix;
if (BUCKET_SWEEP != 1) {
buckets[key_out] = (uint32_t)cur_ix;
}
}
#undef HASH_MAP_SIZE
#undef BUCKET_SWEEP_MASK
#undef BUCKET_SWEEP
#undef BUCKET_MASK
#undef BUCKET_SIZE
#undef HashLongestMatchQuickly

View File

@ -51,13 +51,9 @@ typedef struct HashRolling {
uint32_t factor_remove;
} HashRolling;
static BROTLI_INLINE HashRolling* FN(Self)(HasherHandle handle) {
return (HashRolling*)&(GetHasherCommon(handle)[1]);
}
static void FN(Initialize)(
HasherHandle handle, const BrotliEncoderParams* params) {
HashRolling* self = FN(Self)(handle);
HasherCommon* common, HashRolling* BROTLI_RESTRICT self,
const BrotliEncoderParams* params) {
size_t i;
self->state = 0;
self->next_ix = 0;
@ -71,7 +67,7 @@ static void FN(Initialize)(
self->factor_remove *= self->factor;
}
self->table = (uint32_t*)((HasherHandle)self + sizeof(HashRolling));
self->table = (uint32_t*)common->extra;
for (i = 0; i < NUMBUCKETS; i++) {
self->table[i] = FN(kInvalidPos);
}
@ -79,9 +75,8 @@ static void FN(Initialize)(
BROTLI_UNUSED(params);
}
static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
size_t input_size, const uint8_t* data) {
HashRolling* self = FN(Self)(handle);
static void FN(Prepare)(HashRolling* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
size_t i;
/* Too small size, cannot use this hasher. */
if (input_size < CHUNKLEN) return;
@ -96,36 +91,36 @@ static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
size_t input_size) {
return sizeof(HashRolling) + NUMBUCKETS * sizeof(uint32_t);
return NUMBUCKETS * sizeof(uint32_t);
BROTLI_UNUSED(params);
BROTLI_UNUSED(one_shot);
BROTLI_UNUSED(input_size);
}
static BROTLI_INLINE void FN(Store)(HasherHandle BROTLI_RESTRICT handle,
static BROTLI_INLINE void FN(Store)(HashRolling* BROTLI_RESTRICT self,
const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
BROTLI_UNUSED(handle);
BROTLI_UNUSED(self);
BROTLI_UNUSED(data);
BROTLI_UNUSED(mask);
BROTLI_UNUSED(ix);
}
static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
const uint8_t* data, const size_t mask, const size_t ix_start,
const size_t ix_end) {
BROTLI_UNUSED(handle);
static BROTLI_INLINE void FN(StoreRange)(HashRolling* BROTLI_RESTRICT self,
const uint8_t* BROTLI_RESTRICT data, const size_t mask,
const size_t ix_start, const size_t ix_end) {
BROTLI_UNUSED(self);
BROTLI_UNUSED(data);
BROTLI_UNUSED(mask);
BROTLI_UNUSED(ix_start);
BROTLI_UNUSED(ix_end);
}
static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle,
static BROTLI_INLINE void FN(StitchToPreviousBlock)(
HashRolling* BROTLI_RESTRICT self,
size_t num_bytes, size_t position, const uint8_t* ringbuffer,
size_t ring_buffer_mask) {
/* In this case we must re-initialize the hasher from scratch from the
current position. */
HashRolling* self = FN(Self)(handle);
size_t position_masked;
size_t available = num_bytes;
if ((position & (JUMP - 1)) != 0) {
@ -139,26 +134,27 @@ static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle,
available = ring_buffer_mask - position_masked;
}
FN(Prepare)(handle, BROTLI_FALSE, available,
FN(Prepare)(self, BROTLI_FALSE, available,
ringbuffer + (position & ring_buffer_mask));
self->next_ix = position;
BROTLI_UNUSED(num_bytes);
}
static BROTLI_INLINE void FN(PrepareDistanceCache)(
HasherHandle handle, int* BROTLI_RESTRICT distance_cache) {
BROTLI_UNUSED(handle);
HashRolling* BROTLI_RESTRICT self,
int* BROTLI_RESTRICT distance_cache) {
BROTLI_UNUSED(self);
BROTLI_UNUSED(distance_cache);
}
static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
static BROTLI_INLINE void FN(FindLongestMatch)(
HashRolling* BROTLI_RESTRICT self,
const BrotliEncoderDictionary* dictionary,
const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
const size_t max_length, const size_t max_backward,
const size_t gap, const size_t max_distance,
const size_t dictionary_distance, const size_t max_distance,
HasherSearchResult* BROTLI_RESTRICT out) {
HashRolling* self = FN(Self)(handle);
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
size_t pos = self->next_ix;
@ -209,7 +205,7 @@ static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
backup-hasher, the main hasher already searches in it. */
BROTLI_UNUSED(dictionary);
BROTLI_UNUSED(distance_cache);
BROTLI_UNUSED(gap);
BROTLI_UNUSED(dictionary_distance);
BROTLI_UNUSED(max_distance);
}

View File

@ -24,7 +24,7 @@ static BROTLI_INLINE size_t FN(StoreLookahead)(void) {
return MAX_TREE_COMP_LENGTH;
}
static uint32_t FN(HashBytes)(const uint8_t* data) {
static uint32_t FN(HashBytes)(const uint8_t* BROTLI_RESTRICT data) {
uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
/* The higher bits contain more mixture from the multiplication,
so we take our results from there. */
@ -38,7 +38,7 @@ typedef struct HashToBinaryTree {
/* Hash table that maps the 4-byte hashes of the sequence to the last
position where this hash was found, which is the root of the binary
tree of sequences that share this hash bucket. */
uint32_t buckets_[BUCKET_SIZE];
uint32_t* buckets_; /* uint32_t[BUCKET_SIZE]; */
/* A position used to mark a non-existent sequence, i.e. a tree is empty if
its root is at invalid_pos_ and a node is a leaf if both its children
@ -51,34 +51,30 @@ typedef struct HashToBinaryTree {
corresponding to a hash is a sequence starting at buckets_[hash] and
the left and right children of a sequence starting at pos are
forest_[2 * pos] and forest_[2 * pos + 1]. */
/* uint32_t forest[2 * num_nodes] */
uint32_t* forest_; /* uint32_t[2 * num_nodes] */
} HashToBinaryTree;
static BROTLI_INLINE HashToBinaryTree* FN(Self)(HasherHandle handle) {
return (HashToBinaryTree*)&(GetHasherCommon(handle)[1]);
}
static BROTLI_INLINE uint32_t* FN(Forest)(HashToBinaryTree* self) {
return (uint32_t*)(&self[1]);
}
static void FN(Initialize)(
HasherHandle handle, const BrotliEncoderParams* params) {
HashToBinaryTree* self = FN(Self)(handle);
HasherCommon* common, HashToBinaryTree* BROTLI_RESTRICT self,
const BrotliEncoderParams* params) {
self->buckets_ = (uint32_t*)common->extra;
self->forest_ = &self->buckets_[BUCKET_SIZE];
self->window_mask_ = (1u << params->lgwin) - 1u;
self->invalid_pos_ = (uint32_t)(0 - self->window_mask_);
}
static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
size_t input_size, const uint8_t* data) {
HashToBinaryTree* self = FN(Self)(handle);
static void FN(Prepare)
(HashToBinaryTree* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
uint32_t invalid_pos = self->invalid_pos_;
uint32_t i;
uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
BROTLI_UNUSED(data);
BROTLI_UNUSED(one_shot);
BROTLI_UNUSED(input_size);
for (i = 0; i < BUCKET_SIZE; i++) {
self->buckets_[i] = invalid_pos;
buckets[i] = invalid_pos;
}
}
@ -89,15 +85,17 @@ static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
if (one_shot && input_size < num_nodes) {
num_nodes = input_size;
}
return sizeof(HashToBinaryTree) + 2 * sizeof(uint32_t) * num_nodes;
return sizeof(uint32_t) * BUCKET_SIZE + 2 * sizeof(uint32_t) * num_nodes;
}
static BROTLI_INLINE size_t FN(LeftChildIndex)(HashToBinaryTree* self,
static BROTLI_INLINE size_t FN(LeftChildIndex)(
HashToBinaryTree* BROTLI_RESTRICT self,
const size_t pos) {
return 2 * (pos & self->window_mask_);
}
static BROTLI_INLINE size_t FN(RightChildIndex)(HashToBinaryTree* self,
static BROTLI_INLINE size_t FN(RightChildIndex)(
HashToBinaryTree* BROTLI_RESTRICT self,
const size_t pos) {
return 2 * (pos & self->window_mask_) + 1;
}
@ -113,7 +111,7 @@ static BROTLI_INLINE size_t FN(RightChildIndex)(HashToBinaryTree* self,
This function must be called with increasing cur_ix positions. */
static BROTLI_INLINE BackwardMatch* FN(StoreAndFindMatches)(
HashToBinaryTree* self, const uint8_t* const BROTLI_RESTRICT data,
HashToBinaryTree* BROTLI_RESTRICT self, const uint8_t* BROTLI_RESTRICT data,
const size_t cur_ix, const size_t ring_buffer_mask, const size_t max_length,
const size_t max_backward, size_t* const BROTLI_RESTRICT best_len,
BackwardMatch* BROTLI_RESTRICT matches) {
@ -123,8 +121,9 @@ static BROTLI_INLINE BackwardMatch* FN(StoreAndFindMatches)(
const BROTLI_BOOL should_reroot_tree =
TO_BROTLI_BOOL(max_length >= MAX_TREE_COMP_LENGTH);
const uint32_t key = FN(HashBytes)(&data[cur_ix_masked]);
uint32_t* forest = FN(Forest)(self);
size_t prev_ix = self->buckets_[key];
uint32_t* BROTLI_RESTRICT buckets = self->buckets_;
uint32_t* BROTLI_RESTRICT forest = self->forest_;
size_t prev_ix = buckets[key];
/* The forest index of the rightmost node of the left subtree of the new
root, updated as we traverse and re-root the tree of the hash bucket. */
size_t node_left = FN(LeftChildIndex)(self, cur_ix);
@ -139,7 +138,7 @@ static BROTLI_INLINE BackwardMatch* FN(StoreAndFindMatches)(
size_t best_len_right = 0;
size_t depth_remaining;
if (should_reroot_tree) {
self->buckets_[key] = (uint32_t)cur_ix;
buckets[key] = (uint32_t)cur_ix;
}
for (depth_remaining = MAX_TREE_SEARCH_DEPTH; ; --depth_remaining) {
const size_t backward = cur_ix - prev_ix;
@ -199,11 +198,13 @@ static BROTLI_INLINE BackwardMatch* FN(StoreAndFindMatches)(
matches in matches[0] to matches[*num_matches - 1]. The matches will be
sorted by strictly increasing length and (non-strictly) increasing
distance. */
static BROTLI_INLINE size_t FN(FindAllMatches)(HasherHandle handle,
const BrotliEncoderDictionary* dictionary, const uint8_t* data,
static BROTLI_INLINE size_t FN(FindAllMatches)(
HashToBinaryTree* BROTLI_RESTRICT self,
const BrotliEncoderDictionary* dictionary,
const uint8_t* BROTLI_RESTRICT data,
const size_t ring_buffer_mask, const size_t cur_ix,
const size_t max_length, const size_t max_backward,
const size_t gap, const BrotliEncoderParams* params,
const size_t dictionary_distance, const BrotliEncoderParams* params,
BackwardMatch* matches) {
BackwardMatch* const orig_matches = matches;
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
@ -236,7 +237,7 @@ static BROTLI_INLINE size_t FN(FindAllMatches)(HasherHandle handle,
}
}
if (best_len < max_length) {
matches = FN(StoreAndFindMatches)(FN(Self)(handle), data, cur_ix,
matches = FN(StoreAndFindMatches)(self, data, cur_ix,
ring_buffer_mask, max_length, max_backward, &best_len, matches);
}
for (i = 0; i <= BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN; ++i) {
@ -252,7 +253,7 @@ static BROTLI_INLINE size_t FN(FindAllMatches)(HasherHandle handle,
for (l = minlen; l <= maxlen; ++l) {
uint32_t dict_id = dict_matches[l];
if (dict_id < kInvalidMatch) {
size_t distance = max_backward + gap + (dict_id >> 5) + 1;
size_t distance = dictionary_distance + (dict_id >> 5) + 1;
if (distance <= params->dist.max_distance) {
InitDictionaryBackwardMatch(matches++, distance, l, dict_id & 31);
}
@ -266,18 +267,18 @@ static BROTLI_INLINE size_t FN(FindAllMatches)(HasherHandle handle,
/* Stores the hash of the next 4 bytes and re-roots the binary tree at the
current sequence, without returning any matches.
REQUIRES: ix + MAX_TREE_COMP_LENGTH <= end-of-current-block */
static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t* data,
static BROTLI_INLINE void FN(Store)(HashToBinaryTree* BROTLI_RESTRICT self,
const uint8_t* BROTLI_RESTRICT data,
const size_t mask, const size_t ix) {
HashToBinaryTree* self = FN(Self)(handle);
/* Maximum distance is window size - 16, see section 9.1. of the spec. */
const size_t max_backward = self->window_mask_ - BROTLI_WINDOW_GAP + 1;
FN(StoreAndFindMatches)(self, data, ix, mask, MAX_TREE_COMP_LENGTH,
max_backward, NULL, NULL);
}
static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
const uint8_t* data, const size_t mask, const size_t ix_start,
const size_t ix_end) {
static BROTLI_INLINE void FN(StoreRange)(HashToBinaryTree* BROTLI_RESTRICT self,
const uint8_t* BROTLI_RESTRICT data, const size_t mask,
const size_t ix_start, const size_t ix_end) {
size_t i = ix_start;
size_t j = ix_start;
if (ix_start + 63 <= ix_end) {
@ -285,18 +286,18 @@ static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
}
if (ix_start + 512 <= i) {
for (; j < i; j += 8) {
FN(Store)(handle, data, mask, j);
FN(Store)(self, data, mask, j);
}
}
for (; i < ix_end; ++i) {
FN(Store)(handle, data, mask, i);
FN(Store)(self, data, mask, i);
}
}
static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle,
static BROTLI_INLINE void FN(StitchToPreviousBlock)(
HashToBinaryTree* BROTLI_RESTRICT self,
size_t num_bytes, size_t position, const uint8_t* ringbuffer,
size_t ringbuffer_mask) {
HashToBinaryTree* self = FN(Self)(handle);
if (num_bytes >= FN(HashTypeLength)() - 1 &&
position >= MAX_TREE_COMP_LENGTH) {
/* Store the last `MAX_TREE_COMP_LENGTH - 1` positions in the hasher.

View File

@ -28,34 +28,30 @@ extern "C" {
void BrotliInitDistanceParams(BrotliEncoderParams* params,
uint32_t npostfix, uint32_t ndirect) {
BrotliDistanceParams* dist_params = &params->dist;
uint32_t alphabet_size, max_distance;
uint32_t alphabet_size_max;
uint32_t alphabet_size_limit;
uint32_t max_distance;
dist_params->distance_postfix_bits = npostfix;
dist_params->num_direct_distance_codes = ndirect;
alphabet_size = BROTLI_DISTANCE_ALPHABET_SIZE(
alphabet_size_max = BROTLI_DISTANCE_ALPHABET_SIZE(
npostfix, ndirect, BROTLI_MAX_DISTANCE_BITS);
alphabet_size_limit = alphabet_size_max;
max_distance = ndirect + (1U << (BROTLI_MAX_DISTANCE_BITS + npostfix + 2)) -
(1U << (npostfix + 2));
if (params->large_window) {
static const uint32_t bound[BROTLI_MAX_NPOSTFIX + 1] = {0, 4, 12, 28};
uint32_t postfix = 1U << npostfix;
alphabet_size = BROTLI_DISTANCE_ALPHABET_SIZE(
BrotliDistanceCodeLimit limit = BrotliCalculateDistanceCodeLimit(
BROTLI_MAX_ALLOWED_DISTANCE, npostfix, ndirect);
alphabet_size_max = BROTLI_DISTANCE_ALPHABET_SIZE(
npostfix, ndirect, BROTLI_LARGE_MAX_DISTANCE_BITS);
/* The maximum distance is set so that no distance symbol used can encode
a distance larger than BROTLI_MAX_ALLOWED_DISTANCE with all
its extra bits set. */
if (ndirect < bound[npostfix]) {
max_distance = BROTLI_MAX_ALLOWED_DISTANCE - (bound[npostfix] - ndirect);
} else if (ndirect >= bound[npostfix] + postfix) {
max_distance = (3U << 29) - 4 + (ndirect - bound[npostfix]);
} else {
max_distance = BROTLI_MAX_ALLOWED_DISTANCE;
}
alphabet_size_limit = limit.max_alphabet_size;
max_distance = limit.max_distance;
}
dist_params->alphabet_size = alphabet_size;
dist_params->alphabet_size_max = alphabet_size_max;
dist_params->alphabet_size_limit = alphabet_size_limit;
dist_params->max_distance = max_distance;
}

View File

@ -23,7 +23,8 @@ typedef struct BrotliHasherParams {
typedef struct BrotliDistanceParams {
uint32_t distance_postfix_bits;
uint32_t num_direct_distance_codes;
uint32_t alphabet_size;
uint32_t alphabet_size_max;
uint32_t alphabet_size_limit;
size_t max_distance;
} BrotliDistanceParams;
@ -33,6 +34,7 @@ typedef struct BrotliEncoderParams {
int quality;
int lgwin;
int lgblock;
size_t stream_offset;
size_t size_hint;
BROTLI_BOOL disable_literal_context_modeling;
BROTLI_BOOL large_window;

View File

@ -125,6 +125,9 @@ static BROTLI_INLINE void RingBufferWrite(
later when we copy the last two bytes to the first two positions. */
rb->buffer_[rb->size_ - 2] = 0;
rb->buffer_[rb->size_ - 1] = 0;
/* Initialize tail; might be touched by "best_len++" optimization when
ring buffer is "full". */
rb->buffer_[rb->size_] = 241;
}
{
const size_t masked_pos = rb->pos_ & rb->mask_;

View File

@ -16,8 +16,6 @@
extern "C" {
#endif
/*#define BIT_WRITER_DEBUG */
/* This function writes bits into bytes in increasing addresses, and within
a byte least-significant-bit first.
@ -28,7 +26,7 @@ extern "C" {
0000 0RRR 0000 0000 0000 0000
Now, we could write 5 or less bits in MSB by just sifting by 3
Now, we could write 5 or less bits in MSB by just shifting by 3
and OR'ing to BYTE-0.
For n bits, we take the last 5 bits, OR that with high bits in BYTE-0,
@ -37,37 +35,41 @@ static BROTLI_INLINE void BrotliWriteBits(size_t n_bits,
uint64_t bits,
size_t* BROTLI_RESTRICT pos,
uint8_t* BROTLI_RESTRICT array) {
BROTLI_LOG(("WriteBits %2d 0x%08x%08x %10d\n", (int)n_bits,
(uint32_t)(bits >> 32), (uint32_t)(bits & 0xFFFFFFFF),
(int)*pos));
BROTLI_DCHECK((bits >> n_bits) == 0);
BROTLI_DCHECK(n_bits <= 56);
#if defined(BROTLI_LITTLE_ENDIAN)
/* This branch of the code can write up to 56 bits at a time,
7 bits are lost by being perhaps already in *p and at least
1 bit is needed to initialize the bit-stream ahead (i.e. if 7
bits are in *p and we write 57 bits, then the next write will
access a byte that was never initialized). */
uint8_t* p = &array[*pos >> 3];
uint64_t v = (uint64_t)(*p); /* Zero-extend 8 to 64 bits. */
BROTLI_LOG(("WriteBits %2d 0x%08x%08x %10d\n", (int)n_bits,
(uint32_t)(bits >> 32), (uint32_t)(bits & 0xFFFFFFFF),
(int)*pos));
BROTLI_DCHECK((bits >> n_bits) == 0);
BROTLI_DCHECK(n_bits <= 56);
v |= bits << (*pos & 7);
BROTLI_UNALIGNED_STORE64LE(p, v); /* Set some bits. */
*pos += n_bits;
{
uint8_t* p = &array[*pos >> 3];
uint64_t v = (uint64_t)(*p); /* Zero-extend 8 to 64 bits. */
v |= bits << (*pos & 7);
BROTLI_UNALIGNED_STORE64LE(p, v); /* Set some bits. */
*pos += n_bits;
}
#else
/* implicit & 0xFF is assumed for uint8_t arithmetics */
uint8_t* array_pos = &array[*pos >> 3];
const size_t bits_reserved_in_first_byte = (*pos & 7);
size_t bits_left_to_write;
bits <<= bits_reserved_in_first_byte;
*array_pos++ |= (uint8_t)bits;
for (bits_left_to_write = n_bits + bits_reserved_in_first_byte;
bits_left_to_write >= 9;
bits_left_to_write -= 8) {
bits >>= 8;
*array_pos++ = (uint8_t)bits;
{
uint8_t* array_pos = &array[*pos >> 3];
const size_t bits_reserved_in_first_byte = (*pos & 7);
size_t bits_left_to_write;
bits <<= bits_reserved_in_first_byte;
*array_pos++ |= (uint8_t)bits;
for (bits_left_to_write = n_bits + bits_reserved_in_first_byte;
bits_left_to_write >= 9;
bits_left_to_write -= 8) {
bits >>= 8;
*array_pos++ = (uint8_t)bits;
}
*array_pos = 0;
*pos += n_bits;
}
*array_pos = 0;
*pos += n_bits;
#endif
}

View File

@ -201,7 +201,23 @@ typedef enum BrotliEncoderParameter {
*
* Range is from 0 to (15 << NPOSTFIX) in steps of (1 << NPOSTFIX).
*/
BROTLI_PARAM_NDIRECT = 8
BROTLI_PARAM_NDIRECT = 8,
/**
* Number of bytes of input stream already processed by a different instance.
*
* @note It is important to configure all the encoder instances with same
* parameters (except this one) in order to allow all the encoded parts
* obey the same restrictions implied by header.
*
* If offset is not 0, then stream header is omitted.
* In any case output start is byte aligned, so for proper streams stitching
* "predecessor" stream must be flushed.
*
* Range is not artificially limited, but all the values greater or equal to
* maximal window size have the same effect. Values greater than 2**30 are not
* allowed.
*/
BROTLI_PARAM_STREAM_OFFSET = 9
} BrotliEncoderParameter;
/**
@ -274,6 +290,11 @@ BROTLI_ENC_API size_t BrotliEncoderMaxCompressedSize(size_t input_size);
* @note If ::BrotliEncoderMaxCompressedSize(@p input_size) returns non-zero
* value, then output is guaranteed to be no longer than that.
*
* @note If @p lgwin is greater than ::BROTLI_MAX_WINDOW_BITS then resulting
* stream might be incompatible with RFC 7932; to decode such streams,
* decoder should be configured with
* ::BROTLI_DECODER_PARAM_LARGE_WINDOW = @c 1
*
* @param quality quality parameter value, e.g. ::BROTLI_DEFAULT_QUALITY
* @param lgwin lgwin parameter value, e.g. ::BROTLI_DEFAULT_WINDOW
* @param mode mode parameter value, e.g. ::BROTLI_DEFAULT_MODE

View File

@ -556,11 +556,17 @@ static void PrintHelp(const char* name, BROTLI_BOOL error) {
" -t, --test test compressed file integrity\n"
" -v, --verbose verbose mode\n");
fprintf(media,
" -w NUM, --lgwin=NUM set LZ77 window size (0, %d-%d)\n",
" -w NUM, --lgwin=NUM set LZ77 window size (0, %d-%d)\n"
" window size = 2**NUM - 16\n"
" 0 lets compressor choose the optimal value\n",
BROTLI_MIN_WINDOW_BITS, BROTLI_MAX_WINDOW_BITS);
fprintf(media,
" window size = 2**NUM - 16\n"
" 0 lets compressor choose the optimal value\n");
" --large_window=NUM use incompatible large-window brotli\n"
" bitstream with window size (0, %d-%d)\n"
" WARNING: this format is not compatible\n"
" with brotli RFC 7932 and may not be\n"
" decodable with regular brotli decoders\n",
BROTLI_MIN_WINDOW_BITS, BROTLI_LARGE_MAX_WINDOW_BITS);
fprintf(media,
" -S SUF, --suffix=SUF output file suffix (default:'%s')\n",
DEFAULT_SUFFIX);

28
compiler_config_setting.bzl Executable file
View File

@ -0,0 +1,28 @@
# Copyright 2018 Google Inc. All Rights Reserved.
#
# Distributed under MIT license.
# See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
"""Creates config_setting that allows selecting based on 'compiler' value."""
def create_msvc_config():
# The "do_not_use_tools_cpp_compiler_present" attribute exists to
# distinguish between older versions of Bazel that do not support
# "@bazel_tools//tools/cpp:compiler" flag_value, and newer ones that do.
# In the future, the only way to select on the compiler will be through
# flag_values{"@bazel_tools//tools/cpp:compiler"} and the else branch can
# be removed.
if hasattr(cc_common, "do_not_use_tools_cpp_compiler_present"):
native.config_setting(
name = "msvc",
flag_values = {
"@bazel_tools//tools/cpp:compiler": "msvc-cl",
},
visibility = ["//visibility:public"],
)
else:
native.config_setting(
name = "msvc",
values = {"compiler": "msvc-cl"},
visibility = ["//visibility:public"],
)

View File

@ -298,12 +298,24 @@ Flag that determines if 'Large Window Brotli' is used\&.
\fB\fIBROTLI_PARAM_NPOSTFIX \fP\fP
Recommended number of postfix bits (NPOSTFIX)\&. Encoder may change this value\&.
.PP
Range is from 0 to ::BROTLI_MAX_NPOSTFIX\&.
Range is from 0 to \fBBROTLI_MAX_NPOSTFIX\fP\&.
.TP
\fB\fIBROTLI_PARAM_NDIRECT \fP\fP
Recommended number of direct distance codes (NDIRECT)\&. Encoder may change this value\&.
.PP
Range is from 0 to (15 << NPOSTFIX) in steps of (1 << NPOSTFIX)\&.
.TP
\fB\fIBROTLI_PARAM_STREAM_OFFSET \fP\fP
Number of bytes of input stream already processed by a different instance\&.
.PP
\fBNote:\fP
.RS 4
It is important to configure all the encoder instances with same parameters (except this one) in order to allow all the encoded parts obey the same restrictions implied by header\&.
.RE
.PP
If offset is not 0, then stream header is omitted\&. In any case output start is byte aligned, so for proper streams stitching 'predecessor' stream must be flushed\&.
.PP
Range is not artificially limited, but all the values greater or equal to maximal window size have the same effect\&. Values greater than 2**30 are not allowed\&.
.SH "Function Documentation"
.PP
.SS "\fBBROTLI_BOOL\fP BrotliEncoderCompress (int quality, int lgwin, \fBBrotliEncoderMode\fP mode, size_t input_size, const uint8_t input_buffer[input_size], size_t * encoded_size, uint8_t encoded_buffer[*encoded_size])"
@ -314,6 +326,8 @@ Performs one-shot memory-to-memory compression\&. Compresses the data in \fCinpu
\fBNote:\fP
.RS 4
If \fBBrotliEncoderMaxCompressedSize\fP(\fCinput_size\fP) returns non-zero value, then output is guaranteed to be no longer than that\&.
.PP
If \fClgwin\fP is greater than \fBBROTLI_MAX_WINDOW_BITS\fP then resulting stream might be incompatible with RFC 7932; to decode such streams, decoder should be configured with \fBBROTLI_DECODER_PARAM_LARGE_WINDOW\fP = \fC1\fP
.RE
.PP
\fBParameters:\fP

View File

@ -5,6 +5,11 @@ package(default_visibility = ["//visibility:public"])
licenses(["notice"]) # MIT
TEST_DEPS = [
":dec",
"@junit_junit//jar",
]
java_library(
name = "dec",
srcs = glob(
@ -14,48 +19,40 @@ java_library(
proguard_specs = ["proguard.cfg"],
)
java_library(
name = "test_lib",
testonly = 1,
srcs = glob(["*Test*.java"]),
deps = [
":dec",
"@junit_junit//jar",
],
)
load(":build_defs.bzl", "brotli_java_test")
java_test(
brotli_java_test(
name = "BitReaderTest",
test_class = "org.brotli.dec.BitReaderTest",
runtime_deps = [":test_lib"],
srcs = ["BitReaderTest.java"],
deps = TEST_DEPS,
)
java_test(
brotli_java_test(
name = "DecodeTest",
test_class = "org.brotli.dec.DecodeTest",
runtime_deps = [":test_lib"],
srcs = ["DecodeTest.java"],
deps = TEST_DEPS,
)
java_test(
brotli_java_test(
name = "DictionaryTest",
test_class = "org.brotli.dec.DictionaryTest",
runtime_deps = [":test_lib"],
srcs = ["DictionaryTest.java"],
deps = TEST_DEPS,
)
java_test(
brotli_java_test(
name = "EagerStreamTest",
test_class = "org.brotli.dec.EagerStreamTest",
runtime_deps = [":test_lib"],
srcs = ["EagerStreamTest.java"],
deps = TEST_DEPS,
)
java_test(
brotli_java_test(
name = "SynthTest",
test_class = "org.brotli.dec.SynthTest",
runtime_deps = [":test_lib"],
srcs = ["SynthTest.java"],
deps = TEST_DEPS,
)
java_test(
brotli_java_test(
name = "TransformTest",
test_class = "org.brotli.dec.TransformTest",
runtime_deps = [":test_lib"],
srcs = ["TransformTest.java"],
deps = TEST_DEPS,
)

View File

@ -12,9 +12,14 @@ package org.brotli.dec;
final class BitReader {
// Possible values: {5, 6}. 5 corresponds to 32-bit build, 6 to 64-bit. This value is used for
// conditional compilation -> produced artifacts might be binary INCOMPATIBLE (JLS 13.2).
private static final int LOG_BITNESS = 6;
private static final int BITNESS = 1 << LOG_BITNESS;
// JIT conditional compilation.
private static final int LOG_BITNESS = Utils.getLogBintness();
// Not only Java compiler prunes "if (const false)" code, but JVM as well.
// Code under "if (DEBUG != 0)" have zero performance impact (outside unit tests).
private static final int DEBUG = Utils.isDebugMode();
static final int BITNESS = 1 << LOG_BITNESS;
private static final int BYTENESS = BITNESS / 8;
private static final int CAPACITY = 4096;
@ -89,7 +94,16 @@ final class BitReader {
}
}
static void assertAccumulatorHealthy(State s) {
if (s.bitOffset > BITNESS) {
throw new IllegalStateException("Accumulator underloaded: " + s.bitOffset);
}
}
static void fillBitWindow(State s) {
if (DEBUG != 0) {
assertAccumulatorHealthy(s);
}
if (s.bitOffset >= HALF_BITNESS) {
// Same as doFillBitWindow. JVM fails to inline it.
if (BITNESS == 64) {
@ -103,7 +117,10 @@ final class BitReader {
}
}
private static void doFillBitWindow(State s) {
static void doFillBitWindow(State s) {
if (DEBUG != 0) {
assertAccumulatorHealthy(s);
}
if (BITNESS == 64) {
s.accumulator64 = ((long) s.intBuffer[s.halfOffset++] << HALF_BITNESS)
| (s.accumulator64 >>> HALF_BITNESS);
@ -122,6 +139,12 @@ final class BitReader {
}
}
/**
* Fetches bits from accumulator.
*
* WARNING: accumulator MUST contain at least the specified amount of bits,
* otherwise BitReader will become broken.
*/
static int readFewBits(State s, int n) {
int val = peekBits(s) & ((1 << n) - 1);
s.bitOffset += n;

View File

@ -32,4 +32,23 @@ public class BitReaderTest {
}
fail("BrotliRuntimeException should have been thrown by BitReader.checkHealth");
}
@Test
public void testAccumulatorUnderflowDetected() {
State reader = new State();
Decode.initState(reader, new ByteArrayInputStream(new byte[8]));
// 65 bits is enough for both 32 and 64 bit systems.
BitReader.readBits(reader, 13);
BitReader.readBits(reader, 13);
BitReader.readBits(reader, 13);
BitReader.readBits(reader, 13);
BitReader.readBits(reader, 13);
try {
BitReader.fillBitWindow(reader);
} catch (IllegalStateException ex) {
// This exception is expected.
return;
}
fail("IllegalStateException should have been thrown by 'broken' BitReader");
}
}

View File

@ -84,18 +84,12 @@ public class BrotliInputStream extends InputStream {
}
}
public void setEager(boolean eager) {
boolean isEager = (state.isEager != 0);
if (eager == isEager) {
/* Shortcut for no-op change. */
return;
}
if (eager) {
Decode.setEager(state);
} else {
/* Once decoder is "eager", there is no way back. */
throw new IllegalStateException("Brotli decoder has been already switched to eager mode");
}
public void enableEagerOutput() {
Decode.enableEagerOutput(state);
}
public void enableLargeWindow() {
Decode.enableLargeWindow(state);
}
/**

View File

@ -14,6 +14,11 @@ import java.io.InputStream;
*/
final class Decode {
static final int MIN_LARGE_WINDOW_BITS = 10;
/* Maximum was chosen to be 30 to allow efficient decoder implementation.
* Format allows bigger window, but Java does not support 2G+ arrays. */
static final int MAX_LARGE_WINDOW_BITS = 30;
//----------------------------------------------------------------------------
// RunningState
//----------------------------------------------------------------------------
@ -35,7 +40,7 @@ final class Decode {
private static final int DEFAULT_CODE_LENGTH = 8;
private static final int CODE_LENGTH_REPEAT_CODE = 16;
private static final int NUM_LITERAL_CODES = 256;
private static final int NUM_INSERT_AND_COPY_CODES = 704;
private static final int NUM_COMMAND_CODES = 704;
private static final int NUM_BLOCK_LENGTH_CODES = 26;
private static final int LITERAL_CONTEXT_BITS = 6;
private static final int DISTANCE_CONTEXT_BITS = 2;
@ -44,10 +49,19 @@ final class Decode {
private static final int HUFFMAN_TABLE_MASK = 0xFF;
/**
* Maximum possible Huffman table size for an alphabet size of 704, max code length 15 and root
* table bits 8.
* Maximum possible Huffman table size for an alphabet size of (index * 32),
* max code length 15 and root table bits 8.
* The biggest alphabet is "command" - 704 symbols. Though "distance" alphabet could theoretically
* outreach that limit (for 62 extra bit distances), practically it is limited by
* MAX_ALLOWED_DISTANCE and never gets bigger than 544 symbols.
*/
static final int HUFFMAN_TABLE_SIZE = 1080;
static final int[] MAX_HUFFMAN_TABLE_SIZE = {
256, 402, 436, 468, 500, 534, 566, 598, 630, 662, 694, 726, 758, 790, 822,
854, 886, 920, 952, 984, 1016, 1048, 1080
};
private static final int HUFFMAN_TABLE_SIZE_26 = 396;
private static final int HUFFMAN_TABLE_SIZE_258 = 632;
private static final int CODE_LENGTH_CODES = 18;
private static final int[] CODE_LENGTH_CODE_ORDER = {
@ -56,7 +70,7 @@ final class Decode {
private static final int NUM_DISTANCE_SHORT_CODES = 16;
private static final int[] DISTANCE_SHORT_CODE_INDEX_OFFSET = {
3, 2, 1, 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2
0, 3, 2, 1, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3
};
private static final int[] DISTANCE_SHORT_CODE_VALUE_OFFSET = {
@ -86,6 +100,17 @@ final class Decode {
static final int MAX_TRANSFORMED_WORD_LENGTH = 5 + MAX_WORD_LENGTH + 8;
private static final int MAX_DISTANCE_BITS = 24;
private static final int MAX_LARGE_WINDOW_DISTANCE_BITS = 62;
/**
* Safe distance limit.
*
* Limit ((1 << 31) - 4) allows safe distance calculation without overflows,
* given the distance alphabet size is limited to corresponding size.
*/
private static final int MAX_ALLOWED_DISTANCE = 0x7FFFFFFC;
//----------------------------------------------------------------------------
// Prefix code LUT.
//----------------------------------------------------------------------------
@ -98,33 +123,103 @@ final class Decode {
2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 11, 12, 13, 24
};
static final int[] INSERT_LENGTH_OFFSET = {
0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50, 66, 98, 130, 194, 322, 578, 1090, 2114, 6210,
22594
static final short[] INSERT_LENGTH_N_BITS = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03,
0x04, 0x04, 0x05, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0C, 0x0E, 0x18
};
static final int[] INSERT_LENGTH_N_BITS = {
0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 12, 14, 24
static final short[] COPY_LENGTH_N_BITS = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02,
0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x18
};
static final int[] COPY_LENGTH_OFFSET = {
2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 18, 22, 30, 38, 54, 70, 102, 134, 198, 326, 582, 1094,
2118
};
// Each command is represented with 4x16-bit values:
// * [insertLenExtraBits, copyLenExtraBits]
// * insertLenOffset
// * copyLenOffset
// * distanceContext
static final short[] CMD_LOOKUP = new short[NUM_COMMAND_CODES * 4];
static final int[] COPY_LENGTH_N_BITS = {
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 24
};
static {
unpackCommandLookupTable(CMD_LOOKUP);
}
static final int[] INSERT_RANGE_LUT = {
0, 0, 8, 8, 0, 16, 8, 16, 16
};
private static int log2floor(int i) {
int result = -1;
int step = 16;
while (step > 0) {
if ((i >>> step) != 0) {
result += step;
i = i >>> step;
}
step = step >> 1;
}
return result + i;
}
static final int[] COPY_RANGE_LUT = {
0, 8, 0, 8, 16, 0, 16, 8, 16
};
private static int calculateDistanceAlphabetSize(int npostfix, int ndirect, int maxndistbits) {
return NUM_DISTANCE_SHORT_CODES + ndirect + 2 * (maxndistbits << npostfix);
}
// TODO: add a correctness test for this function when
// large-window and dictionary are implemented.
private static int calculateDistanceAlphabetLimit(int maxDistance, int npostfix, int ndirect) {
if (maxDistance < ndirect + (2 << npostfix)) {
throw new IllegalArgumentException("maxDistance is too small");
}
int offset = ((maxDistance - ndirect) >> npostfix) + 4;
int ndistbits = log2floor(offset) - 1;
int group = ((ndistbits - 1) << 1) | ((offset >> ndistbits) & 1);
return ((group - 1) << npostfix) + (1 << npostfix) + ndirect + NUM_DISTANCE_SHORT_CODES;
}
private static void unpackCommandLookupTable(short[] cmdLookup) {
short[] insertLengthOffsets = new short[24];
short[] copyLengthOffsets = new short[24];
copyLengthOffsets[0] = 2;
for (int i = 0; i < 23; ++i) {
insertLengthOffsets[i + 1] =
(short) (insertLengthOffsets[i] + (1 << INSERT_LENGTH_N_BITS[i]));
copyLengthOffsets[i + 1] =
(short) (copyLengthOffsets[i] + (1 << COPY_LENGTH_N_BITS[i]));
}
for (int cmdCode = 0; cmdCode < NUM_COMMAND_CODES; ++cmdCode) {
int rangeIdx = cmdCode >>> 6;
/* -4 turns any regular distance code to negative. */
int distanceContextOffset = -4;
if (rangeIdx >= 2) {
rangeIdx -= 2;
distanceContextOffset = 0;
}
int insertCode = (((0x29850 >>> (rangeIdx * 2)) & 0x3) << 3) | ((cmdCode >>> 3) & 7);
int copyCode = (((0x26244 >>> (rangeIdx * 2)) & 0x3) << 3) | (cmdCode & 7);
short copyLengthOffset = copyLengthOffsets[copyCode];
int distanceContext =
distanceContextOffset + (copyLengthOffset > 4 ? 3 : copyLengthOffset - 2);
int index = cmdCode * 4;
cmdLookup[index + 0] =
(short) (INSERT_LENGTH_N_BITS[insertCode] | (COPY_LENGTH_N_BITS[copyCode] << 8));
cmdLookup[index + 1] = insertLengthOffsets[insertCode];
cmdLookup[index + 2] = copyLengthOffsets[copyCode];
cmdLookup[index + 3] = (short) distanceContext;
}
}
/**
* Reads brotli stream header and parses "window bits".
*
* @param s initialized state, before any read is performed.
* @return -1 if header is invalid
*/
private static int decodeWindowBits(State s) {
/* Change the meaning of flag. Before that step it means "decoder must be capable of reading
* "large-window" brotli stream. After this step it means that "large-window" feature
* is actually detected. Despite the window size could be same as before (lgwin = 10..24),
* encoded distances are allowed to be much greater, thus bigger dictinary could be used. */
int largeWindowEnabled = s.isLargeWindow;
s.isLargeWindow = 0;
BitReader.fillBitWindow(s);
if (BitReader.readFewBits(s, 1) == 0) {
return 16;
@ -135,7 +230,25 @@ final class Decode {
}
n = BitReader.readFewBits(s, 3);
if (n != 0) {
return 8 + n;
if (n == 1) {
if (largeWindowEnabled == 0) {
/* Reserved value in regular brotli stream. */
return -1;
}
s.isLargeWindow = 1;
/* Check "reserved" bit for future (post-large-window) extensions. */
if (BitReader.readFewBits(s, 1) == 1) {
return -1;
}
n = BitReader.readFewBits(s, 6);
if (n < MIN_LARGE_WINDOW_BITS || n > MAX_LARGE_WINDOW_BITS) {
/* Encoded window bits value is too small or too big. */
return -1;
}
return n;
} else {
return 8 + n;
}
}
return 17;
}
@ -147,13 +260,20 @@ final class Decode {
*
* @param s initialized state, before any read is performed.
*/
static void setEager(State s) {
static void enableEagerOutput(State s) {
if (s.runningState != INITIALIZED) {
throw new IllegalStateException("State MUST be freshly initialized");
}
s.isEager = 1;
}
static void enableLargeWindow(State s) {
if (s.runningState != INITIALIZED) {
throw new IllegalStateException("State MUST be freshly initialized");
}
s.isLargeWindow = 1;
}
/**
* Associate input with decoder state.
*
@ -164,7 +284,13 @@ final class Decode {
if (s.runningState != UNINITIALIZED) {
throw new IllegalStateException("State MUST be uninitialized");
}
s.blockTrees = new int[6 * HUFFMAN_TABLE_SIZE];
/* 6 trees + 1 extra "offset" slot to simplify table decoding logic. */
s.blockTrees = new int[7 + 3 * (HUFFMAN_TABLE_SIZE_258 + HUFFMAN_TABLE_SIZE_26)];
s.blockTrees[0] = 7;
s.distRbIdx = 3;
int maxDistanceAlphabetLimit = calculateDistanceAlphabetLimit(MAX_ALLOWED_DISTANCE, 3, 15 << 3);
s.distExtraBits = new byte[maxDistanceAlphabetLimit];
s.distOffset = new int[maxDistanceAlphabetLimit];
s.input = input;
BitReader.initBitReader(s);
s.runningState = INITIALIZED;
@ -246,11 +372,12 @@ final class Decode {
/**
* Decodes the next Huffman code from bit-stream.
*/
private static int readSymbol(int[] table, int offset, State s) {
private static int readSymbol(int[] tableGroup, int tableIdx, State s) {
int offset = tableGroup[tableIdx];
int val = BitReader.peekBits(s);
offset += val & HUFFMAN_TABLE_MASK;
int bits = table[offset] >> 16;
int sym = table[offset] & 0xFFFF;
int bits = tableGroup[offset] >> 16;
int sym = tableGroup[offset] & 0xFFFF;
if (bits <= HUFFMAN_TABLE_BITS) {
s.bitOffset += bits;
return sym;
@ -258,27 +385,18 @@ final class Decode {
offset += sym;
int mask = (1 << bits) - 1;
offset += (val & mask) >>> HUFFMAN_TABLE_BITS;
s.bitOffset += ((table[offset] >> 16) + HUFFMAN_TABLE_BITS);
return table[offset] & 0xFFFF;
s.bitOffset += ((tableGroup[offset] >> 16) + HUFFMAN_TABLE_BITS);
return tableGroup[offset] & 0xFFFF;
}
private static int readBlockLength(int[] table, int offset, State s) {
private static int readBlockLength(int[] tableGroup, int tableIdx, State s) {
BitReader.fillBitWindow(s);
int code = readSymbol(table, offset, s);
int code = readSymbol(tableGroup, tableIdx, s);
int n = BLOCK_LENGTH_N_BITS[code];
BitReader.fillBitWindow(s);
return BLOCK_LENGTH_OFFSET[code] + BitReader.readBits(s, n);
}
private static int translateShortCodes(int code, int[] ringBuffer, int index) {
if (code < NUM_DISTANCE_SHORT_CODES) {
index += DISTANCE_SHORT_CODE_INDEX_OFFSET[code];
index &= 3;
return ringBuffer[index] + DISTANCE_SHORT_CODE_VALUE_OFFSET[code];
}
return code - NUM_DISTANCE_SHORT_CODES + 1;
}
private static void moveToFront(int[] v, int index) {
int value = v[index];
for (; index > 0; index--) {
@ -308,9 +426,9 @@ final class Decode {
int repeat = 0;
int repeatCodeLen = 0;
int space = 32768;
int[] table = new int[32];
Huffman.buildHuffmanTable(table, 0, 5, codeLengthCodeLengths, CODE_LENGTH_CODES);
int[] table = new int[32 + 1]; /* Speculative single entry table group. */
int tableIdx = table.length - 1;
Huffman.buildHuffmanTable(table, tableIdx, 5, codeLengthCodeLengths, CODE_LENGTH_CODES);
while (symbol < numSymbols && space > 0) {
BitReader.readMoreInput(s);
@ -361,85 +479,128 @@ final class Decode {
Utils.fillIntsWithZeroes(codeLengths, symbol, numSymbols);
}
static int checkDupes(int[] symbols, int length) {
private static void checkDupes(int[] symbols, int length) {
for (int i = 0; i < length - 1; ++i) {
for (int j = i + 1; j < length; ++j) {
if (symbols[i] == symbols[j]) {
return 0;
throw new BrotliRuntimeException("Duplicate simple Huffman code symbol"); // COV_NF_LINE
}
}
}
return 1;
}
// TODO: Use specialized versions for smaller tables.
static void readHuffmanCode(int alphabetSize, int[] table, int offset, State s) {
int ok = 1;
int simpleCodeOrSkip;
/**
* Reads up to 4 symbols directly and applies predefined histograms.
*/
private static int readSimpleHuffmanCode(int alphabetSizeMax, int alphabetSizeLimit,
int[] tableGroup, int tableIdx, State s) {
// TODO: Avoid allocation?
int[] codeLengths = new int[alphabetSizeLimit];
int[] symbols = new int[4];
int maxBits = 1 + log2floor(alphabetSizeMax - 1);
int numSymbols = BitReader.readFewBits(s, 2) + 1;
for (int i = 0; i < numSymbols; i++) {
BitReader.fillBitWindow(s);
int symbol = BitReader.readFewBits(s, maxBits);
if (symbol >= alphabetSizeLimit) {
throw new BrotliRuntimeException("Can't readHuffmanCode"); // COV_NF_LINE
}
symbols[i] = symbol;
}
checkDupes(symbols, numSymbols);
int histogramId = numSymbols;
if (numSymbols == 4) {
histogramId += BitReader.readFewBits(s, 1);
}
switch (histogramId) {
case 1:
codeLengths[symbols[0]] = 1;
break;
case 2:
codeLengths[symbols[0]] = 1;
codeLengths[symbols[1]] = 1;
break;
case 3:
codeLengths[symbols[0]] = 1;
codeLengths[symbols[1]] = 2;
codeLengths[symbols[2]] = 2;
break;
case 4: // uniform 4-symbol histogram
codeLengths[symbols[0]] = 2;
codeLengths[symbols[1]] = 2;
codeLengths[symbols[2]] = 2;
codeLengths[symbols[3]] = 2;
break;
case 5: // prioritized 4-symbol histogram
codeLengths[symbols[0]] = 1;
codeLengths[symbols[1]] = 2;
codeLengths[symbols[2]] = 3;
codeLengths[symbols[3]] = 3;
break;
default:
break;
}
// TODO: Use specialized version?
return Huffman.buildHuffmanTable(
tableGroup, tableIdx, HUFFMAN_TABLE_BITS, codeLengths, alphabetSizeLimit);
}
// Decode Huffman-coded code lengths.
private static int readComplexHuffmanCode(int alphabetSizeLimit, int skip,
int[] tableGroup, int tableIdx, State s) {
// TODO: Avoid allocation?
int[] codeLengths = new int[alphabetSizeLimit];
int[] codeLengthCodeLengths = new int[CODE_LENGTH_CODES];
int space = 32;
int numCodes = 0;
for (int i = skip; i < CODE_LENGTH_CODES && space > 0; i++) {
int codeLenIdx = CODE_LENGTH_CODE_ORDER[i];
BitReader.fillBitWindow(s);
int p = BitReader.peekBits(s) & 15;
// TODO: Demultiplex FIXED_TABLE.
s.bitOffset += FIXED_TABLE[p] >> 16;
int v = FIXED_TABLE[p] & 0xFFFF;
codeLengthCodeLengths[codeLenIdx] = v;
if (v != 0) {
space -= (32 >> v);
numCodes++;
}
}
if (space != 0 && numCodes != 1) {
throw new BrotliRuntimeException("Corrupted Huffman code histogram"); // COV_NF_LINE
}
readHuffmanCodeLengths(codeLengthCodeLengths, alphabetSizeLimit, codeLengths, s);
return Huffman.buildHuffmanTable(
tableGroup, tableIdx, HUFFMAN_TABLE_BITS, codeLengths, alphabetSizeLimit);
}
/**
* Decodes Huffman table from bit-stream.
*
* @return number of slots used by resulting Huffman table
*/
private static int readHuffmanCode(int alphabetSizeMax, int alphabetSizeLimit,
int[] tableGroup, int tableIdx, State s) {
BitReader.readMoreInput(s);
// TODO: Avoid allocation.
int[] codeLengths = new int[alphabetSize];
BitReader.fillBitWindow(s);
simpleCodeOrSkip = BitReader.readFewBits(s, 2);
if (simpleCodeOrSkip == 1) { // Read symbols, codes & code lengths directly.
int maxBitsCounter = alphabetSize - 1;
int maxBits = 0;
int[] symbols = new int[4];
int numSymbols = BitReader.readFewBits(s, 2) + 1;
while (maxBitsCounter != 0) {
maxBitsCounter >>= 1;
maxBits++;
}
// TODO: uncomment when codeLengths is reused.
// Utils.fillWithZeroes(codeLengths, 0, alphabetSize);
for (int i = 0; i < numSymbols; i++) {
BitReader.fillBitWindow(s);
symbols[i] = BitReader.readFewBits(s, maxBits) % alphabetSize;
codeLengths[symbols[i]] = 2;
}
codeLengths[symbols[0]] = 1;
switch (numSymbols) {
case 2:
codeLengths[symbols[1]] = 1;
break;
case 4:
if (BitReader.readFewBits(s, 1) == 1) {
codeLengths[symbols[2]] = 3;
codeLengths[symbols[3]] = 3;
} else {
codeLengths[symbols[0]] = 2;
}
break;
default:
break;
}
ok = checkDupes(symbols, numSymbols);
} else { // Decode Huffman-coded code lengths.
int[] codeLengthCodeLengths = new int[CODE_LENGTH_CODES];
int space = 32;
int numCodes = 0;
for (int i = simpleCodeOrSkip; i < CODE_LENGTH_CODES && space > 0; i++) {
int codeLenIdx = CODE_LENGTH_CODE_ORDER[i];
BitReader.fillBitWindow(s);
int p = BitReader.peekBits(s) & 15;
// TODO: Demultiplex FIXED_TABLE.
s.bitOffset += FIXED_TABLE[p] >> 16;
int v = FIXED_TABLE[p] & 0xFFFF;
codeLengthCodeLengths[codeLenIdx] = v;
if (v != 0) {
space -= (32 >> v);
numCodes++;
}
}
if (space != 0 && numCodes != 1) {
ok = 0;
}
readHuffmanCodeLengths(codeLengthCodeLengths, alphabetSize, codeLengths, s);
int simpleCodeOrSkip = BitReader.readFewBits(s, 2);
if (simpleCodeOrSkip == 1) {
return readSimpleHuffmanCode(alphabetSizeMax, alphabetSizeLimit, tableGroup, tableIdx, s);
} else {
return readComplexHuffmanCode(alphabetSizeLimit, simpleCodeOrSkip, tableGroup, tableIdx, s);
}
if (ok == 0) {
throw new BrotliRuntimeException("Can't readHuffmanCode"); // COV_NF_LINE
}
Huffman.buildHuffmanTable(table, offset, HUFFMAN_TABLE_BITS, codeLengths, alphabetSize);
}
private static int decodeContextMap(int contextMapSize, byte[] contextMap, State s) {
@ -457,12 +618,16 @@ final class Decode {
if (useRleForZeros != 0) {
maxRunLengthPrefix = BitReader.readFewBits(s, 4) + 1;
}
int[] table = new int[HUFFMAN_TABLE_SIZE];
readHuffmanCode(numTrees + maxRunLengthPrefix, table, 0, s);
int alphabetSize = numTrees + maxRunLengthPrefix;
int tableSize = MAX_HUFFMAN_TABLE_SIZE[(alphabetSize + 31) >> 5];
/* Speculative single entry table group. */
int[] table = new int[tableSize + 1];
int tableIdx = table.length - 1;
readHuffmanCode(alphabetSize, alphabetSize, table, tableIdx, s);
for (int i = 0; i < contextMapSize; ) {
BitReader.readMoreInput(s);
BitReader.fillBitWindow(s);
int code = readSymbol(table, 0, s);
int code = readSymbol(table, tableIdx, s);
if (code == 0) {
contextMap[i] = 0;
i++;
@ -493,8 +658,8 @@ final class Decode {
final int[] ringBuffers = s.rings;
final int offset = 4 + treeType * 2;
BitReader.fillBitWindow(s);
int blockType = readSymbol(s.blockTrees, treeType * HUFFMAN_TABLE_SIZE, s);
int result = readBlockLength(s.blockTrees, (treeType + 3) * HUFFMAN_TABLE_SIZE, s);
int blockType = readSymbol(s.blockTrees, 2 * treeType, s);
int result = readBlockLength(s.blockTrees, 2 * treeType + 1, s);
if (blockType == 1) {
blockType = ringBuffers[offset + 1] + 1;
@ -515,8 +680,7 @@ final class Decode {
s.literalBlockLength = decodeBlockTypeAndLength(s, 0, s.numLiteralBlockTypes);
int literalBlockType = s.rings[5];
s.contextMapSlice = literalBlockType << LITERAL_CONTEXT_BITS;
s.literalTreeIndex = s.contextMap[s.contextMapSlice] & 0xFF;
s.literalTree = s.hGroup0[s.literalTreeIndex];
s.literalTreeIdx = s.contextMap[s.contextMapSlice] & 0xFF;
int contextMode = s.contextModes[literalBlockType];
s.contextLookupOffset1 = contextMode << 9;
s.contextLookupOffset2 = s.contextLookupOffset1 + 256;
@ -524,7 +688,7 @@ final class Decode {
private static void decodeCommandBlockSwitch(State s) {
s.commandBlockLength = decodeBlockTypeAndLength(s, 1, s.numCommandBlockTypes);
s.treeCommandOffset = s.hGroup1[s.rings[7]];
s.commandTreeIdx = s.rings[7];
}
private static void decodeDistanceBlockSwitch(State s) {
@ -563,9 +727,9 @@ final class Decode {
return;
}
// TODO: Reset? Do we need this?
s.hGroup0 = new int[0];
s.hGroup1 = new int[0];
s.hGroup2 = new int[0];
s.literalTreeGroup = new int[0];
s.commandTreeGroup = new int[0];
s.distanceTreeGroup = new int[0];
BitReader.readMoreInput(s);
decodeMetaBlockLength(s);
@ -592,12 +756,57 @@ final class Decode {
}
private static int readMetablockPartition(State s, int treeType, int numBlockTypes) {
int offset = s.blockTrees[2 * treeType];
if (numBlockTypes <= 1) {
s.blockTrees[2 * treeType + 1] = offset;
s.blockTrees[2 * treeType + 2] = offset;
return 1 << 28;
}
readHuffmanCode(numBlockTypes + 2, s.blockTrees, treeType * HUFFMAN_TABLE_SIZE, s);
readHuffmanCode(NUM_BLOCK_LENGTH_CODES, s.blockTrees, (treeType + 3) * HUFFMAN_TABLE_SIZE, s);
return readBlockLength(s.blockTrees, (treeType + 3) * HUFFMAN_TABLE_SIZE, s);
int blockTypeAlphabetSize = numBlockTypes + 2;
offset += readHuffmanCode(
blockTypeAlphabetSize, blockTypeAlphabetSize, s.blockTrees, 2 * treeType, s);
s.blockTrees[2 * treeType + 1] = offset;
int blockLengthAlphabetSize = NUM_BLOCK_LENGTH_CODES;
offset += readHuffmanCode(
blockLengthAlphabetSize, blockLengthAlphabetSize, s.blockTrees, 2 * treeType + 1, s);
s.blockTrees[2 * treeType + 2] = offset;
return readBlockLength(s.blockTrees, 2 * treeType + 1, s);
}
private static void calculateDistanceLut(State s, int alphabetSizeLimit) {
byte[] distExtraBits = s.distExtraBits;
int[] distOffset = s.distOffset;
int npostfix = s.distancePostfixBits;
int ndirect = s.numDirectDistanceCodes;
int postfix = 1 << npostfix;
int bits = 1;
int half = 0;
/* Skip short codes. */
int i = NUM_DISTANCE_SHORT_CODES;
/* Fill direct codes. */
for (int j = 0; j < ndirect; ++j) {
distExtraBits[i] = 0;
distOffset[i] = j + 1;
++i;
}
/* Fill regular distance codes. */
while (i < alphabetSizeLimit) {
int base = ndirect + ((((2 + half) << bits) - 4) << npostfix) + 1;
/* Always fill the complete group. */
for (int j = 0; j < postfix; ++j) {
distExtraBits[i] = (byte) bits;
distOffset[i] = base + j;
++i;
}
bits = bits + half;
half = half ^ 1;
}
}
private static void readMetablockHuffmanCodesAndContextMaps(State s) {
@ -611,10 +820,8 @@ final class Decode {
BitReader.readMoreInput(s);
BitReader.fillBitWindow(s);
s.distancePostfixBits = BitReader.readFewBits(s, 2);
s.numDirectDistanceCodes =
NUM_DISTANCE_SHORT_CODES + (BitReader.readFewBits(s, 4) << s.distancePostfixBits);
s.numDirectDistanceCodes = BitReader.readFewBits(s, 4) << s.distancePostfixBits;
s.distancePostfixMask = (1 << s.distancePostfixBits) - 1;
int numDistanceCodes = s.numDirectDistanceCodes + (48 << s.distancePostfixBits);
// TODO: Reuse?
s.contextModes = new byte[s.numLiteralBlockTypes];
for (int i = 0; i < s.numLiteralBlockTypes;) {
@ -622,7 +829,7 @@ final class Decode {
int limit = Math.min(i + 96, s.numLiteralBlockTypes);
for (; i < limit; ++i) {
BitReader.fillBitWindow(s);
s.contextModes[i] = (byte) (BitReader.readFewBits(s, 2));
s.contextModes[i] = (byte) BitReader.readFewBits(s, 2);
}
BitReader.readMoreInput(s);
}
@ -644,18 +851,29 @@ final class Decode {
int numDistTrees = decodeContextMap(s.numDistanceBlockTypes << DISTANCE_CONTEXT_BITS,
s.distContextMap, s);
s.hGroup0 = decodeHuffmanTreeGroup(NUM_LITERAL_CODES, numLiteralTrees, s);
s.hGroup1 =
decodeHuffmanTreeGroup(NUM_INSERT_AND_COPY_CODES, s.numCommandBlockTypes, s);
s.hGroup2 = decodeHuffmanTreeGroup(numDistanceCodes, numDistTrees, s);
s.literalTreeGroup = decodeHuffmanTreeGroup(NUM_LITERAL_CODES, NUM_LITERAL_CODES,
numLiteralTrees, s);
s.commandTreeGroup = decodeHuffmanTreeGroup(NUM_COMMAND_CODES, NUM_COMMAND_CODES,
s.numCommandBlockTypes, s);
int distanceAlphabetSizeMax = calculateDistanceAlphabetSize(
s.distancePostfixBits, s.numDirectDistanceCodes, MAX_DISTANCE_BITS);
int distanceAlphabetSizeLimit = distanceAlphabetSizeMax;
if (s.isLargeWindow == 1) {
distanceAlphabetSizeMax = calculateDistanceAlphabetSize(
s.distancePostfixBits, s.numDirectDistanceCodes, MAX_LARGE_WINDOW_DISTANCE_BITS);
distanceAlphabetSizeLimit = calculateDistanceAlphabetLimit(
MAX_ALLOWED_DISTANCE, s.distancePostfixBits, s.numDirectDistanceCodes);
}
s.distanceTreeGroup = decodeHuffmanTreeGroup(distanceAlphabetSizeMax, distanceAlphabetSizeLimit,
numDistTrees, s);
calculateDistanceLut(s, distanceAlphabetSizeLimit);
s.contextMapSlice = 0;
s.distContextMapSlice = 0;
s.contextLookupOffset1 = (int) (s.contextModes[0]) << 9;
s.contextLookupOffset1 = s.contextModes[0] * 512;
s.contextLookupOffset2 = s.contextLookupOffset1 + 256;
s.literalTreeIndex = 0;
s.literalTree = s.hGroup0[0];
s.treeCommandOffset = s.hGroup1[0];
s.literalTreeIdx = 0;
s.commandTreeIdx = 0;
s.rings[4] = 1;
s.rings[5] = 0;
@ -706,13 +924,14 @@ final class Decode {
}
}
private static int[] decodeHuffmanTreeGroup(int alphabetSize, int n, State s) {
int[] group = new int[n + (n * HUFFMAN_TABLE_SIZE)];
private static int[] decodeHuffmanTreeGroup(int alphabetSizeMax, int alphabetSizeLimit,
int n, State s) {
int maxTableSize = MAX_HUFFMAN_TABLE_SIZE[(alphabetSizeLimit + 31) >> 5];
int[] group = new int[n + n * maxTableSize];
int next = n;
for (int i = 0; i < n; i++) {
for (int i = 0; i < n; ++i) {
group[i] = next;
Decode.readHuffmanCode(alphabetSize, group, next, s);
next += HUFFMAN_TABLE_SIZE;
next += readHuffmanCode(alphabetSizeMax, alphabetSizeLimit, group, i, s);
}
return group;
}
@ -738,7 +957,7 @@ final class Decode {
}
if (s.runningState == INITIALIZED) {
int windowBits = decodeWindowBits(s);
if (windowBits == 9) { /* Reserved case for future expansion. */
if (windowBits == -1) { /* Reserved case for future expansion. */
throw new BrotliRuntimeException("Invalid 'windowBits' code");
}
s.maxRingBufferSize = 1 << windowBits;
@ -780,23 +999,21 @@ final class Decode {
}
s.commandBlockLength--;
BitReader.fillBitWindow(s);
int cmdCode = readSymbol(s.hGroup1, s.treeCommandOffset, s);
int rangeIdx = cmdCode >>> 6;
s.distanceCode = 0;
if (rangeIdx >= 2) {
rangeIdx -= 2;
s.distanceCode = -1;
int cmdCode = readSymbol(s.commandTreeGroup, s.commandTreeIdx, s) << 2;
short insertAndCopyExtraBits = CMD_LOOKUP[cmdCode];
int insertLengthOffset = CMD_LOOKUP[cmdCode + 1];
int copyLengthOffset = CMD_LOOKUP[cmdCode + 2];
s.distanceCode = CMD_LOOKUP[cmdCode + 3];
BitReader.fillBitWindow(s);
{
int extraBits = insertAndCopyExtraBits & 0xFF;
s.insertLength = insertLengthOffset + BitReader.readBits(s, extraBits);
}
int insertCode = INSERT_RANGE_LUT[rangeIdx] + ((cmdCode >>> 3) & 7);
BitReader.fillBitWindow(s);
int insertBits = INSERT_LENGTH_N_BITS[insertCode];
int insertExtra = BitReader.readBits(s, insertBits);
s.insertLength = INSERT_LENGTH_OFFSET[insertCode] + insertExtra;
int copyCode = COPY_RANGE_LUT[rangeIdx] + (cmdCode & 7);
BitReader.fillBitWindow(s);
int copyBits = COPY_LENGTH_N_BITS[copyCode];
int copyExtra = BitReader.readBits(s, copyBits);
s.copyLength = COPY_LENGTH_OFFSET[copyCode] + copyExtra;
{
int extraBits = insertAndCopyExtraBits >> 8;
s.copyLength = copyLengthOffset + BitReader.readBits(s, extraBits);
}
s.j = 0;
s.runningState = INSERT_LOOP;
@ -811,8 +1028,7 @@ final class Decode {
}
s.literalBlockLength--;
BitReader.fillBitWindow(s);
ringBuffer[s.pos] =
(byte) readSymbol(s.hGroup0, s.literalTree, s);
ringBuffer[s.pos] = (byte) readSymbol(s.literalTreeGroup, s.literalTreeIdx, s);
s.pos++;
s.j++;
if (s.pos >= fence) {
@ -829,14 +1045,13 @@ final class Decode {
if (s.literalBlockLength == 0) {
decodeLiteralBlockSwitch(s);
}
int literalTreeIndex = s.contextMap[s.contextMapSlice
+ (Context.LOOKUP[s.contextLookupOffset1 + prevByte1]
| Context.LOOKUP[s.contextLookupOffset2 + prevByte2])] & 0xFF;
int literalContext = Context.LOOKUP[s.contextLookupOffset1 + prevByte1]
| Context.LOOKUP[s.contextLookupOffset2 + prevByte2];
int literalTreeIdx = s.contextMap[s.contextMapSlice + literalContext] & 0xFF;
s.literalBlockLength--;
prevByte2 = prevByte1;
BitReader.fillBitWindow(s);
prevByte1 = readSymbol(
s.hGroup0, s.hGroup0[literalTreeIndex], s);
prevByte1 = readSymbol(s.literalTreeGroup, literalTreeIdx, s);
ringBuffer[s.pos] = (byte) prevByte1;
s.pos++;
s.j++;
@ -855,36 +1070,38 @@ final class Decode {
s.runningState = MAIN_LOOP;
continue;
}
if (s.distanceCode < 0) {
int distanceCode = s.distanceCode;
if (distanceCode < 0) {
// distanceCode in untouched; assigning it 0 won't affect distance ring buffer rolling.
s.distance = s.rings[s.distRbIdx];
} else {
BitReader.readMoreInput(s);
if (s.distanceBlockLength == 0) {
decodeDistanceBlockSwitch(s);
}
s.distanceBlockLength--;
BitReader.fillBitWindow(s);
s.distanceCode = readSymbol(s.hGroup2, s.hGroup2[
s.distContextMap[s.distContextMapSlice
+ (s.copyLength > 4 ? 3 : s.copyLength - 2)] & 0xFF], s);
if (s.distanceCode >= s.numDirectDistanceCodes) {
s.distanceCode -= s.numDirectDistanceCodes;
int postfix = s.distanceCode & s.distancePostfixMask;
s.distanceCode >>>= s.distancePostfixBits;
int n = (s.distanceCode >>> 1) + 1;
int offset = ((2 + (s.distanceCode & 1)) << n) - 4;
BitReader.fillBitWindow(s);
int distanceExtra = BitReader.readBits(s, n);
s.distanceCode = s.numDirectDistanceCodes + postfix
+ ((offset + distanceExtra) << s.distancePostfixBits);
int distTreeIdx = s.distContextMap[s.distContextMapSlice + distanceCode] & 0xFF;
distanceCode = readSymbol(s.distanceTreeGroup, distTreeIdx, s);
if (distanceCode < NUM_DISTANCE_SHORT_CODES) {
int index = (s.distRbIdx + DISTANCE_SHORT_CODE_INDEX_OFFSET[distanceCode]) & 0x3;
s.distance = s.rings[index] + DISTANCE_SHORT_CODE_VALUE_OFFSET[distanceCode];
if (s.distance < 0) {
throw new BrotliRuntimeException("Negative distance"); // COV_NF_LINE
}
} else {
int extraBits = s.distExtraBits[distanceCode];
int bits;
if (s.bitOffset + extraBits <= BitReader.BITNESS) {
bits = BitReader.readFewBits(s, extraBits);
} else {
BitReader.fillBitWindow(s);
bits = BitReader.readBits(s, extraBits);
}
s.distance = s.distOffset[distanceCode] + (bits << s.distancePostfixBits);
}
}
// Convert the distance code to the actual distance by possibly looking up past distances
// from the ringBuffer.
s.distance = translateShortCodes(s.distanceCode, s.rings, s.distRbIdx);
if (s.distance < 0) {
throw new BrotliRuntimeException("Negative distance"); // COV_NF_LINE
}
if (s.maxDistance != s.maxBackwardDistance
&& s.pos < s.maxBackwardDistance) {
s.maxDistance = s.pos;
@ -897,9 +1114,9 @@ final class Decode {
continue;
}
if (s.distanceCode > 0) {
s.rings[s.distRbIdx & 3] = s.distance;
s.distRbIdx++;
if (distanceCode > 0) {
s.distRbIdx = (s.distRbIdx + 1) & 0x3;
s.rings[s.distRbIdx] = s.distance;
}
if (s.copyLength > s.metaBlockLength) {
@ -916,7 +1133,10 @@ final class Decode {
int dstEnd = dst + copyLength;
if ((srcEnd < ringBufferMask) && (dstEnd < ringBufferMask)) {
if (copyLength < 12 || (srcEnd > dst && dstEnd > src)) {
for (int k = 0; k < copyLength; ++k) {
for (int k = 0; k < copyLength; k += 4) {
ringBuffer[dst++] = ringBuffer[src++];
ringBuffer[dst++] = ringBuffer[src++];
ringBuffer[dst++] = ringBuffer[src++];
ringBuffer[dst++] = ringBuffer[src++];
}
} else {
@ -945,6 +1165,10 @@ final class Decode {
continue;
case TRANSFORM:
// This check is done here to unburden the hot loop.
if (s.distance > MAX_ALLOWED_DISTANCE) {
throw new BrotliRuntimeException("Invalid backward reference"); // COV_NF_LINE
}
if (s.copyLength >= MIN_WORD_LENGTH
&& s.copyLength <= MAX_WORD_LENGTH) {
int offset = DICTIONARY_OFFSETS_BY_LENGTH[s.copyLength];

View File

@ -375,7 +375,7 @@ public class EagerStreamTest {
ps = new ProxyStream(new ByteArrayInputStream(DATA));
reader = new BrotliInputStream(ps, 1);
reader.setEager(true);
reader.enableEagerOutput();
reader.read(buffer);
reader.close();
int eagerReadBytes = ps.readBytes;

View File

@ -58,9 +58,12 @@ final class Huffman {
/**
* Builds Huffman lookup table assuming code lengths are in symbol order.
*
* @return number of slots used by resulting Huffman table
*/
static void buildHuffmanTable(int[] rootTable, int tableOffset, int rootBits, int[] codeLengths,
static int buildHuffmanTable(int[] tableGroup, int tableIdx, int rootBits, int[] codeLengths,
int codeLengthsSize) {
int tableOffset = tableGroup[tableIdx];
int key; // Reversed prefix code.
int[] sorted = new int[codeLengthsSize]; // Symbols sorted by code length.
// TODO: fill with zeroes?
@ -93,9 +96,9 @@ final class Huffman {
// Special case code with only one value.
if (offset[MAX_LENGTH] == 1) {
for (key = 0; key < totalSize; key++) {
rootTable[tableOffset + key] = sorted[0];
tableGroup[tableOffset + key] = sorted[0];
}
return;
return totalSize;
}
// Fill in root table.
@ -103,7 +106,8 @@ final class Huffman {
symbol = 0;
for (int len = 1, step = 2; len <= rootBits; len++, step <<= 1) {
for (; count[len] > 0; count[len]--) {
replicateValue(rootTable, tableOffset + key, step, tableSize, len << 16 | sorted[symbol++]);
replicateValue(tableGroup, tableOffset + key, step, tableSize,
len << 16 | sorted[symbol++]);
key = getNextKey(key, len);
}
}
@ -120,13 +124,14 @@ final class Huffman {
tableSize = 1 << tableBits;
totalSize += tableSize;
low = key & mask;
rootTable[tableOffset + low] =
tableGroup[tableOffset + low] =
(tableBits + rootBits) << 16 | (currentOffset - tableOffset - low);
}
replicateValue(rootTable, currentOffset + (key >> rootBits), step, tableSize,
replicateValue(tableGroup, currentOffset + (key >> rootBits), step, tableSize,
(len - rootBits) << 16 | sorted[symbol++]);
key = getNextKey(key, len);
}
}
return totalSize;
}
}

View File

@ -13,6 +13,7 @@ final class State {
byte[] contextModes;
byte[] contextMap;
byte[] distContextMap;
byte[] distExtraBits;
byte[] output;
byte[] byteBuffer; // BitReader
@ -21,9 +22,10 @@ final class State {
int[] intBuffer; // BitReader
int[] rings;
int[] blockTrees;
int[] hGroup0;
int[] hGroup1;
int[] hGroup2;
int[] literalTreeGroup;
int[] commandTreeGroup;
int[] distanceTreeGroup;
int[] distOffset;
long accumulator64; // BitReader: pre-fetched bits.
@ -48,15 +50,14 @@ final class State {
int maxDistance;
int distRbIdx;
int trivialLiteralContext;
int literalTreeIndex;
int literalTree;
int literalTreeIdx;
int commandTreeIdx;
int j;
int insertLength;
int contextMapSlice;
int distContextMapSlice;
int contextLookupOffset1;
int contextLookupOffset2;
int treeCommandOffset;
int distanceCode;
int numDirectDistanceCodes;
int distancePostfixMask;
@ -73,6 +74,7 @@ final class State {
int ringBufferBytesWritten;
int ringBufferBytesReady;
int isEager;
int isLargeWindow;
InputStream input; // BitReader

View File

@ -12,6 +12,7 @@ import static org.junit.Assert.fail;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Arrays;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@ -624,7 +625,7 @@ public class SynthTest {
* // one ins/copy and dist block type
* vlq_blocktypes: 1
* vlq_blocktypes: 1
* ndirect: 0 0
* ndirect: 0, 0
* // two MSB6 literal context modes
* bits: "00", "00"
* // two literal prefix codes
@ -680,7 +681,7 @@ public class SynthTest {
* // one ins/copy and dist block type
* vlq_blocktypes: 1
* vlq_blocktypes: 1
* ndirect: 0 0
* ndirect: 0, 0
* // two MSB6 literal context modes
* bits: "00", "00"
* // two literal prefix codes
@ -983,6 +984,46 @@ public class SynthTest {
);
}
@Test
public void testDistanceLut() {
byte[] compressed = {
(byte) 0x8b, (byte) 0x02, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x80,
(byte) 0xe3, (byte) 0xb4, (byte) 0x0d, (byte) 0x00, (byte) 0x00, (byte) 0x07, (byte) 0x5b,
(byte) 0x26, (byte) 0x31, (byte) 0x40, (byte) 0x02, (byte) 0x00, (byte) 0xe0, (byte) 0x4e,
(byte) 0x1b, (byte) 0x99, (byte) 0x86, (byte) 0x46, (byte) 0xc6, (byte) 0x22, (byte) 0x14,
(byte) 0x00, (byte) 0x00, (byte) 0x03, (byte) 0x00, (byte) 0x00, (byte) 0x1c, (byte) 0xa7,
(byte) 0x6d, (byte) 0x00, (byte) 0x00, (byte) 0x38, (byte) 0xd8, (byte) 0x32, (byte) 0x89,
(byte) 0x01, (byte) 0x12, (byte) 0x21, (byte) 0x91, (byte) 0x69, (byte) 0x62, (byte) 0x6a,
(byte) 0x36
};
checkSynth(
/*
* main_header
* metablock_header_easy: 6, 0 // implicit ndirect: 0, 0
* command_easy: 3, "abc", 3 // Insert "abc", copy "abc"
* metablock_header_begin: 0, 0, 6, 0
* vlq_blocktypes: 1 // num litetal block types
* vlq_blocktypes: 1 // num command block types
* vlq_blocktypes: 1 // num distance block types
* ndirect: 3, 0
* bits: "00" // literal context modes
* vlq_blocktypes: 1 // num literal Huffman trees
* // command has no context -> num trees == num block types
* vlq_blocktypes: 1 // num distance Huffman trees
* huffman_fixed: 256
* huffman_fixed: 704
* huffman_simple: 0,1,67, 18
* command_inscopy_easy: 3, 3 // Insert 3, copy 3
* command_literals_easy: "def"
* // 0-bit Huffman code : dcode = 18 -> third direct distance
* metablock_lastempty // make sure that no extra distance bits are read
*/
compressed,
true,
"abcabcdefdef"
);
}
@Test
public void testEmpty() {
byte[] compressed = {
@ -1264,7 +1305,7 @@ public class SynthTest {
* // one ins/copy and dist block type
* vlq_blocktypes: 1
* vlq_blocktypes: 1
* ndirect: 0 0
* ndirect: 0, 0
* // two MSB6 literal context modes
* bits: "00", "00"
* // two literal prefix codes
@ -2706,6 +2747,87 @@ public class SynthTest {
}
*/
@Test
public void testStressReadDistanceExtraBits() {
byte[] compressed = {
(byte) 0x4f, (byte) 0xfe, (byte) 0xff, (byte) 0x3f, (byte) 0x00, (byte) 0x00, (byte) 0x00,
(byte) 0x80, (byte) 0xe3, (byte) 0xb4, (byte) 0x0d, (byte) 0x00, (byte) 0x00, (byte) 0x07,
(byte) 0x5b, (byte) 0x26, (byte) 0x31, (byte) 0x40, (byte) 0x02, (byte) 0x00, (byte) 0xe0,
(byte) 0x4e, (byte) 0x9b, (byte) 0xf6, (byte) 0x69, (byte) 0xef, (byte) 0xff, (byte) 0x0c,
(byte) 0x8d, (byte) 0x8c, (byte) 0x05, (byte) 0x10, (byte) 0x00, (byte) 0x00, (byte) 0x00,
(byte) 0x00, (byte) 0x00, (byte) 0x38, (byte) 0x4e, (byte) 0xdb, (byte) 0x00, (byte) 0x00,
(byte) 0x70, (byte) 0xb0, (byte) 0x65, (byte) 0x12, (byte) 0x03, (byte) 0x24, (byte) 0xa8,
(byte) 0xaa, (byte) 0xef, (byte) 0xab, (byte) 0xaa, (byte) 0x7f, (byte) 0x24, (byte) 0x16,
(byte) 0x35, (byte) 0x8f, (byte) 0xac, (byte) 0x9e, (byte) 0x3d, (byte) 0xf7, (byte) 0xf3,
(byte) 0xe3, (byte) 0x0a, (byte) 0xfc, (byte) 0xff, (byte) 0x03, (byte) 0x00, (byte) 0x00,
(byte) 0x78, (byte) 0x01, (byte) 0x08, (byte) 0x30, (byte) 0x31, (byte) 0x32, (byte) 0x33,
(byte) 0x34, (byte) 0x35, (byte) 0x36, (byte) 0x37, (byte) 0x38, (byte) 0x39, (byte) 0x41,
(byte) 0x42, (byte) 0x43, (byte) 0x44, (byte) 0x45, (byte) 0x46, (byte) 0x30, (byte) 0x31,
(byte) 0x32, (byte) 0x33, (byte) 0x34, (byte) 0x35, (byte) 0x36, (byte) 0x37, (byte) 0x38,
(byte) 0x39, (byte) 0x41, (byte) 0x42, (byte) 0x43, (byte) 0x44, (byte) 0x45, (byte) 0x46,
(byte) 0x30, (byte) 0x31, (byte) 0x32, (byte) 0x33, (byte) 0x34, (byte) 0x35, (byte) 0x36,
(byte) 0x37, (byte) 0x38, (byte) 0x39, (byte) 0x41, (byte) 0x42, (byte) 0x43, (byte) 0x44,
(byte) 0x45, (byte) 0x46, (byte) 0x03
};
/* This line is added manually. */
char[] stub = new char[8388602]; Arrays.fill(stub, 'c'); String hex = "0123456789ABCDEF";
checkSynth(
/*
* main_header: 24
* metablock_header_easy: 8388605, 0 // 2^23 - 3 = shortest 22-bit distance
* command_easy: 8388602, "abc", 1
* metablock_header_begin: 0, 0, 3, 0
* vlq_blocktypes: 1 // num litetal block types
* vlq_blocktypes: 1 // num command block types
* vlq_blocktypes: 1 // num distance block types
* ndirect: 0, 0
* bits: "00" // literal context modes
* vlq_blocktypes: 1 // num literal Huffman trees
* // command has no context -> num trees == num block types
* vlq_blocktypes: 1 // num distance Huffman trees
* huffman_fixed: 256
* huffman_fixed: 704
* // Begin of distance Huffman tree. First 15 codes have lengths 1 to 15.
* // Symbol that corresponds to first half of 22-bit distance range is also
* // 15. All other symbols are 0.
* hskip: 0
* clcl_ordered: 4,4,4,4, 4,4,4,4, 4,4,4,4, 4,4, 5,5,5,5
* set_prefix_cl_rle: "0000", "0001", "0010", "0011", \
* "0100", "0101", "0110", "0111", \
* "1000", "1001", "1010", "1011", \
* "1100", "1101", \
* "11100", "11101", "11110", "11111"
* cl_rle: 1
* cl_rle: 2
* cl_rle: 3
* cl_rle: 4
* cl_rle: 5
* cl_rle: 6
* cl_rle: 7
* cl_rle: 8
* cl_rle: 9
* cl_rle: 10
* cl_rle: 11
* cl_rle: 12
* cl_rle: 13
* cl_rle: 14
* cl_rle: 15
* cl_rle_rep_0: 43
* cl_rle: 15 // literal number 97, that is, the letter 'a'
* // end of literal Huffman tree
* command_inscopy_easy: 0, 3 // Insert 0, copy 3
* // 15 bits of distance code plus 22 extra bits
* command_dist_bits: "111111111111111", "0000000000000000000000"
* metablock_uncompressed: "0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF"
* metablock_lastempty
*/
compressed,
true,
/* This line is modified manually. */
"abc" + new String(stub) + "abc" + hex + hex + hex
);
}
@Test
public void testTooManySymbolsRepeated() {
byte[] compressed = {
@ -2785,6 +2907,34 @@ public class SynthTest {
);
}
@Test
public void testZeroCostLiterals() {
byte[] compressed = {
(byte) 0x9b, (byte) 0xff, (byte) 0xff, (byte) 0xff, (byte) 0x00, (byte) 0x20, (byte) 0x54,
(byte) 0x00, (byte) 0x00, (byte) 0x38, (byte) 0xd8, (byte) 0x32, (byte) 0x89, (byte) 0x01,
(byte) 0x12, (byte) 0x00, (byte) 0x00, (byte) 0x77, (byte) 0xda, (byte) 0xcc, (byte) 0xe1,
(byte) 0x7b, (byte) 0xfa, (byte) 0x0f
};
/* This lines is added manually. */
char[] expected = new char[16777216]; Arrays.fill(expected, '*');
checkSynth(
/*
* main_header
* metablock_header_begin: 1, 0, 16777216, 0
* metablock_header_trivial_context
* huffman_simple: 0,1,256, 42 // Single symbol alphabet
* huffman_fixed: 704
* huffman_fixed: 64
* command_inscopy_easy: 16777216, 0
* // 16777216 times 0 bits
*/
compressed,
true,
/* This line is modified manually. */
new String(expected)
);
}
/* GENERATED CODE END */
}

View File

@ -88,4 +88,15 @@ final class Utils {
static void flipBuffer(Buffer buffer) {
buffer.flip();
}
static int isDebugMode() {
boolean assertsEnabled = Boolean.parseBoolean(System.getProperty("BROTLI_ENABLE_ASSERTS"));
return assertsEnabled ? 1 : 0;
}
// See BitReader.LOG_BITNESS
static int getLogBintness() {
boolean isLongExpensive = Boolean.parseBoolean(System.getProperty("BROTLI_32_BIT_CPU"));
return isLongExpensive ? 5 : 6;
}
}

View File

@ -0,0 +1,34 @@
"""Utilities for Java brotli tests."""
_TEST_JVM_FLAGS = [
"-DBROTLI_ENABLE_ASSERTS=true",
]
def brotli_java_test(name, main_class = None, jvm_flags = None, **kwargs):
"""test duplication rule that creates 32/64-bit test pair."""
if jvm_flags == None:
jvm_flags = []
jvm_flags = jvm_flags + _TEST_JVM_FLAGS
test_package = native.package_name().replace("/", ".")
if main_class == None:
test_class = test_package + "." + name
else:
test_class = None
native.java_test(
name = name + "_32",
main_class = main_class,
test_class = test_class,
jvm_flags = jvm_flags + ["-DBROTLI_32_BIT_CPU=true"],
**kwargs
)
native.java_test(
name = name + "_64",
main_class = main_class,
test_class = test_class,
jvm_flags = jvm_flags + ["-DBROTLI_32_BIT_CPU=false"],
**kwargs
)

View File

@ -40,6 +40,16 @@
</testExcludes>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>3.0.0-M3</version>
<configuration>
<systemPropertyVariables>
<BROTLI_ENABLE_ASSERTS>true</BROTLI_ENABLE_ASSERTS>
</systemPropertyVariables>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>

View File

@ -35,7 +35,6 @@ java_library(
java_test(
name = "SetZeroDictionaryTest",
test_class = "org.brotli.wrapper.common.SetZeroDictionaryTest",
size = "small",
data = [
":brotli_jni_no_dictionary_data", # Bazel JNI workaround
@ -43,12 +42,12 @@ java_test(
jvm_flags = [
"-DBROTLI_JNI_LIBRARY=$(location :brotli_jni_no_dictionary_data)",
],
test_class = "org.brotli.wrapper.common.SetZeroDictionaryTest",
runtime_deps = [":test_lib"],
)
java_test(
name = "SetRfcDictionaryTest",
test_class = "org.brotli.wrapper.common.SetRfcDictionaryTest",
size = "small",
data = [
":brotli_jni_no_dictionary_data", # Bazel JNI workaround
@ -56,5 +55,6 @@ java_test(
jvm_flags = [
"-DBROTLI_JNI_LIBRARY=$(location :brotli_jni_no_dictionary_data)",
],
test_class = "org.brotli.wrapper.common.SetRfcDictionaryTest",
runtime_deps = [":test_lib"],
)

View File

@ -39,7 +39,6 @@ filegroup(
java_test(
name = "BrotliDecoderChannelTest",
test_class = "org.brotli.wrapper.dec.BrotliDecoderChannelTest",
size = "large",
data = [
":brotli_jni", # Bazel JNI workaround
@ -49,12 +48,12 @@ java_test(
"-DBROTLI_JNI_LIBRARY=$(location :brotli_jni)",
"-DTEST_BUNDLE=$(location :test_bundle)",
],
test_class = "org.brotli.wrapper.dec.BrotliDecoderChannelTest",
runtime_deps = [":test_lib"],
)
java_test(
name = "BrotliInputStreamTest",
test_class = "org.brotli.wrapper.dec.BrotliInputStreamTest",
size = "large",
data = [
":brotli_jni", # Bazel JNI workaround
@ -64,12 +63,12 @@ java_test(
"-DBROTLI_JNI_LIBRARY=$(location :brotli_jni)",
"-DTEST_BUNDLE=$(location :test_bundle)",
],
test_class = "org.brotli.wrapper.dec.BrotliInputStreamTest",
runtime_deps = [":test_lib"],
)
java_test(
name = "DecoderTest",
test_class = "org.brotli.wrapper.dec.DecoderTest",
size = "large",
data = [
":brotli_jni", # Bazel JNI workaround
@ -79,5 +78,6 @@ java_test(
"-DBROTLI_JNI_LIBRARY=$(location :brotli_jni)",
"-DTEST_BUNDLE=$(location :test_bundle)",
],
test_class = "org.brotli.wrapper.dec.DecoderTest",
runtime_deps = [":test_lib"],
)

View File

@ -34,8 +34,8 @@ public class BrotliInputStream extends InputStream {
this(source, DEFAULT_BUFFER_SIZE);
}
public void setEager(boolean eager) {
decoder.setEager(eager);
public void enableEagerOutput() {
decoder.enableEagerOutput();
}
@Override

View File

@ -50,8 +50,8 @@ public class Decoder {
throw new IOException(message);
}
public void setEager(boolean eager) {
this.eager = eager;
public void enableEagerOutput() {
this.eager = true;
}
/**

View File

@ -30,6 +30,7 @@ public class DecoderJNI {
private final long[] context = new long[3];
private final ByteBuffer inputBuffer;
private Status lastStatus = Status.NEEDS_MORE_INPUT;
private boolean fresh = true;
public Wrapper(int inputBufferSize) throws IOException {
this.context[1] = inputBufferSize;
@ -52,6 +53,7 @@ public class DecoderJNI {
if (lastStatus == Status.OK && length != 0) {
throw new IllegalStateException("pushing input to decoder in OK state");
}
fresh = false;
nativePush(context, length);
parseStatus();
}
@ -90,6 +92,7 @@ public class DecoderJNI {
if (lastStatus != Status.NEEDS_MORE_OUTPUT && !hasOutput()) {
throw new IllegalStateException("pulling output from decoder in " + lastStatus + " state");
}
fresh = false;
ByteBuffer result = nativePull(context);
parseStatus();
return result;

View File

@ -56,7 +56,7 @@ public class EagerStreamTest extends BrotliJniTestBase {
}
};
BrotliInputStream reader = new BrotliInputStream(source);
reader.setEager(true);
reader.enableEagerOutput();
int count = 0;
while (true) {
log.append("^").append(count);

View File

@ -40,7 +40,6 @@ filegroup(
java_test(
name = "BrotliEncoderChannelTest",
test_class = "org.brotli.wrapper.enc.BrotliEncoderChannelTest",
size = "large",
data = [
":brotli_jni", # Bazel JNI workaround
@ -51,12 +50,12 @@ java_test(
"-DTEST_BUNDLE=$(location :test_bundle)",
],
shard_count = 15,
test_class = "org.brotli.wrapper.enc.BrotliEncoderChannelTest",
runtime_deps = [":test_lib"],
)
java_test(
name = "BrotliOutputStreamTest",
test_class = "org.brotli.wrapper.enc.BrotliOutputStreamTest",
size = "large",
data = [
":brotli_jni", # Bazel JNI workaround
@ -67,12 +66,12 @@ java_test(
"-DTEST_BUNDLE=$(location :test_bundle)",
],
shard_count = 15,
test_class = "org.brotli.wrapper.enc.BrotliOutputStreamTest",
runtime_deps = [":test_lib"],
)
java_test(
name = "EncoderTest",
test_class = "org.brotli.wrapper.enc.EncoderTest",
size = "large",
data = [
":brotli_jni", # Bazel JNI workaround
@ -83,5 +82,6 @@ java_test(
"-DTEST_BUNDLE=$(location :test_bundle)",
],
shard_count = 15,
test_class = "org.brotli.wrapper.enc.EncoderTest",
runtime_deps = [":test_lib"],
)

View File

@ -27,9 +27,13 @@ class EncoderJNI {
static class Wrapper {
protected final long[] context = new long[5];
private final ByteBuffer inputBuffer;
private boolean fresh = true;
Wrapper(int inputBufferSize, int quality, int lgwin)
throws IOException {
if (inputBufferSize <= 0) {
throw new IOException("buffer size must be positive");
}
this.context[1] = inputBufferSize;
this.context[2] = quality;
this.context[3] = lgwin;
@ -56,6 +60,7 @@ class EncoderJNI {
throw new IllegalStateException("pushing input to encoder over previous input");
}
context[1] = op.ordinal();
fresh = false;
nativePush(context, length);
}
@ -86,6 +91,7 @@ class EncoderJNI {
if (!isSuccess() || !hasMoreOutput()) {
throw new IllegalStateException("pulling while data is not ready");
}
fresh = false;
return nativePull(context);
}

View File

@ -1,15 +1,11 @@
workspace(name = "org_brotli_js")
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository")
http_archive(
git_repository(
name = "io_bazel_rules_closure",
sha256 = "b29a8bc2cb10513c864cb1084d6f38613ef14a143797cea0af0f91cd385f5e8c",
strip_prefix = "rules_closure-0.8.0",
urls = [
"https://mirror.bazel.build/github.com/bazelbuild/rules_closure/archive/0.8.0.tar.gz",
"https://github.com/bazelbuild/rules_closure/archive/0.8.0.tar.gz",
],
commit = "a176ec89a1b251bb5442ba569d47cee3c053e633",
remote = "https://github.com/bazelbuild/rules_closure.git",
)
load("@io_bazel_rules_closure//closure:defs.bzl", "closure_repositories")

1155
js/decode.js Normal file → Executable file

File diff suppressed because it is too large Load Diff

3
js/decode.min.js vendored

File diff suppressed because one or more lines are too long

View File

@ -1,3 +1,4 @@
#include <climits>
#include <cstddef>
#include <cstdio>
#include <cstring>
@ -8,6 +9,16 @@
#include "./durchschlag.h"
#include "./sieve.h"
/* This isn't a definitive list of "--foo" arguments, only those that take an
* additional "=#" integer parameter, like "--foo=20" or "--foo=32K".
*/
#define LONG_ARG_BLOCK_LEN "--block_len="
#define LONG_ARG_SLICE_LEN "--slice_len="
#define LONG_ARG_TARGET_DICT_LEN "--target_dict_len="
#define LONG_ARG_MIN_SLICE_POP "--min_slice_pop="
#define LONG_ARG_CHUNK_LEN "--chunk_len="
#define LONG_ARG_OVERLAP_LEN "--overlap_len="
#define METHOD_DM 0
#define METHOD_SIEVE 1
#define METHOD_DURCHSCHLAG 2
@ -93,11 +104,20 @@ static void printHelp(const char* name) {
" --dsh use 'durchschlag' engine (default)\n"
" --purify rewrite samples; unique text parts are zeroed out\n"
" --sieve use 'sieve' engine\n"
" -b# set block length for 'durchschlag'; default: 1024\n"
" -s# set slice length for 'distill', 'durchschlag', 'purify'\n"
" -b#, --block_len=#\n"
" set block length for 'durchschlag'; default: 1024\n"
" -s#, --slice_len=#\n"
" set slice length for 'distill', 'durchschlag', 'purify'\n"
" and 'sieve'; default: 16\n"
" -t# set target dictionary size (limit); default: 16K\n"
" -u# set minimum slice population (for rewrites); default: 2\n"
" -t#, --target_dict_len=#\n"
" set target dictionary length (limit); default: 16K\n"
" -u#, --min_slice_pop=#\n"
" set minimum slice population (for rewrites); default: 2\n"
" -c#, --chunk_len=#\n"
" if positive, samples are cut into chunks of this length;\n"
" default: 0; cannot mix with 'rewrite samples'\n"
" -o#, --overlap_len=#\n"
" set chunk overlap length; default 0\n"
"# is a decimal number with optional k/K/m/M suffix.\n"
"WARNING: 'distill' and 'purify' will overwrite original samples!\n"
" Completely unique samples might become empty files.\n\n");
@ -110,6 +130,8 @@ int main(int argc, char const* argv[]) {
size_t targetSize = 16 << 10;
size_t blockSize = 1024;
size_t minimumPopulation = 2;
size_t chunkLen = 0;
size_t overlapLen = 0;
std::vector<uint8_t> data;
std::vector<size_t> sizes;
@ -119,66 +141,115 @@ int main(int argc, char const* argv[]) {
if (argv[i] == nullptr) {
continue;
}
if (argv[i][0] == '-') {
if (argv[i][1] == '-') {
char arg1 = argv[i][1];
const char* arg2 = arg1 ? &argv[i][2] : nullptr;
if (arg1 == '-') {
if (dictionaryArg != -1) {
fprintf(stderr,
"Method should be specified before dictionary / sample '%s'\n",
argv[i]);
exit(1);
}
if (std::strcmp("--sieve", argv[i]) == 0) {
/* Look for "--long_arg" via exact match. */
if (std::strcmp(argv[i], "--sieve") == 0) {
method = METHOD_SIEVE;
continue;
}
if (std::strcmp("--dm", argv[i]) == 0) {
if (std::strcmp(argv[i], "--dm") == 0) {
method = METHOD_DM;
continue;
}
if (std::strcmp("--dsh", argv[i]) == 0) {
if (std::strcmp(argv[i], "--dsh") == 0) {
method = METHOD_DURCHSCHLAG;
continue;
}
if (std::strcmp("--distill", argv[i]) == 0) {
if (std::strcmp(argv[i], "--distill") == 0) {
method = METHOD_DISTILL;
continue;
}
if (std::strcmp("--purify", argv[i]) == 0) {
if (std::strcmp(argv[i], "--purify") == 0) {
method = METHOD_PURIFY;
continue;
}
printHelp(fileName(argv[0]));
fprintf(stderr, "Invalid option '%s'\n", argv[i]);
exit(1);
/* Look for "--long_arg=#" via prefix match. */
if (std::strncmp(argv[i], LONG_ARG_BLOCK_LEN,
std::strlen(LONG_ARG_BLOCK_LEN)) == 0) {
arg1 = 'b';
arg2 = &argv[i][std::strlen(LONG_ARG_BLOCK_LEN)];
} else if (std::strncmp(argv[i], LONG_ARG_SLICE_LEN,
std::strlen(LONG_ARG_SLICE_LEN)) == 0) {
arg1 = 's';
arg2 = &argv[i][std::strlen(LONG_ARG_SLICE_LEN)];
} else if (std::strncmp(argv[i], LONG_ARG_TARGET_DICT_LEN,
std::strlen(LONG_ARG_TARGET_DICT_LEN)) == 0) {
arg1 = 't';
arg2 = &argv[i][std::strlen(LONG_ARG_TARGET_DICT_LEN)];
} else if (std::strncmp(argv[i], LONG_ARG_MIN_SLICE_POP,
std::strlen(LONG_ARG_MIN_SLICE_POP)) == 0) {
arg1 = 'u';
arg2 = &argv[i][std::strlen(LONG_ARG_MIN_SLICE_POP)];
} else if (std::strncmp(argv[i], LONG_ARG_CHUNK_LEN,
std::strlen(LONG_ARG_CHUNK_LEN)) == 0) {
arg1 = 'c';
arg2 = &argv[i][std::strlen(LONG_ARG_CHUNK_LEN)];
} else if (std::strncmp(argv[i], LONG_ARG_OVERLAP_LEN,
std::strlen(LONG_ARG_OVERLAP_LEN)) == 0) {
arg1 = 'o';
arg2 = &argv[i][std::strlen(LONG_ARG_OVERLAP_LEN)];
} else {
printHelp(fileName(argv[0]));
fprintf(stderr, "Invalid option '%s'\n", argv[i]);
exit(1);
}
}
if (argv[i][1] == 'b') {
blockSize = readInt(&argv[i][2]);
/* Look for "-f" short args or "--foo=#" long args. */
if (arg1 == 'b') {
blockSize = readInt(arg2);
if (blockSize < 16 || blockSize > 65536) {
printHelp(fileName(argv[0]));
fprintf(stderr, "Invalid option '%s'\n", argv[i]);
exit(1);
}
} else if (argv[i][1] == 's') {
sliceLen = readInt(&argv[i][2]);
} else if (arg1 == 's') {
sliceLen = readInt(arg2);
if (sliceLen < 4 || sliceLen > 256) {
printHelp(fileName(argv[0]));
fprintf(stderr, "Invalid option '%s'\n", argv[i]);
exit(1);
}
} else if (argv[i][1] == 't') {
targetSize = readInt(&argv[i][2]);
} else if (arg1 == 't') {
targetSize = readInt(arg2);
if (targetSize < 256 || targetSize > (1 << 25)) {
printHelp(fileName(argv[0]));
fprintf(stderr, "Invalid option '%s'\n", argv[i]);
exit(1);
}
} else if (argv[i][1] == 'u') {
minimumPopulation = readInt(&argv[i][2]);
} else if (arg1 == 'u') {
minimumPopulation = readInt(arg2);
if (minimumPopulation < 256 || minimumPopulation > 65536) {
printHelp(fileName(argv[0]));
fprintf(stderr, "Invalid option '%s'\n", argv[i]);
exit(1);
}
} else if (arg1 == 'c') {
chunkLen = readInt(arg2);
if (chunkLen < 0 || chunkLen > INT_MAX) {
printHelp(fileName(argv[0]));
fprintf(stderr, "Invalid option '%s'\n", argv[i]);
exit(1);
}
} else if (arg1 == 'o') {
overlapLen = readInt(arg2);
if (overlapLen < 0 || overlapLen > INT_MAX) {
printHelp(fileName(argv[0]));
fprintf(stderr, "Invalid option '%s'\n", argv[i]);
exit(1);
}
} else {
printHelp(fileName(argv[0]));
fprintf(stderr, "Unrecognized option '%s'\n", argv[i]);
@ -186,21 +257,44 @@ int main(int argc, char const* argv[]) {
}
continue;
}
if (dictionaryArg == -1) {
if (method != METHOD_DISTILL && method != METHOD_PURIFY) {
dictionaryArg = i;
continue;
}
}
std::string content = readFile(argv[i]);
data.insert(data.end(), content.begin(), content.end());
total += content.size();
pathArgs.push_back(i);
sizes.push_back(content.size());
if (chunkLen == 0) {
pathArgs.push_back(i);
data.insert(data.end(), content.begin(), content.end());
total += content.size();
sizes.push_back(content.size());
continue;
} else if (chunkLen <= overlapLen) {
printHelp(fileName(argv[0]));
fprintf(stderr, "Invalid chunkLen - overlapLen combination\n");
exit(1);
}
for (size_t chunkStart = 0;
chunkStart < content.size();
chunkStart += chunkLen - overlapLen) {
std::string chunk = content.substr(chunkStart, chunkLen);
data.insert(data.end(), chunk.begin(), chunk.end());
total += chunk.size();
sizes.push_back(chunk.size());
}
}
bool wantDictionary = (dictionaryArg == -1);
if (method == METHOD_DISTILL || method == METHOD_PURIFY) {
wantDictionary = false;
if (chunkLen != 0) {
printHelp(fileName(argv[0]));
fprintf(stderr, "Cannot mix 'rewrite samples' with positive chunk_len\n");
exit(1);
}
}
if (wantDictionary || total == 0) {
printHelp(fileName(argv[0]));