mirror of
https://github.com/google/brotli.git
synced 2024-11-25 13:00:06 +00:00
4b2b2d4f83
Update: * Bazel: fix MSVC configuration * C: common: extended documentation and helpers around distance codes * C: common: enable BROTLI_DCHECK in "debug" builds * C: common: fix implicit trailing zero in `kPrefixSuffix` * C: dec: fix possible bit reader discharge for "large-window" mode * C: dec: simplify distance decoding via lookup table * C: dec: reuse decoder state members memory via union with lookup table * C: dec: add decoder state diagram * C: enc: clarify access to static dictionary * C: enc: improve static dictionary hash * C: enc: add "stream offset" parameter for parallel encoding * C: enc: reorganize hasher; now Q2-Q3 require exactly 256KiB to avoid global TCMalloc lock * C: enc: fix rare access to uninitialized data in ring-buffer * C: enc: reorganize logging / checks in `write_bits.h` * Java: dec: add "large-window" support * Java: dec: improve speed * Java: dec: debug and 32-bit mode are now activated via system properties * Java: dec: demystify some state variables (use better names) * Dictionary generator: add single input mode * Java: dec: modernize tests * Bazel: js: pick working commit for closure rules
366 lines
11 KiB
C
366 lines
11 KiB
C
/* Copyright 2015 Google Inc. All Rights Reserved.
|
|
|
|
Distributed under MIT license.
|
|
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
|
*/
|
|
|
|
/* Brotli state for partial streaming decoding. */
|
|
|
|
#ifndef BROTLI_DEC_STATE_H_
|
|
#define BROTLI_DEC_STATE_H_
|
|
|
|
#include "../common/constants.h"
|
|
#include "../common/dictionary.h"
|
|
#include "../common/platform.h"
|
|
#include "../common/transform.h"
|
|
#include <brotli/types.h>
|
|
#include "./bit_reader.h"
|
|
#include "./huffman.h"
|
|
|
|
#if defined(__cplusplus) || defined(c_plusplus)
|
|
extern "C" {
|
|
#endif
|
|
|
|
/* Graphviz diagram that describes state transitions:
|
|
|
|
digraph States {
|
|
graph [compound=true]
|
|
concentrate=true
|
|
node [shape="box"]
|
|
|
|
UNINITED -> {LARGE_WINDOW_BITS -> INITIALIZE}
|
|
subgraph cluster_metablock_workflow {
|
|
style="rounded"
|
|
label=< <B>METABLOCK CYCLE</B> >
|
|
METABLOCK_BEGIN -> METABLOCK_HEADER
|
|
METABLOCK_HEADER:sw -> METADATA
|
|
METABLOCK_HEADER:s -> UNCOMPRESSED
|
|
METABLOCK_HEADER:se -> METABLOCK_DONE:ne
|
|
METADATA:s -> METABLOCK_DONE:w
|
|
UNCOMPRESSED:s -> METABLOCK_DONE:n
|
|
METABLOCK_DONE:e -> METABLOCK_BEGIN:e [constraint="false"]
|
|
}
|
|
INITIALIZE -> METABLOCK_BEGIN
|
|
METABLOCK_DONE -> DONE
|
|
|
|
subgraph cluster_compressed_metablock {
|
|
style="rounded"
|
|
label=< <B>COMPRESSED METABLOCK</B> >
|
|
|
|
subgraph cluster_command {
|
|
style="rounded"
|
|
label=< <B>HOT LOOP</B> >
|
|
|
|
_METABLOCK_DONE_PORT_ [shape=point style=invis]
|
|
|
|
{
|
|
// Set different shape for nodes returning from "compressed metablock".
|
|
node [shape=invhouse]; CMD_INNER CMD_POST_DECODE_LITERALS;
|
|
CMD_POST_WRAP_COPY; CMD_INNER_WRITE; CMD_POST_WRITE_1;
|
|
}
|
|
|
|
CMD_BEGIN -> CMD_INNER -> CMD_POST_DECODE_LITERALS -> CMD_POST_WRAP_COPY
|
|
|
|
// IO ("write") nodes are not in the hot loop!
|
|
CMD_INNER_WRITE [style=dashed]
|
|
CMD_INNER -> CMD_INNER_WRITE
|
|
CMD_POST_WRITE_1 [style=dashed]
|
|
CMD_POST_DECODE_LITERALS -> CMD_POST_WRITE_1
|
|
CMD_POST_WRITE_2 [style=dashed]
|
|
CMD_POST_WRAP_COPY -> CMD_POST_WRITE_2
|
|
|
|
CMD_POST_WRITE_1 -> CMD_BEGIN:s [constraint="false"]
|
|
CMD_INNER_WRITE -> {CMD_INNER CMD_POST_DECODE_LITERALS}
|
|
[constraint="false"]
|
|
CMD_BEGIN:ne -> CMD_POST_DECODE_LITERALS [constraint="false"]
|
|
CMD_POST_WRAP_COPY -> CMD_BEGIN [constraint="false"]
|
|
CMD_POST_DECODE_LITERALS -> CMD_BEGIN:ne [constraint="false"]
|
|
CMD_POST_WRITE_2 -> CMD_POST_WRAP_COPY [constraint="false"]
|
|
{rank=same; CMD_BEGIN; CMD_INNER; CMD_POST_DECODE_LITERALS;
|
|
CMD_POST_WRAP_COPY}
|
|
{rank=same; CMD_INNER_WRITE; CMD_POST_WRITE_1; CMD_POST_WRITE_2}
|
|
|
|
{CMD_INNER CMD_POST_DECODE_LITERALS CMD_POST_WRAP_COPY} ->
|
|
_METABLOCK_DONE_PORT_ [style=invis]
|
|
{CMD_INNER_WRITE CMD_POST_WRITE_1} -> _METABLOCK_DONE_PORT_
|
|
[constraint="false" style=invis]
|
|
}
|
|
|
|
BEFORE_COMPRESSED_METABLOCK_HEADER:s -> HUFFMAN_CODE_0:n
|
|
HUFFMAN_CODE_0 -> HUFFMAN_CODE_1 -> HUFFMAN_CODE_2 -> HUFFMAN_CODE_3
|
|
HUFFMAN_CODE_0 -> METABLOCK_HEADER_2 -> CONTEXT_MODES -> CONTEXT_MAP_1
|
|
CONTEXT_MAP_1 -> CONTEXT_MAP_2 -> TREE_GROUP
|
|
TREE_GROUP -> BEFORE_COMPRESSED_METABLOCK_BODY:e
|
|
BEFORE_COMPRESSED_METABLOCK_BODY:s -> CMD_BEGIN:n
|
|
|
|
HUFFMAN_CODE_3:e -> HUFFMAN_CODE_0:ne [constraint="false"]
|
|
{rank=same; HUFFMAN_CODE_0; HUFFMAN_CODE_1; HUFFMAN_CODE_2; HUFFMAN_CODE_3}
|
|
{rank=same; METABLOCK_HEADER_2; CONTEXT_MODES; CONTEXT_MAP_1; CONTEXT_MAP_2;
|
|
TREE_GROUP}
|
|
}
|
|
METABLOCK_HEADER:e -> BEFORE_COMPRESSED_METABLOCK_HEADER:n
|
|
|
|
_METABLOCK_DONE_PORT_ -> METABLOCK_DONE:se
|
|
[constraint="false" ltail=cluster_command]
|
|
|
|
UNINITED [shape=Mdiamond];
|
|
DONE [shape=Msquare];
|
|
}
|
|
|
|
|
|
*/
|
|
|
|
typedef enum {
|
|
BROTLI_STATE_UNINITED,
|
|
BROTLI_STATE_LARGE_WINDOW_BITS,
|
|
BROTLI_STATE_INITIALIZE,
|
|
BROTLI_STATE_METABLOCK_BEGIN,
|
|
BROTLI_STATE_METABLOCK_HEADER,
|
|
BROTLI_STATE_METABLOCK_HEADER_2,
|
|
BROTLI_STATE_CONTEXT_MODES,
|
|
BROTLI_STATE_COMMAND_BEGIN,
|
|
BROTLI_STATE_COMMAND_INNER,
|
|
BROTLI_STATE_COMMAND_POST_DECODE_LITERALS,
|
|
BROTLI_STATE_COMMAND_POST_WRAP_COPY,
|
|
BROTLI_STATE_UNCOMPRESSED,
|
|
BROTLI_STATE_METADATA,
|
|
BROTLI_STATE_COMMAND_INNER_WRITE,
|
|
BROTLI_STATE_METABLOCK_DONE,
|
|
BROTLI_STATE_COMMAND_POST_WRITE_1,
|
|
BROTLI_STATE_COMMAND_POST_WRITE_2,
|
|
BROTLI_STATE_BEFORE_COMPRESSED_METABLOCK_HEADER,
|
|
BROTLI_STATE_HUFFMAN_CODE_0,
|
|
BROTLI_STATE_HUFFMAN_CODE_1,
|
|
BROTLI_STATE_HUFFMAN_CODE_2,
|
|
BROTLI_STATE_HUFFMAN_CODE_3,
|
|
BROTLI_STATE_CONTEXT_MAP_1,
|
|
BROTLI_STATE_CONTEXT_MAP_2,
|
|
BROTLI_STATE_TREE_GROUP,
|
|
BROTLI_STATE_BEFORE_COMPRESSED_METABLOCK_BODY,
|
|
BROTLI_STATE_DONE
|
|
} BrotliRunningState;
|
|
|
|
typedef enum {
|
|
BROTLI_STATE_METABLOCK_HEADER_NONE,
|
|
BROTLI_STATE_METABLOCK_HEADER_EMPTY,
|
|
BROTLI_STATE_METABLOCK_HEADER_NIBBLES,
|
|
BROTLI_STATE_METABLOCK_HEADER_SIZE,
|
|
BROTLI_STATE_METABLOCK_HEADER_UNCOMPRESSED,
|
|
BROTLI_STATE_METABLOCK_HEADER_RESERVED,
|
|
BROTLI_STATE_METABLOCK_HEADER_BYTES,
|
|
BROTLI_STATE_METABLOCK_HEADER_METADATA
|
|
} BrotliRunningMetablockHeaderState;
|
|
|
|
typedef enum {
|
|
BROTLI_STATE_UNCOMPRESSED_NONE,
|
|
BROTLI_STATE_UNCOMPRESSED_WRITE
|
|
} BrotliRunningUncompressedState;
|
|
|
|
typedef enum {
|
|
BROTLI_STATE_TREE_GROUP_NONE,
|
|
BROTLI_STATE_TREE_GROUP_LOOP
|
|
} BrotliRunningTreeGroupState;
|
|
|
|
typedef enum {
|
|
BROTLI_STATE_CONTEXT_MAP_NONE,
|
|
BROTLI_STATE_CONTEXT_MAP_READ_PREFIX,
|
|
BROTLI_STATE_CONTEXT_MAP_HUFFMAN,
|
|
BROTLI_STATE_CONTEXT_MAP_DECODE,
|
|
BROTLI_STATE_CONTEXT_MAP_TRANSFORM
|
|
} BrotliRunningContextMapState;
|
|
|
|
typedef enum {
|
|
BROTLI_STATE_HUFFMAN_NONE,
|
|
BROTLI_STATE_HUFFMAN_SIMPLE_SIZE,
|
|
BROTLI_STATE_HUFFMAN_SIMPLE_READ,
|
|
BROTLI_STATE_HUFFMAN_SIMPLE_BUILD,
|
|
BROTLI_STATE_HUFFMAN_COMPLEX,
|
|
BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS
|
|
} BrotliRunningHuffmanState;
|
|
|
|
typedef enum {
|
|
BROTLI_STATE_DECODE_UINT8_NONE,
|
|
BROTLI_STATE_DECODE_UINT8_SHORT,
|
|
BROTLI_STATE_DECODE_UINT8_LONG
|
|
} BrotliRunningDecodeUint8State;
|
|
|
|
typedef enum {
|
|
BROTLI_STATE_READ_BLOCK_LENGTH_NONE,
|
|
BROTLI_STATE_READ_BLOCK_LENGTH_SUFFIX
|
|
} BrotliRunningReadBlockLengthState;
|
|
|
|
typedef struct BrotliMetablockHeaderArena {
|
|
BrotliRunningTreeGroupState substate_tree_group;
|
|
BrotliRunningContextMapState substate_context_map;
|
|
BrotliRunningHuffmanState substate_huffman;
|
|
|
|
uint32_t sub_loop_counter;
|
|
|
|
uint32_t repeat_code_len;
|
|
uint32_t prev_code_len;
|
|
|
|
/* For ReadHuffmanCode. */
|
|
uint32_t symbol;
|
|
uint32_t repeat;
|
|
uint32_t space;
|
|
|
|
/* Huffman table for "histograms". */
|
|
HuffmanCode table[32];
|
|
/* List of heads of symbol chains. */
|
|
uint16_t* symbol_lists;
|
|
/* Storage from symbol_lists. */
|
|
uint16_t symbols_lists_array[BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1 +
|
|
BROTLI_NUM_COMMAND_SYMBOLS];
|
|
/* Tails of symbol chains. */
|
|
int next_symbol[32];
|
|
uint8_t code_length_code_lengths[BROTLI_CODE_LENGTH_CODES];
|
|
/* Population counts for the code lengths. */
|
|
uint16_t code_length_histo[16];
|
|
|
|
/* For HuffmanTreeGroupDecode. */
|
|
int htree_index;
|
|
HuffmanCode* next;
|
|
|
|
/* For DecodeContextMap. */
|
|
uint32_t context_index;
|
|
uint32_t max_run_length_prefix;
|
|
uint32_t code;
|
|
HuffmanCode context_map_table[BROTLI_HUFFMAN_MAX_SIZE_272];
|
|
} BrotliMetablockHeaderArena;
|
|
|
|
typedef struct BrotliMetablockBodyArena {
|
|
uint8_t dist_extra_bits[544];
|
|
uint32_t dist_offset[544];
|
|
} BrotliMetablockBodyArena;
|
|
|
|
struct BrotliDecoderStateStruct {
|
|
BrotliRunningState state;
|
|
|
|
/* This counter is reused for several disjoint loops. */
|
|
int loop_counter;
|
|
|
|
BrotliBitReader br;
|
|
|
|
brotli_alloc_func alloc_func;
|
|
brotli_free_func free_func;
|
|
void* memory_manager_opaque;
|
|
|
|
/* Temporary storage for remaining input. Brotli stream format is designed in
|
|
a way, that 64 bits are enough to make progress in decoding. */
|
|
union {
|
|
uint64_t u64;
|
|
uint8_t u8[8];
|
|
} buffer;
|
|
uint32_t buffer_length;
|
|
|
|
int pos;
|
|
int max_backward_distance;
|
|
int max_distance;
|
|
int ringbuffer_size;
|
|
int ringbuffer_mask;
|
|
int dist_rb_idx;
|
|
int dist_rb[4];
|
|
int error_code;
|
|
uint8_t* ringbuffer;
|
|
uint8_t* ringbuffer_end;
|
|
HuffmanCode* htree_command;
|
|
const uint8_t* context_lookup;
|
|
uint8_t* context_map_slice;
|
|
uint8_t* dist_context_map_slice;
|
|
|
|
/* This ring buffer holds a few past copy distances that will be used by
|
|
some special distance codes. */
|
|
HuffmanTreeGroup literal_hgroup;
|
|
HuffmanTreeGroup insert_copy_hgroup;
|
|
HuffmanTreeGroup distance_hgroup;
|
|
HuffmanCode* block_type_trees;
|
|
HuffmanCode* block_len_trees;
|
|
/* This is true if the literal context map histogram type always matches the
|
|
block type. It is then not needed to keep the context (faster decoding). */
|
|
int trivial_literal_context;
|
|
/* Distance context is actual after command is decoded and before distance is
|
|
computed. After distance computation it is used as a temporary variable. */
|
|
int distance_context;
|
|
int meta_block_remaining_len;
|
|
uint32_t block_length_index;
|
|
uint32_t block_length[3];
|
|
uint32_t num_block_types[3];
|
|
uint32_t block_type_rb[6];
|
|
uint32_t distance_postfix_bits;
|
|
uint32_t num_direct_distance_codes;
|
|
uint32_t num_dist_htrees;
|
|
uint8_t* dist_context_map;
|
|
HuffmanCode* literal_htree;
|
|
uint8_t dist_htree_index;
|
|
|
|
int copy_length;
|
|
int distance_code;
|
|
|
|
/* For partial write operations. */
|
|
size_t rb_roundtrips; /* how many times we went around the ring-buffer */
|
|
size_t partial_pos_out; /* how much output to the user in total */
|
|
|
|
/* For InverseMoveToFrontTransform. */
|
|
uint32_t mtf_upper_bound;
|
|
uint32_t mtf[64 + 1];
|
|
|
|
/* Less used attributes are at the end of this struct. */
|
|
|
|
/* States inside function calls. */
|
|
BrotliRunningMetablockHeaderState substate_metablock_header;
|
|
BrotliRunningUncompressedState substate_uncompressed;
|
|
BrotliRunningDecodeUint8State substate_decode_uint8;
|
|
BrotliRunningReadBlockLengthState substate_read_block_length;
|
|
|
|
unsigned int is_last_metablock : 1;
|
|
unsigned int is_uncompressed : 1;
|
|
unsigned int is_metadata : 1;
|
|
unsigned int should_wrap_ringbuffer : 1;
|
|
unsigned int canny_ringbuffer_allocation : 1;
|
|
unsigned int large_window : 1;
|
|
unsigned int size_nibbles : 8;
|
|
uint32_t window_bits;
|
|
|
|
int new_ringbuffer_size;
|
|
|
|
uint32_t num_literal_htrees;
|
|
uint8_t* context_map;
|
|
uint8_t* context_modes;
|
|
|
|
const BrotliDictionary* dictionary;
|
|
const BrotliTransforms* transforms;
|
|
|
|
uint32_t trivial_literal_contexts[8]; /* 256 bits */
|
|
|
|
union {
|
|
BrotliMetablockHeaderArena header;
|
|
BrotliMetablockBodyArena body;
|
|
} arena;
|
|
};
|
|
|
|
typedef struct BrotliDecoderStateStruct BrotliDecoderStateInternal;
|
|
#define BrotliDecoderState BrotliDecoderStateInternal
|
|
|
|
BROTLI_INTERNAL BROTLI_BOOL BrotliDecoderStateInit(BrotliDecoderState* s,
|
|
brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque);
|
|
BROTLI_INTERNAL void BrotliDecoderStateCleanup(BrotliDecoderState* s);
|
|
BROTLI_INTERNAL void BrotliDecoderStateMetablockBegin(BrotliDecoderState* s);
|
|
BROTLI_INTERNAL void BrotliDecoderStateCleanupAfterMetablock(
|
|
BrotliDecoderState* s);
|
|
BROTLI_INTERNAL BROTLI_BOOL BrotliDecoderHuffmanTreeGroupInit(
|
|
BrotliDecoderState* s, HuffmanTreeGroup* group, uint32_t alphabet_size_max,
|
|
uint32_t alphabet_size_limit, uint32_t ntrees);
|
|
|
|
#define BROTLI_DECODER_ALLOC(S, L) S->alloc_func(S->memory_manager_opaque, L)
|
|
|
|
#define BROTLI_DECODER_FREE(S, X) { \
|
|
S->free_func(S->memory_manager_opaque, X); \
|
|
X = NULL; \
|
|
}
|
|
|
|
#if defined(__cplusplus) || defined(c_plusplus)
|
|
} /* extern "C" */
|
|
#endif
|
|
|
|
#endif /* BROTLI_DEC_STATE_H_ */
|