From 0454ab4ec0cfe0dee125269a5ee065f7774d6a9d Mon Sep 17 00:00:00 2001 From: Zoltan Szabadka Date: Fri, 14 Feb 2014 15:04:23 +0100 Subject: [PATCH] Updates to Brotli compression format, decoder and encoder This commit contains a batch of changes that were made to the Brotli compression algorithm in the last month. Most important changes: * Fixes to the spec. * Change of code length code order. * Use a 2-level Huffman lookup table in the decoder. * Faster uncompressed meta-block decoding. * Optimized encoding of the Huffman code. * Detection of UTF-8 input encoding. * UTF-8 based literal cost modeling for improved backward reference selection. --- brotlispec.txt | 25 +- dec/bit_reader.c | 4 +- dec/bit_reader.h | 31 ++- dec/decode.c | 538 ++++++++++++++++++++----------------- dec/decode.h | 2 + dec/huffman.c | 309 ++++++++------------- dec/huffman.h | 49 +--- enc/backward_references.cc | 152 +++++++---- enc/bit_cost.h | 2 +- enc/block_splitter.cc | 6 +- enc/encode.cc | 189 +++++++++---- enc/entropy_encode.cc | 119 +++++++- enc/entropy_encode.h | 2 +- enc/hash.h | 13 +- enc/literal_cost.cc | 99 +++++++ enc/literal_cost.h | 7 +- 16 files changed, 912 insertions(+), 635 deletions(-) diff --git a/brotlispec.txt b/brotlispec.txt index 23de497..087c939 100644 --- a/brotlispec.txt +++ b/brotlispec.txt @@ -498,11 +498,11 @@ Abstract Symbol Code ------ ---- 0 00 - 1 1010 - 2 100 - 3 11 - 4 01 - 5 1011 + 1 1110 + 2 110 + 3 01 + 4 10 + 5 1111 We can now define the format of the complex Huffman code as follows: @@ -513,7 +513,7 @@ Abstract Code lengths for symbols in the code length alphabet given - just above, in the order: 1, 2, 3, 4, 0, 17, 5, 6, 16, 7, + just above, in the order: 1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15 The code lengths of code length symbols are between 0 and @@ -572,7 +572,7 @@ Abstract 6: last distance - 2 7: last distance + 2 8: last distance - 3 - 9: last disatnce + 3 + 9: last distance + 3 10: second last distance - 1 11: second last distance + 1 12: second last distance - 2 @@ -647,7 +647,7 @@ Abstract ---- ---- ------ ---- ---- ------- ---- ---- ------- 0 0 0 8 2 10-13 16 6 130-193 1 0 1 9 2 14-17 17 7 194-321 - 2 0 2 10 3 18-25 18 8 322-527 + 2 0 2 10 3 18-25 18 8 322-577 3 0 3 11 3 26-33 19 9 578-1089 4 0 4 12 4 34-49 20 10 1090-2113 5 0 5 13 4 50-65 21 12 2114-6209 @@ -681,7 +681,7 @@ Abstract | | | +---------+---------+---------+ | | | | - 0-7 | 128-191 | 192-255 | 383-447 | + 0-7 | 128-191 | 192-255 | 384-447 | | | | | +---------+---------+---------+ | | | | @@ -689,7 +689,7 @@ Abstract | | | | +---------+---------+---------+ | | | | - 16-23 | 448-551 | 576-639 | 640-703 | + 16-23 | 448-511 | 576-639 | 640-703 | | | | | +---------+---------+---------+ @@ -1008,9 +1008,10 @@ Abstract 1 bit: ISEMPTY, set to 1 if the meta-block is empty, this field is only present if ISLAST bit is set, since only the last meta-block can be empty - 2 bits: MNIBBLES, (# of nibbles to represent the length) - 4 + 2 bits: MNIBBLES - 4, where MNIBBLES is # of nibbles to represent + the length - (MNIBBLES + 4) x 4 bits: MLEN - 1, where MLEN is the length + MNIBBLES x 4 bits: MLEN - 1, where MLEN is the length of the meta-block in the input data in bytes 1 bit: ISUNCOMPRESSED, if set to 1, any bits of input up to diff --git a/dec/bit_reader.c b/dec/bit_reader.c index 25e33e3..981dccf 100644 --- a/dec/bit_reader.c +++ b/dec/bit_reader.c @@ -33,7 +33,7 @@ int BrotliInitBitReader(BrotliBitReader* const br, BrotliInput input) { br->val_ = 0; br->pos_ = 0; br->bit_pos_ = 0; - br->bits_left_ = 64; + br->bit_end_pos_ = 0; br->eos_ = 0; if (!BrotliReadMoreInput(br)) { return 0; @@ -42,7 +42,7 @@ int BrotliInitBitReader(BrotliBitReader* const br, BrotliInput input) { br->val_ |= ((uint64_t)br->buf_[br->pos_]) << (8 * i); ++br->pos_; } - return (br->bits_left_ > 64); + return (br->bit_end_pos_ > 0); } #if defined(__cplusplus) || defined(c_plusplus) diff --git a/dec/bit_reader.h b/dec/bit_reader.h index 551cc14..3221cdd 100644 --- a/dec/bit_reader.h +++ b/dec/bit_reader.h @@ -31,7 +31,7 @@ extern "C" { #define BROTLI_IBUF_SIZE (2 * BROTLI_READ_SIZE + 32) #define BROTLI_IBUF_MASK (2 * BROTLI_READ_SIZE - 1) -#define UNALIGNED_COPY64(dst, src) *(uint64_t*)(dst) = *(const uint64_t*)(src) +#define UNALIGNED_COPY64(dst, src) memcpy(dst, src, 8) static const uint32_t kBitMask[BROTLI_MAX_NUM_BIT_READ] = { 0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767, @@ -42,13 +42,13 @@ typedef struct { /* Input byte buffer, consist of a ringbuffer and a "slack" region where */ /* bytes from the start of the ringbuffer are copied. */ uint8_t buf_[BROTLI_IBUF_SIZE]; - uint8_t* buf_ptr_; /* next input will write here */ - BrotliInput input_; /* input callback */ - uint64_t val_; /* pre-fetched bits */ - uint32_t pos_; /* byte position in stream */ - uint32_t bit_pos_; /* current bit-reading position in val_ */ - uint32_t bits_left_; /* how many valid bits left */ - int eos_; /* input stream is finished */ + uint8_t* buf_ptr_; /* next input will write here */ + BrotliInput input_; /* input callback */ + uint64_t val_; /* pre-fetched bits */ + uint32_t pos_; /* byte position in stream */ + uint32_t bit_pos_; /* current bit-reading position in val_ */ + uint32_t bit_end_pos_; /* bit-reading end position from LSB of val_ */ + int eos_; /* input stream is finished */ } BrotliBitReader; int BrotliInitBitReader(BrotliBitReader* const br, BrotliInput input); @@ -65,7 +65,7 @@ static BROTLI_INLINE void BrotliSetBitPos(BrotliBitReader* const br, #ifdef BROTLI_DECODE_DEBUG uint32_t n_bits = val - br->bit_pos_; const uint32_t bval = (uint32_t)(br->val_ >> br->bit_pos_) & kBitMask[n_bits]; - printf("[BrotliReadBits] %010ld %2d val: %6x\n", + printf("[BrotliReadBits] %010d %2d val: %6x\n", (br->pos_ << 3) + br->bit_pos_ - 64, n_bits, bval); #endif br->bit_pos_ = val; @@ -78,7 +78,7 @@ static BROTLI_INLINE void ShiftBytes(BrotliBitReader* const br) { br->val_ |= ((uint64_t)br->buf_[br->pos_ & BROTLI_IBUF_MASK]) << 56; ++br->pos_; br->bit_pos_ -= 8; - br->bits_left_ -= 8; + br->bit_end_pos_ -= 8; } } @@ -95,10 +95,10 @@ static BROTLI_INLINE void ShiftBytes(BrotliBitReader* const br) { every 32 bytes of input is read. */ static BROTLI_INLINE int BrotliReadMoreInput(BrotliBitReader* const br) { - if (br->bits_left_ > 320) { + if (br->bit_end_pos_ > 256) { return 1; } else if (br->eos_) { - return br->bit_pos_ <= br->bits_left_; + return br->bit_pos_ <= br->bit_end_pos_; } else { uint8_t* dst = br->buf_ptr_; int bytes_read = BrotliRead(br->input_, dst, BROTLI_READ_SIZE); @@ -131,7 +131,7 @@ static BROTLI_INLINE int BrotliReadMoreInput(BrotliBitReader* const br) { } else { br->buf_ptr_ = br->buf_; } - br->bits_left_ += ((uint32_t)bytes_read << 3); + br->bit_end_pos_ += ((uint32_t)bytes_read << 3); return 1; } } @@ -147,7 +147,7 @@ static BROTLI_INLINE void BrotliFillBitWindow(BrotliBitReader* const br) { br->buf_ + (br->pos_ & BROTLI_IBUF_MASK)) << 24; br->pos_ += 5; br->bit_pos_ -= 40; - br->bits_left_ -= 40; + br->bit_end_pos_ -= 40; #else ShiftBytes(br); #endif @@ -155,14 +155,13 @@ static BROTLI_INLINE void BrotliFillBitWindow(BrotliBitReader* const br) { } /* Reads the specified number of bits from Read Buffer. */ -/* Requires that n_bits is positive. */ static BROTLI_INLINE uint32_t BrotliReadBits( BrotliBitReader* const br, int n_bits) { uint32_t val; BrotliFillBitWindow(br); val = (uint32_t)(br->val_ >> br->bit_pos_) & kBitMask[n_bits]; #ifdef BROTLI_DECODE_DEBUG - printf("[BrotliReadBits] %010ld %2d val: %6x\n", + printf("[BrotliReadBits] %010d %2d val: %6x\n", (br->pos_ << 3) + br->bit_pos_ - 64, n_bits, val); #endif br->bit_pos_ += (uint32_t)n_bits; diff --git a/dec/decode.c b/dec/decode.c index a8e41ab..070abf1 100644 --- a/dec/decode.c +++ b/dec/decode.c @@ -46,9 +46,14 @@ static const int kNumBlockLengthCodes = 26; static const int kLiteralContextBits = 6; static const int kDistanceContextBits = 2; +#define HUFFMAN_TABLE_BITS 8 +#define HUFFMAN_TABLE_MASK 0xff +/* This is a rough estimate, not an exact bound. */ +#define HUFFMAN_MAX_TABLE_SIZE 2048 + #define CODE_LENGTH_CODES 18 static const uint8_t kCodeLengthCodeOrder[CODE_LENGTH_CODES] = { - 1, 2, 3, 4, 0, 17, 5, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15, }; #define NUM_DISTANCE_SHORT_CODES 16 @@ -104,36 +109,19 @@ static void DecodeMetaBlockLength(BrotliBitReader* br, } /* Decodes the next Huffman code from bit-stream. */ -static BROTLI_INLINE int ReadSymbol(const HuffmanTree* tree, +static BROTLI_INLINE int ReadSymbol(const HuffmanCode* table, BrotliBitReader* br) { - uint32_t bits; - uint32_t bitpos; - int lut_ix; - uint8_t lut_bits; - const HuffmanTreeNode* node = tree->root_; + int nbits; BrotliFillBitWindow(br); - bits = BrotliPrefetchBits(br); - bitpos = br->bit_pos_; - /* Check if we find the bit combination from the Huffman lookup table. */ - lut_ix = bits & (HUFF_LUT - 1); - lut_bits = tree->lut_bits_[lut_ix]; - if (lut_bits <= HUFF_LUT_BITS) { - BrotliSetBitPos(br, bitpos + lut_bits); - return tree->lut_symbol_[lut_ix]; + table += (int)(br->val_ >> br->bit_pos_) & HUFFMAN_TABLE_MASK; + nbits = table->bits - HUFFMAN_TABLE_BITS; + if (nbits > 0) { + br->bit_pos_ += HUFFMAN_TABLE_BITS; + table += table->value; + table += (int)(br->val_ >> br->bit_pos_) & ((1 << nbits) - 1); } - node += tree->lut_jump_[lut_ix]; - bitpos += HUFF_LUT_BITS; - bits >>= HUFF_LUT_BITS; - - /* Decode the value from a binary tree. */ - assert(node != NULL); - do { - node = HuffmanTreeNextNode(node, bits & 1); - bits >>= 1; - ++bitpos; - } while (HuffmanTreeNodeIsNotLeaf(node)); - BrotliSetBitPos(br, bitpos); - return node->symbol_; + br->bit_pos_ += table->bits; + return table->value; } static void PrintUcharVector(const uint8_t* v, int len) { @@ -145,47 +133,34 @@ static int ReadHuffmanCodeLengths( const uint8_t* code_length_code_lengths, int num_symbols, uint8_t* code_lengths, BrotliBitReader* br) { - int ok = 0; - int symbol; + int symbol = 0; uint8_t prev_code_len = kDefaultCodeLength; int repeat = 0; - uint8_t repeat_length = 0; + uint8_t repeat_code_len = 0; int space = 32768; - HuffmanTree tree; + HuffmanCode table[32]; - if (!BrotliHuffmanTreeBuildImplicit(&tree, code_length_code_lengths, - CODE_LENGTH_CODES)) { + if (!BrotliBuildHuffmanTable(table, 5, + code_length_code_lengths, + CODE_LENGTH_CODES)) { printf("[ReadHuffmanCodeLengths] Building code length tree failed: "); PrintUcharVector(code_length_code_lengths, CODE_LENGTH_CODES); return 0; } - if (!BrotliReadMoreInput(br)) { - printf("[ReadHuffmanCodeLengths] Unexpected end of input.\n"); - return 0; - } - - symbol = 0; - while (symbol + repeat < num_symbols && space > 0) { + while (symbol < num_symbols && space > 0) { + const HuffmanCode* p = table; uint8_t code_len; if (!BrotliReadMoreInput(br)) { printf("[ReadHuffmanCodeLengths] Unexpected end of input.\n"); - goto End; - } - code_len = (uint8_t)ReadSymbol(&tree, br); - BROTLI_LOG_UINT(symbol); - BROTLI_LOG_UINT(repeat); - BROTLI_LOG_UINT(repeat_length); - BROTLI_LOG_UINT(code_len); - if ((code_len < kCodeLengthRepeatCode) || - (code_len == kCodeLengthRepeatCode && repeat_length == 0) || - (code_len > kCodeLengthRepeatCode && repeat_length > 0)) { - while (repeat > 0) { - code_lengths[symbol++] = repeat_length; - --repeat; - } + return 0; } + BrotliFillBitWindow(br); + p += (br->val_ >> br->bit_pos_) & 31; + br->bit_pos_ += p->bits; + code_len = (uint8_t)p->value; if (code_len < kCodeLengthRepeatCode) { + repeat = 0; code_lengths[symbol++] = code_len; if (code_len != 0) { prev_code_len = code_len; @@ -193,47 +168,46 @@ static int ReadHuffmanCodeLengths( } } else { const int extra_bits = code_len - 14; - int i = repeat; + int old_repeat; + int repeat_delta; + uint8_t new_len = 0; + if (code_len == kCodeLengthRepeatCode) { + new_len = prev_code_len; + } + if (repeat_code_len != new_len) { + repeat = 0; + repeat_code_len = new_len; + } + old_repeat = repeat; if (repeat > 0) { repeat -= 2; repeat <<= extra_bits; } repeat += (int)BrotliReadBits(br, extra_bits) + 3; - if (repeat + symbol > num_symbols) { - goto End; + repeat_delta = repeat - old_repeat; + if (symbol + repeat_delta > num_symbols) { + return 0; } - if (code_len == kCodeLengthRepeatCode) { - repeat_length = prev_code_len; - for (; i < repeat; ++i) { - space -= 32768 >> repeat_length; - } - } else { - repeat_length = 0; + memset(&code_lengths[symbol], repeat_code_len, (size_t)repeat_delta); + symbol += repeat_delta; + if (repeat_code_len != 0) { + space -= repeat_delta << (15 - repeat_code_len); } } } if (space != 0) { printf("[ReadHuffmanCodeLengths] space = %d\n", space); - goto End; + return 0; } - if (symbol + repeat > num_symbols) { - printf("[ReadHuffmanCodeLengths] symbol + repeat > num_symbols " - "(%d + %d vs %d)\n", symbol, repeat, num_symbols); - goto End; - } - while (repeat-- > 0) code_lengths[symbol++] = repeat_length; - while (symbol < num_symbols) code_lengths[symbol++] = 0; - ok = 1; - - End: - BrotliHuffmanTreeRelease(&tree); - return ok; + memset(&code_lengths[symbol], 0, (size_t)(num_symbols - symbol)); + return 1; } static int ReadHuffmanCode(int alphabet_size, - HuffmanTree* tree, + HuffmanCode* table, BrotliBitReader* br) { int ok = 1; + int table_size = 0; int simple_code_or_skip; uint8_t* code_lengths = NULL; @@ -290,109 +264,49 @@ static int ReadHuffmanCode(int alphabet_size, int i; uint8_t code_length_code_lengths[CODE_LENGTH_CODES] = { 0 }; int space = 32; - for (i = simple_code_or_skip; - i < CODE_LENGTH_CODES && space > 0; ++i) { - int code_len_idx = kCodeLengthCodeOrder[i]; - uint8_t v = (uint8_t)BrotliReadBits(br, 2); - if (v == 1) { - v = (uint8_t)BrotliReadBits(br, 1); - if (v == 0) { - v = 2; - } else { - v = (uint8_t)BrotliReadBits(br, 1); - if (v == 0) { - v = 1; - } else { - v = 5; - } - } - } else if (v == 2) { - v = 4; - } + /* Static Huffman code for the code length code lengths */ + static const HuffmanCode huff[16] = { + {2, 0}, {2, 4}, {2, 3}, {3, 2}, {2, 0}, {2, 4}, {2, 3}, {4, 1}, + {2, 0}, {2, 4}, {2, 3}, {3, 2}, {2, 0}, {2, 4}, {2, 3}, {4, 5}, + }; + for (i = simple_code_or_skip; i < CODE_LENGTH_CODES && space > 0; ++i) { + const int code_len_idx = kCodeLengthCodeOrder[i]; + const HuffmanCode* p = huff; + uint8_t v; + BrotliFillBitWindow(br); + p += (br->val_ >> br->bit_pos_) & 15; + br->bit_pos_ += p->bits; + v = (uint8_t)p->value; code_length_code_lengths[code_len_idx] = v; BROTLI_LOG_ARRAY_INDEX(code_length_code_lengths, code_len_idx); if (v != 0) { space -= (32 >> v); } } - ok = ReadHuffmanCodeLengths(code_length_code_lengths, alphabet_size, - code_lengths, br); + ok = ReadHuffmanCodeLengths(code_length_code_lengths, + alphabet_size, code_lengths, br); } if (ok) { - ok = BrotliHuffmanTreeBuildImplicit(tree, code_lengths, alphabet_size); - if (!ok) { - printf("[ReadHuffmanCode] HuffmanTreeBuildImplicit failed: "); + table_size = BrotliBuildHuffmanTable(table, HUFFMAN_TABLE_BITS, + code_lengths, alphabet_size); + if (table_size == 0) { + printf("[ReadHuffmanCode] BuildHuffmanTable failed: "); PrintUcharVector(code_lengths, alphabet_size); } } free(code_lengths); - return ok; + return table_size; } -static int ReadCopyDistance(const HuffmanTree* tree, - int num_direct_codes, - int postfix_bits, - int postfix_mask, - BrotliBitReader* br) { +static BROTLI_INLINE int ReadBlockLength(const HuffmanCode* table, + BrotliBitReader* br) { int code; int nbits; - int postfix; - int offset; - code = ReadSymbol(tree, br); - if (code < num_direct_codes) { - return code; - } - code -= num_direct_codes; - postfix = code & postfix_mask; - code >>= postfix_bits; - nbits = (code >> 1) + 1; - offset = ((2 + (code & 1)) << nbits) - 4; - return (num_direct_codes + - ((offset + (int)BrotliReadBits(br, nbits)) << postfix_bits) + - postfix); -} - -static int ReadBlockLength(const HuffmanTree* tree, BrotliBitReader* br) { - int code; - int nbits; - code = ReadSymbol(tree, br); + code = ReadSymbol(table, br); nbits = kBlockLengthPrefixCode[code].nbits; return kBlockLengthPrefixCode[code].offset + (int)BrotliReadBits(br, nbits); } -static void ReadInsertAndCopy(const HuffmanTree* tree, - int* insert_len, - int* copy_len, - int* copy_dist, - BrotliBitReader* br) { - int code; - int range_idx; - int insert_code; - int insert_extra_bits; - int copy_code; - int copy_extra_bits; - code = ReadSymbol(tree, br); - range_idx = code >> 6; - if (range_idx >= 2) { - range_idx -= 2; - *copy_dist = -1; - } else { - *copy_dist = 0; - } - insert_code = kInsertRangeLut[range_idx] + ((code >> 3) & 7); - copy_code = kCopyRangeLut[range_idx] + (code & 7); - *insert_len = kInsertLengthPrefixCode[insert_code].offset; - insert_extra_bits = kInsertLengthPrefixCode[insert_code].nbits; - if (insert_extra_bits > 0) { - *insert_len += (int)BrotliReadBits(br, insert_extra_bits); - } - *copy_len = kCopyLengthPrefixCode[copy_code].offset; - copy_extra_bits = kCopyLengthPrefixCode[copy_code].nbits; - if (copy_extra_bits > 0) { - *copy_len += (int)BrotliReadBits(br, copy_extra_bits); - } -} - static int TranslateShortCodes(int code, int* ringbuffer, int index) { int val; if (code < NUM_DISTANCE_SHORT_CODES) { @@ -429,24 +343,22 @@ static void InverseMoveToFrontTransform(uint8_t* v, int v_len) { typedef struct { int alphabet_size; int num_htrees; - HuffmanTree* htrees; + HuffmanCode* codes; + HuffmanCode** htrees; } HuffmanTreeGroup; static void HuffmanTreeGroupInit(HuffmanTreeGroup* group, int alphabet_size, int ntrees) { - int i; group->alphabet_size = alphabet_size; group->num_htrees = ntrees; - group->htrees = (HuffmanTree*)malloc(sizeof(HuffmanTree) * (size_t)ntrees); - for (i = 0; i < ntrees; ++i) { - group->htrees[i].root_ = NULL; - } + group->codes = (HuffmanCode*)malloc( + sizeof(HuffmanCode) * (size_t)(ntrees * HUFFMAN_MAX_TABLE_SIZE)); + group->htrees = (HuffmanCode**)malloc(sizeof(HuffmanCode*) * (size_t)ntrees); } static void HuffmanTreeGroupRelease(HuffmanTreeGroup* group) { - int i; - for (i = 0; i < group->num_htrees; ++i) { - BrotliHuffmanTreeRelease(&group->htrees[i]); + if (group->codes) { + free(group->codes); } if (group->htrees) { free(group->htrees); @@ -456,8 +368,13 @@ static void HuffmanTreeGroupRelease(HuffmanTreeGroup* group) { static int HuffmanTreeGroupDecode(HuffmanTreeGroup* group, BrotliBitReader* br) { int i; + int table_size; + HuffmanCode* next = group->codes; for (i = 0; i < group->num_htrees; ++i) { - if (!ReadHuffmanCode(group->alphabet_size, &group->htrees[i], br)) { + group->htrees[i] = next; + table_size = ReadHuffmanCode(group->alphabet_size, next, br); + next += table_size; + if (table_size == 0) { return 0; } } @@ -469,6 +386,10 @@ static int DecodeContextMap(int context_map_size, uint8_t** context_map, BrotliBitReader* br) { int ok = 1; + int use_rle_for_zeros; + int max_run_length_prefix = 0; + HuffmanCode* table; + int i; if (!BrotliReadMoreInput(br)) { printf("[DecodeContextMap] Unexpected end of input.\n"); return 0; @@ -487,55 +408,54 @@ static int DecodeContextMap(int context_map_size, return 1; } - { - HuffmanTree tree_index_htree; - int use_rle_for_zeros = (int)BrotliReadBits(br, 1); - int max_run_length_prefix = 0; - int i; - if (use_rle_for_zeros) { - max_run_length_prefix = (int)BrotliReadBits(br, 4) + 1; + use_rle_for_zeros = (int)BrotliReadBits(br, 1); + if (use_rle_for_zeros) { + max_run_length_prefix = (int)BrotliReadBits(br, 4) + 1; + } + table = (HuffmanCode*)malloc(HUFFMAN_MAX_TABLE_SIZE * sizeof(*table)); + if (table == NULL) { + return 0; + } + if (!ReadHuffmanCode(*num_htrees + max_run_length_prefix, table, br)) { + ok = 0; + goto End; + } + for (i = 0; i < context_map_size;) { + int code; + if (!BrotliReadMoreInput(br)) { + printf("[DecodeContextMap] Unexpected end of input.\n"); + ok = 0; + goto End; } - if (!ReadHuffmanCode(*num_htrees + max_run_length_prefix, - &tree_index_htree, br)) { - return 0; - } - for (i = 0; i < context_map_size;) { - int code; - if (!BrotliReadMoreInput(br)) { - printf("[DecodeContextMap] Unexpected end of input.\n"); - ok = 0; - goto End; - } - code = ReadSymbol(&tree_index_htree, br); - if (code == 0) { + code = ReadSymbol(table, br); + if (code == 0) { + (*context_map)[i] = 0; + ++i; + } else if (code <= max_run_length_prefix) { + int reps = 1 + (1 << code) + (int)BrotliReadBits(br, code); + while (--reps) { + if (i >= context_map_size) { + ok = 0; + goto End; + } (*context_map)[i] = 0; ++i; - } else if (code <= max_run_length_prefix) { - int reps = 1 + (1 << code) + (int)BrotliReadBits(br, code); - while (--reps) { - if (i >= context_map_size) { - ok = 0; - goto End; - } - (*context_map)[i] = 0; - ++i; - } - } else { - (*context_map)[i] = (uint8_t)(code - max_run_length_prefix); - ++i; } + } else { + (*context_map)[i] = (uint8_t)(code - max_run_length_prefix); + ++i; } - End: - BrotliHuffmanTreeRelease(&tree_index_htree); } if (BrotliReadBits(br, 1)) { InverseMoveToFrontTransform(*context_map, context_map_size); } +End: + free(table); return ok; } static BROTLI_INLINE void DecodeBlockType(const int max_block_type, - const HuffmanTree* trees, + const HuffmanCode* trees, int tree_type, int* block_types, int* ringbuffers, @@ -543,7 +463,7 @@ static BROTLI_INLINE void DecodeBlockType(const int max_block_type, BrotliBitReader* br) { int* ringbuffer = ringbuffers + tree_type * 2; int* index = indexes + tree_type; - int type_code = ReadSymbol(trees + tree_type, br); + int type_code = ReadSymbol(&trees[tree_type * HUFFMAN_MAX_TABLE_SIZE], br); int block_type; if (type_code == 0) { block_type = ringbuffer[*index & 1]; @@ -608,6 +528,92 @@ static BROTLI_INLINE void IncrementalCopyFastPath( } } +int CopyUncompressedBlockToOutput(BrotliOutput output, int len, int pos, + uint8_t* ringbuffer, int ringbuffer_mask, + BrotliBitReader* br) { + const int rb_size = ringbuffer_mask + 1; + uint8_t* ringbuffer_end = ringbuffer + rb_size; + int rb_pos = pos & ringbuffer_mask; + int br_pos = br->pos_ & BROTLI_IBUF_MASK; + int nbytes; + + /* For short lengths copy byte-by-byte */ + if (len < 8 || br->bit_pos_ + (uint32_t)(len << 3) < br->bit_end_pos_) { + while (len-- > 0) { + if (!BrotliReadMoreInput(br)) { + return 0; + } + ringbuffer[rb_pos++]= (uint8_t)BrotliReadBits(br, 8); + if (rb_pos == rb_size) { + if (BrotliWrite(output, ringbuffer, (size_t)rb_size) < rb_size) { + return 0; + } + rb_pos = 0; + } + } + return 1; + } + + if (br->bit_end_pos_ < 64) { + return 0; + } + + /* Copy remaining 0-8 bytes from br->val_ to ringbuffer. */ + while (br->bit_pos_ < 64) { + ringbuffer[rb_pos] = (uint8_t)(br->val_ >> br->bit_pos_); + br->bit_pos_ += 8; + ++rb_pos; + --len; + } + + /* Copy remaining bytes from br->buf_ to ringbuffer. */ + nbytes = (int)(br->bit_end_pos_ - br->bit_pos_) >> 3; + if (br_pos + nbytes > BROTLI_IBUF_MASK) { + int tail = BROTLI_IBUF_MASK + 1 - br_pos; + memcpy(&ringbuffer[rb_pos], &br->buf_[br_pos], (size_t)tail); + nbytes -= tail; + rb_pos += tail; + len -= tail; + br_pos = 0; + } + memcpy(&ringbuffer[rb_pos], &br->buf_[br_pos], (size_t)nbytes); + rb_pos += nbytes; + len -= nbytes; + + /* If we wrote past the logical end of the ringbuffer, copy the tail of the + ringbuffer to its beginning and flush the ringbuffer to the output. */ + if (rb_pos >= rb_size) { + if (BrotliWrite(output, ringbuffer, (size_t)rb_size) < rb_size) { + return 0; + } + rb_pos -= rb_size; + memcpy(ringbuffer, ringbuffer_end, (size_t)rb_pos); + } + + /* If we have more to copy than the remaining size of the ringbuffer, then we + first fill the ringbuffer from the input and then flush the ringbuffer to + the output */ + while (rb_pos + len >= rb_size) { + nbytes = rb_size - rb_pos; + if (BrotliRead(br->input_, &ringbuffer[rb_pos], (size_t)nbytes) < nbytes || + BrotliWrite(output, ringbuffer, (size_t)rb_size) < nbytes) { + return 0; + } + len -= nbytes; + rb_pos = 0; + } + + /* Copy straight from the input onto the ringbuffer. The ringbuffer will be + flushed to the output at a later time. */ + if (BrotliRead(br->input_, &ringbuffer[rb_pos], (size_t)len) < len) { + return 0; + } + + /* Restore the state of the bit reader. */ + BrotliInitBitReader(br, br->input_); + return 1; +} + int BrotliDecompressedSize(size_t encoded_size, const uint8_t* encoded_buffer, size_t* decoded_size) { @@ -662,11 +668,15 @@ int BrotliDecompress(BrotliInput input, BrotliOutput output) { uint8_t prev_byte1 = 0; uint8_t prev_byte2 = 0; HuffmanTreeGroup hgroup[3]; + HuffmanCode* block_type_trees = NULL; + HuffmanCode* block_len_trees = NULL; BrotliBitReader br; - /* 16 bytes would be enough, but we add some more slack for transforms */ - /* to work at the end of the ringbuffer. */ - static const int kRingBufferWriteAheadSlack = 128; + /* We need the slack region for the following reasons: + - always doing two 8-byte copies for fast backward copying + - transforms + - flushing the input ringbuffer when decoding uncompressed blocks */ + static const int kRingBufferWriteAheadSlack = 128 + BROTLI_READ_SIZE; static const int kMaxDictionaryWordLength = 0; @@ -688,6 +698,16 @@ int BrotliDecompress(BrotliInput input, BrotliOutput output) { } ringbuffer_end = ringbuffer + ringbuffer_size; + if (ok) { + block_type_trees = (HuffmanCode*)malloc( + 3 * HUFFMAN_MAX_TABLE_SIZE * sizeof(HuffmanCode)); + block_len_trees = (HuffmanCode*)malloc( + 3 * HUFFMAN_MAX_TABLE_SIZE * sizeof(HuffmanCode)); + if (block_type_trees == NULL || block_len_trees == NULL) { + ok = 0; + } + } + while (!input_end && ok) { int meta_block_remaining_len = 0; int is_uncompressed; @@ -696,8 +716,6 @@ int BrotliDecompress(BrotliInput input, BrotliOutput output) { int num_block_types[3] = { 1, 1, 1 }; int block_type_rb[6] = { 0, 1, 0, 1, 0, 1 }; int block_type_rb_index[3] = { 0 }; - HuffmanTree block_type_trees[3]; - HuffmanTree block_len_trees[3]; int distance_postfix_bits; int num_direct_distance_codes; int distance_postfix_mask; @@ -716,12 +734,11 @@ int BrotliDecompress(BrotliInput input, BrotliOutput output) { int context_lookup_offset1 = 0; int context_lookup_offset2 = 0; uint8_t context_mode; + HuffmanCode* htree_command; for (i = 0; i < 3; ++i) { - hgroup[i].num_htrees = 0; + hgroup[i].codes = NULL; hgroup[i].htrees = NULL; - block_type_trees[i].root_ = NULL; - block_len_trees[i].root_ = NULL; } if (!BrotliReadMoreInput(&br)) { @@ -738,31 +755,25 @@ int BrotliDecompress(BrotliInput input, BrotliOutput output) { } if (is_uncompressed) { BrotliSetBitPos(&br, (br.bit_pos_ + 7) & (uint32_t)(~7UL)); - while (meta_block_remaining_len) { - ringbuffer[pos & ringbuffer_mask] = (uint8_t)BrotliReadBits(&br, 8); - if ((pos & ringbuffer_mask) == ringbuffer_mask) { - if (BrotliWrite(output, ringbuffer, (size_t)ringbuffer_size) < 0) { - ok = 0; - goto End; - } - } - ++pos; - --meta_block_remaining_len; - } + ok = CopyUncompressedBlockToOutput(output, meta_block_remaining_len, pos, + ringbuffer, ringbuffer_mask, &br); + pos += meta_block_remaining_len; goto End; } for (i = 0; i < 3; ++i) { - block_type_trees[i].root_ = NULL; - block_len_trees[i].root_ = NULL; num_block_types[i] = DecodeVarLenUint8(&br) + 1; if (num_block_types[i] >= 2) { - if (!ReadHuffmanCode( - num_block_types[i] + 2, &block_type_trees[i], &br) || - !ReadHuffmanCode(kNumBlockLengthCodes, &block_len_trees[i], &br)) { + if (!ReadHuffmanCode(num_block_types[i] + 2, + &block_type_trees[i * HUFFMAN_MAX_TABLE_SIZE], + &br) || + !ReadHuffmanCode(kNumBlockLengthCodes, + &block_len_trees[i * HUFFMAN_MAX_TABLE_SIZE], + &br)) { ok = 0; goto End; } - block_length[i] = ReadBlockLength(&block_len_trees[i], &br); + block_length[i] = ReadBlockLength( + &block_len_trees[i * HUFFMAN_MAX_TABLE_SIZE], &br); block_type_rb_index[i] = 1; } } @@ -822,8 +833,13 @@ int BrotliDecompress(BrotliInput input, BrotliOutput output) { context_mode = context_modes[block_type[0]]; context_lookup_offset1 = kContextLookupOffsets[context_mode]; context_lookup_offset2 = kContextLookupOffsets[context_mode + 1]; + htree_command = hgroup[1].htrees[0]; while (meta_block_remaining_len > 0) { + int cmd_code; + int range_idx; + int insert_code; + int copy_code; int insert_length; int copy_length; int distance_code; @@ -841,11 +857,25 @@ int BrotliDecompress(BrotliInput input, BrotliOutput output) { DecodeBlockType(num_block_types[1], block_type_trees, 1, block_type, block_type_rb, block_type_rb_index, &br); - block_length[1] = ReadBlockLength(&block_len_trees[1], &br); + block_length[1] = ReadBlockLength( + &block_len_trees[HUFFMAN_MAX_TABLE_SIZE], &br); + htree_command = hgroup[1].htrees[block_type[1]]; } --block_length[1]; - ReadInsertAndCopy(&hgroup[1].htrees[block_type[1]], - &insert_length, ©_length, &distance_code, &br); + cmd_code = ReadSymbol(htree_command, &br); + range_idx = cmd_code >> 6; + if (range_idx >= 2) { + range_idx -= 2; + distance_code = -1; + } else { + distance_code = 0; + } + insert_code = kInsertRangeLut[range_idx] + ((cmd_code >> 3) & 7); + copy_code = kCopyRangeLut[range_idx] + (cmd_code & 7); + insert_length = kInsertLengthPrefixCode[insert_code].offset + + (int)BrotliReadBits(&br, kInsertLengthPrefixCode[insert_code].nbits); + copy_length = kCopyLengthPrefixCode[copy_code].offset + + (int)BrotliReadBits(&br, kCopyLengthPrefixCode[copy_code].nbits); BROTLI_LOG_UINT(insert_length); BROTLI_LOG_UINT(copy_length); BROTLI_LOG_UINT(distance_code); @@ -859,7 +889,7 @@ int BrotliDecompress(BrotliInput input, BrotliOutput output) { DecodeBlockType(num_block_types[0], block_type_trees, 0, block_type, block_type_rb, block_type_rb_index, &br); - block_length[0] = ReadBlockLength(&block_len_trees[0], &br); + block_length[0] = ReadBlockLength(block_len_trees, &br); context_offset = block_type[0] << kLiteralContextBits; context_map_slice = context_map + context_offset; context_mode = context_modes[block_type[0]]; @@ -872,7 +902,7 @@ int BrotliDecompress(BrotliInput input, BrotliOutput output) { literal_htree_index = context_map_slice[context]; --block_length[0]; prev_byte2 = prev_byte1; - prev_byte1 = (uint8_t)ReadSymbol(&hgroup[0].htrees[literal_htree_index], + prev_byte1 = (uint8_t)ReadSymbol(hgroup[0].htrees[literal_htree_index], &br); ringbuffer[pos & ringbuffer_mask] = prev_byte1; BROTLI_LOG_UINT(literal_htree_index); @@ -899,7 +929,8 @@ int BrotliDecompress(BrotliInput input, BrotliOutput output) { DecodeBlockType(num_block_types[2], block_type_trees, 2, block_type, block_type_rb, block_type_rb_index, &br); - block_length[2] = ReadBlockLength(&block_len_trees[2], &br); + block_length[2] = ReadBlockLength( + &block_len_trees[2 * HUFFMAN_MAX_TABLE_SIZE], &br); dist_htree_index = (uint8_t)block_type[2]; dist_context_offset = block_type[2] << kDistanceContextBits; dist_context_map_slice = dist_context_map + dist_context_offset; @@ -907,11 +938,20 @@ int BrotliDecompress(BrotliInput input, BrotliOutput output) { --block_length[2]; context = (uint8_t)(copy_length > 4 ? 3 : copy_length - 2); dist_htree_index = dist_context_map_slice[context]; - distance_code = ReadCopyDistance(&hgroup[2].htrees[dist_htree_index], - num_direct_distance_codes, - distance_postfix_bits, - distance_postfix_mask, - &br); + distance_code = ReadSymbol(hgroup[2].htrees[dist_htree_index], &br); + if (distance_code >= num_direct_distance_codes) { + int nbits; + int postfix; + int offset; + distance_code -= num_direct_distance_codes; + postfix = distance_code & distance_postfix_mask; + distance_code >>= distance_postfix_bits; + nbits = (distance_code >> 1) + 1; + offset = ((2 + (distance_code & 1)) << nbits) - 4; + distance_code = num_direct_distance_codes + + ((offset + (int)BrotliReadBits(&br, nbits)) << + distance_postfix_bits) + postfix; + } } /* Convert the distance code to the actual distance by possibly looking */ @@ -1004,8 +1044,6 @@ int BrotliDecompress(BrotliInput input, BrotliOutput output) { } for (i = 0; i < 3; ++i) { HuffmanTreeGroupRelease(&hgroup[i]); - BrotliHuffmanTreeRelease(&block_type_trees[i]); - BrotliHuffmanTreeRelease(&block_len_trees[i]); } } @@ -1015,6 +1053,12 @@ int BrotliDecompress(BrotliInput input, BrotliOutput output) { } free(ringbuffer); } + if (block_type_trees != 0) { + free(block_type_trees); + } + if (block_len_trees != 0) { + free(block_len_trees); + } return ok; } diff --git a/dec/decode.h b/dec/decode.h index 9182438..ec6d65e 100644 --- a/dec/decode.h +++ b/dec/decode.h @@ -26,6 +26,8 @@ extern "C" { #endif /* Sets *decoded_size to the decompressed size of the given encoded stream. */ +/* This function only works if the encoded buffer has a single meta block, */ +/* and this meta block must have the "is last" bit set. */ /* Returns 1 on success, 0 on failure. */ int BrotliDecompressedSize(size_t encoded_size, const uint8_t* encoded_buffer, diff --git a/dec/huffman.c b/dec/huffman.c index 20e6223..12493a9 100644 --- a/dec/huffman.c +++ b/dec/huffman.c @@ -12,11 +12,12 @@ See the License for the specific language governing permissions and limitations under the License. - Utilities for building and looking up Huffman trees. + Utilities for building Huffman decoding tables. */ #include #include +#include #include #include "./huffman.h" #include "./safe_malloc.h" @@ -25,231 +26,137 @@ extern "C" { #endif -#define NON_EXISTENT_SYMBOL (-1) -#define MAX_ALLOWED_CODE_LENGTH 15 +#define MAX_LENGTH 15 -static void TreeNodeInit(HuffmanTreeNode* const node) { - node->children_ = -1; /* means: 'unassigned so far' */ -} - -static int NodeIsEmpty(const HuffmanTreeNode* const node) { - return (node->children_ < 0); -} - -static int IsFull(const HuffmanTree* const tree) { - return (tree->num_nodes_ == tree->max_nodes_); -} - -static void AssignChildren(HuffmanTree* const tree, - HuffmanTreeNode* const node) { - HuffmanTreeNode* const children = tree->root_ + tree->num_nodes_; - node->children_ = (int)(children - node); - assert(children - node == (int)(children - node)); - tree->num_nodes_ += 2; - TreeNodeInit(children + 0); - TreeNodeInit(children + 1); -} - -static int TreeInit(HuffmanTree* const tree, int num_leaves) { - assert(tree != NULL); - tree->root_ = NULL; - if (num_leaves == 0) return 0; - /* We allocate maximum possible nodes in the tree at once. */ - /* Note that a Huffman tree is a full binary tree; and in a full binary */ - /* tree with L leaves, the total number of nodes N = 2 * L - 1. */ - tree->max_nodes_ = 2 * num_leaves - 1; - assert(tree->max_nodes_ < (1 << 16)); /* limit for the lut_jump_ table */ - tree->root_ = (HuffmanTreeNode*)BrotliSafeMalloc((uint64_t)tree->max_nodes_, - sizeof(*tree->root_)); - if (tree->root_ == NULL) return 0; - TreeNodeInit(tree->root_); /* Initialize root. */ - tree->num_nodes_ = 1; - memset(tree->lut_bits_, 255, sizeof(tree->lut_bits_)); - memset(tree->lut_jump_, 0, sizeof(tree->lut_jump_)); - return 1; -} - -void BrotliHuffmanTreeRelease(HuffmanTree* const tree) { - if (tree != NULL) { - if (tree->root_ != NULL) { - free(tree->root_); - } - tree->root_ = NULL; - tree->max_nodes_ = 0; - tree->num_nodes_ = 0; +/* Returns reverse(reverse(key, len) + 1, len), where reverse(key, len) is the + bit-wise reversal of the len least significant bits of key. */ +static BROTLI_INLINE int GetNextKey(int key, int len) { + int step = 1 << (len - 1); + while (key & step) { + step >>= 1; } + return (key & (step - 1)) + step; } -/* Utility: converts Huffman code lengths to corresponding Huffman codes. */ -/* 'huff_codes' should be pre-allocated. */ -/* Returns false in case of error (memory allocation, invalid codes). */ -static int HuffmanCodeLengthsToCodes(const uint8_t* const code_lengths, - int code_lengths_size, - int* const huff_codes) { - int symbol; - int code_len; - int code_length_hist[MAX_ALLOWED_CODE_LENGTH + 1] = { 0 }; - int curr_code; - int next_codes[MAX_ALLOWED_CODE_LENGTH + 1] = { 0 }; - int max_code_length = 0; +/* Stores code in table[0], table[step], table[2*step], ..., table[end] */ +/* Assumes that end is an integer multiple of step */ +static BROTLI_INLINE void ReplicateValue(HuffmanCode* table, + int step, int end, + HuffmanCode code) { + do { + end -= step; + table[end] = code; + } while (end > 0); +} - assert(code_lengths != NULL); - assert(code_lengths_size > 0); - assert(huff_codes != NULL); - - /* Calculate max code length. */ - for (symbol = 0; symbol < code_lengths_size; ++symbol) { - if (code_lengths[symbol] > max_code_length) { - max_code_length = code_lengths[symbol]; - } +/* Returns the table width of the next 2nd level table. count is the histogram + of bit lengths for the remaining symbols, len is the code length of the next + processed symbol */ +static BROTLI_INLINE int NextTableBitSize(const int* const count, + int len, int root_bits) { + int left = 1 << (len - root_bits); + while (len < MAX_LENGTH) { + left -= count[len]; + if (left <= 0) break; + ++len; + left <<= 1; } - if (max_code_length > MAX_ALLOWED_CODE_LENGTH) return 0; + return len - root_bits; +} - /* Calculate code length histogram. */ - for (symbol = 0; symbol < code_lengths_size; ++symbol) { - ++code_length_hist[code_lengths[symbol]]; - } - code_length_hist[0] = 0; +int BrotliBuildHuffmanTable(HuffmanCode* root_table, + int root_bits, + const uint8_t* const code_lengths, + int code_lengths_size) { + HuffmanCode code; /* current table entry */ + HuffmanCode* table; /* next available space in table */ + int len; /* current code length */ + int symbol; /* symbol index in original or sorted table */ + int key; /* reversed prefix code */ + int step; /* step size to replicate values in current table */ + int low; /* low bits for current root entry */ + int mask; /* mask for low bits */ + int table_bits; /* key length of current table */ + int table_size; /* size of current table */ + int total_size; /* sum of root table size and 2nd level table sizes */ + int* sorted; /* symbols sorted by code length */ + int count[MAX_LENGTH + 1] = { 0 }; /* number of codes of each length */ + int offset[MAX_LENGTH + 1]; /* offsets in sorted table for each length */ - /* Calculate the initial values of 'next_codes' for each code length. */ - /* next_codes[code_len] denotes the code to be assigned to the next symbol */ - /* of code length 'code_len'. */ - curr_code = 0; - next_codes[0] = -1; /* Unused, as code length = 0 implies */ - /* code doesn't exist. */ - for (code_len = 1; code_len <= max_code_length; ++code_len) { - curr_code = (curr_code + code_length_hist[code_len - 1]) << 1; - next_codes[code_len] = curr_code; + sorted = (int*)malloc((size_t)code_lengths_size * sizeof(*sorted)); + if (sorted == NULL) { + return 0; } - /* Get symbols. */ - for (symbol = 0; symbol < code_lengths_size; ++symbol) { - if (code_lengths[symbol] > 0) { - huff_codes[symbol] = next_codes[code_lengths[symbol]]++; - } else { - huff_codes[symbol] = NON_EXISTENT_SYMBOL; - } + /* build histogram of code lengths */ + for (symbol = 0; symbol < code_lengths_size; symbol++) { + count[code_lengths[symbol]]++; } - return 1; -} -static const uint8_t kReverse7[128] = { - 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120, - 4, 68, 36, 100, 20, 84, 52, 116, 12, 76, 44, 108, 28, 92, 60, 124, - 2, 66, 34, 98, 18, 82, 50, 114, 10, 74, 42, 106, 26, 90, 58, 122, - 6, 70, 38, 102, 22, 86, 54, 118, 14, 78, 46, 110, 30, 94, 62, 126, - 1, 65, 33, 97, 17, 81, 49, 113, 9, 73, 41, 105, 25, 89, 57, 121, - 5, 69, 37, 101, 21, 85, 53, 117, 13, 77, 45, 109, 29, 93, 61, 125, - 3, 67, 35, 99, 19, 83, 51, 115, 11, 75, 43, 107, 27, 91, 59, 123, - 7, 71, 39, 103, 23, 87, 55, 119, 15, 79, 47, 111, 31, 95, 63, 127 -}; - -static int ReverseBitsShort(int bits, int num_bits) { - return kReverse7[bits] >> (7 - num_bits); -} - -static int TreeAddSymbol(HuffmanTree* const tree, - int symbol, int code, int code_length) { - int step = HUFF_LUT_BITS; - int base_code; - HuffmanTreeNode* node = tree->root_; - const HuffmanTreeNode* const max_node = tree->root_ + tree->max_nodes_; - assert(symbol == (int16_t)symbol); - if (code_length <= HUFF_LUT_BITS) { - int i = 1 << (HUFF_LUT_BITS - code_length); - base_code = ReverseBitsShort(code, code_length); - do { - int idx; - --i; - idx = base_code | (i << code_length); - tree->lut_symbol_[idx] = (int16_t)symbol; - tree->lut_bits_[idx] = (uint8_t)code_length; - } while (i > 0); - } else { - base_code = ReverseBitsShort((code >> (code_length - HUFF_LUT_BITS)), - HUFF_LUT_BITS); + /* generate offsets into sorted symbol table by code length */ + offset[1] = 0; + for (len = 1; len < MAX_LENGTH; len++) { + offset[len + 1] = offset[len] + count[len]; } - while (code_length-- > 0) { - if (node >= max_node) { - return 0; - } - if (NodeIsEmpty(node)) { - if (IsFull(tree)) return 0; /* error: too many symbols. */ - AssignChildren(tree, node); - } else if (!HuffmanTreeNodeIsNotLeaf(node)) { - return 0; /* leaf is already occupied. */ - } - node += node->children_ + ((code >> code_length) & 1); - if (--step == 0) { - tree->lut_jump_[base_code] = (int16_t)(node - tree->root_); - } - } - if (NodeIsEmpty(node)) { - node->children_ = 0; /* turn newly created node into a leaf. */ - } else if (HuffmanTreeNodeIsNotLeaf(node)) { - return 0; /* trying to assign a symbol to already used code. */ - } - node->symbol_ = symbol; /* Add symbol in this node. */ - return 1; -} -int BrotliHuffmanTreeBuildImplicit(HuffmanTree* const tree, - const uint8_t* const code_lengths, - int code_lengths_size) { - int symbol; - int num_symbols = 0; - int root_symbol = 0; - - assert(tree != NULL); - assert(code_lengths != NULL); - - /* Find out number of symbols and the root symbol. */ - for (symbol = 0; symbol < code_lengths_size; ++symbol) { - if (code_lengths[symbol] > 0) { - /* Note: code length = 0 indicates non-existent symbol. */ - ++num_symbols; - root_symbol = symbol; + /* sort symbols by length, by symbol order within each length */ + for (symbol = 0; symbol < code_lengths_size; symbol++) { + if (code_lengths[symbol] != 0) { + sorted[offset[code_lengths[symbol]]++] = symbol; } } - /* Initialize the tree. Will fail for num_symbols = 0 */ - if (!TreeInit(tree, num_symbols)) return 0; + table = root_table; + table_bits = root_bits; + table_size = 1 << table_bits; + total_size = table_size; - /* Build tree. */ - if (num_symbols == 1) { /* Trivial case. */ - const int max_symbol = code_lengths_size; - if (root_symbol < 0 || root_symbol >= max_symbol) { - BrotliHuffmanTreeRelease(tree); - return 0; + /* special case code with only one value */ + if (offset[MAX_LENGTH] == 1) { + code.bits = 0; + code.value = (uint16_t)sorted[0]; + for (key = 0; key < total_size; ++key) { + table[key] = code; } - return TreeAddSymbol(tree, root_symbol, 0, 0); - } else { /* Normal case. */ - int ok = 0; + free(sorted); + return total_size; + } - /* Get Huffman codes from the code lengths. */ - int* const codes = - (int*)BrotliSafeMalloc((uint64_t)code_lengths_size, sizeof(*codes)); - if (codes == NULL) goto End; - - if (!HuffmanCodeLengthsToCodes(code_lengths, code_lengths_size, codes)) { - goto End; + /* fill in root table */ + key = 0; + symbol = 0; + for (len = 1, step = 2; len <= root_bits; ++len, step <<= 1) { + for (; count[len] > 0; --count[len]) { + code.bits = (uint8_t)(len); + code.value = (uint16_t)sorted[symbol++]; + ReplicateValue(&table[key], step, table_size, code); + key = GetNextKey(key, len); } + } - /* Add symbols one-by-one. */ - for (symbol = 0; symbol < code_lengths_size; ++symbol) { - if (code_lengths[symbol] > 0) { - if (!TreeAddSymbol(tree, symbol, codes[symbol], code_lengths[symbol])) { - goto End; - } + /* fill in 2nd level tables and add pointers to root table */ + mask = total_size - 1; + low = -1; + for (len = root_bits + 1, step = 2; len <= MAX_LENGTH; ++len, step <<= 1) { + for (; count[len] > 0; --count[len]) { + if ((key & mask) != low) { + table += table_size; + table_bits = NextTableBitSize(count, len, root_bits); + table_size = 1 << table_bits; + total_size += table_size; + low = key & mask; + root_table[low].bits = (uint8_t)(table_bits + root_bits); + root_table[low].value = (uint16_t)((table - root_table) - low); } + code.bits = (uint8_t)(len - root_bits); + code.value = (uint16_t)sorted[symbol++]; + ReplicateValue(&table[key >> root_bits], step, table_size, code); + key = GetNextKey(key, len); } - ok = 1; - End: - free(codes); - ok = ok && IsFull(tree); - if (!ok) BrotliHuffmanTreeRelease(tree); - return ok; } + + free(sorted); + return total_size; } #if defined(__cplusplus) || defined(c_plusplus) diff --git a/dec/huffman.h b/dec/huffman.h index fbd0744..834b316 100644 --- a/dec/huffman.h +++ b/dec/huffman.h @@ -12,7 +12,7 @@ See the License for the specific language governing permissions and limitations under the License. - Utilities for building and looking up Huffman trees. + Utilities for building Huffman decoding tables. */ #ifndef BROTLI_DEC_HUFFMAN_H_ @@ -25,48 +25,17 @@ extern "C" { #endif -/* A node of a Huffman tree. */ typedef struct { - int symbol_; - int children_; /* delta offset to both children (contiguous) or 0 if leaf. */ -} HuffmanTreeNode; + uint8_t bits; /* number of bits used for this symbol */ + uint16_t value; /* symbol value or table offset */ +} HuffmanCode; -/* Huffman Tree. */ -#define HUFF_LUT_BITS 7 -#define HUFF_LUT (1U << HUFF_LUT_BITS) -typedef struct HuffmanTree HuffmanTree; -struct HuffmanTree { - /* Fast lookup for short bit lengths. */ - uint8_t lut_bits_[HUFF_LUT]; - int16_t lut_symbol_[HUFF_LUT]; - int16_t lut_jump_[HUFF_LUT]; - /* Complete tree for lookups. */ - HuffmanTreeNode* root_; /* all the nodes, starting at root. */ - int max_nodes_; /* max number of nodes */ - int num_nodes_; /* number of currently occupied nodes */ -}; - -/* Returns true if the given node is not a leaf of the Huffman tree. */ -static BROTLI_INLINE int HuffmanTreeNodeIsNotLeaf( - const HuffmanTreeNode* const node) { - return node->children_; -} - -/* Go down one level. Most critical function. 'right_child' must be 0 or 1. */ -static BROTLI_INLINE const HuffmanTreeNode* HuffmanTreeNextNode( - const HuffmanTreeNode* node, int right_child) { - return node + node->children_ + right_child; -} - -/* Releases the nodes of the Huffman tree. */ -/* Note: It does NOT free 'tree' itself. */ -void BrotliHuffmanTreeRelease(HuffmanTree* const tree); - -/* Builds Huffman tree assuming code lengths are implicitly in symbol order. */ +/* Builds Huffman lookup table assuming code lengths are in symbol order. */ /* Returns false in case of error (invalid tree or memory error). */ -int BrotliHuffmanTreeBuildImplicit(HuffmanTree* const tree, - const uint8_t* const code_lengths, - int code_lengths_size); +int BrotliBuildHuffmanTable(HuffmanCode* root_table, + int root_bits, + const uint8_t* const code_lengths, + int code_lengths_size); #if defined(__cplusplus) || defined(c_plusplus) } /* extern "C" */ diff --git a/enc/backward_references.cc b/enc/backward_references.cc index 0e7f89b..f76d7d4 100644 --- a/enc/backward_references.cc +++ b/enc/backward_references.cc @@ -45,66 +45,97 @@ void CreateBackwardReferences(size_t num_bytes, average_cost /= num_bytes; hasher->set_average_cost(average_cost); + // M1 match is for considering for two repeated copies, if moving + // one literal form the previous copy to the current one allows the + // current copy to be more efficient (because the way static dictionary + // codes words). M1 matching improves text compression density by ~0.15 %. + bool match_found_M1 = false; + size_t best_len_M1 = 0; + size_t best_len_code_M1 = 0; + size_t best_dist_M1 = 0; + double best_score_M1 = 0; while (i + 2 < i_end) { size_t best_len = 0; size_t best_len_code = 0; size_t best_dist = 0; double best_score = 0; size_t max_distance = std::min(i + i_diff, max_backward_limit); + bool in_dictionary; hasher->set_insert_length(insert_length); bool match_found = hasher->FindLongestMatch( ringbuffer, literal_cost, ringbuffer_mask, i + i_diff, i_end - i, max_distance, - &best_len, &best_len_code, &best_dist, &best_score); + &best_len, &best_len_code, &best_dist, &best_score, &in_dictionary); + bool best_in_dictionary = in_dictionary; if (match_found) { - // Found a match. Let's look for something even better ahead. - int delayed_backward_references_in_row = 0; - while (i + 4 < i_end && - delayed_backward_references_in_row < 4) { - size_t best_len_2 = 0; - size_t best_len_code_2 = 0; - size_t best_dist_2 = 0; - double best_score_2 = 0; - max_distance = std::min(i + i_diff + 1, max_backward_limit); + if (match_found_M1 && best_score_M1 > best_score) { + // Two copies after each other. Take the last literal from the + // last copy, and use it as the first of this one. + (commands->rbegin())->copy_length_ -= 1; + (commands->rbegin())->copy_length_code_ -= 1; hasher->Store(ringbuffer + i, i + i_diff); - match_found = hasher->FindLongestMatch( - ringbuffer, literal_cost, ringbuffer_mask, - i + i_diff + 1, i_end - i - 1, max_distance, - &best_len_2, &best_len_code_2, &best_dist_2, &best_score_2); - double cost_diff_lazy = 0; - if (best_len >= 4) { - cost_diff_lazy += - literal_cost[(i + 4) & ringbuffer_mask] - average_cost; - } - { - const int tail_length = best_len_2 - best_len + 1; - for (int k = 0; k < tail_length; ++k) { - cost_diff_lazy -= - literal_cost[(i + best_len + k) & ringbuffer_mask] - - average_cost; + --i; + best_len = best_len_M1; + best_len_code = best_len_code_M1; + best_dist = best_dist_M1; + best_score = best_score_M1; + // in_dictionary doesn't need to be correct, but it is the only + // reason why M1 matching should be beneficial here. Setting it here + // will only disable further M1 matching against this copy. + best_in_dictionary = true; + in_dictionary = true; + } else { + // Found a match. Let's look for something even better ahead. + int delayed_backward_references_in_row = 0; + while (i + 4 < i_end && + delayed_backward_references_in_row < 4) { + size_t best_len_2 = 0; + size_t best_len_code_2 = 0; + size_t best_dist_2 = 0; + double best_score_2 = 0; + max_distance = std::min(i + i_diff + 1, max_backward_limit); + hasher->Store(ringbuffer + i, i + i_diff); + match_found = hasher->FindLongestMatch( + ringbuffer, literal_cost, ringbuffer_mask, + i + i_diff + 1, i_end - i - 1, max_distance, + &best_len_2, &best_len_code_2, &best_dist_2, &best_score_2, + &in_dictionary); + double cost_diff_lazy = 0; + if (best_len >= 4) { + cost_diff_lazy += + literal_cost[(i + 4) & ringbuffer_mask] - average_cost; } - } - // If we are not inserting any symbols, inserting one is more - // expensive than if we were inserting symbols anyways. - if (insert_length < 1) { - cost_diff_lazy += 0.97; - } - // Add bias to slightly avoid lazy matching. - cost_diff_lazy += 2.0 + delayed_backward_references_in_row * 0.2; - cost_diff_lazy += 0.04 * literal_cost[i & ringbuffer_mask]; + { + const int tail_length = best_len_2 - best_len + 1; + for (int k = 0; k < tail_length; ++k) { + cost_diff_lazy -= + literal_cost[(i + best_len + k) & ringbuffer_mask] - + average_cost; + } + } + // If we are not inserting any symbols, inserting one is more + // expensive than if we were inserting symbols anyways. + if (insert_length < 1) { + cost_diff_lazy += 0.97; + } + // Add bias to slightly avoid lazy matching. + cost_diff_lazy += 2.0 + delayed_backward_references_in_row * 0.2; + cost_diff_lazy += 0.04 * literal_cost[i & ringbuffer_mask]; - if (match_found && best_score_2 >= best_score + cost_diff_lazy) { - // Ok, let's just write one byte for now and start a match from the - // next byte. - ++insert_length; - ++delayed_backward_references_in_row; - best_len = best_len_2; - best_len_code = best_len_code_2; - best_dist = best_dist_2; - best_score = best_score_2; - i++; - } else { - break; + if (match_found && best_score_2 >= best_score + cost_diff_lazy) { + // Ok, let's just write one byte for now and start a match from the + // next byte. + ++insert_length; + ++delayed_backward_references_in_row; + best_len = best_len_2; + best_len_code = best_len_code_2; + best_dist = best_dist_2; + best_score = best_score_2; + best_in_dictionary = in_dictionary; + i++; + } else { + break; + } } } Command cmd; @@ -117,13 +148,40 @@ void CreateBackwardReferences(size_t num_bytes, insert_length = 0; ++i; - for (int j = 1; j < best_len; ++j) { + // Copy all copied literals to the hasher, except the last one. + // We cannot store the last one yet, otherwise we couldn't find + // the possible M1 match. + for (int j = 1; j < best_len - 1; ++j) { if (i + 2 < i_end) { hasher->Store(ringbuffer + i, i + i_diff); } ++i; } + // Prepare M1 match. + if (best_len >= 4 && i + 20 < i_end && !best_in_dictionary) { + max_distance = std::min(i + i_diff, max_backward_limit); + match_found_M1 = hasher->FindLongestMatch( + ringbuffer, literal_cost, ringbuffer_mask, + i + i_diff, i_end - i, max_distance, + &best_len_M1, &best_len_code_M1, &best_dist_M1, &best_score_M1, + &in_dictionary); + } else { + match_found_M1 = false; + in_dictionary = false; + } + // This byte is just moved from the previous copy to the current, + // that is no gain. + best_score_M1 -= literal_cost[i & ringbuffer_mask]; + // Adjust for losing the opportunity for lazy matching. + best_score_M1 -= 3.75; + + // Store the last one of the match. + if (i + 2 < i_end) { + hasher->Store(ringbuffer + i, i + i_diff); + } + ++i; } else { + match_found_M1 = false; ++insert_length; hasher->Store(ringbuffer + i, i + i_diff); ++i; diff --git a/enc/bit_cost.h b/enc/bit_cost.h index c769455..c2fd3e4 100644 --- a/enc/bit_cost.h +++ b/enc/bit_cost.h @@ -93,7 +93,7 @@ static inline int HuffmanBitCost(const uint8_t* depth, int length) { cost[17] += 3; int tree_size = 0; - int bits = 6 + 3 * max_depth; // huffman tree of huffman tree cost + int bits = 6 + 2 * max_depth; // huffman tree of huffman tree cost for (int i = 0; i < kCodeLengthCodes; ++i) { bits += histogram[i] * cost[i]; // huffman tree bit cost tree_size += histogram[i]; diff --git a/enc/block_splitter.cc b/enc/block_splitter.cc index 34363c4..57c1e90 100644 --- a/enc/block_splitter.cc +++ b/enc/block_splitter.cc @@ -31,16 +31,16 @@ namespace brotli { -static const int kMaxLiteralHistograms = 48; +static const int kMaxLiteralHistograms = 100; static const int kMaxCommandHistograms = 50; static const double kLiteralBlockSwitchCost = 26; static const double kCommandBlockSwitchCost = 13.5; static const double kDistanceBlockSwitchCost = 14.6; static const int kLiteralStrideLength = 70; static const int kCommandStrideLength = 40; -static const int kSymbolsPerLiteralHistogram = 550; +static const int kSymbolsPerLiteralHistogram = 544; static const int kSymbolsPerCommandHistogram = 530; -static const int kSymbolsPerDistanceHistogram = 550; +static const int kSymbolsPerDistanceHistogram = 544; static const int kMinLengthForBlockSplitting = 128; static const int kIterMulForRefining = 2; static const int kMinItersForRefining = 100; diff --git a/enc/encode.cc b/enc/encode.cc index b492421..6603eec 100644 --- a/enc/encode.cc +++ b/enc/encode.cc @@ -77,6 +77,68 @@ void EncodeVarLenUint8(int n, int* storage_ix, uint8_t* storage) { } } +int ParseAsUTF8(int* symbol, const uint8_t* input, int size) { + // ASCII + if ((input[0] & 0x80) == 0) { + *symbol = input[0]; + if (*symbol > 0) { + return 1; + } + } + // 2-byte UTF8 + if (size > 1 && + (input[0] & 0xe0) == 0xc0 && + (input[1] & 0xc0) == 0x80) { + *symbol = (((input[0] & 0x1f) << 6) | + (input[1] & 0x3f)); + if (*symbol > 0x7f) { + return 2; + } + } + // 3-byte UFT8 + if (size > 2 && + (input[0] & 0xf0) == 0xe0 && + (input[1] & 0xc0) == 0x80 && + (input[2] & 0xc0) == 0x80) { + *symbol = (((input[0] & 0x0f) << 12) | + ((input[1] & 0x3f) << 6) | + (input[2] & 0x3f)); + if (*symbol > 0x7ff) { + return 3; + } + } + // 4-byte UFT8 + if (size > 3 && + (input[0] & 0xf8) == 0xf0 && + (input[1] & 0xc0) == 0x80 && + (input[2] & 0xc0) == 0x80 && + (input[3] & 0xc0) == 0x80) { + *symbol = (((input[0] & 0x07) << 18) | + ((input[1] & 0x3f) << 12) | + ((input[2] & 0x3f) << 6) | + (input[3] & 0x3f)); + if (*symbol > 0xffff && *symbol <= 0x10ffff) { + return 4; + } + } + // Not UTF8, emit a special symbol above the UTF8-code space + *symbol = 0x110000 | input[0]; + return 1; +} + +// Returns true if at least min_fraction of the data is UTF8-encoded. +bool IsMostlyUTF8(const uint8_t* data, size_t length, double min_fraction) { + size_t size_utf8 = 0; + size_t pos = 0; + while (pos < length) { + int symbol; + int bytes_read = ParseAsUTF8(&symbol, data + pos, length - pos); + pos += bytes_read; + if (symbol < 0x110000) size_utf8 += bytes_read; + } + return size_utf8 > min_fraction * length; +} + void EncodeMetaBlockLength(size_t meta_block_size, bool is_last, bool is_uncompressed, @@ -118,7 +180,7 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask( const uint8_t* code_length_bitdepth, int* storage_ix, uint8_t* storage) { static const uint8_t kStorageOrder[kCodeLengthCodes] = { - 1, 2, 3, 4, 0, 17, 5, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15, }; // Throw away trailing zeros: int codes_to_store = kCodeLengthCodes; @@ -147,7 +209,7 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask( WriteBits(2, skip_some, storage_ix, storage); for (int i = skip_some; i < codes_to_store; ++i) { uint8_t len[] = { 2, 4, 3, 2, 2, 4 }; - uint8_t bits[] = { 0, 5, 1, 3, 2, 13 }; + uint8_t bits[] = { 0, 7, 3, 2, 1, 15 }; int v = code_length_bitdepth[kStorageOrder[i]]; WriteBits(len[v], bits[v], storage_ix, storage); } @@ -175,54 +237,49 @@ void StoreHuffmanTreeToBitMask( } template -void StoreHuffmanCode(const EntropyCode& code, int alphabet_size, - int* storage_ix, uint8_t* storage) { +void StoreHuffmanCodeSimple( + const EntropyCode& code, int alphabet_size, + int max_bits, + int* storage_ix, uint8_t* storage) { const uint8_t *depth = &code.depth_[0]; - int max_bits_counter = alphabet_size - 1; - int max_bits = 0; - while (max_bits_counter) { - max_bits_counter >>= 1; - ++max_bits; + int symbols[4]; + // Quadratic sort. + int k, j; + for (k = 0; k < code.count_; ++k) { + symbols[k] = code.symbols_[k]; } - if (code.count_ == 0) { // emit minimal tree for empty cases - // bits: small tree marker: 1, count-1: 0, max_bits-sized encoding for 0 - WriteBits(4 + max_bits, 0x1, storage_ix, storage); - return; - } - if (code.count_ <= 4) { - int symbols[4]; - // Quadratic sort. - int k, j; - for (k = 0; k < code.count_; ++k) { - symbols[k] = code.symbols_[k]; - } - for (k = 0; k < code.count_; ++k) { - for (j = k + 1; j < code.count_; ++j) { - if (depth[symbols[j]] < depth[symbols[k]]) { - int t = symbols[k]; - symbols[k] = symbols[j]; - symbols[j] = t; - } + for (k = 0; k < code.count_; ++k) { + for (j = k + 1; j < code.count_; ++j) { + if (depth[symbols[j]] < depth[symbols[k]]) { + int t = symbols[k]; + symbols[k] = symbols[j]; + symbols[j] = t; } } - // Small tree marker to encode 1-4 symbols. - WriteBits(2, 1, storage_ix, storage); - WriteBits(2, code.count_ - 1, storage_ix, storage); - for (int i = 0; i < code.count_; ++i) { - WriteBits(max_bits, symbols[i], storage_ix, storage); - } - if (code.count_ == 4) { - if (depth[symbols[0]] == 2 && - depth[symbols[1]] == 2 && - depth[symbols[2]] == 2 && - depth[symbols[3]] == 2) { - WriteBits(1, 0, storage_ix, storage); - } else { - WriteBits(1, 1, storage_ix, storage); - } - } - return; } + // Small tree marker to encode 1-4 symbols. + WriteBits(2, 1, storage_ix, storage); + WriteBits(2, code.count_ - 1, storage_ix, storage); + for (int i = 0; i < code.count_; ++i) { + WriteBits(max_bits, symbols[i], storage_ix, storage); + } + if (code.count_ == 4) { + if (depth[symbols[0]] == 2 && + depth[symbols[1]] == 2 && + depth[symbols[2]] == 2 && + depth[symbols[3]] == 2) { + WriteBits(1, 0, storage_ix, storage); + } else { + WriteBits(1, 1, storage_ix, storage); + } + } +} + +template +void StoreHuffmanCodeComplex( + const EntropyCode& code, int alphabet_size, + int* storage_ix, uint8_t* storage) { + const uint8_t *depth = &code.depth_[0]; uint8_t huffman_tree[kSize]; uint8_t huffman_tree_extra_bits[kSize]; int huffman_tree_size = 0; @@ -246,6 +303,31 @@ void StoreHuffmanCode(const EntropyCode& code, int alphabet_size, storage_ix, storage); } + +template +void StoreHuffmanCode(const EntropyCode& code, int alphabet_size, + int* storage_ix, uint8_t* storage) { + int max_bits_counter = alphabet_size - 1; + int max_bits = 0; + while (max_bits_counter) { + max_bits_counter >>= 1; + ++max_bits; + } + if (code.count_ == 0) { + // Emit a minimal tree for empty cases. + // bits: small tree marker: 1, count-1: 0, max_bits-sized encoding for 0 + WriteBits(4 + max_bits, 0x1, storage_ix, storage); + } else if (code.count_ <= 4) { + StoreHuffmanCodeSimple( + code, alphabet_size, max_bits, + storage_ix, storage); + } else { + StoreHuffmanCodeComplex( + code, alphabet_size, + storage_ix, storage); + } +} + template void StoreHuffmanCodes(const std::vector >& codes, int alphabet_size, @@ -798,12 +880,23 @@ void BrotliCompressor::WriteMetaBlock(const size_t input_size, const bool is_last, size_t* encoded_size, uint8_t* encoded_buffer) { + static const double kMinUTF8Ratio = 0.75; + bool utf8_mode = false; std::vector commands; if (input_size > 0) { ringbuffer_.Write(input_buffer, input_size); - EstimateBitCostsForLiterals(input_pos_, input_size, - kRingBufferMask, ringbuffer_.start(), - &literal_cost_[0]); + utf8_mode = IsMostlyUTF8( + &ringbuffer_.start()[input_pos_ & kRingBufferMask], + input_size, kMinUTF8Ratio); + if (utf8_mode) { + EstimateBitCostsForLiteralsUTF8(input_pos_, input_size, + kRingBufferMask, ringbuffer_.start(), + &literal_cost_[0]); + } else { + EstimateBitCostsForLiterals(input_pos_, input_size, + kRingBufferMask, ringbuffer_.start(), + &literal_cost_[0]); + } CreateBackwardReferences(input_size, input_pos_, ringbuffer_.start(), &literal_cost_[0], diff --git a/enc/entropy_encode.cc b/enc/entropy_encode.cc index e4c6b20..1ec50f1 100644 --- a/enc/entropy_encode.cc +++ b/enc/entropy_encode.cc @@ -182,6 +182,12 @@ void WriteHuffmanTreeRepetitions( ++(*tree_size); --repetitions; } + if (repetitions == 7) { + tree[*tree_size] = value; + extra_bits[*tree_size] = 0; + ++(*tree_size); + --repetitions; + } if (repetitions < 3) { for (int i = 0; i < repetitions; ++i) { tree[*tree_size] = value; @@ -208,6 +214,12 @@ void WriteHuffmanTreeRepetitionsZeros( uint8_t* tree, uint8_t* extra_bits, int* tree_size) { + if (repetitions == 11) { + tree[*tree_size] = 0; + extra_bits[*tree_size] = 0; + ++(*tree_size); + --repetitions; + } if (repetitions < 3) { for (int i = 0; i < repetitions; ++i) { tree[*tree_size] = 0; @@ -230,11 +242,6 @@ void WriteHuffmanTreeRepetitionsZeros( } -// Heuristics for selecting the stride ranges to collapse. -int ValuesShouldBeCollapsedToStrideAverage(int a, int b) { - return abs(a - b) < 4; -} - int OptimizeHuffmanCountsForRle(int length, int* counts) { int stride; int limit; @@ -251,6 +258,35 @@ int OptimizeHuffmanCountsForRle(int length, int* counts) { break; } } + { + int nonzeros = 0; + int smallest_nonzero = 1 << 30; + for (i = 0; i < length; ++i) { + if (counts[i] != 0) { + ++nonzeros; + if (smallest_nonzero > counts[i]) { + smallest_nonzero = counts[i]; + } + } + } + if (nonzeros < 5) { + // Small histogram will model it well. + return 1; + } + int zeros = length - nonzeros; + if (smallest_nonzero < 4) { + if (zeros < 6) { + for (i = 1; i < length - 1; ++i) { + if (counts[i - 1] != 0 && counts[i] == 0 && counts[i + 1] != 0) { + counts[i] = 1; + } + } + } + } + if (nonzeros < 28) { + return 1; + } + } // 2) Let's mark all population counts that already can be encoded // with an rle code. good_for_rle = (uint8_t*)calloc(length, 1); @@ -282,13 +318,15 @@ int OptimizeHuffmanCountsForRle(int length, int* counts) { } } // 3) Let's replace those population counts that lead to more rle codes. + // Math here is in 24.8 fixed point representation. + const int streak_limit = 1240; stride = 0; - limit = (counts[0] + counts[1] + counts[2]) / 3 + 1; + limit = 256 * (counts[0] + counts[1] + counts[2]) / 3 + 420; sum = 0; for (i = 0; i < length + 1; ++i) { if (i == length || good_for_rle[i] || (i != 0 && good_for_rle[i - 1]) || - !ValuesShouldBeCollapsedToStrideAverage(counts[i], limit)) { + abs(256 * counts[i] - limit) >= streak_limit) { if (stride >= 4 || (stride >= 3 && sum == 0)) { int k; // The stride must end, collapse what we have, if we have enough (4). @@ -311,9 +349,9 @@ int OptimizeHuffmanCountsForRle(int length, int* counts) { if (i < length - 2) { // All interesting strides have a count of at least 4, // at least when non-zeros. - limit = (counts[i] + counts[i + 1] + counts[i + 2]) / 3 + 1; + limit = 256 * (counts[i] + counts[i + 1] + counts[i + 2]) / 3 + 420; } else if (i < length) { - limit = counts[i]; + limit = 256 * counts[i]; } else { limit = 0; } @@ -322,7 +360,10 @@ int OptimizeHuffmanCountsForRle(int length, int* counts) { if (i != length) { sum += counts[i]; if (stride >= 4) { - limit = (sum + stride / 2) / stride; + limit = (256 * sum + stride / 2) / stride; + } + if (stride == 4) { + limit += 120; } } } @@ -331,16 +372,70 @@ int OptimizeHuffmanCountsForRle(int length, int* counts) { } +static void DecideOverRleUse(const uint8_t* depth, const int length, + bool *use_rle_for_non_zero, + bool *use_rle_for_zero) { + int total_reps_zero = 0; + int total_reps_non_zero = 0; + int count_reps_zero = 0; + int count_reps_non_zero = 0; + int new_length = length; + for (int i = 0; i < length; ++i) { + if (depth[length - i - 1] == 0) { + --new_length; + } else { + break; + } + } + for (uint32_t i = 0; i < new_length;) { + const int value = depth[i]; + int reps = 1; + // Find rle coding for longer codes. + // Shorter codes seem not to benefit from rle. + for (uint32_t k = i + 1; k < new_length && depth[k] == value; ++k) { + ++reps; + } + if (reps >= 3 && value == 0) { + total_reps_zero += reps; + ++count_reps_zero; + } + if (reps >= 4 && value != 0) { + total_reps_non_zero += reps; + ++count_reps_non_zero; + } + i += reps; + } + total_reps_non_zero -= count_reps_non_zero * 2; + total_reps_zero -= count_reps_zero * 2; + *use_rle_for_non_zero = total_reps_non_zero > 2; + *use_rle_for_zero = total_reps_zero > 2; +} + + void WriteHuffmanTree(const uint8_t* depth, const int length, uint8_t* tree, uint8_t* extra_bits_data, int* huffman_tree_size) { int previous_value = 8; + + // First gather statistics on if it is a good idea to do rle. + bool use_rle_for_non_zero; + bool use_rle_for_zero; + DecideOverRleUse(depth, length, &use_rle_for_non_zero, &use_rle_for_zero); + + // Actual rle coding. for (uint32_t i = 0; i < length;) { const int value = depth[i]; int reps = 1; - for (uint32_t k = i + 1; k < length && depth[k] == value; ++k) { - ++reps; + if (length > 50) { + // Find rle coding for longer codes. + // Shorter codes seem not to benefit from rle. + if ((value != 0 && use_rle_for_non_zero) || + (value == 0 && use_rle_for_zero)) { + for (uint32_t k = i + 1; k < length && depth[k] == value; ++k) { + ++reps; + } + } } if (value == 0) { WriteHuffmanTreeRepetitionsZeros(reps, tree, extra_bits_data, diff --git a/enc/entropy_encode.h b/enc/entropy_encode.h index 89c3e1a..aabb9a5 100644 --- a/enc/entropy_encode.h +++ b/enc/entropy_encode.h @@ -86,7 +86,7 @@ void BuildEntropyCode(const Histogram& histogram, ++code->count_; } } - if (code->count_ >= 64) { + if (alphabet_size >= 50 && code->count_ >= 16) { int counts[kSize]; memcpy(counts, &histogram.data_[0], sizeof(counts[0]) * kSize); OptimizeHuffmanCountsForRle(alphabet_size, counts); diff --git a/enc/hash.h b/enc/hash.h index cb38e8f..920c88b 100644 --- a/enc/hash.h +++ b/enc/hash.h @@ -150,7 +150,10 @@ class HashLongestMatch { size_t * __restrict best_len_out, size_t * __restrict best_len_code_out, size_t * __restrict best_distance_out, - double * __restrict best_score_out) { + double * __restrict best_score_out, + bool * __restrict in_dictionary) { + *in_dictionary = true; + *best_len_code_out = 0; const size_t cur_ix_masked = cur_ix & ring_buffer_mask; const double start_cost4 = literal_cost == NULL ? 20 : literal_cost[cur_ix_masked] + @@ -166,9 +169,9 @@ class HashLongestMatch { literal_cost[(cur_ix + 1) & ring_buffer_mask] + 1.2; bool match_found = false; // Don't accept a short copy from far away. - double best_score = 8.25; + double best_score = 8.11; if (insert_length_ < 4) { - double cost_diff[4] = { 0.20, 0.09, 0.05, 0.03 }; + double cost_diff[4] = { 0.10, 0.04, 0.02, 0.01 }; best_score += cost_diff[insert_length_]; } size_t best_len = *best_len_out; @@ -235,6 +238,7 @@ class HashLongestMatch { *best_distance_out = best_ix; *best_score_out = best_score; match_found = true; + *in_dictionary = backward > max_backward; } } } @@ -257,7 +261,7 @@ class HashLongestMatch { continue; } int len = 2; - const double score = start_cost2 - 1.70 * Log2Floor(backward); + const double score = start_cost2 - 2.3 * Log2Floor(backward); if (best_score < score) { best_score = score; @@ -309,6 +313,7 @@ class HashLongestMatch { *best_distance_out = best_ix; *best_score_out = best_score; match_found = true; + *in_dictionary = false; } } } diff --git a/enc/literal_cost.cc b/enc/literal_cost.cc index bf05a98..a944599 100644 --- a/enc/literal_cost.cc +++ b/enc/literal_cost.cc @@ -22,6 +22,104 @@ namespace brotli { +static int UTF8Position(int last, int c, int clamp) { + if (c < 128) { + return 0; // Next one is the 'Byte 1' again. + } else if (c >= 192) { + return std::min(1, clamp); // Next one is the 'Byte 2' of utf-8 encoding. + } else { + // Let's decide over the last byte if this ends the sequence. + if (last < 0xe0) { + return 0; // Completed two or three byte coding. + } else { + return std::min(2, clamp); // Next one is the 'Byte 3' of utf-8 encoding. + } + } +} + +static int DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask, + const uint8_t *data) { + int counts[3] = { 0 }; + int max_utf8 = 1; // should be 2, but 1 compresses better. + int last_c = 0; + int utf8_pos = 0; + for (int i = 0; i < len; ++i) { + int c = data[(pos + i) & mask]; + utf8_pos = UTF8Position(last_c, c, 2); + ++counts[utf8_pos]; + last_c = c; + } + if (counts[2] < 500) { + max_utf8 = 1; + } + if (counts[1] + counts[2] < 25) { + max_utf8 = 0; + } + return max_utf8; +} + +void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask, + const uint8_t *data, float *cost) { + + // max_utf8 is 0 (normal ascii single byte modeling), + // 1 (for 2-byte utf-8 modeling), or 2 (for 3-byte utf-8 modeling). + const int max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data); + int histogram[3][256] = { { 0 } }; + int window_half = 495; + int in_window = std::min(static_cast(window_half), len); + int in_window_utf8[3] = { 0 }; + + // Bootstrap histograms. + int last_c = 0; + int utf8_pos = 0; + for (int i = 0; i < in_window; ++i) { + int c = data[(pos + i) & mask]; + ++histogram[utf8_pos][c]; + ++in_window_utf8[utf8_pos]; + utf8_pos = UTF8Position(last_c, c, max_utf8); + last_c = c; + } + + // Compute bit costs with sliding window. + for (int i = 0; i < len; ++i) { + if (i - window_half >= 0) { + // Remove a byte in the past. + int c = (i - window_half - 1) < 0 ? + 0 : data[(pos + i - window_half - 1) & mask]; + int last_c = (i - window_half - 2) < 0 ? + 0 : data[(pos + i - window_half - 2) & mask]; + int utf8_pos2 = UTF8Position(last_c, c, max_utf8); + --histogram[utf8_pos2][data[(pos + i - window_half) & mask]]; + --in_window_utf8[utf8_pos2]; + } + if (i + window_half < len) { + // Add a byte in the future. + int c = (i + window_half - 1) < 0 ? + 0 : data[(pos + i + window_half - 1) & mask]; + int last_c = (i + window_half - 2) < 0 ? + 0 : data[(pos + i + window_half - 2) & mask]; + int utf8_pos2 = UTF8Position(last_c, c, max_utf8); + ++histogram[utf8_pos2][data[(pos + i + window_half) & mask]]; + ++in_window_utf8[utf8_pos2]; + } + int c = i < 1 ? 0 : data[(pos + i - 1) & mask]; + int last_c = i < 2 ? 0 : data[(pos + i - 2) & mask]; + int utf8_pos = UTF8Position(last_c, c, max_utf8); + int masked_pos = (pos + i) & mask; + int histo = histogram[utf8_pos][data[masked_pos]]; + if (histo == 0) { + histo = 1; + } + cost[masked_pos] = log2(static_cast(in_window_utf8[utf8_pos]) + / histo); + cost[masked_pos] += 0.02905; + if (cost[masked_pos] < 1.0) { + cost[masked_pos] *= 0.5; + cost[masked_pos] += 0.5; + } + } +} + void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask, const uint8_t *data, float *cost) { int histogram[256] = { 0 }; @@ -59,4 +157,5 @@ void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask, } } + } // namespace brotli diff --git a/enc/literal_cost.h b/enc/literal_cost.h index fd7f325..ca39a4e 100644 --- a/enc/literal_cost.h +++ b/enc/literal_cost.h @@ -26,7 +26,12 @@ namespace brotli { // ringbuffer (data, mask) will take entropy coded and writes these estimates // to the ringbuffer (cost, mask). void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask, - const uint8_t *data, float *cost); + const uint8_t *data, + float *cost); + +void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask, + const uint8_t *data, + float *cost); } // namespace brotli