Brotli format change: small improvement to the encoding of Huffman codes

Combine the HSKIP and the simple/complex Huffman code type bits.
This commit is contained in:
Zoltan Szabadka 2014-01-08 12:28:28 +01:00
parent e60dbdb10b
commit 2bcd58bb5a
4 changed files with 32 additions and 25 deletions

View File

@ -412,16 +412,16 @@ Abstract
3.4. Simple Huffman codes
The first bit of the compressed representation of each Huffman
The first two bits of the compressed representation of each Huffman
code distinguishes between simple and complex Huffman codes. If
the first bit is 1, then a simple, otherwise a complex Huffman
code follows.
this value is 1, then a simple Huffman code follows. Otherwise
the value indicates the number of leading zeros.
A simple Huffman code can have only up to four symbols with non-
zero code length. The format of the simple Huffman code is as
follows:
1 bit: 1, indicating a simple Huffman code
2 bits: value of 1 indicates a simple Huffman code
2 bits: NSYM - 1, where NSYM = # of symbols with non-zero
code length
@ -507,8 +507,10 @@ Abstract
We can now define the format of the complex Huffman code as
follows:
1 bit: 0, indicating a complex Huffman code
1 bit : HSKIP, if 1, skip over first two code length codes
2 bits: HSKIP, values of 0, 2 or 3 represent the respective
number of leading zeros. (Value of 1 indicates the
Simple Huffman code.)
Code lengths for symbols in the code length alphabet given
just above, in the order: 1, 2, 3, 4, 0, 17, 5, 6, 16, 7,
@ -519,9 +521,9 @@ Abstract
the static Huffman code above. A code length of 0 means
the corresponding code length symbol is not used.
If HSKIP is 1, code lengths of code length symbols 1 and
2 are implicit zeros and are not present in the code
lengths sequence above. If there are at least two non-
If HSKIP is 2 or 3, a respective number of leading code
lengths are implicit zeros and are not present in the
code lengths sequence above. If there are at least two non-
zero code lengths, any trailing zero code lengths are
omitted, i.e. the last code length in the sequence must
be non-zero. In this case the sum of (32 >> code length)

View File

@ -234,7 +234,7 @@ static int ReadHuffmanCode(int alphabet_size,
HuffmanTree* tree,
BrotliBitReader* br) {
int ok = 1;
int simple_code;
int simple_code_or_skip;
uint8_t* code_lengths = NULL;
code_lengths =
@ -247,9 +247,12 @@ static int ReadHuffmanCode(int alphabet_size,
printf("[ReadHuffmanCode] Unexpected end of input.\n");
return 0;
}
simple_code = BrotliReadBits(br, 1);
BROTLI_LOG_UINT(simple_code);
if (simple_code) { /* Read symbols, codes & code lengths directly. */
/* simple_code_or_skip is used as follows:
1 for simple code;
0 for no skipping, 2 skips 2 code lengths, 3 skips 3 code lengths */
simple_code_or_skip = BrotliReadBits(br, 2);
BROTLI_LOG_UINT(simple_code_or_skip);
if (simple_code_or_skip == 1) { /* Read symbols, codes & code lengths directly. */
int i;
int max_bits_counter = alphabet_size - 1;
int max_bits = 0;
@ -286,7 +289,7 @@ static int ReadHuffmanCode(int alphabet_size,
int i;
uint8_t code_length_code_lengths[CODE_LENGTH_CODES] = { 0 };
int space = 32;
for (i = BrotliReadBits(br, 1) * 2;
for (i = simple_code_or_skip;
i < CODE_LENGTH_CODES && space > 0; ++i) {
int code_len_idx = kCodeLengthCodeOrder[i];
int v = BrotliReadBits(br, 2);

View File

@ -136,12 +136,16 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask(
if (num_codes == 1) {
codes_to_store = kCodeLengthCodes;
}
const int skip_two_first =
code_length_bitdepth[kStorageOrder[0]] == 0 &&
code_length_bitdepth[kStorageOrder[1]] == 0;
WriteBits(1, skip_two_first, storage_ix, storage);
for (int i = skip_two_first * 2; i < codes_to_store; ++i) {
int skip_some = 0; // skips none.
if (code_length_bitdepth[kStorageOrder[0]] == 0 &&
code_length_bitdepth[kStorageOrder[1]] == 0) {
skip_some = 2; // skips two.
if (code_length_bitdepth[kStorageOrder[2]] == 0) {
skip_some = 3; // skips three.
}
}
WriteBits(2, skip_some, storage_ix, storage);
for (int i = skip_some; i < codes_to_store; ++i) {
uint8_t len[] = { 2, 4, 3, 2, 2, 4 };
uint8_t bits[] = { 0, 5, 1, 3, 2, 13 };
int v = code_length_bitdepth[kStorageOrder[i]];
@ -182,7 +186,7 @@ void StoreHuffmanCode(const EntropyCode<kSize>& code, int alphabet_size,
}
if (code.count_ == 0) { // emit minimal tree for empty cases
// bits: small tree marker: 1, count-1: 0, max_bits-sized encoding for 0
WriteBits(3 + max_bits, 0x01, storage_ix, storage);
WriteBits(4 + max_bits, 0x1, storage_ix, storage);
return;
}
if (code.count_ <= 4) {
@ -202,7 +206,7 @@ void StoreHuffmanCode(const EntropyCode<kSize>& code, int alphabet_size,
}
}
// Small tree marker to encode 1-4 symbols.
WriteBits(1, 1, storage_ix, storage);
WriteBits(2, 1, storage_ix, storage);
WriteBits(2, code.count_ - 1, storage_ix, storage);
for (int i = 0; i < code.count_; ++i) {
WriteBits(max_bits, symbols[i], storage_ix, storage);
@ -219,8 +223,6 @@ void StoreHuffmanCode(const EntropyCode<kSize>& code, int alphabet_size,
}
return;
}
WriteBits(1, 0, storage_ix, storage);
uint8_t huffman_tree[kSize];
uint8_t huffman_tree_extra_bits[kSize];
int huffman_tree_size = 0;

View File

@ -51,7 +51,7 @@ void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
histo = 1;
}
cost[masked_pos] = log2(static_cast<double>(in_window) / histo);
cost[masked_pos] += 0.03;
cost[masked_pos] += 0.029;
if (cost[masked_pos] < 1.0) {
cost[masked_pos] *= 0.5;
cost[masked_pos] += 0.5;