mirror of
https://github.com/google/brotli.git
synced 2024-11-25 04:50:05 +00:00
Updates to Brotli compression format, decoder and encoder
This commit contains a batch of changes that were made to the Brotli compression algorithm in the last month. Most important changes: * Format change: don't push distances representing static dictionary words to the distance cache. * Fix decoder invalid memory access bug caused by building a non-complete Huffman tree. * Add a mode parameter to the encoder interface. * Use different hashers for text and font mode. * Add a heuristics to the hasher for skipping non-compressible data. * Exhaustive search of static dictionary during backward reference search.
This commit is contained in:
parent
cddab4adef
commit
e7650080a8
@ -589,7 +589,9 @@ Abstract
|
|||||||
in the sequence of distances) is not pushed to the ring-buffer of
|
in the sequence of distances) is not pushed to the ring-buffer of
|
||||||
last distances, in other words, the expression "(second, third,
|
last distances, in other words, the expression "(second, third,
|
||||||
fourth) last distance" means the (second, third, fourth) last
|
fourth) last distance" means the (second, third, fourth) last
|
||||||
distance that was not represented by a 0 distance code.
|
distance that was not represented by a 0 distance code. Similarly,
|
||||||
|
distances that represent static dictionary words (see Section 8.) are
|
||||||
|
not pushed to the ringbuffer of last distances.
|
||||||
|
|
||||||
The next NDIRECT distance codes, from 16 to 15 + NDIRECT, represent
|
The next NDIRECT distance codes, from 16 to 15 + NDIRECT, represent
|
||||||
distances from 1 to NDIRECT. Neither the distance short codes, nor
|
distances from 1 to NDIRECT. Neither the distance short codes, nor
|
||||||
|
25
dec/decode.c
25
dec/decode.c
@ -247,12 +247,23 @@ static int ReadHuffmanCode(int alphabet_size,
|
|||||||
code_lengths[symbols[0]] = 1;
|
code_lengths[symbols[0]] = 1;
|
||||||
switch (num_symbols) {
|
switch (num_symbols) {
|
||||||
case 1:
|
case 1:
|
||||||
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
|
ok = ((symbols[0] != symbols[1]) &&
|
||||||
|
(symbols[0] != symbols[2]) &&
|
||||||
|
(symbols[1] != symbols[2]));
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
|
ok = (symbols[0] != symbols[1]);
|
||||||
code_lengths[symbols[1]] = 1;
|
code_lengths[symbols[1]] = 1;
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
|
ok = ((symbols[0] != symbols[1]) &&
|
||||||
|
(symbols[0] != symbols[2]) &&
|
||||||
|
(symbols[0] != symbols[3]) &&
|
||||||
|
(symbols[1] != symbols[2]) &&
|
||||||
|
(symbols[1] != symbols[3]) &&
|
||||||
|
(symbols[2] != symbols[3]));
|
||||||
if (BrotliReadBits(br, 1)) {
|
if (BrotliReadBits(br, 1)) {
|
||||||
code_lengths[symbols[2]] = 3;
|
code_lengths[symbols[2]] = 3;
|
||||||
code_lengths[symbols[3]] = 3;
|
code_lengths[symbols[3]] = 3;
|
||||||
@ -266,6 +277,7 @@ static int ReadHuffmanCode(int alphabet_size,
|
|||||||
int i;
|
int i;
|
||||||
uint8_t code_length_code_lengths[CODE_LENGTH_CODES] = { 0 };
|
uint8_t code_length_code_lengths[CODE_LENGTH_CODES] = { 0 };
|
||||||
int space = 32;
|
int space = 32;
|
||||||
|
int num_codes = 0;
|
||||||
/* Static Huffman code for the code length code lengths */
|
/* Static Huffman code for the code length code lengths */
|
||||||
static const HuffmanCode huff[16] = {
|
static const HuffmanCode huff[16] = {
|
||||||
{2, 0}, {2, 4}, {2, 3}, {3, 2}, {2, 0}, {2, 4}, {2, 3}, {4, 1},
|
{2, 0}, {2, 4}, {2, 3}, {3, 2}, {2, 0}, {2, 4}, {2, 3}, {4, 1},
|
||||||
@ -283,9 +295,11 @@ static int ReadHuffmanCode(int alphabet_size,
|
|||||||
BROTLI_LOG_ARRAY_INDEX(code_length_code_lengths, code_len_idx);
|
BROTLI_LOG_ARRAY_INDEX(code_length_code_lengths, code_len_idx);
|
||||||
if (v != 0) {
|
if (v != 0) {
|
||||||
space -= (32 >> v);
|
space -= (32 >> v);
|
||||||
|
++num_codes;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ok = ReadHuffmanCodeLengths(code_length_code_lengths,
|
ok = (num_codes == 1 || space == 0) &&
|
||||||
|
ReadHuffmanCodeLengths(code_length_code_lengths,
|
||||||
alphabet_size, code_lengths, br);
|
alphabet_size, code_lengths, br);
|
||||||
}
|
}
|
||||||
if (ok) {
|
if (ok) {
|
||||||
@ -961,10 +975,6 @@ int BrotliDecompress(BrotliInput input, BrotliOutput output) {
|
|||||||
ok = 0;
|
ok = 0;
|
||||||
goto End;
|
goto End;
|
||||||
}
|
}
|
||||||
if (distance_code > 0) {
|
|
||||||
dist_rb[dist_rb_idx & 3] = distance;
|
|
||||||
++dist_rb_idx;
|
|
||||||
}
|
|
||||||
BROTLI_LOG_UINT(distance);
|
BROTLI_LOG_UINT(distance);
|
||||||
|
|
||||||
if (pos < max_backward_distance &&
|
if (pos < max_backward_distance &&
|
||||||
@ -1016,6 +1026,11 @@ int BrotliDecompress(BrotliInput input, BrotliOutput output) {
|
|||||||
goto End;
|
goto End;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
if (distance_code > 0) {
|
||||||
|
dist_rb[dist_rb_idx & 3] = distance;
|
||||||
|
++dist_rb_idx;
|
||||||
|
}
|
||||||
|
|
||||||
if (copy_length > meta_block_remaining_len) {
|
if (copy_length > meta_block_remaining_len) {
|
||||||
printf("Invalid backward reference. pos: %d distance: %d "
|
printf("Invalid backward reference. pos: %d distance: %d "
|
||||||
"len: %d bytes left: %d\n", pos, distance, copy_length,
|
"len: %d bytes left: %d\n", pos, distance, copy_length,
|
||||||
|
@ -23,6 +23,7 @@
|
|||||||
|
|
||||||
namespace brotli {
|
namespace brotli {
|
||||||
|
|
||||||
|
template<typename Hasher>
|
||||||
void CreateBackwardReferences(size_t num_bytes,
|
void CreateBackwardReferences(size_t num_bytes,
|
||||||
size_t position,
|
size_t position,
|
||||||
const uint8_t* ringbuffer,
|
const uint8_t* ringbuffer,
|
||||||
@ -38,6 +39,9 @@ void CreateBackwardReferences(size_t num_bytes,
|
|||||||
const int i_diff = position - i;
|
const int i_diff = position - i;
|
||||||
const size_t i_end = i + num_bytes;
|
const size_t i_end = i + num_bytes;
|
||||||
|
|
||||||
|
const int random_heuristics_window_size = 512;
|
||||||
|
int apply_random_heuristics = i + random_heuristics_window_size;
|
||||||
|
|
||||||
double average_cost = 0.0;
|
double average_cost = 0.0;
|
||||||
for (int k = position; k < position + num_bytes; ++k) {
|
for (int k = position; k < position + num_bytes; ++k) {
|
||||||
average_cost += literal_cost[k & ringbuffer_mask];
|
average_cost += literal_cost[k & ringbuffer_mask];
|
||||||
@ -65,7 +69,8 @@ void CreateBackwardReferences(size_t num_bytes,
|
|||||||
bool match_found = hasher->FindLongestMatch(
|
bool match_found = hasher->FindLongestMatch(
|
||||||
ringbuffer, literal_cost, ringbuffer_mask,
|
ringbuffer, literal_cost, ringbuffer_mask,
|
||||||
i + i_diff, i_end - i, max_distance,
|
i + i_diff, i_end - i, max_distance,
|
||||||
&best_len, &best_len_code, &best_dist, &best_score, &in_dictionary);
|
&best_len, &best_len_code, &best_dist, &best_score,
|
||||||
|
&in_dictionary);
|
||||||
bool best_in_dictionary = in_dictionary;
|
bool best_in_dictionary = in_dictionary;
|
||||||
if (match_found) {
|
if (match_found) {
|
||||||
if (match_found_M1 && best_score_M1 > best_score) {
|
if (match_found_M1 && best_score_M1 > best_score) {
|
||||||
@ -138,16 +143,20 @@ void CreateBackwardReferences(size_t num_bytes,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
apply_random_heuristics =
|
||||||
|
i + 2 * best_len + random_heuristics_window_size;
|
||||||
Command cmd;
|
Command cmd;
|
||||||
cmd.insert_length_ = insert_length;
|
cmd.insert_length_ = insert_length;
|
||||||
cmd.copy_length_ = best_len;
|
cmd.copy_length_ = best_len;
|
||||||
cmd.copy_length_code_ = best_len_code;
|
cmd.copy_length_code_ = best_len_code;
|
||||||
cmd.copy_distance_ = best_dist;
|
cmd.copy_distance_ = best_dist;
|
||||||
commands->push_back(cmd);
|
commands->push_back(cmd);
|
||||||
hasher->set_last_distance(best_dist);
|
|
||||||
|
|
||||||
insert_length = 0;
|
insert_length = 0;
|
||||||
++i;
|
++i;
|
||||||
|
if (best_dist <= std::min(i + i_diff, max_backward_limit)) {
|
||||||
|
hasher->set_last_distance(best_dist);
|
||||||
|
}
|
||||||
|
|
||||||
// Copy all copied literals to the hasher, except the last one.
|
// Copy all copied literals to the hasher, except the last one.
|
||||||
// We cannot store the last one yet, otherwise we couldn't find
|
// We cannot store the last one yet, otherwise we couldn't find
|
||||||
// the possible M1 match.
|
// the possible M1 match.
|
||||||
@ -158,7 +167,8 @@ void CreateBackwardReferences(size_t num_bytes,
|
|||||||
++i;
|
++i;
|
||||||
}
|
}
|
||||||
// Prepare M1 match.
|
// Prepare M1 match.
|
||||||
if (best_len >= 4 && i + 20 < i_end && !best_in_dictionary) {
|
if (hasher->HasStaticDictionary() &&
|
||||||
|
best_len >= 4 && i + 20 < i_end && !best_in_dictionary) {
|
||||||
max_distance = std::min(i + i_diff, max_backward_limit);
|
max_distance = std::min(i + i_diff, max_backward_limit);
|
||||||
match_found_M1 = hasher->FindLongestMatch(
|
match_found_M1 = hasher->FindLongestMatch(
|
||||||
ringbuffer, literal_cost, ringbuffer_mask,
|
ringbuffer, literal_cost, ringbuffer_mask,
|
||||||
@ -185,6 +195,32 @@ void CreateBackwardReferences(size_t num_bytes,
|
|||||||
++insert_length;
|
++insert_length;
|
||||||
hasher->Store(ringbuffer + i, i + i_diff);
|
hasher->Store(ringbuffer + i, i + i_diff);
|
||||||
++i;
|
++i;
|
||||||
|
// If we have not seen matches for a long time, we can skip some
|
||||||
|
// match lookups. Unsuccessful match lookups are very very expensive
|
||||||
|
// and this kind of a heuristic speeds up compression quite
|
||||||
|
// a lot.
|
||||||
|
if (i > apply_random_heuristics) {
|
||||||
|
// Going through uncompressible data, jump.
|
||||||
|
if (i > apply_random_heuristics + 4 * random_heuristics_window_size) {
|
||||||
|
// It is quite a long time since we saw a copy, so we assume
|
||||||
|
// that this data is not compressible, and store hashes less
|
||||||
|
// often. Hashes of non compressible data are less likely to
|
||||||
|
// turn out to be useful in the future, too, so we store less of
|
||||||
|
// them to not to flood out the hash table of good compressible
|
||||||
|
// data.
|
||||||
|
int i_jump = std::min(i + 16, i_end - 4);
|
||||||
|
for (; i < i_jump; i += 4) {
|
||||||
|
hasher->Store(ringbuffer + i, i + i_diff);
|
||||||
|
insert_length += 4;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
int i_jump = std::min(i + 8, i_end - 2);
|
||||||
|
for (; i < i_jump; i += 2) {
|
||||||
|
hasher->Store(ringbuffer + i, i + i_diff);
|
||||||
|
insert_length += 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
insert_length += (i_end - i);
|
insert_length += (i_end - i);
|
||||||
@ -198,4 +234,34 @@ void CreateBackwardReferences(size_t num_bytes,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CreateBackwardReferences(size_t num_bytes,
|
||||||
|
size_t position,
|
||||||
|
const uint8_t* ringbuffer,
|
||||||
|
const float* literal_cost,
|
||||||
|
size_t ringbuffer_mask,
|
||||||
|
const size_t max_backward_limit,
|
||||||
|
Hashers* hashers,
|
||||||
|
Hashers::Type hash_type,
|
||||||
|
std::vector<Command>* commands) {
|
||||||
|
switch (hash_type) {
|
||||||
|
case Hashers::HASH_15_8_4:
|
||||||
|
CreateBackwardReferences(
|
||||||
|
num_bytes, position, ringbuffer, literal_cost,
|
||||||
|
ringbuffer_mask, max_backward_limit,
|
||||||
|
hashers->hash_15_8_4.get(),
|
||||||
|
commands);
|
||||||
|
break;
|
||||||
|
case Hashers::HASH_15_8_2:
|
||||||
|
CreateBackwardReferences(
|
||||||
|
num_bytes, position, ringbuffer, literal_cost,
|
||||||
|
ringbuffer_mask, max_backward_limit,
|
||||||
|
hashers->hash_15_8_2.get(),
|
||||||
|
commands);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
} // namespace brotli
|
} // namespace brotli
|
||||||
|
@ -31,7 +31,8 @@ void CreateBackwardReferences(size_t num_bytes,
|
|||||||
const float* literal_cost,
|
const float* literal_cost,
|
||||||
size_t ringbuffer_mask,
|
size_t ringbuffer_mask,
|
||||||
const size_t max_backward_limit,
|
const size_t max_backward_limit,
|
||||||
Hasher* hasher,
|
Hashers* hashers,
|
||||||
|
Hashers::Type hash_type,
|
||||||
std::vector<Command>* commands);
|
std::vector<Command>* commands);
|
||||||
|
|
||||||
} // namespace brotli
|
} // namespace brotli
|
||||||
|
@ -46,6 +46,7 @@ static inline int HuffmanBitCost(const uint8_t* depth, int length) {
|
|||||||
int max_depth = 1;
|
int max_depth = 1;
|
||||||
int histogram[kCodeLengthCodes] = { 0 };
|
int histogram[kCodeLengthCodes] = { 0 };
|
||||||
int tail_start = 0;
|
int tail_start = 0;
|
||||||
|
int prev_value = 8;
|
||||||
// compute histogram of compacted huffman tree
|
// compute histogram of compacted huffman tree
|
||||||
for (int i = 0; i < length;) {
|
for (int i = 0; i < length;) {
|
||||||
const int value = depth[i];
|
const int value = depth[i];
|
||||||
@ -57,29 +58,32 @@ static inline int HuffmanBitCost(const uint8_t* depth, int length) {
|
|||||||
++reps;
|
++reps;
|
||||||
}
|
}
|
||||||
i += reps;
|
i += reps;
|
||||||
|
if (i == length && value == 0)
|
||||||
|
break;
|
||||||
if (value == 0) {
|
if (value == 0) {
|
||||||
if (reps < 3) {
|
if (reps < 3) {
|
||||||
histogram[0] += reps;
|
histogram[0] += reps;
|
||||||
} else {
|
} else {
|
||||||
reps -= 3;
|
reps -= 2;
|
||||||
while (reps >= 0) {
|
while (reps > 0) {
|
||||||
++histogram[17];
|
++histogram[17];
|
||||||
reps >>= 3;
|
reps >>= 3;
|
||||||
--reps;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
tail_start = i;
|
tail_start = i;
|
||||||
|
if (value != prev_value) {
|
||||||
++histogram[value];
|
++histogram[value];
|
||||||
--reps;
|
--reps;
|
||||||
|
}
|
||||||
|
prev_value = value;
|
||||||
if (reps < 3) {
|
if (reps < 3) {
|
||||||
histogram[value] += reps;
|
histogram[value] += reps;
|
||||||
} else {
|
} else {
|
||||||
reps -= 3;
|
reps -= 2;
|
||||||
while (reps >= 0) {
|
while (reps > 0) {
|
||||||
++histogram[16];
|
++histogram[16];
|
||||||
reps >>= 2;
|
reps >>= 2;
|
||||||
--reps;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -93,7 +97,7 @@ static inline int HuffmanBitCost(const uint8_t* depth, int length) {
|
|||||||
cost[17] += 3;
|
cost[17] += 3;
|
||||||
|
|
||||||
int tree_size = 0;
|
int tree_size = 0;
|
||||||
int bits = 6 + 2 * max_depth; // huffman tree of huffman tree cost
|
int bits = 18 + 2 * max_depth; // huffman tree of huffman tree cost
|
||||||
for (int i = 0; i < kCodeLengthCodes; ++i) {
|
for (int i = 0; i < kCodeLengthCodes; ++i) {
|
||||||
bits += histogram[i] * cost[i]; // huffman tree bit cost
|
bits += histogram[i] * cost[i]; // huffman tree bit cost
|
||||||
tree_size += histogram[i];
|
tree_size += histogram[i];
|
||||||
|
@ -348,6 +348,7 @@ void SplitBlock(const std::vector<Command>& cmds,
|
|||||||
&insert_and_copy_codes,
|
&insert_and_copy_codes,
|
||||||
&distance_prefixes);
|
&distance_prefixes);
|
||||||
|
|
||||||
|
|
||||||
SplitByteVector<HistogramLiteral>(
|
SplitByteVector<HistogramLiteral>(
|
||||||
literals,
|
literals,
|
||||||
kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
|
kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
|
||||||
|
@ -24,9 +24,13 @@ namespace brotli {
|
|||||||
// Command holds a sequence of literals and a backward reference copy.
|
// Command holds a sequence of literals and a backward reference copy.
|
||||||
class Command {
|
class Command {
|
||||||
public:
|
public:
|
||||||
|
// distance_code_ is initialized to 17 because it refers to the distance
|
||||||
|
// code of a backward distance of 1, this way the last insert-only command
|
||||||
|
// won't use the last-distance short code, and accordingly distance_prefix_ is
|
||||||
|
// set to 16
|
||||||
Command() : insert_length_(0), copy_length_(0), copy_length_code_(0),
|
Command() : insert_length_(0), copy_length_(0), copy_length_code_(0),
|
||||||
copy_distance_(0), distance_code_(0),
|
copy_distance_(0), distance_code_(17),
|
||||||
distance_prefix_(0), command_prefix_(0),
|
distance_prefix_(16), command_prefix_(0),
|
||||||
distance_extra_bits_(0), distance_extra_bits_value_(0) {}
|
distance_extra_bits_(0), distance_extra_bits_value_(0) {}
|
||||||
|
|
||||||
uint32_t insert_length_;
|
uint32_t insert_length_;
|
||||||
|
232
enc/encode.cc
232
enc/encode.cc
@ -168,15 +168,6 @@ void EncodeMetaBlockLength(size_t meta_block_size,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int kSize>
|
|
||||||
void EntropyEncode(int val, const EntropyCode<kSize>& code,
|
|
||||||
int* storage_ix, uint8_t* storage) {
|
|
||||||
if (code.count_ <= 1) {
|
|
||||||
return;
|
|
||||||
};
|
|
||||||
WriteBits(code.depth_[val], code.bits_[val], storage_ix, storage);
|
|
||||||
}
|
|
||||||
|
|
||||||
void StoreHuffmanTreeOfHuffmanTreeToBitMask(
|
void StoreHuffmanTreeOfHuffmanTreeToBitMask(
|
||||||
const uint8_t* code_length_bitdepth,
|
const uint8_t* code_length_bitdepth,
|
||||||
int* storage_ix, uint8_t* storage) {
|
int* storage_ix, uint8_t* storage) {
|
||||||
@ -225,7 +216,9 @@ void StoreHuffmanTreeToBitMask(
|
|||||||
for (int i = 0; i < huffman_tree_size; ++i) {
|
for (int i = 0; i < huffman_tree_size; ++i) {
|
||||||
const int ix = huffman_tree[i];
|
const int ix = huffman_tree[i];
|
||||||
const int extra_bits = huffman_tree_extra_bits[i];
|
const int extra_bits = huffman_tree_extra_bits[i];
|
||||||
EntropyEncode(ix, entropy, storage_ix, storage);
|
if (entropy.count_ > 1) {
|
||||||
|
WriteBits(entropy.depth_[ix], entropy.bits_[ix], storage_ix, storage);
|
||||||
|
}
|
||||||
switch (ix) {
|
switch (ix) {
|
||||||
case 16:
|
case 16:
|
||||||
WriteBits(2, extra_bits, storage_ix, storage);
|
WriteBits(2, extra_bits, storage_ix, storage);
|
||||||
@ -240,8 +233,7 @@ void StoreHuffmanTreeToBitMask(
|
|||||||
template<int kSize>
|
template<int kSize>
|
||||||
void StoreHuffmanCodeSimple(
|
void StoreHuffmanCodeSimple(
|
||||||
const EntropyCode<kSize>& code, int alphabet_size,
|
const EntropyCode<kSize>& code, int alphabet_size,
|
||||||
int max_bits,
|
int max_bits, int* storage_ix, uint8_t* storage) {
|
||||||
int* storage_ix, uint8_t* storage) {
|
|
||||||
const uint8_t *depth = &code.depth_[0];
|
const uint8_t *depth = &code.depth_[0];
|
||||||
int symbols[4];
|
int symbols[4];
|
||||||
// Quadratic sort.
|
// Quadratic sort.
|
||||||
@ -304,37 +296,66 @@ void StoreHuffmanCodeComplex(
|
|||||||
storage_ix, storage);
|
storage_ix, storage);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<int kSize>
|
template<int kSize>
|
||||||
void StoreHuffmanCode(const EntropyCode<kSize>& code, int alphabet_size,
|
void BuildAndStoreEntropyCode(const Histogram<kSize>& histogram,
|
||||||
|
const int tree_limit,
|
||||||
|
const int alphabet_size,
|
||||||
|
EntropyCode<kSize>* code,
|
||||||
int* storage_ix, uint8_t* storage) {
|
int* storage_ix, uint8_t* storage) {
|
||||||
|
memset(code->depth_, 0, sizeof(code->depth_));
|
||||||
|
memset(code->bits_, 0, sizeof(code->bits_));
|
||||||
|
memset(code->symbols_, 0, sizeof(code->symbols_));
|
||||||
|
code->count_ = 0;
|
||||||
|
|
||||||
int max_bits_counter = alphabet_size - 1;
|
int max_bits_counter = alphabet_size - 1;
|
||||||
int max_bits = 0;
|
int max_bits = 0;
|
||||||
while (max_bits_counter) {
|
while (max_bits_counter) {
|
||||||
max_bits_counter >>= 1;
|
max_bits_counter >>= 1;
|
||||||
++max_bits;
|
++max_bits;
|
||||||
}
|
}
|
||||||
if (code.count_ == 0) {
|
|
||||||
// Emit a minimal tree for empty cases.
|
for (size_t i = 0; i < alphabet_size; i++) {
|
||||||
// bits: small tree marker: 1, count-1: 0, max_bits-sized encoding for 0
|
if (histogram.data_[i] > 0) {
|
||||||
WriteBits(4 + max_bits, 0x1, storage_ix, storage);
|
if (code->count_ < 4) code->symbols_[code->count_] = i;
|
||||||
} else if (code.count_ <= 4) {
|
++code->count_;
|
||||||
StoreHuffmanCodeSimple(
|
}
|
||||||
code, alphabet_size, max_bits,
|
}
|
||||||
storage_ix, storage);
|
|
||||||
|
if (code->count_ <= 1) {
|
||||||
|
WriteBits(2, 1, storage_ix, storage);
|
||||||
|
WriteBits(2, 0, storage_ix, storage);
|
||||||
|
WriteBits(max_bits, code->symbols_[0], storage_ix, storage);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (alphabet_size >= 50 && code->count_ >= 16) {
|
||||||
|
std::vector<int> counts(alphabet_size);
|
||||||
|
memcpy(&counts[0], histogram.data_, sizeof(counts[0]) * alphabet_size);
|
||||||
|
OptimizeHuffmanCountsForRle(alphabet_size, &counts[0]);
|
||||||
|
CreateHuffmanTree(&counts[0], alphabet_size, tree_limit, code->depth_);
|
||||||
} else {
|
} else {
|
||||||
StoreHuffmanCodeComplex(
|
CreateHuffmanTree(histogram.data_, alphabet_size, tree_limit, code->depth_);
|
||||||
code, alphabet_size,
|
}
|
||||||
storage_ix, storage);
|
ConvertBitDepthsToSymbols(code->depth_, alphabet_size, code->bits_);
|
||||||
|
|
||||||
|
if (code->count_ <= 4) {
|
||||||
|
StoreHuffmanCodeSimple(*code, alphabet_size, max_bits, storage_ix, storage);
|
||||||
|
} else {
|
||||||
|
StoreHuffmanCodeComplex(*code, alphabet_size, storage_ix, storage);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int kSize>
|
template<int kSize>
|
||||||
void StoreHuffmanCodes(const std::vector<EntropyCode<kSize> >& codes,
|
void BuildAndStoreEntropyCodes(
|
||||||
|
const std::vector<Histogram<kSize> >& histograms,
|
||||||
int alphabet_size,
|
int alphabet_size,
|
||||||
|
std::vector<EntropyCode<kSize> >* entropy_codes,
|
||||||
int* storage_ix, uint8_t* storage) {
|
int* storage_ix, uint8_t* storage) {
|
||||||
for (int i = 0; i < codes.size(); ++i) {
|
entropy_codes->resize(histograms.size());
|
||||||
StoreHuffmanCode(codes[i], alphabet_size, storage_ix, storage);
|
for (int i = 0; i < histograms.size(); ++i) {
|
||||||
|
BuildAndStoreEntropyCode(histograms[i], 15, alphabet_size,
|
||||||
|
&(*entropy_codes)[i],
|
||||||
|
storage_ix, storage);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -342,7 +363,7 @@ void EncodeCommand(const Command& cmd,
|
|||||||
const EntropyCodeCommand& entropy,
|
const EntropyCodeCommand& entropy,
|
||||||
int* storage_ix, uint8_t* storage) {
|
int* storage_ix, uint8_t* storage) {
|
||||||
int code = cmd.command_prefix_;
|
int code = cmd.command_prefix_;
|
||||||
EntropyEncode(code, entropy, storage_ix, storage);
|
WriteBits(entropy.depth_[code], entropy.bits_[code], storage_ix, storage);
|
||||||
if (code >= 128) {
|
if (code >= 128) {
|
||||||
code -= 128;
|
code -= 128;
|
||||||
}
|
}
|
||||||
@ -364,13 +385,15 @@ void EncodeCopyDistance(const Command& cmd, const EntropyCodeDistance& entropy,
|
|||||||
int code = cmd.distance_prefix_;
|
int code = cmd.distance_prefix_;
|
||||||
int extra_bits = cmd.distance_extra_bits_;
|
int extra_bits = cmd.distance_extra_bits_;
|
||||||
uint64_t extra_bits_val = cmd.distance_extra_bits_value_;
|
uint64_t extra_bits_val = cmd.distance_extra_bits_value_;
|
||||||
EntropyEncode(code, entropy, storage_ix, storage);
|
WriteBits(entropy.depth_[code], entropy.bits_[code], storage_ix, storage);
|
||||||
if (extra_bits > 0) {
|
if (extra_bits > 0) {
|
||||||
WriteBits(extra_bits, extra_bits_val, storage_ix, storage);
|
WriteBits(extra_bits, extra_bits_val, storage_ix, storage);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ComputeDistanceShortCodes(std::vector<Command>* cmds,
|
void ComputeDistanceShortCodes(std::vector<Command>* cmds,
|
||||||
|
size_t pos,
|
||||||
|
const size_t max_backward,
|
||||||
int* dist_ringbuffer,
|
int* dist_ringbuffer,
|
||||||
size_t* ringbuffer_idx) {
|
size_t* ringbuffer_idx) {
|
||||||
static const int kIndexOffset[16] = {
|
static const int kIndexOffset[16] = {
|
||||||
@ -380,10 +403,13 @@ void ComputeDistanceShortCodes(std::vector<Command>* cmds,
|
|||||||
0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
|
0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
|
||||||
};
|
};
|
||||||
for (int i = 0; i < cmds->size(); ++i) {
|
for (int i = 0; i < cmds->size(); ++i) {
|
||||||
|
pos += (*cmds)[i].insert_length_;
|
||||||
|
size_t max_distance = std::min(pos, max_backward);
|
||||||
int cur_dist = (*cmds)[i].copy_distance_;
|
int cur_dist = (*cmds)[i].copy_distance_;
|
||||||
if (cur_dist == 0) break;
|
|
||||||
int dist_code = cur_dist + 16;
|
int dist_code = cur_dist + 16;
|
||||||
int limits[16] = { 0, 4, 10, 11,
|
if (cur_dist <= max_distance) {
|
||||||
|
if (cur_dist == 0) break;
|
||||||
|
int limits[16] = { 0, 0, 0, 0,
|
||||||
6, 6, 11, 11,
|
6, 6, 11, 11,
|
||||||
11, 11, 11, 11,
|
11, 11, 11, 11,
|
||||||
12, 12, 12, 12 };
|
12, 12, 12, 12 };
|
||||||
@ -405,6 +431,13 @@ void ComputeDistanceShortCodes(std::vector<Command>* cmds,
|
|||||||
dist_ringbuffer[*ringbuffer_idx & 3] = cur_dist;
|
dist_ringbuffer[*ringbuffer_idx & 3] = cur_dist;
|
||||||
++(*ringbuffer_idx);
|
++(*ringbuffer_idx);
|
||||||
}
|
}
|
||||||
|
pos += (*cmds)[i].copy_length_;
|
||||||
|
} else {
|
||||||
|
int word_idx = cur_dist - max_distance - 1;
|
||||||
|
const std::string word =
|
||||||
|
GetTransformedDictionaryWord((*cmds)[i].copy_length_code_, word_idx);
|
||||||
|
pos += word.size();
|
||||||
|
}
|
||||||
(*cmds)[i].distance_code_ = dist_code;
|
(*cmds)[i].distance_code_ = dist_code;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -558,18 +591,20 @@ void EncodeContextMap(const std::vector<int>& context_map,
|
|||||||
for (int i = 0; i < rle_symbols.size(); ++i) {
|
for (int i = 0; i < rle_symbols.size(); ++i) {
|
||||||
symbol_histogram.Add(rle_symbols[i]);
|
symbol_histogram.Add(rle_symbols[i]);
|
||||||
}
|
}
|
||||||
EntropyCodeContextMap symbol_code;
|
|
||||||
BuildEntropyCode(symbol_histogram, 15, num_clusters + max_run_length_prefix,
|
|
||||||
&symbol_code);
|
|
||||||
bool use_rle = max_run_length_prefix > 0;
|
bool use_rle = max_run_length_prefix > 0;
|
||||||
WriteBits(1, use_rle, storage_ix, storage);
|
WriteBits(1, use_rle, storage_ix, storage);
|
||||||
if (use_rle) {
|
if (use_rle) {
|
||||||
WriteBits(4, max_run_length_prefix - 1, storage_ix, storage);
|
WriteBits(4, max_run_length_prefix - 1, storage_ix, storage);
|
||||||
}
|
}
|
||||||
StoreHuffmanCode(symbol_code, num_clusters + max_run_length_prefix,
|
EntropyCodeContextMap symbol_code;
|
||||||
|
BuildAndStoreEntropyCode(symbol_histogram, 15,
|
||||||
|
num_clusters + max_run_length_prefix,
|
||||||
|
&symbol_code,
|
||||||
storage_ix, storage);
|
storage_ix, storage);
|
||||||
for (int i = 0; i < rle_symbols.size(); ++i) {
|
for (int i = 0; i < rle_symbols.size(); ++i) {
|
||||||
EntropyEncode(rle_symbols[i], symbol_code, storage_ix, storage);
|
WriteBits(symbol_code.depth_[rle_symbols[i]],
|
||||||
|
symbol_code.bits_[rle_symbols[i]],
|
||||||
|
storage_ix, storage);
|
||||||
if (rle_symbols[i] > 0 && rle_symbols[i] <= max_run_length_prefix) {
|
if (rle_symbols[i] > 0 && rle_symbols[i] <= max_run_length_prefix) {
|
||||||
WriteBits(rle_symbols[i], extra_bits[i], storage_ix, storage);
|
WriteBits(rle_symbols[i], extra_bits[i], storage_ix, storage);
|
||||||
}
|
}
|
||||||
@ -577,16 +612,6 @@ void EncodeContextMap(const std::vector<int>& context_map,
|
|||||||
WriteBits(1, 1, storage_ix, storage); // use move-to-front
|
WriteBits(1, 1, storage_ix, storage); // use move-to-front
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int kSize>
|
|
||||||
void BuildEntropyCodes(const std::vector<Histogram<kSize> >& histograms,
|
|
||||||
int alphabet_size,
|
|
||||||
std::vector<EntropyCode<kSize> >* entropy_codes) {
|
|
||||||
entropy_codes->resize(histograms.size());
|
|
||||||
for (int i = 0; i < histograms.size(); ++i) {
|
|
||||||
BuildEntropyCode(histograms[i], 15, alphabet_size, &(*entropy_codes)[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
struct BlockSplitCode {
|
struct BlockSplitCode {
|
||||||
EntropyCodeBlockType block_type_code;
|
EntropyCodeBlockType block_type_code;
|
||||||
EntropyCodeBlockLength block_len_code;
|
EntropyCodeBlockLength block_len_code;
|
||||||
@ -598,8 +623,8 @@ void EncodeBlockLength(const EntropyCodeBlockLength& entropy,
|
|||||||
int len_code = BlockLengthPrefix(length);
|
int len_code = BlockLengthPrefix(length);
|
||||||
int extra_bits = BlockLengthExtraBits(len_code);
|
int extra_bits = BlockLengthExtraBits(len_code);
|
||||||
int extra_bits_value = length - BlockLengthOffset(len_code);
|
int extra_bits_value = length - BlockLengthOffset(len_code);
|
||||||
EntropyEncode(len_code, entropy, storage_ix, storage);
|
WriteBits(entropy.depth_[len_code], entropy.bits_[len_code],
|
||||||
|
storage_ix, storage);
|
||||||
if (extra_bits > 0) {
|
if (extra_bits > 0) {
|
||||||
WriteBits(extra_bits, extra_bits_value, storage_ix, storage);
|
WriteBits(extra_bits, extra_bits_value, storage_ix, storage);
|
||||||
}
|
}
|
||||||
@ -632,25 +657,24 @@ void BuildAndEncodeBlockSplitCode(const BlockSplit& split,
|
|||||||
BlockSplitCode* code,
|
BlockSplitCode* code,
|
||||||
int* storage_ix, uint8_t* storage) {
|
int* storage_ix, uint8_t* storage) {
|
||||||
EncodeVarLenUint8(split.num_types_ - 1, storage_ix, storage);
|
EncodeVarLenUint8(split.num_types_ - 1, storage_ix, storage);
|
||||||
|
|
||||||
if (split.num_types_ == 1) {
|
if (split.num_types_ == 1) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
HistogramBlockType type_histo;
|
HistogramBlockType type_histo;
|
||||||
for (int i = 0; i < split.type_codes_.size(); ++i) {
|
for (int i = 1; i < split.type_codes_.size(); ++i) {
|
||||||
type_histo.Add(split.type_codes_[i]);
|
type_histo.Add(split.type_codes_[i]);
|
||||||
}
|
}
|
||||||
BuildEntropyCode(type_histo, 15, split.num_types_ + 2,
|
|
||||||
&code->block_type_code);
|
|
||||||
HistogramBlockLength length_histo;
|
HistogramBlockLength length_histo;
|
||||||
for (int i = 0; i < split.lengths_.size(); ++i) {
|
for (int i = 0; i < split.lengths_.size(); ++i) {
|
||||||
length_histo.Add(BlockLengthPrefix(split.lengths_[i]));
|
length_histo.Add(BlockLengthPrefix(split.lengths_[i]));
|
||||||
}
|
}
|
||||||
BuildEntropyCode(length_histo, 15, kNumBlockLenPrefixes,
|
BuildAndStoreEntropyCode(type_histo, 15, split.num_types_ + 2,
|
||||||
&code->block_len_code);
|
&code->block_type_code,
|
||||||
StoreHuffmanCode(code->block_type_code, split.num_types_ + 2,
|
|
||||||
storage_ix, storage);
|
storage_ix, storage);
|
||||||
StoreHuffmanCode(code->block_len_code, kNumBlockLenPrefixes,
|
BuildAndStoreEntropyCode(length_histo, 15, kNumBlockLenPrefixes,
|
||||||
|
&code->block_len_code,
|
||||||
storage_ix, storage);
|
storage_ix, storage);
|
||||||
EncodeBlockLength(code->block_len_code, split.lengths_[0],
|
EncodeBlockLength(code->block_len_code, split.lengths_[0],
|
||||||
storage_ix, storage);
|
storage_ix, storage);
|
||||||
@ -664,7 +688,9 @@ void MoveAndEncode(const BlockSplitCode& code,
|
|||||||
it->type_ = it->split_.types_[it->idx_];
|
it->type_ = it->split_.types_[it->idx_];
|
||||||
it->length_ = it->split_.lengths_[it->idx_];
|
it->length_ = it->split_.lengths_[it->idx_];
|
||||||
int type_code = it->split_.type_codes_[it->idx_];
|
int type_code = it->split_.type_codes_[it->idx_];
|
||||||
EntropyEncode(type_code, code.block_type_code, storage_ix, storage);
|
WriteBits(code.block_type_code.depth_[type_code],
|
||||||
|
code.block_type_code.bits_[type_code],
|
||||||
|
storage_ix, storage);
|
||||||
EncodeBlockLength(code.block_len_code, it->length_, storage_ix, storage);
|
EncodeBlockLength(code.block_len_code, it->length_, storage_ix, storage);
|
||||||
}
|
}
|
||||||
--it->length_;
|
--it->length_;
|
||||||
@ -773,10 +799,8 @@ void StoreMetaBlock(const MetaBlock& mb,
|
|||||||
int* storage_ix, uint8_t* storage) {
|
int* storage_ix, uint8_t* storage) {
|
||||||
size_t length = MetaBlockLength(mb.cmds);
|
size_t length = MetaBlockLength(mb.cmds);
|
||||||
const size_t end_pos = *pos + length;
|
const size_t end_pos = *pos + length;
|
||||||
EncodeMetaBlockLength(length,
|
EncodeMetaBlockLength(length, is_last, false, storage_ix, storage);
|
||||||
is_last,
|
|
||||||
false,
|
|
||||||
storage_ix, storage);
|
|
||||||
if (length == 0) {
|
if (length == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -792,26 +816,27 @@ void StoreMetaBlock(const MetaBlock& mb,
|
|||||||
WriteBits(2, mb.params.distance_postfix_bits, storage_ix, storage);
|
WriteBits(2, mb.params.distance_postfix_bits, storage_ix, storage);
|
||||||
WriteBits(4,
|
WriteBits(4,
|
||||||
mb.params.num_direct_distance_codes >>
|
mb.params.num_direct_distance_codes >>
|
||||||
mb.params.distance_postfix_bits, storage_ix, storage);
|
mb.params.distance_postfix_bits,
|
||||||
|
storage_ix, storage);
|
||||||
int num_distance_codes =
|
int num_distance_codes =
|
||||||
kNumDistanceShortCodes + mb.params.num_direct_distance_codes +
|
kNumDistanceShortCodes + mb.params.num_direct_distance_codes +
|
||||||
(48 << mb.params.distance_postfix_bits);
|
(48 << mb.params.distance_postfix_bits);
|
||||||
for (int i = 0; i < mb.literal_split.num_types_; ++i) {
|
for (int i = 0; i < mb.literal_split.num_types_; ++i) {
|
||||||
WriteBits(2, mb.literal_context_modes[i], storage_ix, storage);
|
WriteBits(2, mb.literal_context_modes[i], storage_ix, storage);
|
||||||
}
|
}
|
||||||
EncodeContextMap(mb.literal_context_map, mb.literal_histograms.size(), storage_ix, storage);
|
EncodeContextMap(mb.literal_context_map, mb.literal_histograms.size(),
|
||||||
EncodeContextMap(mb.distance_context_map, mb.distance_histograms.size(), storage_ix, storage);
|
storage_ix, storage);
|
||||||
|
EncodeContextMap(mb.distance_context_map, mb.distance_histograms.size(),
|
||||||
|
storage_ix, storage);
|
||||||
std::vector<EntropyCodeLiteral> literal_codes;
|
std::vector<EntropyCodeLiteral> literal_codes;
|
||||||
std::vector<EntropyCodeCommand> command_codes;
|
std::vector<EntropyCodeCommand> command_codes;
|
||||||
std::vector<EntropyCodeDistance> distance_codes;
|
std::vector<EntropyCodeDistance> distance_codes;
|
||||||
BuildEntropyCodes(mb.literal_histograms, 256, &literal_codes);
|
BuildAndStoreEntropyCodes(mb.literal_histograms, 256, &literal_codes,
|
||||||
BuildEntropyCodes(mb.command_histograms, kNumCommandPrefixes,
|
storage_ix, storage);
|
||||||
&command_codes);
|
BuildAndStoreEntropyCodes(mb.command_histograms, kNumCommandPrefixes,
|
||||||
BuildEntropyCodes(mb.distance_histograms, num_distance_codes,
|
&command_codes, storage_ix, storage);
|
||||||
&distance_codes);
|
BuildAndStoreEntropyCodes(mb.distance_histograms, num_distance_codes,
|
||||||
StoreHuffmanCodes(literal_codes, 256, storage_ix, storage);
|
&distance_codes, storage_ix, storage);
|
||||||
StoreHuffmanCodes(command_codes, kNumCommandPrefixes, storage_ix, storage);
|
|
||||||
StoreHuffmanCodes(distance_codes, num_distance_codes, storage_ix, storage);
|
|
||||||
BlockSplitIterator literal_it(mb.literal_split);
|
BlockSplitIterator literal_it(mb.literal_split);
|
||||||
BlockSplitIterator command_it(mb.command_split);
|
BlockSplitIterator command_it(mb.command_split);
|
||||||
BlockSplitIterator distance_it(mb.distance_split);
|
BlockSplitIterator distance_it(mb.distance_split);
|
||||||
@ -828,8 +853,10 @@ void StoreMetaBlock(const MetaBlock& mb,
|
|||||||
Context(prev_byte, prev_byte2,
|
Context(prev_byte, prev_byte2,
|
||||||
mb.literal_context_modes[literal_it.type_]));
|
mb.literal_context_modes[literal_it.type_]));
|
||||||
histogram_idx = mb.literal_context_map[context];
|
histogram_idx = mb.literal_context_map[context];
|
||||||
EntropyEncode(ringbuffer[*pos & mask],
|
int literal = ringbuffer[*pos & mask];
|
||||||
literal_codes[histogram_idx], storage_ix, storage);
|
WriteBits(literal_codes[histogram_idx].depth_[literal],
|
||||||
|
literal_codes[histogram_idx].bits_[literal],
|
||||||
|
storage_ix, storage);
|
||||||
++(*pos);
|
++(*pos);
|
||||||
}
|
}
|
||||||
if (*pos < end_pos && cmd.distance_prefix_ != 0xffff) {
|
if (*pos < end_pos && cmd.distance_prefix_ != 0xffff) {
|
||||||
@ -845,9 +872,10 @@ void StoreMetaBlock(const MetaBlock& mb,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
BrotliCompressor::BrotliCompressor()
|
BrotliCompressor::BrotliCompressor(BrotliParams params)
|
||||||
: window_bits_(kWindowBits),
|
: params_(params),
|
||||||
hasher_(new Hasher),
|
window_bits_(kWindowBits),
|
||||||
|
hashers_(new Hashers()),
|
||||||
dist_ringbuffer_idx_(0),
|
dist_ringbuffer_idx_(0),
|
||||||
input_pos_(0),
|
input_pos_(0),
|
||||||
ringbuffer_(kRingBufferBits, kMetaBlockSizeBits),
|
ringbuffer_(kRingBufferBits, kMetaBlockSizeBits),
|
||||||
@ -859,29 +887,42 @@ BrotliCompressor::BrotliCompressor()
|
|||||||
dist_ringbuffer_[2] = 11;
|
dist_ringbuffer_[2] = 11;
|
||||||
dist_ringbuffer_[3] = 4;
|
dist_ringbuffer_[3] = 4;
|
||||||
storage_[0] = 0;
|
storage_[0] = 0;
|
||||||
|
switch (params.mode) {
|
||||||
|
case BrotliParams::MODE_TEXT: hash_type_ = Hashers::HASH_15_8_4; break;
|
||||||
|
case BrotliParams::MODE_FONT: hash_type_ = Hashers::HASH_15_8_2; break;
|
||||||
|
default: break;
|
||||||
|
}
|
||||||
|
hashers_->Init(hash_type_);
|
||||||
|
if (params.mode == BrotliParams::MODE_TEXT) {
|
||||||
StoreDictionaryWordHashes();
|
StoreDictionaryWordHashes();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
BrotliCompressor::~BrotliCompressor() {
|
BrotliCompressor::~BrotliCompressor() {
|
||||||
delete hasher_;
|
|
||||||
delete[] storage_;
|
delete[] storage_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
StaticDictionary *BrotliCompressor::static_dictionary_ = NULL;
|
||||||
|
|
||||||
void BrotliCompressor::StoreDictionaryWordHashes() {
|
void BrotliCompressor::StoreDictionaryWordHashes() {
|
||||||
for (int t = kNumTransforms - 1; t >= 0; --t) {
|
const int num_transforms = kNumTransforms;
|
||||||
|
if (static_dictionary_ == NULL) {
|
||||||
|
static_dictionary_ = new StaticDictionary;
|
||||||
|
for (int t = num_transforms - 1; t >= 0; --t) {
|
||||||
for (int i = kMaxDictionaryWordLength; i >= 3; --i) {
|
for (int i = kMaxDictionaryWordLength; i >= 3; --i) {
|
||||||
const int num_words = 1 << kBrotliDictionarySizeBitsByLength[i];
|
const int num_words = 1 << kBrotliDictionarySizeBitsByLength[i];
|
||||||
for (int j = num_words - 1; j >= 0; --j) {
|
for (int j = num_words - 1; j >= 0; --j) {
|
||||||
int word_id = t * num_words + j;
|
int word_id = t * num_words + j;
|
||||||
std::string word = GetTransformedDictionaryWord(i, word_id);
|
std::string word = GetTransformedDictionaryWord(i, word_id);
|
||||||
if (word.size() >= 3) {
|
if (word.size() >= 3) {
|
||||||
hasher_->Store(reinterpret_cast<const uint8_t*>(&word[0]),
|
static_dictionary_->Insert(word, i, word_id);
|
||||||
(-1) * ((i << 20) + word_id + 1));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
hashers_->SetStaticDictionary(static_dictionary_);
|
||||||
|
}
|
||||||
|
|
||||||
void BrotliCompressor::WriteStreamHeader() {
|
void BrotliCompressor::WriteStreamHeader() {
|
||||||
// Encode window size.
|
// Encode window size.
|
||||||
@ -908,25 +949,30 @@ void BrotliCompressor::WriteMetaBlock(const size_t input_size,
|
|||||||
input_size, kMinUTF8Ratio);
|
input_size, kMinUTF8Ratio);
|
||||||
if (utf8_mode) {
|
if (utf8_mode) {
|
||||||
EstimateBitCostsForLiteralsUTF8(input_pos_, input_size,
|
EstimateBitCostsForLiteralsUTF8(input_pos_, input_size,
|
||||||
kRingBufferMask, ringbuffer_.start(),
|
kRingBufferMask, kRingBufferMask,
|
||||||
&literal_cost_[0]);
|
ringbuffer_.start(), &literal_cost_[0]);
|
||||||
} else {
|
} else {
|
||||||
EstimateBitCostsForLiterals(input_pos_, input_size,
|
EstimateBitCostsForLiterals(input_pos_, input_size,
|
||||||
kRingBufferMask, ringbuffer_.start(),
|
kRingBufferMask, kRingBufferMask,
|
||||||
&literal_cost_[0]);
|
ringbuffer_.start(), &literal_cost_[0]);
|
||||||
}
|
}
|
||||||
CreateBackwardReferences(input_size, input_pos_,
|
CreateBackwardReferences(
|
||||||
|
input_size, input_pos_,
|
||||||
ringbuffer_.start(),
|
ringbuffer_.start(),
|
||||||
&literal_cost_[0],
|
&literal_cost_[0],
|
||||||
kRingBufferMask, kMaxBackwardDistance,
|
kRingBufferMask, kMaxBackwardDistance,
|
||||||
hasher_,
|
hashers_.get(),
|
||||||
|
hash_type_,
|
||||||
&commands);
|
&commands);
|
||||||
ComputeDistanceShortCodes(&commands, dist_ringbuffer_,
|
ComputeDistanceShortCodes(&commands, input_pos_, kMaxBackwardDistance,
|
||||||
|
dist_ringbuffer_,
|
||||||
&dist_ringbuffer_idx_);
|
&dist_ringbuffer_idx_);
|
||||||
}
|
}
|
||||||
EncodingParams params;
|
EncodingParams params;
|
||||||
params.num_direct_distance_codes = 12;
|
params.num_direct_distance_codes =
|
||||||
params.distance_postfix_bits = 1;
|
params_.mode == BrotliParams::MODE_FONT ? 12 : 0;
|
||||||
|
params.distance_postfix_bits =
|
||||||
|
params_.mode == BrotliParams::MODE_FONT ? 1 : 0;
|
||||||
params.literal_context_mode = CONTEXT_SIGNED;
|
params.literal_context_mode = CONTEXT_SIGNED;
|
||||||
const int storage_ix0 = storage_ix_;
|
const int storage_ix0 = storage_ix_;
|
||||||
MetaBlock mb;
|
MetaBlock mb;
|
||||||
@ -935,6 +981,7 @@ void BrotliCompressor::WriteMetaBlock(const size_t input_size,
|
|||||||
StoreMetaBlock(mb, is_last, ringbuffer_.start(), kRingBufferMask,
|
StoreMetaBlock(mb, is_last, ringbuffer_.start(), kRingBufferMask,
|
||||||
&input_pos_, &storage_ix_, storage_);
|
&input_pos_, &storage_ix_, storage_);
|
||||||
size_t output_size = is_last ? ((storage_ix_ + 7) >> 3) : (storage_ix_ >> 3);
|
size_t output_size = is_last ? ((storage_ix_ + 7) >> 3) : (storage_ix_ >> 3);
|
||||||
|
output_size -= (storage_ix0 >> 3);
|
||||||
if (input_size + 4 < output_size) {
|
if (input_size + 4 < output_size) {
|
||||||
storage_ix_ = storage_ix0;
|
storage_ix_ = storage_ix0;
|
||||||
storage_[storage_ix_ >> 3] &= (1 << (storage_ix_ & 7)) - 1;
|
storage_[storage_ix_ >> 3] &= (1 << (storage_ix_ & 7)) - 1;
|
||||||
@ -968,7 +1015,8 @@ void BrotliCompressor::FinishStream(
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int BrotliCompressBuffer(size_t input_size,
|
int BrotliCompressBuffer(BrotliParams params,
|
||||||
|
size_t input_size,
|
||||||
const uint8_t* input_buffer,
|
const uint8_t* input_buffer,
|
||||||
size_t* encoded_size,
|
size_t* encoded_size,
|
||||||
uint8_t* encoded_buffer) {
|
uint8_t* encoded_buffer) {
|
||||||
@ -978,7 +1026,7 @@ int BrotliCompressBuffer(size_t input_size,
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
BrotliCompressor compressor;
|
BrotliCompressor compressor(params);
|
||||||
compressor.WriteStreamHeader();
|
compressor.WriteStreamHeader();
|
||||||
|
|
||||||
const int max_block_size = 1 << kMetaBlockSizeBits;
|
const int max_block_size = 1 << kMetaBlockSizeBits;
|
||||||
|
21
enc/encode.h
21
enc/encode.h
@ -23,12 +23,23 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
#include "./hash.h"
|
#include "./hash.h"
|
||||||
#include "./ringbuffer.h"
|
#include "./ringbuffer.h"
|
||||||
|
#include "./static_dict.h"
|
||||||
|
|
||||||
namespace brotli {
|
namespace brotli {
|
||||||
|
|
||||||
|
struct BrotliParams {
|
||||||
|
enum Mode {
|
||||||
|
MODE_TEXT = 0,
|
||||||
|
MODE_FONT = 1,
|
||||||
|
};
|
||||||
|
Mode mode;
|
||||||
|
|
||||||
|
BrotliParams() : mode(MODE_TEXT) {}
|
||||||
|
};
|
||||||
|
|
||||||
class BrotliCompressor {
|
class BrotliCompressor {
|
||||||
public:
|
public:
|
||||||
BrotliCompressor();
|
explicit BrotliCompressor(BrotliParams params);
|
||||||
~BrotliCompressor();
|
~BrotliCompressor();
|
||||||
|
|
||||||
// Writes the stream header into the internal output buffer.
|
// Writes the stream header into the internal output buffer.
|
||||||
@ -53,8 +64,10 @@ class BrotliCompressor {
|
|||||||
// Initializes the hasher with the hashes of dictionary words.
|
// Initializes the hasher with the hashes of dictionary words.
|
||||||
void StoreDictionaryWordHashes();
|
void StoreDictionaryWordHashes();
|
||||||
|
|
||||||
|
BrotliParams params_;
|
||||||
int window_bits_;
|
int window_bits_;
|
||||||
Hasher* hasher_;
|
std::unique_ptr<Hashers> hashers_;
|
||||||
|
Hashers::Type hash_type_;
|
||||||
int dist_ringbuffer_[4];
|
int dist_ringbuffer_[4];
|
||||||
size_t dist_ringbuffer_idx_;
|
size_t dist_ringbuffer_idx_;
|
||||||
size_t input_pos_;
|
size_t input_pos_;
|
||||||
@ -62,12 +75,14 @@ class BrotliCompressor {
|
|||||||
std::vector<float> literal_cost_;
|
std::vector<float> literal_cost_;
|
||||||
int storage_ix_;
|
int storage_ix_;
|
||||||
uint8_t* storage_;
|
uint8_t* storage_;
|
||||||
|
static StaticDictionary *static_dictionary_;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Compresses the data in input_buffer into encoded_buffer, and sets
|
// Compresses the data in input_buffer into encoded_buffer, and sets
|
||||||
// *encoded_size to the compressed length.
|
// *encoded_size to the compressed length.
|
||||||
// Returns 0 if there was an error and 1 otherwise.
|
// Returns 0 if there was an error and 1 otherwise.
|
||||||
int BrotliCompressBuffer(size_t input_size,
|
int BrotliCompressBuffer(BrotliParams params,
|
||||||
|
size_t input_size,
|
||||||
const uint8_t* input_buffer,
|
const uint8_t* input_buffer,
|
||||||
size_t* encoded_size,
|
size_t* encoded_size,
|
||||||
uint8_t* encoded_buffer);
|
uint8_t* encoded_buffer);
|
||||||
|
195
enc/hash.h
195
enc/hash.h
@ -24,12 +24,14 @@
|
|||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
#include "./transform.h"
|
#include "./transform.h"
|
||||||
#include "./fast_log.h"
|
#include "./fast_log.h"
|
||||||
#include "./find_match_length.h"
|
#include "./find_match_length.h"
|
||||||
#include "./port.h"
|
#include "./port.h"
|
||||||
|
#include "./static_dict.h"
|
||||||
|
|
||||||
namespace brotli {
|
namespace brotli {
|
||||||
|
|
||||||
@ -41,11 +43,21 @@ namespace brotli {
|
|||||||
// * The number has been tuned heuristically against compression benchmarks.
|
// * The number has been tuned heuristically against compression benchmarks.
|
||||||
static const uint32_t kHashMul32 = 0x1e35a7bd;
|
static const uint32_t kHashMul32 = 0x1e35a7bd;
|
||||||
|
|
||||||
inline uint32_t Hash3Bytes(const uint8_t *data, const int bits) {
|
template<int kShiftBits, int kMinLength>
|
||||||
|
inline uint32_t Hash(const uint8_t *data) {
|
||||||
|
if (kMinLength <= 3) {
|
||||||
|
// If kMinLength is 2 or 3, we hash the first 3 bytes of data.
|
||||||
uint32_t h = (BROTLI_UNALIGNED_LOAD32(data) & 0xffffff) * kHashMul32;
|
uint32_t h = (BROTLI_UNALIGNED_LOAD32(data) & 0xffffff) * kHashMul32;
|
||||||
// The higher bits contain more mixture from the multiplication,
|
// The higher bits contain more mixture from the multiplication,
|
||||||
// so we take our results from there.
|
// so we take our results from there.
|
||||||
return h >> (32 - bits);
|
return h >> (32 - kShiftBits);
|
||||||
|
} else {
|
||||||
|
// If kMinLength is at least 4, we hash the first 4 bytes of data.
|
||||||
|
uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
|
||||||
|
// The higher bits contain more mixture from the multiplication,
|
||||||
|
// so we take our results from there.
|
||||||
|
return h >> (32 - kShiftBits);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Usually, we always choose the longest backward reference. This function
|
// Usually, we always choose the longest backward reference. This function
|
||||||
@ -67,32 +79,35 @@ inline double BackwardReferenceScore(double average_cost,
|
|||||||
double start_cost3,
|
double start_cost3,
|
||||||
double start_cost2,
|
double start_cost2,
|
||||||
int copy_length,
|
int copy_length,
|
||||||
int backward_reference_offset,
|
int backward_reference_offset) {
|
||||||
int last_distance1,
|
|
||||||
int last_distance2,
|
|
||||||
int last_distance3,
|
|
||||||
int last_distance4) {
|
|
||||||
double retval = 0;
|
double retval = 0;
|
||||||
switch (copy_length) {
|
switch (copy_length) {
|
||||||
case 2: retval = start_cost2; break;
|
case 2: retval = start_cost2; break;
|
||||||
case 3: retval = start_cost3; break;
|
case 3: retval = start_cost3; break;
|
||||||
default: retval = start_cost4 + (copy_length - 4) * average_cost; break;
|
default: retval = start_cost4 + (copy_length - 4) * average_cost; break;
|
||||||
}
|
}
|
||||||
int diff_last1 = abs(backward_reference_offset - last_distance1);
|
|
||||||
int diff_last2 = abs(backward_reference_offset - last_distance2);
|
|
||||||
if (diff_last1 == 0) {
|
|
||||||
retval += 0.6;
|
|
||||||
} else if (diff_last1 < 4) {
|
|
||||||
retval -= 0.9 + 0.03 * diff_last1;
|
|
||||||
} else if (diff_last2 < 4) {
|
|
||||||
retval -= 0.95 + 0.1 * diff_last2;
|
|
||||||
} else if (backward_reference_offset == last_distance3) {
|
|
||||||
retval -= 1.17;
|
|
||||||
} else if (backward_reference_offset == last_distance4) {
|
|
||||||
retval -= 1.27;
|
|
||||||
} else {
|
|
||||||
retval -= 1.20 * Log2Floor(backward_reference_offset);
|
retval -= 1.20 * Log2Floor(backward_reference_offset);
|
||||||
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline double BackwardReferenceScoreUsingLastDistance(double average_cost,
|
||||||
|
double start_cost4,
|
||||||
|
double start_cost3,
|
||||||
|
double start_cost2,
|
||||||
|
int copy_length,
|
||||||
|
int distance_short_code) {
|
||||||
|
double retval = 0;
|
||||||
|
switch (copy_length) {
|
||||||
|
case 2: retval = start_cost2; break;
|
||||||
|
case 3: retval = start_cost3; break;
|
||||||
|
default: retval = start_cost4 + (copy_length - 4) * average_cost; break;
|
||||||
|
}
|
||||||
|
static const double kDistanceShortCodeBitCost[16] = {
|
||||||
|
-0.6, 0.95, 1.17, 1.27,
|
||||||
|
0.93, 0.93, 0.96, 0.96, 0.99, 0.99,
|
||||||
|
1.05, 1.05, 1.15, 1.15, 1.25, 1.25
|
||||||
|
};
|
||||||
|
retval -= kDistanceShortCodeBitCost[distance_short_code];
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -102,7 +117,7 @@ inline double BackwardReferenceScore(double average_cost,
|
|||||||
// This is a hash map of fixed size (kBucketSize) to a ring buffer of
|
// This is a hash map of fixed size (kBucketSize) to a ring buffer of
|
||||||
// fixed size (kBlockSize). The ring buffer contains the last kBlockSize
|
// fixed size (kBlockSize). The ring buffer contains the last kBlockSize
|
||||||
// index positions of the given hash key in the compressed data.
|
// index positions of the given hash key in the compressed data.
|
||||||
template <int kBucketBits, int kBlockBits>
|
template <int kBucketBits, int kBlockBits, int kMinLength>
|
||||||
class HashLongestMatch {
|
class HashLongestMatch {
|
||||||
public:
|
public:
|
||||||
HashLongestMatch()
|
HashLongestMatch()
|
||||||
@ -111,17 +126,24 @@ class HashLongestMatch {
|
|||||||
last_distance3_(15),
|
last_distance3_(15),
|
||||||
last_distance4_(16),
|
last_distance4_(16),
|
||||||
insert_length_(0),
|
insert_length_(0),
|
||||||
average_cost_(5.4) {
|
average_cost_(5.4),
|
||||||
|
static_dict_(NULL) {
|
||||||
Reset();
|
Reset();
|
||||||
}
|
}
|
||||||
void Reset() {
|
void Reset() {
|
||||||
std::fill(&num_[0], &num_[sizeof(num_) / sizeof(num_[0])], 0);
|
std::fill(&num_[0], &num_[sizeof(num_) / sizeof(num_[0])], 0);
|
||||||
}
|
}
|
||||||
|
void SetStaticDictionary(const StaticDictionary *dict) {
|
||||||
|
static_dict_ = dict;
|
||||||
|
}
|
||||||
|
bool HasStaticDictionary() const {
|
||||||
|
return static_dict_ != NULL;
|
||||||
|
}
|
||||||
|
|
||||||
// Look at 3 bytes at data.
|
// Look at 3 bytes at data.
|
||||||
// Compute a hash from these, and store the value of ix at that position.
|
// Compute a hash from these, and store the value of ix at that position.
|
||||||
inline void Store(const uint8_t *data, const int ix) {
|
inline void Store(const uint8_t *data, const int ix) {
|
||||||
const uint32_t key = Hash3Bytes(data, kBucketBits);
|
const uint32_t key = Hash<kBucketBits, kMinLength>(data);
|
||||||
const int minor_ix = num_[key] & kBlockMask;
|
const int minor_ix = num_[key] & kBlockMask;
|
||||||
buckets_[key][minor_ix] = ix;
|
buckets_[key][minor_ix] = ix;
|
||||||
++num_[key];
|
++num_[key];
|
||||||
@ -218,19 +240,17 @@ class HashLongestMatch {
|
|||||||
const size_t len =
|
const size_t len =
|
||||||
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
|
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
|
||||||
max_length);
|
max_length);
|
||||||
if (len >= 3 || (len == 2 && i < 2)) {
|
if (len >= std::max(kMinLength, 3) ||
|
||||||
|
(kMinLength == 2 && len == 2 && i < 2)) {
|
||||||
// Comparing for >= 2 does not change the semantics, but just saves for
|
// Comparing for >= 2 does not change the semantics, but just saves for
|
||||||
// a few unnecessary binary logarithms in backward reference score,
|
// a few unnecessary binary logarithms in backward reference score,
|
||||||
// since we are not interested in such short matches.
|
// since we are not interested in such short matches.
|
||||||
const double score = BackwardReferenceScore(average_cost_,
|
const double score = BackwardReferenceScoreUsingLastDistance(
|
||||||
|
average_cost_,
|
||||||
start_cost4,
|
start_cost4,
|
||||||
start_cost3,
|
start_cost3,
|
||||||
start_cost2,
|
start_cost2,
|
||||||
len, backward,
|
len, i);
|
||||||
last_distance1_,
|
|
||||||
last_distance2_,
|
|
||||||
last_distance3_,
|
|
||||||
last_distance4_);
|
|
||||||
if (best_score < score) {
|
if (best_score < score) {
|
||||||
best_score = score;
|
best_score = score;
|
||||||
best_len = len;
|
best_len = len;
|
||||||
@ -244,12 +264,9 @@ class HashLongestMatch {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
const uint32_t key = Hash3Bytes(&data[cur_ix_masked], kBucketBits);
|
if (kMinLength == 2) {
|
||||||
const int * __restrict const bucket = &buckets_[key][0];
|
|
||||||
const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
|
|
||||||
int stop = int(cur_ix) - 64;
|
int stop = int(cur_ix) - 64;
|
||||||
if (stop < 0) { stop = 0; }
|
if (stop < 0) { stop = 0; }
|
||||||
|
|
||||||
start_cost2 -= 1.0;
|
start_cost2 -= 1.0;
|
||||||
for (int i = cur_ix - 1; i > stop; --i) {
|
for (int i = cur_ix - 1; i > stop; --i) {
|
||||||
size_t prev_ix = i;
|
size_t prev_ix = i;
|
||||||
@ -275,45 +292,13 @@ class HashLongestMatch {
|
|||||||
match_found = true;
|
match_found = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
const uint32_t key = Hash<kBucketBits, kMinLength>(&data[cur_ix_masked]);
|
||||||
|
const int * __restrict const bucket = &buckets_[key][0];
|
||||||
|
const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
|
||||||
for (int i = num_[key] - 1; i >= down; --i) {
|
for (int i = num_[key] - 1; i >= down; --i) {
|
||||||
int prev_ix = bucket[i & kBlockMask];
|
int prev_ix = bucket[i & kBlockMask];
|
||||||
if (prev_ix < 0) {
|
if (prev_ix >= 0) {
|
||||||
prev_ix *= -1;
|
|
||||||
prev_ix -= 1;
|
|
||||||
int copy_len_code = prev_ix >> 20;
|
|
||||||
int word_id = prev_ix & 0xfffff;
|
|
||||||
std::string word = GetTransformedDictionaryWord(copy_len_code, word_id);
|
|
||||||
int len = word.size();
|
|
||||||
const size_t backward = max_backward + word_id + 1;
|
|
||||||
bool word_matched = (len >= 3 && len <= max_length);
|
|
||||||
for (int k = 0; k < len && word_matched; ++k) {
|
|
||||||
if ((uint8_t)(word[k]) != data[cur_ix_masked + k]) {
|
|
||||||
word_matched = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (word_matched) {
|
|
||||||
const double score = BackwardReferenceScore(average_cost_,
|
|
||||||
start_cost4,
|
|
||||||
start_cost3,
|
|
||||||
start_cost2,
|
|
||||||
len, backward,
|
|
||||||
last_distance1_,
|
|
||||||
last_distance2_,
|
|
||||||
last_distance3_,
|
|
||||||
last_distance4_);
|
|
||||||
if (best_score < score) {
|
|
||||||
best_score = score;
|
|
||||||
best_len = len;
|
|
||||||
best_ix = backward;
|
|
||||||
*best_len_out = best_len;
|
|
||||||
*best_len_code_out = copy_len_code;
|
|
||||||
*best_distance_out = best_ix;
|
|
||||||
*best_score_out = best_score;
|
|
||||||
match_found = true;
|
|
||||||
*in_dictionary = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
const size_t backward = cur_ix - prev_ix;
|
const size_t backward = cur_ix - prev_ix;
|
||||||
if (PREDICT_FALSE(backward > max_backward)) {
|
if (PREDICT_FALSE(backward > max_backward)) {
|
||||||
break;
|
break;
|
||||||
@ -327,7 +312,7 @@ class HashLongestMatch {
|
|||||||
const size_t len =
|
const size_t len =
|
||||||
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
|
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
|
||||||
max_length);
|
max_length);
|
||||||
if (len >= 3) {
|
if (len >= std::max(kMinLength, 3)) {
|
||||||
// Comparing for >= 3 does not change the semantics, but just saves
|
// Comparing for >= 3 does not change the semantics, but just saves
|
||||||
// for a few unnecessary binary logarithms in backward reference
|
// for a few unnecessary binary logarithms in backward reference
|
||||||
// score, since we are not interested in such short matches.
|
// score, since we are not interested in such short matches.
|
||||||
@ -335,11 +320,7 @@ class HashLongestMatch {
|
|||||||
start_cost4,
|
start_cost4,
|
||||||
start_cost3,
|
start_cost3,
|
||||||
start_cost2,
|
start_cost2,
|
||||||
len, backward,
|
len, backward);
|
||||||
last_distance1_,
|
|
||||||
last_distance2_,
|
|
||||||
last_distance3_,
|
|
||||||
last_distance4_);
|
|
||||||
if (best_score < score) {
|
if (best_score < score) {
|
||||||
best_score = score;
|
best_score = score;
|
||||||
best_len = len;
|
best_len = len;
|
||||||
@ -354,6 +335,36 @@ class HashLongestMatch {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (static_dict_ != NULL) {
|
||||||
|
// We decide based on first 4 bytes how many bytes to test for.
|
||||||
|
int prefix = BROTLI_UNALIGNED_LOAD32(&data[cur_ix_masked]);
|
||||||
|
int maxlen = static_dict_->GetLength(prefix);
|
||||||
|
for (int len = std::min<size_t>(maxlen, max_length);
|
||||||
|
len > best_len && len >= 4; --len) {
|
||||||
|
std::string snippet((const char *)&data[cur_ix_masked], len);
|
||||||
|
int copy_len_code;
|
||||||
|
int word_id;
|
||||||
|
if (static_dict_->Get(snippet, ©_len_code, &word_id)) {
|
||||||
|
const size_t backward = max_backward + word_id + 1;
|
||||||
|
const double score = BackwardReferenceScore(average_cost_,
|
||||||
|
start_cost4,
|
||||||
|
start_cost3,
|
||||||
|
start_cost2,
|
||||||
|
len, backward);
|
||||||
|
if (best_score < score) {
|
||||||
|
best_score = score;
|
||||||
|
best_len = len;
|
||||||
|
best_ix = backward;
|
||||||
|
*best_len_out = best_len;
|
||||||
|
*best_len_code_out = copy_len_code;
|
||||||
|
*best_distance_out = best_ix;
|
||||||
|
*best_score_out = best_score;
|
||||||
|
match_found = true;
|
||||||
|
*in_dictionary = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
return match_found;
|
return match_found;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -399,9 +410,37 @@ class HashLongestMatch {
|
|||||||
int insert_length_;
|
int insert_length_;
|
||||||
|
|
||||||
double average_cost_;
|
double average_cost_;
|
||||||
|
|
||||||
|
const StaticDictionary *static_dict_;
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef HashLongestMatch<13, 11> Hasher;
|
struct Hashers {
|
||||||
|
enum Type {
|
||||||
|
HASH_15_8_4 = 0,
|
||||||
|
HASH_15_8_2 = 1,
|
||||||
|
};
|
||||||
|
|
||||||
|
void Init(Type type) {
|
||||||
|
switch (type) {
|
||||||
|
case HASH_15_8_4:
|
||||||
|
hash_15_8_4.reset(new HashLongestMatch<15, 8, 4>());
|
||||||
|
break;
|
||||||
|
case HASH_15_8_2:
|
||||||
|
hash_15_8_2.reset(new HashLongestMatch<15, 8, 2>());
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetStaticDictionary(const StaticDictionary *dict) {
|
||||||
|
if (hash_15_8_4.get() != NULL) hash_15_8_4->SetStaticDictionary(dict);
|
||||||
|
if (hash_15_8_2.get() != NULL) hash_15_8_2->SetStaticDictionary(dict);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<HashLongestMatch<15, 8, 4> > hash_15_8_4;
|
||||||
|
std::unique_ptr<HashLongestMatch<15, 8, 2> > hash_15_8_2;
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace brotli
|
} // namespace brotli
|
||||||
|
|
||||||
|
@ -59,7 +59,8 @@ static int DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
|
void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
|
||||||
const uint8_t *data, float *cost) {
|
size_t cost_mask, const uint8_t *data,
|
||||||
|
float *cost) {
|
||||||
|
|
||||||
// max_utf8 is 0 (normal ascii single byte modeling),
|
// max_utf8 is 0 (normal ascii single byte modeling),
|
||||||
// 1 (for 2-byte utf-8 modeling), or 2 (for 3-byte utf-8 modeling).
|
// 1 (for 2-byte utf-8 modeling), or 2 (for 3-byte utf-8 modeling).
|
||||||
@ -110,18 +111,20 @@ void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
|
|||||||
if (histo == 0) {
|
if (histo == 0) {
|
||||||
histo = 1;
|
histo = 1;
|
||||||
}
|
}
|
||||||
cost[masked_pos] = log2(static_cast<double>(in_window_utf8[utf8_pos])
|
float lit_cost = log2(static_cast<double>(in_window_utf8[utf8_pos])
|
||||||
/ histo);
|
/ histo);
|
||||||
cost[masked_pos] += 0.02905;
|
lit_cost += 0.02905;
|
||||||
if (cost[masked_pos] < 1.0) {
|
if (lit_cost < 1.0) {
|
||||||
cost[masked_pos] *= 0.5;
|
lit_cost *= 0.5;
|
||||||
cost[masked_pos] += 0.5;
|
lit_cost += 0.5;
|
||||||
}
|
}
|
||||||
|
cost[(pos + i) & cost_mask] = lit_cost;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
|
void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
|
||||||
const uint8_t *data, float *cost) {
|
size_t cost_mask, const uint8_t *data,
|
||||||
|
float *cost) {
|
||||||
int histogram[256] = { 0 };
|
int histogram[256] = { 0 };
|
||||||
int window_half = 2000;
|
int window_half = 2000;
|
||||||
int in_window = std::min(static_cast<size_t>(window_half), len);
|
int in_window = std::min(static_cast<size_t>(window_half), len);
|
||||||
@ -143,17 +146,17 @@ void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
|
|||||||
++histogram[data[(pos + i + window_half) & mask]];
|
++histogram[data[(pos + i + window_half) & mask]];
|
||||||
++in_window;
|
++in_window;
|
||||||
}
|
}
|
||||||
int masked_pos = (pos + i) & mask;
|
int histo = histogram[data[(pos + i) & mask]];
|
||||||
int histo = histogram[data[masked_pos]];
|
|
||||||
if (histo == 0) {
|
if (histo == 0) {
|
||||||
histo = 1;
|
histo = 1;
|
||||||
}
|
}
|
||||||
cost[masked_pos] = log2(static_cast<double>(in_window) / histo);
|
float lit_cost = log2(static_cast<double>(in_window) / histo);
|
||||||
cost[masked_pos] += 0.029;
|
lit_cost += 0.029;
|
||||||
if (cost[masked_pos] < 1.0) {
|
if (lit_cost < 1.0) {
|
||||||
cost[masked_pos] *= 0.5;
|
lit_cost *= 0.5;
|
||||||
cost[masked_pos] += 0.5;
|
lit_cost += 0.5;
|
||||||
}
|
}
|
||||||
|
cost[(pos + i) & cost_mask] = lit_cost;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -26,11 +26,11 @@ namespace brotli {
|
|||||||
// ringbuffer (data, mask) will take entropy coded and writes these estimates
|
// ringbuffer (data, mask) will take entropy coded and writes these estimates
|
||||||
// to the ringbuffer (cost, mask).
|
// to the ringbuffer (cost, mask).
|
||||||
void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
|
void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
|
||||||
const uint8_t *data,
|
size_t cost_mask, const uint8_t *data,
|
||||||
float *cost);
|
float *cost);
|
||||||
|
|
||||||
void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
|
void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
|
||||||
const uint8_t *data,
|
size_t cost_mask, const uint8_t *data,
|
||||||
float *cost);
|
float *cost);
|
||||||
|
|
||||||
} // namespace brotli
|
} // namespace brotli
|
||||||
|
@ -90,7 +90,7 @@ int CopyLengthPrefix(int length) {
|
|||||||
|
|
||||||
int CommandPrefix(int insert_length, int copy_length) {
|
int CommandPrefix(int insert_length, int copy_length) {
|
||||||
if (copy_length == 0) {
|
if (copy_length == 0) {
|
||||||
copy_length = 3;
|
copy_length = 4;
|
||||||
}
|
}
|
||||||
int insert_prefix = InsertLengthPrefix(insert_length);
|
int insert_prefix = InsertLengthPrefix(insert_length);
|
||||||
int copy_prefix = CopyLengthPrefix(copy_length);
|
int copy_prefix = CopyLengthPrefix(copy_length);
|
||||||
|
68
enc/static_dict.h
Normal file
68
enc/static_dict.h
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
// Class to model the static dictionary.
|
||||||
|
|
||||||
|
#ifndef BROTLI_ENC_STATIC_DICT_H_
|
||||||
|
#define BROTLI_ENC_STATIC_DICT_H_
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace brotli {
|
||||||
|
|
||||||
|
class StaticDictionary {
|
||||||
|
public:
|
||||||
|
StaticDictionary() {}
|
||||||
|
void Insert(const std::string &str, int len, int dist) {
|
||||||
|
int ix = (dist << 6) + len;
|
||||||
|
std::unordered_map<std::string, int>::const_iterator it = map_.find(str);
|
||||||
|
if (it != map_.end() && ix >= it->second) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
map_[str] = ix;
|
||||||
|
int v = 0;
|
||||||
|
for (int i = 0; i < 4 && i < str.size(); ++i) {
|
||||||
|
v += str[i] << (8 * i);
|
||||||
|
}
|
||||||
|
if (prefix_map_[v] < str.size()) {
|
||||||
|
prefix_map_[v] = str.size();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int GetLength(int v) const {
|
||||||
|
std::unordered_map<int, int>::const_iterator it = prefix_map_.find(v);
|
||||||
|
if (it == prefix_map_.end()) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return it->second;
|
||||||
|
}
|
||||||
|
bool Get(const std::string &str, int *len, int *dist) const {
|
||||||
|
std::unordered_map<std::string, int>::const_iterator it = map_.find(str);
|
||||||
|
if (it == map_.end()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
int v = it->second;
|
||||||
|
*len = v & 63;
|
||||||
|
*dist = v >> 6;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
std::unordered_map<std::string, int> map_;
|
||||||
|
std::unordered_map<int, int> prefix_map_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace brotli
|
||||||
|
|
||||||
|
#endif // BROTLI_ENC_STATIC_DICT_H_
|
Loading…
Reference in New Issue
Block a user