mirror of
https://github.com/google/brotli.git
synced 2024-11-25 04:50:05 +00:00
Merge pull request #48 from szabadka/master
Add a faster but less dense compression mode.
This commit is contained in:
commit
8bda995564
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
include ../shared.mk
|
include ../shared.mk
|
||||||
|
|
||||||
OBJS = backward_references.o block_splitter.o brotli_bit_stream.o encode.o entropy_encode.o histogram.o literal_cost.o
|
OBJS = backward_references.o block_splitter.o brotli_bit_stream.o encode.o entropy_encode.o histogram.o literal_cost.o metablock.o
|
||||||
|
|
||||||
all : $(OBJS)
|
all : $(OBJS)
|
||||||
|
|
||||||
|
@ -24,6 +24,31 @@
|
|||||||
|
|
||||||
namespace brotli {
|
namespace brotli {
|
||||||
|
|
||||||
|
static inline double BitsEntropy(const int *population, int size) {
|
||||||
|
int sum = 0;
|
||||||
|
double retval = 0;
|
||||||
|
const int *population_end = population + size;
|
||||||
|
int p;
|
||||||
|
if (size & 1) {
|
||||||
|
goto odd_number_of_elements_left;
|
||||||
|
}
|
||||||
|
while (population < population_end) {
|
||||||
|
p = *population++;
|
||||||
|
sum += p;
|
||||||
|
retval -= p * FastLog2(p);
|
||||||
|
odd_number_of_elements_left:
|
||||||
|
p = *population++;
|
||||||
|
sum += p;
|
||||||
|
retval -= p * FastLog2(p);
|
||||||
|
}
|
||||||
|
if (sum) retval -= sum * log(sum);
|
||||||
|
if (retval < sum) {
|
||||||
|
// At least one bit per literal is needed.
|
||||||
|
retval = sum;
|
||||||
|
}
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
|
||||||
static const int kHuffmanExtraBits[kCodeLengthCodes] = {
|
static const int kHuffmanExtraBits[kCodeLengthCodes] = {
|
||||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3,
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3,
|
||||||
};
|
};
|
||||||
|
@ -284,21 +284,21 @@ void ClusterBlocks(const DataType* data, const size_t length,
|
|||||||
void BuildBlockSplit(const std::vector<uint8_t>& block_ids, BlockSplit* split) {
|
void BuildBlockSplit(const std::vector<uint8_t>& block_ids, BlockSplit* split) {
|
||||||
int cur_id = block_ids[0];
|
int cur_id = block_ids[0];
|
||||||
int cur_length = 1;
|
int cur_length = 1;
|
||||||
split->num_types_ = -1;
|
split->num_types = -1;
|
||||||
for (int i = 1; i < block_ids.size(); ++i) {
|
for (int i = 1; i < block_ids.size(); ++i) {
|
||||||
if (block_ids[i] != cur_id) {
|
if (block_ids[i] != cur_id) {
|
||||||
split->types_.push_back(cur_id);
|
split->types.push_back(cur_id);
|
||||||
split->lengths_.push_back(cur_length);
|
split->lengths.push_back(cur_length);
|
||||||
split->num_types_ = std::max(split->num_types_, cur_id);
|
split->num_types = std::max(split->num_types, cur_id);
|
||||||
cur_id = block_ids[i];
|
cur_id = block_ids[i];
|
||||||
cur_length = 0;
|
cur_length = 0;
|
||||||
}
|
}
|
||||||
++cur_length;
|
++cur_length;
|
||||||
}
|
}
|
||||||
split->types_.push_back(cur_id);
|
split->types.push_back(cur_id);
|
||||||
split->lengths_.push_back(cur_length);
|
split->lengths.push_back(cur_length);
|
||||||
split->num_types_ = std::max(split->num_types_, cur_id);
|
split->num_types = std::max(split->num_types, cur_id);
|
||||||
++split->num_types_;
|
++split->num_types;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename HistogramType, typename DataType>
|
template<typename HistogramType, typename DataType>
|
||||||
@ -309,12 +309,12 @@ void SplitByteVector(const std::vector<DataType>& data,
|
|||||||
const double block_switch_cost,
|
const double block_switch_cost,
|
||||||
BlockSplit* split) {
|
BlockSplit* split) {
|
||||||
if (data.empty()) {
|
if (data.empty()) {
|
||||||
split->num_types_ = 1;
|
split->num_types = 1;
|
||||||
return;
|
return;
|
||||||
} else if (data.size() < kMinLengthForBlockSplitting) {
|
} else if (data.size() < kMinLengthForBlockSplitting) {
|
||||||
split->num_types_ = 1;
|
split->num_types = 1;
|
||||||
split->types_.push_back(0);
|
split->types.push_back(0);
|
||||||
split->lengths_.push_back(data.size());
|
split->lengths.push_back(data.size());
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
std::vector<HistogramType> histograms;
|
std::vector<HistogramType> histograms;
|
||||||
@ -356,7 +356,6 @@ void SplitBlock(const std::vector<Command>& cmds,
|
|||||||
&insert_and_copy_codes,
|
&insert_and_copy_codes,
|
||||||
&distance_prefixes);
|
&distance_prefixes);
|
||||||
|
|
||||||
|
|
||||||
SplitByteVector<HistogramLiteral>(
|
SplitByteVector<HistogramLiteral>(
|
||||||
literals,
|
literals,
|
||||||
kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
|
kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
|
||||||
|
@ -24,28 +24,23 @@
|
|||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
#include "./command.h"
|
#include "./command.h"
|
||||||
|
#include "./metablock.h"
|
||||||
|
|
||||||
namespace brotli {
|
namespace brotli {
|
||||||
|
|
||||||
struct BlockSplit {
|
|
||||||
int num_types_;
|
|
||||||
std::vector<int> types_;
|
|
||||||
std::vector<int> lengths_;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct BlockSplitIterator {
|
struct BlockSplitIterator {
|
||||||
explicit BlockSplitIterator(const BlockSplit& split)
|
explicit BlockSplitIterator(const BlockSplit& split)
|
||||||
: split_(split), idx_(0), type_(0), length_(0) {
|
: split_(split), idx_(0), type_(0), length_(0) {
|
||||||
if (!split.lengths_.empty()) {
|
if (!split.lengths.empty()) {
|
||||||
length_ = split.lengths_[0];
|
length_ = split.lengths[0];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Next() {
|
void Next() {
|
||||||
if (length_ == 0) {
|
if (length_ == 0) {
|
||||||
++idx_;
|
++idx_;
|
||||||
type_ = split_.types_[idx_];
|
type_ = split_.types[idx_];
|
||||||
length_ = split_.lengths_[idx_];
|
length_ = split_.lengths[idx_];
|
||||||
}
|
}
|
||||||
--length_;
|
--length_;
|
||||||
}
|
}
|
||||||
|
@ -23,6 +23,7 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "./bit_cost.h"
|
#include "./bit_cost.h"
|
||||||
|
#include "./context.h"
|
||||||
#include "./entropy_encode.h"
|
#include "./entropy_encode.h"
|
||||||
#include "./fast_log.h"
|
#include "./fast_log.h"
|
||||||
#include "./prefix.h"
|
#include "./prefix.h"
|
||||||
@ -572,4 +573,253 @@ void StoreTrivialContextMap(int num_types,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Manages the encoding of one block category (literal, command or distance).
|
||||||
|
class BlockEncoder {
|
||||||
|
public:
|
||||||
|
BlockEncoder(int alphabet_size,
|
||||||
|
int num_block_types,
|
||||||
|
const std::vector<int>& block_types,
|
||||||
|
const std::vector<int>& block_lengths)
|
||||||
|
: alphabet_size_(alphabet_size),
|
||||||
|
num_block_types_(num_block_types),
|
||||||
|
block_types_(block_types),
|
||||||
|
block_lengths_(block_lengths),
|
||||||
|
block_ix_(0),
|
||||||
|
block_len_(block_lengths.empty() ? 0 : block_lengths[0]),
|
||||||
|
entropy_ix_(0) {}
|
||||||
|
|
||||||
|
// Creates entropy codes of block lengths and block types and stores them
|
||||||
|
// to the bit stream.
|
||||||
|
void BuildAndStoreBlockSwitchEntropyCodes(int quality,
|
||||||
|
int* storage_ix, uint8_t* storage) {
|
||||||
|
BuildAndStoreBlockSplitCode(
|
||||||
|
block_types_, block_lengths_, num_block_types_,
|
||||||
|
quality, &block_split_code_, storage_ix, storage);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Creates entropy codes for all block types and stores them to the bit
|
||||||
|
// stream.
|
||||||
|
template<int kSize>
|
||||||
|
void BuildAndStoreEntropyCodes(
|
||||||
|
const std::vector<Histogram<kSize> >& histograms,
|
||||||
|
int quality,
|
||||||
|
int* storage_ix, uint8_t* storage) {
|
||||||
|
depths_.resize(histograms.size() * alphabet_size_);
|
||||||
|
bits_.resize(histograms.size() * alphabet_size_);
|
||||||
|
for (int i = 0; i < histograms.size(); ++i) {
|
||||||
|
int ix = i * alphabet_size_;
|
||||||
|
BuildAndStoreHuffmanTree(&histograms[i].data_[0], alphabet_size_,
|
||||||
|
quality,
|
||||||
|
&depths_[ix], &bits_[ix],
|
||||||
|
storage_ix, storage);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stores the next symbol with the entropy code of the current block type.
|
||||||
|
// Updates the block type and block length at block boundaries.
|
||||||
|
void StoreSymbol(int symbol, int* storage_ix, uint8_t* storage) {
|
||||||
|
if (block_len_ == 0) {
|
||||||
|
++block_ix_;
|
||||||
|
block_len_ = block_lengths_[block_ix_];
|
||||||
|
entropy_ix_ = block_types_[block_ix_] * alphabet_size_;
|
||||||
|
StoreBlockSwitch(block_split_code_, block_ix_, storage_ix, storage);
|
||||||
|
}
|
||||||
|
--block_len_;
|
||||||
|
int ix = entropy_ix_ + symbol;
|
||||||
|
WriteBits(depths_[ix], bits_[ix], storage_ix, storage);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stores the next symbol with the entropy code of the current block type and
|
||||||
|
// context value.
|
||||||
|
// Updates the block type and block length at block boundaries.
|
||||||
|
template<int kContextBits>
|
||||||
|
void StoreSymbolWithContext(int symbol, int context,
|
||||||
|
const std::vector<int>& context_map,
|
||||||
|
int* storage_ix, uint8_t* storage) {
|
||||||
|
if (block_len_ == 0) {
|
||||||
|
++block_ix_;
|
||||||
|
block_len_ = block_lengths_[block_ix_];
|
||||||
|
entropy_ix_ = block_types_[block_ix_] << kContextBits;
|
||||||
|
StoreBlockSwitch(block_split_code_, block_ix_, storage_ix, storage);
|
||||||
|
}
|
||||||
|
--block_len_;
|
||||||
|
int histo_ix = context_map[entropy_ix_ + context];
|
||||||
|
int ix = histo_ix * alphabet_size_ + symbol;
|
||||||
|
WriteBits(depths_[ix], bits_[ix], storage_ix, storage);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
const int alphabet_size_;
|
||||||
|
const int num_block_types_;
|
||||||
|
const std::vector<int>& block_types_;
|
||||||
|
const std::vector<int>& block_lengths_;
|
||||||
|
BlockSplitCode block_split_code_;
|
||||||
|
int block_ix_;
|
||||||
|
int block_len_;
|
||||||
|
int entropy_ix_;
|
||||||
|
std::vector<uint8_t> depths_;
|
||||||
|
std::vector<uint16_t> bits_;
|
||||||
|
};
|
||||||
|
|
||||||
|
bool StoreMetaBlock(const uint8_t* input,
|
||||||
|
size_t start_pos,
|
||||||
|
size_t length,
|
||||||
|
size_t mask,
|
||||||
|
bool is_last,
|
||||||
|
int quality,
|
||||||
|
int num_direct_distance_codes,
|
||||||
|
int distance_postfix_bits,
|
||||||
|
int literal_context_mode,
|
||||||
|
const brotli::Command *commands,
|
||||||
|
size_t n_commands,
|
||||||
|
const MetaBlockSplit& mb,
|
||||||
|
int *storage_ix,
|
||||||
|
uint8_t *storage) {
|
||||||
|
if (!StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (length == 0) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
int num_distance_codes =
|
||||||
|
kNumDistanceShortCodes + num_direct_distance_codes +
|
||||||
|
(48 << distance_postfix_bits);
|
||||||
|
|
||||||
|
BlockEncoder literal_enc(256,
|
||||||
|
mb.literal_split.num_types,
|
||||||
|
mb.literal_split.types,
|
||||||
|
mb.literal_split.lengths);
|
||||||
|
BlockEncoder command_enc(kNumCommandPrefixes,
|
||||||
|
mb.command_split.num_types,
|
||||||
|
mb.command_split.types,
|
||||||
|
mb.command_split.lengths);
|
||||||
|
BlockEncoder distance_enc(num_distance_codes,
|
||||||
|
mb.distance_split.num_types,
|
||||||
|
mb.distance_split.types,
|
||||||
|
mb.distance_split.lengths);
|
||||||
|
|
||||||
|
literal_enc.BuildAndStoreBlockSwitchEntropyCodes(
|
||||||
|
quality, storage_ix, storage);
|
||||||
|
command_enc.BuildAndStoreBlockSwitchEntropyCodes(
|
||||||
|
quality, storage_ix, storage);
|
||||||
|
distance_enc.BuildAndStoreBlockSwitchEntropyCodes(
|
||||||
|
quality, storage_ix, storage);
|
||||||
|
|
||||||
|
WriteBits(2, distance_postfix_bits, storage_ix, storage);
|
||||||
|
WriteBits(4, num_direct_distance_codes >> distance_postfix_bits,
|
||||||
|
storage_ix, storage);
|
||||||
|
for (int i = 0; i < mb.literal_split.num_types; ++i) {
|
||||||
|
WriteBits(2, literal_context_mode, storage_ix, storage);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mb.literal_context_map.empty()) {
|
||||||
|
StoreTrivialContextMap(mb.literal_histograms.size(), kLiteralContextBits,
|
||||||
|
storage_ix, storage);
|
||||||
|
} else {
|
||||||
|
EncodeContextMap(mb.literal_context_map, mb.literal_histograms.size(),
|
||||||
|
storage_ix, storage);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mb.distance_context_map.empty()) {
|
||||||
|
StoreTrivialContextMap(mb.distance_histograms.size(), kDistanceContextBits,
|
||||||
|
storage_ix, storage);
|
||||||
|
} else {
|
||||||
|
EncodeContextMap(mb.distance_context_map, mb.distance_histograms.size(),
|
||||||
|
storage_ix, storage);
|
||||||
|
}
|
||||||
|
|
||||||
|
literal_enc.BuildAndStoreEntropyCodes(mb.literal_histograms, quality,
|
||||||
|
storage_ix, storage);
|
||||||
|
command_enc.BuildAndStoreEntropyCodes(mb.command_histograms, quality,
|
||||||
|
storage_ix, storage);
|
||||||
|
distance_enc.BuildAndStoreEntropyCodes(mb.distance_histograms, quality,
|
||||||
|
storage_ix, storage);
|
||||||
|
|
||||||
|
size_t pos = start_pos;
|
||||||
|
for (int i = 0; i < n_commands; ++i) {
|
||||||
|
const Command cmd = commands[i];
|
||||||
|
int cmd_code = cmd.cmd_prefix_;
|
||||||
|
int lennumextra = cmd.cmd_extra_ >> 48;
|
||||||
|
uint64_t lenextra = cmd.cmd_extra_ & 0xffffffffffffULL;
|
||||||
|
command_enc.StoreSymbol(cmd_code, storage_ix, storage);
|
||||||
|
WriteBits(lennumextra, lenextra, storage_ix, storage);
|
||||||
|
if (mb.literal_context_map.empty()) {
|
||||||
|
for (int j = 0; j < cmd.insert_len_; j++) {
|
||||||
|
literal_enc.StoreSymbol(input[pos & mask], storage_ix, storage);
|
||||||
|
++pos;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (int j = 0; j < cmd.insert_len_; ++j) {
|
||||||
|
uint8_t prev_byte = pos > 0 ? input[(pos - 1) & mask] : 0;
|
||||||
|
uint8_t prev_byte2 = pos > 1 ? input[(pos - 2) & mask] : 0;
|
||||||
|
int context = Context(prev_byte, prev_byte2,
|
||||||
|
literal_context_mode);
|
||||||
|
int literal = input[pos & mask];
|
||||||
|
literal_enc.StoreSymbolWithContext<kLiteralContextBits>(
|
||||||
|
literal, context, mb.literal_context_map, storage_ix, storage);
|
||||||
|
++pos;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) {
|
||||||
|
int dist_code = cmd.dist_prefix_;
|
||||||
|
int distnumextra = cmd.dist_extra_ >> 24;
|
||||||
|
int distextra = cmd.dist_extra_ & 0xffffff;
|
||||||
|
if (mb.distance_context_map.empty()) {
|
||||||
|
distance_enc.StoreSymbol(dist_code, storage_ix, storage);
|
||||||
|
} else {
|
||||||
|
int context = cmd.DistanceContext();
|
||||||
|
distance_enc.StoreSymbolWithContext<kDistanceContextBits>(
|
||||||
|
dist_code, context, mb.distance_context_map, storage_ix, storage);
|
||||||
|
}
|
||||||
|
brotli::WriteBits(distnumextra, distextra, storage_ix, storage);
|
||||||
|
}
|
||||||
|
pos += cmd.copy_len_;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is for storing uncompressed blocks (simple raw storage of
|
||||||
|
// bytes-as-bytes).
|
||||||
|
bool StoreUncompressedMetaBlock(bool final_block,
|
||||||
|
const uint8_t * __restrict input,
|
||||||
|
size_t position, size_t mask,
|
||||||
|
size_t len,
|
||||||
|
int * __restrict storage_ix,
|
||||||
|
uint8_t * __restrict storage) {
|
||||||
|
if (!brotli::StoreUncompressedMetaBlockHeader(len, storage_ix, storage)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
*storage_ix = ((*storage_ix + 7) / 8) * 8; // Go to next byte
|
||||||
|
|
||||||
|
size_t masked_pos = position & mask;
|
||||||
|
if (masked_pos + len > mask + 1) {
|
||||||
|
size_t len1 = mask + 1 - masked_pos;
|
||||||
|
memcpy(&storage[*storage_ix >> 3], &input[masked_pos], len1);
|
||||||
|
*storage_ix += len1 << 3;
|
||||||
|
len -= len1;
|
||||||
|
masked_pos = 0;
|
||||||
|
}
|
||||||
|
memcpy(&storage[*storage_ix >> 3], &input[masked_pos], len);
|
||||||
|
*storage_ix += len << 3;
|
||||||
|
|
||||||
|
// We need to clear the next 4 bytes to continue to be
|
||||||
|
// compatible with WriteBits.
|
||||||
|
brotli::WriteBitsPrepareStorage(*storage_ix, storage);
|
||||||
|
|
||||||
|
// Since the uncomressed block itself may not be the final block, add an empty
|
||||||
|
// one after this.
|
||||||
|
if (final_block) {
|
||||||
|
brotli::WriteBits(1, 1, storage_ix, storage); // islast
|
||||||
|
brotli::WriteBits(1, 1, storage_ix, storage); // isempty
|
||||||
|
*storage_ix = ((*storage_ix + 7) / 8) * 8; // Go to next byte
|
||||||
|
|
||||||
|
// We need to clear the next 4 bytes to continue to be
|
||||||
|
// compatible with WriteBits.
|
||||||
|
brotli::WriteBitsPrepareStorage(*storage_ix, storage);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace brotli
|
} // namespace brotli
|
||||||
|
@ -28,6 +28,8 @@
|
|||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include "./metablock.h"
|
||||||
|
|
||||||
namespace brotli {
|
namespace brotli {
|
||||||
|
|
||||||
// All Store functions here will use a storage_ix, which is always the bit
|
// All Store functions here will use a storage_ix, which is always the bit
|
||||||
@ -104,6 +106,30 @@ void StoreBlockSwitch(const BlockSplitCode& code,
|
|||||||
int* storage_ix,
|
int* storage_ix,
|
||||||
uint8_t* storage);
|
uint8_t* storage);
|
||||||
|
|
||||||
|
bool StoreMetaBlock(const uint8_t* input,
|
||||||
|
size_t start_pos,
|
||||||
|
size_t length,
|
||||||
|
size_t mask,
|
||||||
|
bool final_block,
|
||||||
|
int quality,
|
||||||
|
int num_direct_distance_codes,
|
||||||
|
int distance_postfix_bits,
|
||||||
|
int literal_context_mode,
|
||||||
|
const brotli::Command *commands,
|
||||||
|
size_t n_commands,
|
||||||
|
const MetaBlockSplit& mb,
|
||||||
|
int *storage_ix,
|
||||||
|
uint8_t *storage);
|
||||||
|
|
||||||
|
// This is for storing uncompressed blocks (simple raw storage of
|
||||||
|
// bytes-as-bytes).
|
||||||
|
bool StoreUncompressedMetaBlock(bool final_block,
|
||||||
|
const uint8_t* input,
|
||||||
|
size_t position, size_t mask,
|
||||||
|
size_t len,
|
||||||
|
int* storage_ix,
|
||||||
|
uint8_t* storage);
|
||||||
|
|
||||||
} // namespace brotli
|
} // namespace brotli
|
||||||
|
|
||||||
#endif // BROTLI_ENC_BROTLI_BIT_STREAM_H_
|
#endif // BROTLI_ENC_BROTLI_BIT_STREAM_H_
|
||||||
|
297
enc/encode.cc
297
enc/encode.cc
@ -25,6 +25,7 @@
|
|||||||
#include "./brotli_bit_stream.h"
|
#include "./brotli_bit_stream.h"
|
||||||
#include "./cluster.h"
|
#include "./cluster.h"
|
||||||
#include "./context.h"
|
#include "./context.h"
|
||||||
|
#include "./metablock.h"
|
||||||
#include "./transform.h"
|
#include "./transform.h"
|
||||||
#include "./entropy_encode.h"
|
#include "./entropy_encode.h"
|
||||||
#include "./fast_log.h"
|
#include "./fast_log.h"
|
||||||
@ -48,24 +49,6 @@ static const int kMetaBlockSizeBits = 21;
|
|||||||
static const int kRingBufferBits = 23;
|
static const int kRingBufferBits = 23;
|
||||||
static const int kRingBufferMask = (1 << kRingBufferBits) - 1;
|
static const int kRingBufferMask = (1 << kRingBufferBits) - 1;
|
||||||
|
|
||||||
template<int kSize>
|
|
||||||
double Entropy(const std::vector<Histogram<kSize> >& histograms) {
|
|
||||||
double retval = 0;
|
|
||||||
for (int i = 0; i < histograms.size(); ++i) {
|
|
||||||
retval += histograms[i].EntropyBitCost();
|
|
||||||
}
|
|
||||||
return retval;
|
|
||||||
}
|
|
||||||
|
|
||||||
template<int kSize>
|
|
||||||
double TotalBitCost(const std::vector<Histogram<kSize> >& histograms) {
|
|
||||||
double retval = 0;
|
|
||||||
for (int i = 0; i < histograms.size(); ++i) {
|
|
||||||
retval += PopulationCost(histograms[i]);
|
|
||||||
}
|
|
||||||
return retval;
|
|
||||||
}
|
|
||||||
|
|
||||||
int ParseAsUTF8(int* symbol, const uint8_t* input, int size) {
|
int ParseAsUTF8(int* symbol, const uint8_t* input, int size) {
|
||||||
// ASCII
|
// ASCII
|
||||||
if ((input[0] & 0x80) == 0) {
|
if ((input[0] & 0x80) == 0) {
|
||||||
@ -128,55 +111,6 @@ bool IsMostlyUTF8(const uint8_t* data, size_t length, double min_fraction) {
|
|||||||
return size_utf8 > min_fraction * length;
|
return size_utf8 > min_fraction * length;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int kSize>
|
|
||||||
void BuildAndStoreEntropyCode(const Histogram<kSize>& histogram,
|
|
||||||
const int tree_limit,
|
|
||||||
const int alphabet_size,
|
|
||||||
EntropyCode<kSize>* code,
|
|
||||||
int* storage_ix, uint8_t* storage) {
|
|
||||||
memset(code->depth_, 0, sizeof(code->depth_));
|
|
||||||
memset(code->bits_, 0, sizeof(code->bits_));
|
|
||||||
BuildAndStoreHuffmanTree(histogram.data_, alphabet_size, 9,
|
|
||||||
code->depth_, code->bits_, storage_ix, storage);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<int kSize>
|
|
||||||
void BuildAndStoreEntropyCodes(
|
|
||||||
const std::vector<Histogram<kSize> >& histograms,
|
|
||||||
int alphabet_size,
|
|
||||||
std::vector<EntropyCode<kSize> >* entropy_codes,
|
|
||||||
int* storage_ix, uint8_t* storage) {
|
|
||||||
entropy_codes->resize(histograms.size());
|
|
||||||
for (int i = 0; i < histograms.size(); ++i) {
|
|
||||||
BuildAndStoreEntropyCode(histograms[i], 15, alphabet_size,
|
|
||||||
&(*entropy_codes)[i],
|
|
||||||
storage_ix, storage);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void EncodeCommand(const Command& cmd,
|
|
||||||
const EntropyCodeCommand& entropy,
|
|
||||||
int* storage_ix, uint8_t* storage) {
|
|
||||||
int code = cmd.cmd_prefix_;
|
|
||||||
WriteBits(entropy.depth_[code], entropy.bits_[code], storage_ix, storage);
|
|
||||||
int nextra = cmd.cmd_extra_ >> 48;
|
|
||||||
uint64_t extra = cmd.cmd_extra_ & 0xffffffffffffULL;
|
|
||||||
if (nextra > 0) {
|
|
||||||
WriteBits(nextra, extra, storage_ix, storage);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void EncodeCopyDistance(const Command& cmd, const EntropyCodeDistance& entropy,
|
|
||||||
int* storage_ix, uint8_t* storage) {
|
|
||||||
int code = cmd.dist_prefix_;
|
|
||||||
int extra_bits = cmd.dist_extra_ >> 24;
|
|
||||||
uint64_t extra_bits_val = cmd.dist_extra_ & 0xffffff;
|
|
||||||
WriteBits(entropy.depth_[code], entropy.bits_[code], storage_ix, storage);
|
|
||||||
if (extra_bits > 0) {
|
|
||||||
WriteBits(extra_bits, extra_bits_val, storage_ix, storage);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void RecomputeDistancePrefixes(std::vector<Command>* cmds,
|
void RecomputeDistancePrefixes(std::vector<Command>* cmds,
|
||||||
int num_direct_distance_codes,
|
int num_direct_distance_codes,
|
||||||
int distance_postfix_bits) {
|
int distance_postfix_bits) {
|
||||||
@ -196,101 +130,6 @@ void RecomputeDistancePrefixes(std::vector<Command>* cmds,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void MoveAndEncode(const BlockSplitCode& code,
|
|
||||||
BlockSplitIterator* it,
|
|
||||||
int* storage_ix, uint8_t* storage) {
|
|
||||||
if (it->length_ == 0) {
|
|
||||||
++it->idx_;
|
|
||||||
it->type_ = it->split_.types_[it->idx_];
|
|
||||||
it->length_ = it->split_.lengths_[it->idx_];
|
|
||||||
StoreBlockSwitch(code, it->idx_, storage_ix, storage);
|
|
||||||
}
|
|
||||||
--it->length_;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct EncodingParams {
|
|
||||||
int num_direct_distance_codes;
|
|
||||||
int distance_postfix_bits;
|
|
||||||
int literal_context_mode;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct MetaBlock {
|
|
||||||
std::vector<Command> cmds;
|
|
||||||
EncodingParams params;
|
|
||||||
BlockSplit literal_split;
|
|
||||||
BlockSplit command_split;
|
|
||||||
BlockSplit distance_split;
|
|
||||||
std::vector<int> literal_context_modes;
|
|
||||||
std::vector<int> literal_context_map;
|
|
||||||
std::vector<int> distance_context_map;
|
|
||||||
std::vector<HistogramLiteral> literal_histograms;
|
|
||||||
std::vector<HistogramCommand> command_histograms;
|
|
||||||
std::vector<HistogramDistance> distance_histograms;
|
|
||||||
};
|
|
||||||
|
|
||||||
void BuildMetaBlock(const EncodingParams& params,
|
|
||||||
const std::vector<Command>& cmds,
|
|
||||||
const uint8_t* ringbuffer,
|
|
||||||
const size_t pos,
|
|
||||||
const size_t mask,
|
|
||||||
MetaBlock* mb) {
|
|
||||||
mb->cmds = cmds;
|
|
||||||
mb->params = params;
|
|
||||||
if (cmds.empty()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
RecomputeDistancePrefixes(&mb->cmds,
|
|
||||||
mb->params.num_direct_distance_codes,
|
|
||||||
mb->params.distance_postfix_bits);
|
|
||||||
SplitBlock(mb->cmds,
|
|
||||||
&ringbuffer[pos & mask],
|
|
||||||
&mb->literal_split,
|
|
||||||
&mb->command_split,
|
|
||||||
&mb->distance_split);
|
|
||||||
|
|
||||||
mb->literal_context_modes.resize(mb->literal_split.num_types_,
|
|
||||||
mb->params.literal_context_mode);
|
|
||||||
|
|
||||||
|
|
||||||
int num_literal_contexts =
|
|
||||||
mb->literal_split.num_types_ << kLiteralContextBits;
|
|
||||||
int num_distance_contexts =
|
|
||||||
mb->distance_split.num_types_ << kDistanceContextBits;
|
|
||||||
std::vector<HistogramLiteral> literal_histograms(num_literal_contexts);
|
|
||||||
mb->command_histograms.resize(mb->command_split.num_types_);
|
|
||||||
std::vector<HistogramDistance> distance_histograms(num_distance_contexts);
|
|
||||||
BuildHistograms(mb->cmds,
|
|
||||||
mb->literal_split,
|
|
||||||
mb->command_split,
|
|
||||||
mb->distance_split,
|
|
||||||
ringbuffer,
|
|
||||||
pos,
|
|
||||||
mask,
|
|
||||||
mb->literal_context_modes,
|
|
||||||
&literal_histograms,
|
|
||||||
&mb->command_histograms,
|
|
||||||
&distance_histograms);
|
|
||||||
|
|
||||||
// Histogram ids need to fit in one byte.
|
|
||||||
static const int kMaxNumberOfHistograms = 256;
|
|
||||||
|
|
||||||
mb->literal_histograms = literal_histograms;
|
|
||||||
ClusterHistograms(literal_histograms,
|
|
||||||
1 << kLiteralContextBits,
|
|
||||||
mb->literal_split.num_types_,
|
|
||||||
kMaxNumberOfHistograms,
|
|
||||||
&mb->literal_histograms,
|
|
||||||
&mb->literal_context_map);
|
|
||||||
|
|
||||||
mb->distance_histograms = distance_histograms;
|
|
||||||
ClusterHistograms(distance_histograms,
|
|
||||||
1 << kDistanceContextBits,
|
|
||||||
mb->distance_split.num_types_,
|
|
||||||
kMaxNumberOfHistograms,
|
|
||||||
&mb->distance_histograms,
|
|
||||||
&mb->distance_context_map);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t MetaBlockLength(const std::vector<Command>& cmds) {
|
size_t MetaBlockLength(const std::vector<Command>& cmds) {
|
||||||
size_t length = 0;
|
size_t length = 0;
|
||||||
for (int i = 0; i < cmds.size(); ++i) {
|
for (int i = 0; i < cmds.size(); ++i) {
|
||||||
@ -300,100 +139,6 @@ size_t MetaBlockLength(const std::vector<Command>& cmds) {
|
|||||||
return length;
|
return length;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool StoreMetaBlock(const MetaBlock& mb,
|
|
||||||
const bool is_last,
|
|
||||||
const uint8_t* ringbuffer,
|
|
||||||
const size_t mask,
|
|
||||||
size_t* pos,
|
|
||||||
int* storage_ix, uint8_t* storage) {
|
|
||||||
size_t length = MetaBlockLength(mb.cmds);
|
|
||||||
const size_t end_pos = *pos + length;
|
|
||||||
if (!StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (length == 0) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
BlockSplitCode literal_split_code;
|
|
||||||
BlockSplitCode command_split_code;
|
|
||||||
BlockSplitCode distance_split_code;
|
|
||||||
BuildAndStoreBlockSplitCode(mb.literal_split.types_,
|
|
||||||
mb.literal_split.lengths_,
|
|
||||||
mb.literal_split.num_types_,
|
|
||||||
9, // quality
|
|
||||||
&literal_split_code,
|
|
||||||
storage_ix, storage);
|
|
||||||
BuildAndStoreBlockSplitCode(mb.command_split.types_,
|
|
||||||
mb.command_split.lengths_,
|
|
||||||
mb.command_split.num_types_,
|
|
||||||
9, // quality
|
|
||||||
&command_split_code,
|
|
||||||
storage_ix, storage);
|
|
||||||
BuildAndStoreBlockSplitCode(mb.distance_split.types_,
|
|
||||||
mb.distance_split.lengths_,
|
|
||||||
mb.distance_split.num_types_,
|
|
||||||
9, // quality
|
|
||||||
&distance_split_code,
|
|
||||||
storage_ix, storage);
|
|
||||||
WriteBits(2, mb.params.distance_postfix_bits, storage_ix, storage);
|
|
||||||
WriteBits(4,
|
|
||||||
mb.params.num_direct_distance_codes >>
|
|
||||||
mb.params.distance_postfix_bits,
|
|
||||||
storage_ix, storage);
|
|
||||||
int num_distance_codes =
|
|
||||||
kNumDistanceShortCodes + mb.params.num_direct_distance_codes +
|
|
||||||
(48 << mb.params.distance_postfix_bits);
|
|
||||||
for (int i = 0; i < mb.literal_split.num_types_; ++i) {
|
|
||||||
WriteBits(2, mb.literal_context_modes[i], storage_ix, storage);
|
|
||||||
}
|
|
||||||
EncodeContextMap(mb.literal_context_map, mb.literal_histograms.size(),
|
|
||||||
storage_ix, storage);
|
|
||||||
EncodeContextMap(mb.distance_context_map, mb.distance_histograms.size(),
|
|
||||||
storage_ix, storage);
|
|
||||||
std::vector<EntropyCodeLiteral> literal_codes;
|
|
||||||
std::vector<EntropyCodeCommand> command_codes;
|
|
||||||
std::vector<EntropyCodeDistance> distance_codes;
|
|
||||||
BuildAndStoreEntropyCodes(mb.literal_histograms, 256, &literal_codes,
|
|
||||||
storage_ix, storage);
|
|
||||||
BuildAndStoreEntropyCodes(mb.command_histograms, kNumCommandPrefixes,
|
|
||||||
&command_codes, storage_ix, storage);
|
|
||||||
BuildAndStoreEntropyCodes(mb.distance_histograms, num_distance_codes,
|
|
||||||
&distance_codes, storage_ix, storage);
|
|
||||||
BlockSplitIterator literal_it(mb.literal_split);
|
|
||||||
BlockSplitIterator command_it(mb.command_split);
|
|
||||||
BlockSplitIterator distance_it(mb.distance_split);
|
|
||||||
for (int i = 0; i < mb.cmds.size(); ++i) {
|
|
||||||
const Command& cmd = mb.cmds[i];
|
|
||||||
MoveAndEncode(command_split_code, &command_it, storage_ix, storage);
|
|
||||||
EncodeCommand(cmd, command_codes[command_it.type_], storage_ix, storage);
|
|
||||||
for (int j = 0; j < cmd.insert_len_; ++j) {
|
|
||||||
MoveAndEncode(literal_split_code, &literal_it, storage_ix, storage);
|
|
||||||
int histogram_idx = literal_it.type_;
|
|
||||||
uint8_t prev_byte = *pos > 0 ? ringbuffer[(*pos - 1) & mask] : 0;
|
|
||||||
uint8_t prev_byte2 = *pos > 1 ? ringbuffer[(*pos - 2) & mask] : 0;
|
|
||||||
int context = ((literal_it.type_ << kLiteralContextBits) +
|
|
||||||
Context(prev_byte, prev_byte2,
|
|
||||||
mb.literal_context_modes[literal_it.type_]));
|
|
||||||
histogram_idx = mb.literal_context_map[context];
|
|
||||||
int literal = ringbuffer[*pos & mask];
|
|
||||||
WriteBits(literal_codes[histogram_idx].depth_[literal],
|
|
||||||
literal_codes[histogram_idx].bits_[literal],
|
|
||||||
storage_ix, storage);
|
|
||||||
++(*pos);
|
|
||||||
}
|
|
||||||
if (*pos < end_pos && cmd.cmd_prefix_ >= 128) {
|
|
||||||
MoveAndEncode(distance_split_code, &distance_it, storage_ix, storage);
|
|
||||||
int context = (distance_it.type_ << 2) + cmd.DistanceContext();
|
|
||||||
int histogram_index = mb.distance_context_map[context];
|
|
||||||
EncodeCopyDistance(cmd, distance_codes[histogram_index],
|
|
||||||
storage_ix, storage);
|
|
||||||
}
|
|
||||||
*pos += cmd.copy_len_;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
BrotliCompressor::BrotliCompressor(BrotliParams params)
|
BrotliCompressor::BrotliCompressor(BrotliParams params)
|
||||||
: params_(params),
|
: params_(params),
|
||||||
window_bits_(kWindowBits),
|
window_bits_(kWindowBits),
|
||||||
@ -499,20 +244,40 @@ bool BrotliCompressor::WriteMetaBlock(const size_t input_size,
|
|||||||
commands.push_back(Command(last_insert_len));
|
commands.push_back(Command(last_insert_len));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
EncodingParams params;
|
int num_direct_distance_codes =
|
||||||
params.num_direct_distance_codes =
|
|
||||||
params_.mode == BrotliParams::MODE_FONT ? 12 : 0;
|
params_.mode == BrotliParams::MODE_FONT ? 12 : 0;
|
||||||
params.distance_postfix_bits =
|
int distance_postfix_bits = params_.mode == BrotliParams::MODE_FONT ? 1 : 0;
|
||||||
params_.mode == BrotliParams::MODE_FONT ? 1 : 0;
|
int literal_context_mode = CONTEXT_SIGNED;
|
||||||
params.literal_context_mode = CONTEXT_SIGNED;
|
|
||||||
const int storage_ix0 = storage_ix_;
|
const int storage_ix0 = storage_ix_;
|
||||||
MetaBlock mb;
|
MetaBlockSplit mb;
|
||||||
BuildMetaBlock(params, commands, ringbuffer_.start(), input_pos_,
|
size_t len = MetaBlockLength(commands);
|
||||||
kRingBufferMask, &mb);
|
if (!commands.empty()) {
|
||||||
if (!StoreMetaBlock(mb, is_last, ringbuffer_.start(), kRingBufferMask,
|
if (params_.greedy_block_split) {
|
||||||
&input_pos_, &storage_ix_, storage_)) {
|
BuildMetaBlockGreedy(ringbuffer_.start(), input_pos_, kRingBufferMask,
|
||||||
|
commands.data(), commands.size(), 9, &mb);
|
||||||
|
} else {
|
||||||
|
RecomputeDistancePrefixes(&commands,
|
||||||
|
num_direct_distance_codes,
|
||||||
|
distance_postfix_bits);
|
||||||
|
BuildMetaBlock(ringbuffer_.start(), input_pos_, kRingBufferMask,
|
||||||
|
commands,
|
||||||
|
num_direct_distance_codes,
|
||||||
|
distance_postfix_bits,
|
||||||
|
literal_context_mode,
|
||||||
|
&mb);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!StoreMetaBlock(ringbuffer_.start(), input_pos_, len, kRingBufferMask,
|
||||||
|
is_last, 9,
|
||||||
|
num_direct_distance_codes,
|
||||||
|
distance_postfix_bits,
|
||||||
|
literal_context_mode,
|
||||||
|
commands.data(), commands.size(),
|
||||||
|
mb,
|
||||||
|
&storage_ix_, storage_)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
input_pos_ += len;
|
||||||
size_t output_size = is_last ? ((storage_ix_ + 7) >> 3) : (storage_ix_ >> 3);
|
size_t output_size = is_last ? ((storage_ix_ + 7) >> 3) : (storage_ix_ >> 3);
|
||||||
output_size -= (storage_ix0 >> 3);
|
output_size -= (storage_ix0 >> 3);
|
||||||
if (input_size + 4 < output_size) {
|
if (input_size + 4 < output_size) {
|
||||||
|
@ -28,6 +28,11 @@
|
|||||||
namespace brotli {
|
namespace brotli {
|
||||||
|
|
||||||
struct BrotliParams {
|
struct BrotliParams {
|
||||||
|
BrotliParams()
|
||||||
|
: mode(MODE_TEXT),
|
||||||
|
enable_transforms(false),
|
||||||
|
greedy_block_split(false) {}
|
||||||
|
|
||||||
enum Mode {
|
enum Mode {
|
||||||
MODE_TEXT = 0,
|
MODE_TEXT = 0,
|
||||||
MODE_FONT = 1,
|
MODE_FONT = 1,
|
||||||
@ -35,8 +40,7 @@ struct BrotliParams {
|
|||||||
Mode mode;
|
Mode mode;
|
||||||
|
|
||||||
bool enable_transforms;
|
bool enable_transforms;
|
||||||
|
bool greedy_block_split;
|
||||||
BrotliParams() : mode(MODE_TEXT), enable_transforms(false) {}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class BrotliCompressor {
|
class BrotliCompressor {
|
||||||
|
283
enc/metablock.cc
Normal file
283
enc/metablock.cc
Normal file
@ -0,0 +1,283 @@
|
|||||||
|
// Copyright 2015 Google Inc. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
// Algorithms for distributing the literals and commands of a metablock between
|
||||||
|
// block types and contexts.
|
||||||
|
|
||||||
|
#include "./metablock.h"
|
||||||
|
|
||||||
|
#include "./block_splitter.h"
|
||||||
|
#include "./cluster.h"
|
||||||
|
#include "./histogram.h"
|
||||||
|
|
||||||
|
namespace brotli {
|
||||||
|
|
||||||
|
void BuildMetaBlock(const uint8_t* ringbuffer,
|
||||||
|
const size_t pos,
|
||||||
|
const size_t mask,
|
||||||
|
const std::vector<Command>& cmds,
|
||||||
|
int num_direct_distance_codes,
|
||||||
|
int distance_postfix_bits,
|
||||||
|
int literal_context_mode,
|
||||||
|
MetaBlockSplit* mb) {
|
||||||
|
SplitBlock(cmds,
|
||||||
|
&ringbuffer[pos & mask],
|
||||||
|
&mb->literal_split,
|
||||||
|
&mb->command_split,
|
||||||
|
&mb->distance_split);
|
||||||
|
|
||||||
|
std::vector<int> literal_context_modes(mb->literal_split.num_types,
|
||||||
|
literal_context_mode);
|
||||||
|
|
||||||
|
int num_literal_contexts =
|
||||||
|
mb->literal_split.num_types << kLiteralContextBits;
|
||||||
|
int num_distance_contexts =
|
||||||
|
mb->distance_split.num_types << kDistanceContextBits;
|
||||||
|
std::vector<HistogramLiteral> literal_histograms(num_literal_contexts);
|
||||||
|
mb->command_histograms.resize(mb->command_split.num_types);
|
||||||
|
std::vector<HistogramDistance> distance_histograms(num_distance_contexts);
|
||||||
|
BuildHistograms(cmds,
|
||||||
|
mb->literal_split,
|
||||||
|
mb->command_split,
|
||||||
|
mb->distance_split,
|
||||||
|
ringbuffer,
|
||||||
|
pos,
|
||||||
|
mask,
|
||||||
|
literal_context_modes,
|
||||||
|
&literal_histograms,
|
||||||
|
&mb->command_histograms,
|
||||||
|
&distance_histograms);
|
||||||
|
|
||||||
|
// Histogram ids need to fit in one byte.
|
||||||
|
static const int kMaxNumberOfHistograms = 256;
|
||||||
|
|
||||||
|
mb->literal_histograms = literal_histograms;
|
||||||
|
ClusterHistograms(literal_histograms,
|
||||||
|
1 << kLiteralContextBits,
|
||||||
|
mb->literal_split.num_types,
|
||||||
|
kMaxNumberOfHistograms,
|
||||||
|
&mb->literal_histograms,
|
||||||
|
&mb->literal_context_map);
|
||||||
|
|
||||||
|
mb->distance_histograms = distance_histograms;
|
||||||
|
ClusterHistograms(distance_histograms,
|
||||||
|
1 << kDistanceContextBits,
|
||||||
|
mb->distance_split.num_types,
|
||||||
|
kMaxNumberOfHistograms,
|
||||||
|
&mb->distance_histograms,
|
||||||
|
&mb->distance_context_map);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Greedy block splitter for one block category (literal, command or distance).
|
||||||
|
template<typename HistogramType>
|
||||||
|
class BlockSplitter {
|
||||||
|
public:
|
||||||
|
BlockSplitter(int alphabet_size,
|
||||||
|
int min_block_size,
|
||||||
|
double split_threshold,
|
||||||
|
int num_symbols,
|
||||||
|
int quality,
|
||||||
|
BlockSplit* split,
|
||||||
|
std::vector<HistogramType>* histograms)
|
||||||
|
: alphabet_size_(alphabet_size),
|
||||||
|
min_block_size_(min_block_size),
|
||||||
|
split_threshold_(split_threshold),
|
||||||
|
quality_(quality),
|
||||||
|
num_blocks_(0),
|
||||||
|
split_(split),
|
||||||
|
histograms_(histograms),
|
||||||
|
target_block_size_(min_block_size),
|
||||||
|
block_size_(0),
|
||||||
|
curr_histogram_ix_(0),
|
||||||
|
merge_last_count_(0) {
|
||||||
|
int max_num_blocks = num_symbols / min_block_size + 1;
|
||||||
|
// We have to allocate one more histogram than the maximum number of block
|
||||||
|
// types for the current histogram when the meta-block is too big.
|
||||||
|
int max_num_types = std::min(max_num_blocks, kMaxBlockTypes + 1);
|
||||||
|
split_->lengths.resize(max_num_blocks);
|
||||||
|
split_->types.resize(max_num_blocks);
|
||||||
|
histograms_->resize(max_num_types);
|
||||||
|
last_histogram_ix_[0] = last_histogram_ix_[1] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Adds the next symbol to the current histogram. When the current histogram
|
||||||
|
// reaches the target size, decides on merging the block.
|
||||||
|
void AddSymbol(int symbol) {
|
||||||
|
(*histograms_)[curr_histogram_ix_].Add(symbol);
|
||||||
|
++block_size_;
|
||||||
|
if (block_size_ == target_block_size_) {
|
||||||
|
FinishBlock(/* is_final = */ false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Does either of three things:
|
||||||
|
// (1) emits the current block with a new block type;
|
||||||
|
// (2) emits the current block with the type of the second last block;
|
||||||
|
// (3) merges the current block with the last block.
|
||||||
|
void FinishBlock(bool is_final) {
|
||||||
|
if (block_size_ < min_block_size_) {
|
||||||
|
block_size_ = min_block_size_;
|
||||||
|
}
|
||||||
|
if (num_blocks_ == 0) {
|
||||||
|
// Create first block.
|
||||||
|
split_->lengths[0] = block_size_;
|
||||||
|
split_->types[0] = 0;
|
||||||
|
last_entropy_[0] =
|
||||||
|
BitsEntropy(&(*histograms_)[0].data_[0], alphabet_size_);
|
||||||
|
last_entropy_[1] = last_entropy_[0];
|
||||||
|
++num_blocks_;
|
||||||
|
++split_->num_types;
|
||||||
|
++curr_histogram_ix_;
|
||||||
|
block_size_ = 0;
|
||||||
|
} else if (block_size_ > 0) {
|
||||||
|
double entropy = BitsEntropy(&(*histograms_)[curr_histogram_ix_].data_[0],
|
||||||
|
alphabet_size_);
|
||||||
|
HistogramType combined_histo[2];
|
||||||
|
double combined_entropy[2];
|
||||||
|
double diff[2];
|
||||||
|
for (int j = 0; j < 2; ++j) {
|
||||||
|
int last_histogram_ix = last_histogram_ix_[j];
|
||||||
|
combined_histo[j] = (*histograms_)[curr_histogram_ix_];
|
||||||
|
combined_histo[j].AddHistogram((*histograms_)[last_histogram_ix]);
|
||||||
|
combined_entropy[j] = BitsEntropy(
|
||||||
|
&combined_histo[j].data_[0], alphabet_size_);
|
||||||
|
diff[j] = combined_entropy[j] - entropy - last_entropy_[j];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (split_->num_types < kMaxBlockTypes &&
|
||||||
|
diff[0] > split_threshold_ &&
|
||||||
|
diff[1] > split_threshold_) {
|
||||||
|
// Create new block.
|
||||||
|
split_->lengths[num_blocks_] = block_size_;
|
||||||
|
split_->types[num_blocks_] = split_->num_types;
|
||||||
|
last_histogram_ix_[1] = last_histogram_ix_[0];
|
||||||
|
last_histogram_ix_[0] = split_->num_types;
|
||||||
|
last_entropy_[1] = last_entropy_[0];
|
||||||
|
last_entropy_[0] = entropy;
|
||||||
|
++num_blocks_;
|
||||||
|
++split_->num_types;
|
||||||
|
++curr_histogram_ix_;
|
||||||
|
block_size_ = 0;
|
||||||
|
merge_last_count_ = 0;
|
||||||
|
target_block_size_ = min_block_size_;
|
||||||
|
} else if (diff[1] < diff[0] - 20.0) {
|
||||||
|
// Combine this block with second last block.
|
||||||
|
split_->lengths[num_blocks_] = block_size_;
|
||||||
|
split_->types[num_blocks_] = split_->types[num_blocks_ - 2];
|
||||||
|
std::swap(last_histogram_ix_[0], last_histogram_ix_[1]);
|
||||||
|
(*histograms_)[last_histogram_ix_[0]] = combined_histo[1];
|
||||||
|
last_entropy_[1] = last_entropy_[0];
|
||||||
|
last_entropy_[0] = combined_entropy[1];
|
||||||
|
++num_blocks_;
|
||||||
|
block_size_ = 0;
|
||||||
|
(*histograms_)[curr_histogram_ix_].Clear();
|
||||||
|
merge_last_count_ = 0;
|
||||||
|
target_block_size_ = min_block_size_;
|
||||||
|
} else {
|
||||||
|
// Combine this block with last block.
|
||||||
|
split_->lengths[num_blocks_ - 1] += block_size_;
|
||||||
|
(*histograms_)[last_histogram_ix_[0]] = combined_histo[0];
|
||||||
|
last_entropy_[0] = combined_entropy[0];
|
||||||
|
if (split_->num_types == 1) {
|
||||||
|
last_entropy_[1] = last_entropy_[0];
|
||||||
|
}
|
||||||
|
block_size_ = 0;
|
||||||
|
(*histograms_)[curr_histogram_ix_].Clear();
|
||||||
|
if (++merge_last_count_ > 1) {
|
||||||
|
target_block_size_ += min_block_size_;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (is_final) {
|
||||||
|
(*histograms_).resize(split_->num_types);
|
||||||
|
split_->types.resize(num_blocks_);
|
||||||
|
split_->lengths.resize(num_blocks_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
static const int kMaxBlockTypes = 256;
|
||||||
|
|
||||||
|
// Alphabet size of particular block category.
|
||||||
|
const int alphabet_size_;
|
||||||
|
// We collect at least this many symbols for each block.
|
||||||
|
const int min_block_size_;
|
||||||
|
// We merge histograms A and B if
|
||||||
|
// entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
|
||||||
|
// where A is the current histogram and B is the histogram of the last or the
|
||||||
|
// second last block type.
|
||||||
|
const double split_threshold_;
|
||||||
|
// Quality setting used for speed vs. compression ratio decisions.
|
||||||
|
const int quality_;
|
||||||
|
|
||||||
|
int num_blocks_;
|
||||||
|
BlockSplit* split_; // not owned
|
||||||
|
std::vector<HistogramType>* histograms_; // not owned
|
||||||
|
|
||||||
|
// The number of symbols that we want to collect before deciding on whether
|
||||||
|
// or not to merge the block with a previous one or emit a new block.
|
||||||
|
int target_block_size_;
|
||||||
|
// The number of symbols in the current histogram.
|
||||||
|
int block_size_;
|
||||||
|
// Offset of the current histogram.
|
||||||
|
int curr_histogram_ix_;
|
||||||
|
// Offset of the histograms of the previous two block types.
|
||||||
|
int last_histogram_ix_[2];
|
||||||
|
// Entropy of the previous two block types.
|
||||||
|
double last_entropy_[2];
|
||||||
|
// The number of times we merged the current block with the last one.
|
||||||
|
int merge_last_count_;
|
||||||
|
};
|
||||||
|
|
||||||
|
void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
|
||||||
|
size_t pos,
|
||||||
|
size_t mask,
|
||||||
|
const Command *commands,
|
||||||
|
size_t n_commands,
|
||||||
|
int quality,
|
||||||
|
MetaBlockSplit* mb) {
|
||||||
|
int num_literals = 0;
|
||||||
|
for (int i = 0; i < n_commands; ++i) {
|
||||||
|
num_literals += commands[i].insert_len_;
|
||||||
|
}
|
||||||
|
|
||||||
|
BlockSplitter<HistogramLiteral> lit_blocks(
|
||||||
|
256, 512, 400.0, num_literals, quality,
|
||||||
|
&mb->literal_split, &mb->literal_histograms);
|
||||||
|
BlockSplitter<HistogramCommand> cmd_blocks(
|
||||||
|
kNumCommandPrefixes, 1024, 500.0, n_commands, quality,
|
||||||
|
&mb->command_split, &mb->command_histograms);
|
||||||
|
BlockSplitter<HistogramDistance> dist_blocks(
|
||||||
|
64, 512, 100.0, n_commands, quality,
|
||||||
|
&mb->distance_split, &mb->distance_histograms);
|
||||||
|
|
||||||
|
for (int i = 0; i < n_commands; ++i) {
|
||||||
|
const Command cmd = commands[i];
|
||||||
|
cmd_blocks.AddSymbol(cmd.cmd_prefix_);
|
||||||
|
for (int j = 0; j < cmd.insert_len_; ++j) {
|
||||||
|
lit_blocks.AddSymbol(ringbuffer[pos & mask]);
|
||||||
|
++pos;
|
||||||
|
}
|
||||||
|
pos += cmd.copy_len_;
|
||||||
|
if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) {
|
||||||
|
dist_blocks.AddSymbol(cmd.dist_prefix_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
lit_blocks.FinishBlock(/* is_final = */ true);
|
||||||
|
cmd_blocks.FinishBlock(/* is_final = */ true);
|
||||||
|
dist_blocks.FinishBlock(/* is_final = */ true);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace brotli
|
66
enc/metablock.h
Normal file
66
enc/metablock.h
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
// Copyright 2015 Google Inc. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
// Algorithms for distributing the literals and commands of a metablock between
|
||||||
|
// block types and contexts.
|
||||||
|
|
||||||
|
#ifndef BROTLI_ENC_METABLOCK_H_
|
||||||
|
#define BROTLI_ENC_METABLOCK_H_
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "./command.h"
|
||||||
|
#include "./histogram.h"
|
||||||
|
|
||||||
|
namespace brotli {
|
||||||
|
|
||||||
|
struct BlockSplit {
|
||||||
|
BlockSplit() : num_types(0) {}
|
||||||
|
|
||||||
|
int num_types;
|
||||||
|
std::vector<int> types;
|
||||||
|
std::vector<int> lengths;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct MetaBlockSplit {
|
||||||
|
BlockSplit literal_split;
|
||||||
|
BlockSplit command_split;
|
||||||
|
BlockSplit distance_split;
|
||||||
|
std::vector<int> literal_context_map;
|
||||||
|
std::vector<int> distance_context_map;
|
||||||
|
std::vector<HistogramLiteral> literal_histograms;
|
||||||
|
std::vector<HistogramCommand> command_histograms;
|
||||||
|
std::vector<HistogramDistance> distance_histograms;
|
||||||
|
};
|
||||||
|
|
||||||
|
void BuildMetaBlock(const uint8_t* ringbuffer,
|
||||||
|
const size_t pos,
|
||||||
|
const size_t mask,
|
||||||
|
const std::vector<Command>& cmds,
|
||||||
|
int num_direct_distance_codes,
|
||||||
|
int distance_postfix_bits,
|
||||||
|
int literal_context_mode,
|
||||||
|
MetaBlockSplit* mb);
|
||||||
|
|
||||||
|
void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
|
||||||
|
size_t pos,
|
||||||
|
size_t mask,
|
||||||
|
const Command *commands,
|
||||||
|
size_t n_commands,
|
||||||
|
int quality,
|
||||||
|
MetaBlockSplit* mb);
|
||||||
|
|
||||||
|
} // namespace brotli
|
||||||
|
|
||||||
|
#endif // BROTLI_ENC_METABLOCK_H_
|
Loading…
Reference in New Issue
Block a user