mirror of
https://github.com/google/brotli.git
synced 2024-11-29 14:41:05 +00:00
89a6fb85fb
Disable all slow features for quality <= 9 (literal cost modeling, dictionary, context modeling, advanced block splitting). Change vector<Command> arguments of internal functions to Command* and size_t.
304 lines
11 KiB
C++
304 lines
11 KiB
C++
// Copyright 2015 Google Inc. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
// Algorithms for distributing the literals and commands of a metablock between
|
|
// block types and contexts.
|
|
|
|
#include "./metablock.h"
|
|
|
|
#include "./block_splitter.h"
|
|
#include "./cluster.h"
|
|
#include "./histogram.h"
|
|
|
|
namespace brotli {
|
|
|
|
void BuildMetaBlock(const uint8_t* ringbuffer,
|
|
const size_t pos,
|
|
const size_t mask,
|
|
const Command* cmds,
|
|
size_t num_commands,
|
|
int num_direct_distance_codes,
|
|
int distance_postfix_bits,
|
|
int literal_context_mode,
|
|
bool enable_context_modeling,
|
|
MetaBlockSplit* mb) {
|
|
SplitBlock(cmds, num_commands,
|
|
&ringbuffer[pos & mask],
|
|
&mb->literal_split,
|
|
&mb->command_split,
|
|
&mb->distance_split);
|
|
|
|
std::vector<int> literal_context_modes(mb->literal_split.num_types,
|
|
literal_context_mode);
|
|
|
|
int num_literal_contexts =
|
|
mb->literal_split.num_types << kLiteralContextBits;
|
|
int num_distance_contexts =
|
|
mb->distance_split.num_types << kDistanceContextBits;
|
|
std::vector<HistogramLiteral> literal_histograms(num_literal_contexts);
|
|
mb->command_histograms.resize(mb->command_split.num_types);
|
|
std::vector<HistogramDistance> distance_histograms(num_distance_contexts);
|
|
BuildHistograms(cmds, num_commands,
|
|
mb->literal_split,
|
|
mb->command_split,
|
|
mb->distance_split,
|
|
ringbuffer,
|
|
pos,
|
|
mask,
|
|
literal_context_modes,
|
|
&literal_histograms,
|
|
&mb->command_histograms,
|
|
&distance_histograms);
|
|
|
|
// Histogram ids need to fit in one byte.
|
|
static const int kMaxNumberOfHistograms = 256;
|
|
|
|
mb->literal_histograms = literal_histograms;
|
|
if (enable_context_modeling) {
|
|
ClusterHistograms(literal_histograms,
|
|
1 << kLiteralContextBits,
|
|
mb->literal_split.num_types,
|
|
kMaxNumberOfHistograms,
|
|
&mb->literal_histograms,
|
|
&mb->literal_context_map);
|
|
} else {
|
|
ClusterHistogramsTrivial(literal_histograms,
|
|
1 << kLiteralContextBits,
|
|
mb->literal_split.num_types,
|
|
kMaxNumberOfHistograms,
|
|
&mb->literal_histograms,
|
|
&mb->literal_context_map);
|
|
}
|
|
|
|
mb->distance_histograms = distance_histograms;
|
|
if (enable_context_modeling) {
|
|
ClusterHistograms(distance_histograms,
|
|
1 << kDistanceContextBits,
|
|
mb->distance_split.num_types,
|
|
kMaxNumberOfHistograms,
|
|
&mb->distance_histograms,
|
|
&mb->distance_context_map);
|
|
} else {
|
|
ClusterHistogramsTrivial(distance_histograms,
|
|
1 << kDistanceContextBits,
|
|
mb->distance_split.num_types,
|
|
kMaxNumberOfHistograms,
|
|
&mb->distance_histograms,
|
|
&mb->distance_context_map);
|
|
}
|
|
}
|
|
|
|
// Greedy block splitter for one block category (literal, command or distance).
|
|
template<typename HistogramType>
|
|
class BlockSplitter {
|
|
public:
|
|
BlockSplitter(int alphabet_size,
|
|
int min_block_size,
|
|
double split_threshold,
|
|
int num_symbols,
|
|
int quality,
|
|
BlockSplit* split,
|
|
std::vector<HistogramType>* histograms)
|
|
: alphabet_size_(alphabet_size),
|
|
min_block_size_(min_block_size),
|
|
split_threshold_(split_threshold),
|
|
quality_(quality),
|
|
num_blocks_(0),
|
|
split_(split),
|
|
histograms_(histograms),
|
|
target_block_size_(min_block_size),
|
|
block_size_(0),
|
|
curr_histogram_ix_(0),
|
|
merge_last_count_(0) {
|
|
int max_num_blocks = num_symbols / min_block_size + 1;
|
|
// We have to allocate one more histogram than the maximum number of block
|
|
// types for the current histogram when the meta-block is too big.
|
|
int max_num_types = std::min(max_num_blocks, kMaxBlockTypes + 1);
|
|
split_->lengths.resize(max_num_blocks);
|
|
split_->types.resize(max_num_blocks);
|
|
histograms_->resize(max_num_types);
|
|
last_histogram_ix_[0] = last_histogram_ix_[1] = 0;
|
|
}
|
|
|
|
// Adds the next symbol to the current histogram. When the current histogram
|
|
// reaches the target size, decides on merging the block.
|
|
void AddSymbol(int symbol) {
|
|
(*histograms_)[curr_histogram_ix_].Add(symbol);
|
|
++block_size_;
|
|
if (block_size_ == target_block_size_) {
|
|
FinishBlock(/* is_final = */ false);
|
|
}
|
|
}
|
|
|
|
// Does either of three things:
|
|
// (1) emits the current block with a new block type;
|
|
// (2) emits the current block with the type of the second last block;
|
|
// (3) merges the current block with the last block.
|
|
void FinishBlock(bool is_final) {
|
|
if (block_size_ < min_block_size_) {
|
|
block_size_ = min_block_size_;
|
|
}
|
|
if (num_blocks_ == 0) {
|
|
// Create first block.
|
|
split_->lengths[0] = block_size_;
|
|
split_->types[0] = 0;
|
|
last_entropy_[0] =
|
|
BitsEntropy(&(*histograms_)[0].data_[0], alphabet_size_);
|
|
last_entropy_[1] = last_entropy_[0];
|
|
++num_blocks_;
|
|
++split_->num_types;
|
|
++curr_histogram_ix_;
|
|
block_size_ = 0;
|
|
} else if (block_size_ > 0) {
|
|
double entropy = BitsEntropy(&(*histograms_)[curr_histogram_ix_].data_[0],
|
|
alphabet_size_);
|
|
HistogramType combined_histo[2];
|
|
double combined_entropy[2];
|
|
double diff[2];
|
|
for (int j = 0; j < 2; ++j) {
|
|
int last_histogram_ix = last_histogram_ix_[j];
|
|
combined_histo[j] = (*histograms_)[curr_histogram_ix_];
|
|
combined_histo[j].AddHistogram((*histograms_)[last_histogram_ix]);
|
|
combined_entropy[j] = BitsEntropy(
|
|
&combined_histo[j].data_[0], alphabet_size_);
|
|
diff[j] = combined_entropy[j] - entropy - last_entropy_[j];
|
|
}
|
|
|
|
if (split_->num_types < kMaxBlockTypes &&
|
|
diff[0] > split_threshold_ &&
|
|
diff[1] > split_threshold_) {
|
|
// Create new block.
|
|
split_->lengths[num_blocks_] = block_size_;
|
|
split_->types[num_blocks_] = split_->num_types;
|
|
last_histogram_ix_[1] = last_histogram_ix_[0];
|
|
last_histogram_ix_[0] = split_->num_types;
|
|
last_entropy_[1] = last_entropy_[0];
|
|
last_entropy_[0] = entropy;
|
|
++num_blocks_;
|
|
++split_->num_types;
|
|
++curr_histogram_ix_;
|
|
block_size_ = 0;
|
|
merge_last_count_ = 0;
|
|
target_block_size_ = min_block_size_;
|
|
} else if (diff[1] < diff[0] - 20.0) {
|
|
// Combine this block with second last block.
|
|
split_->lengths[num_blocks_] = block_size_;
|
|
split_->types[num_blocks_] = split_->types[num_blocks_ - 2];
|
|
std::swap(last_histogram_ix_[0], last_histogram_ix_[1]);
|
|
(*histograms_)[last_histogram_ix_[0]] = combined_histo[1];
|
|
last_entropy_[1] = last_entropy_[0];
|
|
last_entropy_[0] = combined_entropy[1];
|
|
++num_blocks_;
|
|
block_size_ = 0;
|
|
(*histograms_)[curr_histogram_ix_].Clear();
|
|
merge_last_count_ = 0;
|
|
target_block_size_ = min_block_size_;
|
|
} else {
|
|
// Combine this block with last block.
|
|
split_->lengths[num_blocks_ - 1] += block_size_;
|
|
(*histograms_)[last_histogram_ix_[0]] = combined_histo[0];
|
|
last_entropy_[0] = combined_entropy[0];
|
|
if (split_->num_types == 1) {
|
|
last_entropy_[1] = last_entropy_[0];
|
|
}
|
|
block_size_ = 0;
|
|
(*histograms_)[curr_histogram_ix_].Clear();
|
|
if (++merge_last_count_ > 1) {
|
|
target_block_size_ += min_block_size_;
|
|
}
|
|
}
|
|
}
|
|
if (is_final) {
|
|
(*histograms_).resize(split_->num_types);
|
|
split_->types.resize(num_blocks_);
|
|
split_->lengths.resize(num_blocks_);
|
|
}
|
|
}
|
|
|
|
private:
|
|
static const int kMaxBlockTypes = 256;
|
|
|
|
// Alphabet size of particular block category.
|
|
const int alphabet_size_;
|
|
// We collect at least this many symbols for each block.
|
|
const int min_block_size_;
|
|
// We merge histograms A and B if
|
|
// entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
|
|
// where A is the current histogram and B is the histogram of the last or the
|
|
// second last block type.
|
|
const double split_threshold_;
|
|
// Quality setting used for speed vs. compression ratio decisions.
|
|
const int quality_;
|
|
|
|
int num_blocks_;
|
|
BlockSplit* split_; // not owned
|
|
std::vector<HistogramType>* histograms_; // not owned
|
|
|
|
// The number of symbols that we want to collect before deciding on whether
|
|
// or not to merge the block with a previous one or emit a new block.
|
|
int target_block_size_;
|
|
// The number of symbols in the current histogram.
|
|
int block_size_;
|
|
// Offset of the current histogram.
|
|
int curr_histogram_ix_;
|
|
// Offset of the histograms of the previous two block types.
|
|
int last_histogram_ix_[2];
|
|
// Entropy of the previous two block types.
|
|
double last_entropy_[2];
|
|
// The number of times we merged the current block with the last one.
|
|
int merge_last_count_;
|
|
};
|
|
|
|
void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
|
|
size_t pos,
|
|
size_t mask,
|
|
const Command *commands,
|
|
size_t n_commands,
|
|
int quality,
|
|
MetaBlockSplit* mb) {
|
|
int num_literals = 0;
|
|
for (int i = 0; i < n_commands; ++i) {
|
|
num_literals += commands[i].insert_len_;
|
|
}
|
|
|
|
BlockSplitter<HistogramLiteral> lit_blocks(
|
|
256, 512, 400.0, num_literals, quality,
|
|
&mb->literal_split, &mb->literal_histograms);
|
|
BlockSplitter<HistogramCommand> cmd_blocks(
|
|
kNumCommandPrefixes, 1024, 500.0, n_commands, quality,
|
|
&mb->command_split, &mb->command_histograms);
|
|
BlockSplitter<HistogramDistance> dist_blocks(
|
|
64, 512, 100.0, n_commands, quality,
|
|
&mb->distance_split, &mb->distance_histograms);
|
|
|
|
for (int i = 0; i < n_commands; ++i) {
|
|
const Command cmd = commands[i];
|
|
cmd_blocks.AddSymbol(cmd.cmd_prefix_);
|
|
for (int j = 0; j < cmd.insert_len_; ++j) {
|
|
lit_blocks.AddSymbol(ringbuffer[pos & mask]);
|
|
++pos;
|
|
}
|
|
pos += cmd.copy_len_;
|
|
if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) {
|
|
dist_blocks.AddSymbol(cmd.dist_prefix_);
|
|
}
|
|
}
|
|
|
|
lit_blocks.FinishBlock(/* is_final = */ true);
|
|
cmd_blocks.FinishBlock(/* is_final = */ true);
|
|
dist_blocks.FinishBlock(/* is_final = */ true);
|
|
}
|
|
|
|
} // namespace brotli
|