Add params to disable static dictionary and context modeling.

Disable all slow features for quality <= 9 (literal cost modeling,
dictionary, context modeling, advanced block splitting).

Change vector<Command> arguments of internal functions
to Command* and size_t.
This commit is contained in:
Zoltan Szabadka 2015-04-23 13:15:42 +02:00
parent 0c81a1360b
commit 89a6fb85fb
11 changed files with 140 additions and 76 deletions

View File

@ -65,7 +65,9 @@ void CreateBackwardReferences(size_t num_bytes,
for (int k = position; k < position + num_bytes; ++k) {
average_cost += literal_cost[k & literal_cost_mask];
}
average_cost /= num_bytes;
if (num_bytes > 0) {
average_cost /= num_bytes;
}
}
// M1 match is for considering for two repeated copies, if moving

View File

@ -45,12 +45,13 @@ static const int kMinLengthForBlockSplitting = 128;
static const int kIterMulForRefining = 2;
static const int kMinItersForRefining = 100;
void CopyLiteralsToByteArray(const std::vector<Command>& cmds,
void CopyLiteralsToByteArray(const Command* cmds,
const size_t num_commands,
const uint8_t* data,
std::vector<uint8_t>* literals) {
// Count how many we have.
size_t total_length = 0;
for (int i = 0; i < cmds.size(); ++i) {
for (int i = 0; i < num_commands; ++i) {
total_length += cmds[i].insert_len_;
}
if (total_length == 0) {
@ -63,17 +64,18 @@ void CopyLiteralsToByteArray(const std::vector<Command>& cmds,
// Loop again, and copy this time.
size_t pos = 0;
size_t from_pos = 0;
for (int i = 0; i < cmds.size() && pos < total_length; ++i) {
for (int i = 0; i < num_commands && pos < total_length; ++i) {
memcpy(&(*literals)[pos], data + from_pos, cmds[i].insert_len_);
pos += cmds[i].insert_len_;
from_pos += cmds[i].insert_len_ + cmds[i].copy_len_;
}
}
void CopyCommandsToByteArray(const std::vector<Command>& cmds,
void CopyCommandsToByteArray(const Command* cmds,
const size_t num_commands,
std::vector<uint16_t>* insert_and_copy_codes,
std::vector<uint8_t>* distance_prefixes) {
for (int i = 0; i < cmds.size(); ++i) {
for (int i = 0; i < num_commands; ++i) {
const Command& cmd = cmds[i];
insert_and_copy_codes->push_back(cmd.cmd_prefix_);
if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) {
@ -340,19 +342,20 @@ void SplitByteVector(const std::vector<DataType>& data,
BuildBlockSplit(block_ids, split);
}
void SplitBlock(const std::vector<Command>& cmds,
void SplitBlock(const Command* cmds,
const size_t num_commands,
const uint8_t* data,
BlockSplit* literal_split,
BlockSplit* insert_and_copy_split,
BlockSplit* dist_split) {
// Create a continuous array of literals.
std::vector<uint8_t> literals;
CopyLiteralsToByteArray(cmds, data, &literals);
CopyLiteralsToByteArray(cmds, num_commands, data, &literals);
// Compute prefix codes for commands.
std::vector<uint16_t> insert_and_copy_codes;
std::vector<uint8_t> distance_prefixes;
CopyCommandsToByteArray(cmds,
CopyCommandsToByteArray(cmds, num_commands,
&insert_and_copy_codes,
&distance_prefixes);
@ -373,7 +376,8 @@ void SplitBlock(const std::vector<Command>& cmds,
dist_split);
}
void SplitBlockByTotalLength(const std::vector<Command>& all_commands,
void SplitBlockByTotalLength(const Command* all_commands,
const size_t num_commands,
int input_size,
int target_length,
std::vector<std::vector<Command> >* blocks) {
@ -381,7 +385,7 @@ void SplitBlockByTotalLength(const std::vector<Command>& all_commands,
int length_limit = input_size / num_blocks + 1;
int total_length = 0;
std::vector<Command> cur_block;
for (int i = 0; i < all_commands.size(); ++i) {
for (int i = 0; i < num_commands; ++i) {
const Command& cmd = all_commands[i];
int cmd_length = cmd.insert_len_ + cmd.copy_len_;
if (total_length > length_limit) {

View File

@ -51,17 +51,20 @@ struct BlockSplitIterator {
int length_;
};
void CopyLiteralsToByteArray(const std::vector<Command>& cmds,
void CopyLiteralsToByteArray(const Command* cmds,
const size_t num_commands,
const uint8_t* data,
std::vector<uint8_t>* literals);
void SplitBlock(const std::vector<Command>& cmds,
void SplitBlock(const Command* cmds,
const size_t num_commands,
const uint8_t* data,
BlockSplit* literal_split,
BlockSplit* insert_and_copy_split,
BlockSplit* dist_split);
void SplitBlockByTotalLength(const std::vector<Command>& all_commands,
void SplitBlockByTotalLength(const Command* all_commands,
const size_t num_commands,
int input_size,
int target_length,
std::vector<std::vector<Command> >* blocks);

View File

@ -244,6 +244,22 @@ void HistogramReindex(std::vector<HistogramType>* out,
}
}
template<typename HistogramType>
void ClusterHistogramsTrivial(const std::vector<HistogramType>& in,
int num_contexts, int num_blocks,
int max_histograms,
std::vector<HistogramType>* out,
std::vector<int>* histogram_symbols) {
out->resize(num_blocks);
for (int i = 0; i < num_blocks; ++i) {
(*out)[i].Clear();
for (int j = 0; j < num_contexts; ++j) {
(*out)[i].AddHistogram(in[i * num_contexts + j]);
histogram_symbols->push_back(i);
}
}
}
// Clusters similar histograms in 'in' together, the selected histograms are
// placed in 'out', and for each index in 'in', *histogram_symbols will
// indicate which of the 'out' histograms is the best approximation.

View File

@ -99,15 +99,16 @@ bool IsMostlyUTF8(const uint8_t* data, size_t length, double min_fraction) {
return size_utf8 > min_fraction * length;
}
void RecomputeDistancePrefixes(std::vector<Command>* cmds,
void RecomputeDistancePrefixes(Command* cmds,
size_t num_commands,
int num_direct_distance_codes,
int distance_postfix_bits) {
if (num_direct_distance_codes == 0 &&
distance_postfix_bits == 0) {
return;
}
for (int i = 0; i < cmds->size(); ++i) {
Command* cmd = &(*cmds)[i];
for (int i = 0; i < num_commands; ++i) {
Command* cmd = &cmds[i];
if (cmd->copy_len_ > 0 && cmd->cmd_prefix_ >= 128) {
PrefixEncodeCopyDistance(cmd->DistanceCode(),
num_direct_distance_codes,
@ -118,15 +119,6 @@ void RecomputeDistancePrefixes(std::vector<Command>* cmds,
}
}
size_t MetaBlockLength(const std::vector<Command>& cmds) {
size_t length = 0;
for (int i = 0; i < cmds.size(); ++i) {
const Command& cmd = cmds[i];
length += cmd.insert_len_ + cmd.copy_len_;
}
return length;
}
uint8_t* BrotliCompressor::GetBrotliStorage(size_t size) {
if (storage_size_ < size) {
storage_.reset(new uint8_t[size]);
@ -154,6 +146,12 @@ BrotliCompressor::BrotliCompressor(BrotliParams params)
params_.lgblock = std::min(kMaxInputBlockBits,
std::max(kMinInputBlockBits, params_.lgblock));
}
if (params_.quality <= 9) {
params_.enable_dictionary = false;
params_.enable_transforms = false;
params_.greedy_block_split = true;
params_.enable_context_modeling = false;
}
// Set maximum distance, see section 9.1. of the spec.
max_backward_distance_ = (1 << params_.lgwin) - 16;
@ -165,7 +163,10 @@ BrotliCompressor::BrotliCompressor(BrotliParams params)
// smaller than ringbuffer size.
int ringbuffer_bits = std::max(params_.lgwin + 1, params_.lgblock + 1);
ringbuffer_.reset(new RingBuffer(ringbuffer_bits, params_.lgblock));
literal_cost_.resize(1 << ringbuffer_bits);
if (params_.quality > 9) {
literal_cost_mask_ = (1 << params_.lgblock) - 1;
literal_cost_.reset(new float[literal_cost_mask_ + 1]);
}
// Initialize storage.
storage_size_ = 1 << 16;
@ -185,16 +186,16 @@ BrotliCompressor::BrotliCompressor(BrotliParams params)
dist_cache_[3] = 16;
// Initialize hashers.
switch (params.mode) {
switch (params_.mode) {
case BrotliParams::MODE_TEXT: hash_type_ = 8; break;
case BrotliParams::MODE_FONT: hash_type_ = 9; break;
default: break;
}
hashers_->Init(hash_type_);
if (params.mode == BrotliParams::MODE_TEXT) {
StoreDictionaryWordHashes(params.enable_transforms);
if (params_.mode == BrotliParams::MODE_TEXT &&
params_.enable_dictionary) {
StoreDictionaryWordHashes(params_.enable_transforms);
}
}
BrotliCompressor::~BrotliCompressor() {
@ -227,25 +228,30 @@ bool BrotliCompressor::WriteMetaBlock(const size_t input_size,
memcpy(saved_dist_cache, dist_cache_, sizeof(dist_cache_));
if (input_size > 0) {
ringbuffer_->Write(input_buffer, input_size);
utf8_mode = IsMostlyUTF8(
&ringbuffer_->start()[input_pos_ & ringbuffer_->mask()],
input_size, kMinUTF8Ratio);
if (utf8_mode) {
EstimateBitCostsForLiteralsUTF8(input_pos_, input_size,
ringbuffer_->mask(), ringbuffer_->mask(),
ringbuffer_->start(), &literal_cost_[0]);
} else {
EstimateBitCostsForLiterals(input_pos_, input_size,
ringbuffer_->mask(), ringbuffer_->mask(),
ringbuffer_->start(), &literal_cost_[0]);
utf8_mode =
params_.enable_context_modeling &&
IsMostlyUTF8(&ringbuffer_->start()[input_pos_ & ringbuffer_->mask()],
input_size, kMinUTF8Ratio);
if (literal_cost_.get()) {
if (utf8_mode) {
EstimateBitCostsForLiteralsUTF8(input_pos_, input_size,
ringbuffer_->mask(),
literal_cost_mask_,
ringbuffer_->start(),
literal_cost_.get());
} else {
EstimateBitCostsForLiterals(input_pos_, input_size,
ringbuffer_->mask(), literal_cost_mask_,
ringbuffer_->start(), literal_cost_.get());
}
}
int last_insert_len = 0;
int num_commands = 0;
double base_min_score = 8.115;
double base_min_score = params_.enable_context_modeling ? 8.115 : 4.0;
CreateBackwardReferences(
input_size, input_pos_,
ringbuffer_->start(), ringbuffer_->mask(),
&literal_cost_[0], ringbuffer_->mask(),
literal_cost_.get(), literal_cost_mask_,
max_backward_distance_,
base_min_score,
params_.quality,
@ -263,14 +269,13 @@ bool BrotliCompressor::WriteMetaBlock(const size_t input_size,
int num_direct_distance_codes =
params_.mode == BrotliParams::MODE_FONT ? 12 : 0;
int distance_postfix_bits = params_.mode == BrotliParams::MODE_FONT ? 1 : 0;
int literal_context_mode = CONTEXT_SIGNED;
int literal_context_mode = utf8_mode ? CONTEXT_UTF8 : CONTEXT_SIGNED;
const size_t max_out_size = 2 * input_size + 500;
uint8_t* storage = GetBrotliStorage(max_out_size);
storage[0] = last_byte_;
int storage_ix = last_byte_bits_;
MetaBlockSplit mb;
size_t len = MetaBlockLength(commands);
if (!commands.empty()) {
if (params_.greedy_block_split) {
BuildMetaBlockGreedy(ringbuffer_->start(), input_pos_,
@ -278,18 +283,19 @@ bool BrotliCompressor::WriteMetaBlock(const size_t input_size,
commands.data(), commands.size(), params_.quality,
&mb);
} else {
RecomputeDistancePrefixes(&commands,
RecomputeDistancePrefixes(&commands[0], commands.size(),
num_direct_distance_codes,
distance_postfix_bits);
BuildMetaBlock(ringbuffer_->start(), input_pos_, ringbuffer_->mask(),
commands,
commands.data(), commands.size(),
num_direct_distance_codes,
distance_postfix_bits,
literal_context_mode,
params_.enable_context_modeling,
&mb);
}
}
if (!StoreMetaBlock(ringbuffer_->start(), input_pos_, len,
if (!StoreMetaBlock(ringbuffer_->start(), input_pos_, input_size,
ringbuffer_->mask(),
is_last, params_.quality,
num_direct_distance_codes,
@ -308,7 +314,7 @@ bool BrotliCompressor::WriteMetaBlock(const size_t input_size,
storage_ix = last_byte_bits_;
if (!StoreUncompressedMetaBlock(is_last,
ringbuffer_->start(), input_pos_,
ringbuffer_->mask(), len,
ringbuffer_->mask(), input_size,
&storage_ix, storage)) {
return false;
}
@ -321,7 +327,7 @@ bool BrotliCompressor::WriteMetaBlock(const size_t input_size,
*encoded_size = output_size;
last_byte_ = storage[output_size];
last_byte_bits_ = storage_ix & 7;
input_pos_ += len;
input_pos_ += input_size;
return true;
}

View File

@ -38,8 +38,10 @@ struct BrotliParams {
quality(11),
lgwin(22),
lgblock(0),
enable_dictionary(true),
enable_transforms(false),
greedy_block_split(false) {}
greedy_block_split(false),
enable_context_modeling(true) {}
enum Mode {
MODE_TEXT = 0,
@ -56,8 +58,11 @@ struct BrotliParams {
// If set to 0, the value will be set based on the quality.
int lgblock;
// These settings will be respected only if quality > 9.
bool enable_dictionary;
bool enable_transforms;
bool greedy_block_split;
bool enable_context_modeling;
};
class BrotliCompressor {
@ -100,7 +105,8 @@ class BrotliCompressor {
int hash_type_;
size_t input_pos_;
std::unique_ptr<RingBuffer> ringbuffer_;
std::vector<float> literal_cost_;
std::unique_ptr<float[]> literal_cost_;
size_t literal_cost_mask_;
int dist_cache_[4];
uint8_t last_byte_;
uint8_t last_byte_bits_;

View File

@ -209,10 +209,11 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
num_direct_distance_codes,
distance_postfix_bits);
BuildMetaBlock(&input[0], input_pos, mask,
commands,
commands.data(), commands.size(),
num_direct_distance_codes,
distance_postfix_bits,
literal_context_mode,
true,
&mb);
}

View File

@ -27,7 +27,8 @@
namespace brotli {
void BuildHistograms(
const std::vector<Command>& cmds,
const Command* cmds,
const size_t num_commands,
const BlockSplit& literal_split,
const BlockSplit& insert_and_copy_split,
const BlockSplit& dist_split,
@ -41,7 +42,7 @@ void BuildHistograms(
BlockSplitIterator literal_it(literal_split);
BlockSplitIterator insert_and_copy_it(insert_and_copy_split);
BlockSplitIterator dist_it(dist_split);
for (int i = 0; i < cmds.size(); ++i) {
for (int i = 0; i < num_commands; ++i) {
const Command &cmd = cmds[i];
insert_and_copy_it.Next();
(*insert_and_copy_histograms)[insert_and_copy_it.type_].Add(
@ -66,7 +67,8 @@ void BuildHistograms(
}
void BuildLiteralHistogramsForBlockType(
const std::vector<Command>& cmds,
const Command* cmds,
const size_t num_commands,
const BlockSplit& literal_split,
const uint8_t* ringbuffer,
size_t pos,
@ -75,7 +77,7 @@ void BuildLiteralHistogramsForBlockType(
int context_mode,
std::vector<HistogramLiteral>* histograms) {
BlockSplitIterator literal_it(literal_split);
for (int i = 0; i < cmds.size(); ++i) {
for (int i = 0; i < num_commands; ++i) {
const Command &cmd = cmds[i];
for (int j = 0; j < cmd.insert_len_; ++j) {
literal_it.Next();

View File

@ -87,7 +87,8 @@ static const int kLiteralContextBits = 6;
static const int kDistanceContextBits = 2;
void BuildHistograms(
const std::vector<Command>& cmds,
const Command* cmds,
const size_t num_commands,
const BlockSplit& literal_split,
const BlockSplit& insert_and_copy_split,
const BlockSplit& dist_split,
@ -100,7 +101,8 @@ void BuildHistograms(
std::vector<HistogramDistance>* copy_dist_histograms);
void BuildLiteralHistogramsForBlockType(
const std::vector<Command>& cmds,
const Command* cmds,
const size_t num_commands,
const BlockSplit& literal_split,
const uint8_t* ringbuffer,
size_t pos,

View File

@ -26,12 +26,14 @@ namespace brotli {
void BuildMetaBlock(const uint8_t* ringbuffer,
const size_t pos,
const size_t mask,
const std::vector<Command>& cmds,
const Command* cmds,
size_t num_commands,
int num_direct_distance_codes,
int distance_postfix_bits,
int literal_context_mode,
bool enable_context_modeling,
MetaBlockSplit* mb) {
SplitBlock(cmds,
SplitBlock(cmds, num_commands,
&ringbuffer[pos & mask],
&mb->literal_split,
&mb->command_split,
@ -47,7 +49,7 @@ void BuildMetaBlock(const uint8_t* ringbuffer,
std::vector<HistogramLiteral> literal_histograms(num_literal_contexts);
mb->command_histograms.resize(mb->command_split.num_types);
std::vector<HistogramDistance> distance_histograms(num_distance_contexts);
BuildHistograms(cmds,
BuildHistograms(cmds, num_commands,
mb->literal_split,
mb->command_split,
mb->distance_split,
@ -63,20 +65,38 @@ void BuildMetaBlock(const uint8_t* ringbuffer,
static const int kMaxNumberOfHistograms = 256;
mb->literal_histograms = literal_histograms;
ClusterHistograms(literal_histograms,
1 << kLiteralContextBits,
mb->literal_split.num_types,
kMaxNumberOfHistograms,
&mb->literal_histograms,
&mb->literal_context_map);
if (enable_context_modeling) {
ClusterHistograms(literal_histograms,
1 << kLiteralContextBits,
mb->literal_split.num_types,
kMaxNumberOfHistograms,
&mb->literal_histograms,
&mb->literal_context_map);
} else {
ClusterHistogramsTrivial(literal_histograms,
1 << kLiteralContextBits,
mb->literal_split.num_types,
kMaxNumberOfHistograms,
&mb->literal_histograms,
&mb->literal_context_map);
}
mb->distance_histograms = distance_histograms;
ClusterHistograms(distance_histograms,
1 << kDistanceContextBits,
mb->distance_split.num_types,
kMaxNumberOfHistograms,
&mb->distance_histograms,
&mb->distance_context_map);
if (enable_context_modeling) {
ClusterHistograms(distance_histograms,
1 << kDistanceContextBits,
mb->distance_split.num_types,
kMaxNumberOfHistograms,
&mb->distance_histograms,
&mb->distance_context_map);
} else {
ClusterHistogramsTrivial(distance_histograms,
1 << kDistanceContextBits,
mb->distance_split.num_types,
kMaxNumberOfHistograms,
&mb->distance_histograms,
&mb->distance_context_map);
}
}
// Greedy block splitter for one block category (literal, command or distance).

View File

@ -47,10 +47,12 @@ struct MetaBlockSplit {
void BuildMetaBlock(const uint8_t* ringbuffer,
const size_t pos,
const size_t mask,
const std::vector<Command>& cmds,
const Command* cmds,
size_t num_commands,
int num_direct_distance_codes,
int distance_postfix_bits,
int literal_context_mode,
bool enable_context_modleing,
MetaBlockSplit* mb);
void BuildMetaBlockGreedy(const uint8_t* ringbuffer,