Speedups to brotli quality 11.

* Cluster at most 64 histograms at a time in the first
    round of clustering.

  * Use a faster histogram cost estimation function.

  * Don't compute the log2(total) multiple times in the
    block splitter.
This commit is contained in:
Zoltan Szabadka 2015-06-12 15:29:06 +02:00
parent af09ee7344
commit 667f70adcb
4 changed files with 72 additions and 136 deletions

View File

@ -49,94 +49,13 @@ static inline double BitsEntropy(const int *population, int size) {
return retval;
}
static const int kHuffmanExtraBits[kCodeLengthCodes] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3,
};
static inline int HuffmanTreeBitCost(const int* counts, const uint8_t* depth) {
int nbits = 0;
for (int i = 0; i < kCodeLengthCodes; ++i) {
nbits += counts[i] * (depth[i] + kHuffmanExtraBits[i]);
}
return nbits;
}
static inline int HuffmanTreeBitCost(
const Histogram<kCodeLengthCodes>& histogram,
const EntropyCode<kCodeLengthCodes>& entropy) {
return HuffmanTreeBitCost(&histogram.data_[0], &entropy.depth_[0]);
}
static inline int HuffmanBitCost(const uint8_t* depth, int length) {
int max_depth = 1;
int histogram[kCodeLengthCodes] = { 0 };
int tail_start = 0;
int prev_value = 8;
// compute histogram of compacted huffman tree
for (int i = 0; i < length;) {
const int value = depth[i];
if (value > max_depth) {
max_depth = value;
}
int reps = 1;
for (int k = i + 1; k < length && depth[k] == value; ++k) {
++reps;
}
i += reps;
if (i == length && value == 0)
break;
if (value == 0) {
if (reps < 3) {
histogram[0] += reps;
} else {
reps -= 2;
while (reps > 0) {
++histogram[17];
reps >>= 3;
}
}
} else {
tail_start = i;
if (value != prev_value) {
++histogram[value];
--reps;
}
prev_value = value;
if (reps < 3) {
histogram[value] += reps;
} else {
reps -= 2;
while (reps > 0) {
++histogram[16];
reps >>= 2;
}
}
}
}
// create huffman tree of huffman tree
uint8_t cost[kCodeLengthCodes] = { 0 };
CreateHuffmanTree(histogram, kCodeLengthCodes, 7, cost);
// account for rle extra bits
cost[16] += 2;
cost[17] += 3;
int tree_size = 0;
int bits = 18 + 2 * max_depth; // huffman tree of huffman tree cost
for (int i = 0; i < kCodeLengthCodes; ++i) {
bits += histogram[i] * cost[i]; // huffman tree bit cost
tree_size += histogram[i];
}
return bits;
}
template<int kSize>
double PopulationCost(const Histogram<kSize>& histogram) {
if (histogram.total_count_ == 0) {
return 12;
}
int count = 0;
for (int i = 0; i < kSize && count < 5; ++i) {
for (int i = 0; i < kSize; ++i) {
if (histogram.data_[i] > 0) {
++count;
}
@ -147,19 +66,66 @@ double PopulationCost(const Histogram<kSize>& histogram) {
if (count == 2) {
return 20 + histogram.total_count_;
}
double bits = 0;
uint8_t depth[kSize] = { 0 };
CreateHuffmanTree(&histogram.data_[0], kSize, 15, depth);
int bits = 0;
for (int i = 0; i < kSize; ++i) {
bits += histogram.data_[i] * depth[i];
if (count <= 4) {
// For very low symbol count we build the Huffman tree.
CreateHuffmanTree(&histogram.data_[0], kSize, 15, depth);
for (int i = 0; i < kSize; ++i) {
bits += histogram.data_[i] * depth[i];
}
return count == 3 ? bits + 28 : bits + 37;
}
if (count == 3) {
bits += 28;
} else if (count == 4) {
bits += 37;
} else {
bits += HuffmanBitCost(depth, kSize);
// In this loop we compute the entropy of the histogram and simultaneously
// build a simplified histogram of the code length codes where we use the
// zero repeat code 17, but we don't use the non-zero repeat code 16.
int max_depth = 1;
int depth_histo[kCodeLengthCodes] = { 0 };
const double log2total = FastLog2(histogram.total_count_);
for (int i = 0; i < kSize;) {
if (histogram.data_[i] > 0) {
// Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
// = log2(total_count) - log2(count(symbol))
double log2p = log2total - FastLog2(histogram.data_[i]);
// Approximate the bit depth by round(-log2(P(symbol)))
int depth = static_cast<int>(log2p + 0.5);
bits += histogram.data_[i] * log2p;
if (depth > max_depth) {
max_depth = depth;
}
++depth_histo[depth];
++i;
} else {
// Compute the run length of zeros and add the appropiate number of 0 and
// 17 code length codes to the code length code histogram.
int reps = 1;
for (int k = i + 1; k < kSize && histogram.data_[k] == 0; ++k) {
++reps;
}
i += reps;
if (i == kSize) {
// Don't add any cost for the last zero run, since these are encoded
// only implicitly.
break;
}
if (reps < 3) {
depth_histo[0] += reps;
} else {
reps -= 2;
while (reps > 0) {
++depth_histo[17];
// Add the 3 extra bits for the 17 code length code.
bits += 3;
reps >>= 3;
}
}
}
}
// Add the estimated encoding cost of the code length code histogram.
bits += 18 + 2 * max_depth;
// Add the entropy of the code length code histogram.
bits += BitsEntropy(depth_histo, kCodeLengthCodes);
return bits;
}

View File

@ -150,8 +150,8 @@ void RefineEntropyCodes(const DataType* data, size_t length,
}
}
inline static float BitCost(int total, int count) {
return count == 0 ? FastLog2(total) + 2 : FastLog2(total) - FastLog2(count);
inline static float BitCost(int count) {
return count == 0 ? -2 : FastLog2(count);
}
template<typename DataType, int kSize>
@ -168,10 +168,12 @@ void FindBlocks(const DataType* data, const size_t length,
int vecsize = vec.size();
double* insert_cost = new double[kSize * vecsize];
memset(insert_cost, 0, sizeof(insert_cost[0]) * kSize * vecsize);
for (int i = 0; i < kSize; ++i) {
for (int j = 0; j < vecsize; ++j) {
insert_cost[j] = FastLog2(vec[j].total_count_);
}
for (int i = kSize - 1; i >= 0; --i) {
for (int j = 0; j < vecsize; ++j) {
insert_cost[i * vecsize + j] =
BitCost(vec[j].total_count_, vec[j].data_[i]);
insert_cost[i * vecsize + j] = insert_cost[j] - BitCost(vec[j].data_[i]);
}
}
double *cost = new double[vecsize];

View File

@ -393,37 +393,6 @@ void RunLengthCodeZeros(const std::vector<int>& v_in,
}
}
// Returns a maximum zero-run-length-prefix value such that run-length coding
// zeros in v with this maximum prefix value and then encoding the resulting
// histogram and entropy-coding v produces the least amount of bits.
int BestMaxZeroRunLengthPrefix(const std::vector<int>& v) {
int min_cost = std::numeric_limits<int>::max();
int best_max_prefix = 0;
for (int max_prefix = 0; max_prefix <= 16; ++max_prefix) {
std::vector<int> rle_symbols;
std::vector<int> extra_bits;
int max_run_length_prefix = max_prefix;
RunLengthCodeZeros(v, &max_run_length_prefix, &rle_symbols, &extra_bits);
if (max_run_length_prefix < max_prefix) break;
HistogramContextMap histogram;
for (int i = 0; i < rle_symbols.size(); ++i) {
histogram.Add(rle_symbols[i]);
}
int bit_cost = PopulationCost(histogram);
if (max_prefix > 0) {
bit_cost += 4;
}
for (int i = 1; i <= max_prefix; ++i) {
bit_cost += histogram.data_[i] * i; // extra bits
}
if (bit_cost < min_cost) {
min_cost = bit_cost;
best_max_prefix = max_prefix;
}
}
return best_max_prefix;
}
void EncodeContextMap(const std::vector<int>& context_map,
int num_clusters,
int* storage_ix, uint8_t* storage) {
@ -436,7 +405,7 @@ void EncodeContextMap(const std::vector<int>& context_map,
std::vector<int> transformed_symbols = MoveToFrontTransform(context_map);
std::vector<int> rle_symbols;
std::vector<int> extra_bits;
int max_run_length_prefix = BestMaxZeroRunLengthPrefix(transformed_symbols);
int max_run_length_prefix = 6;
RunLengthCodeZeros(transformed_symbols, &max_run_length_prefix,
&rle_symbols, &extra_bits);
HistogramContextMap symbol_histogram;

View File

@ -279,13 +279,12 @@ void ClusterHistograms(const std::vector<HistogramType>& in,
(*histogram_symbols)[i] = i;
}
// Collapse similar histograms within a block type.
if (num_contexts > 1) {
for (int i = 0; i < num_blocks; ++i) {
HistogramCombine(&(*out)[0], &cluster_size[0],
&(*histogram_symbols)[i * num_contexts], num_contexts,
max_histograms);
}
const int max_input_histograms = 64;
for (int i = 0; i < in_size; i += max_input_histograms) {
int num_to_combine = std::min(in_size - i, max_input_histograms);
HistogramCombine(&(*out)[0], &cluster_size[0],
&(*histogram_symbols)[i], num_to_combine,
max_histograms);
}
// Collapse similar histograms.