2015-11-27 10:27:11 +00:00
|
|
|
/* Copyright 2013 Google Inc. All Rights Reserved.
|
|
|
|
|
2015-12-11 10:11:51 +00:00
|
|
|
Distributed under MIT license.
|
2015-11-27 10:27:11 +00:00
|
|
|
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
|
|
|
*/
|
|
|
|
|
2016-06-03 09:19:23 +00:00
|
|
|
/* Functions to estimate the bit cost of Huffman trees. */
|
2013-10-23 11:06:13 +00:00
|
|
|
|
|
|
|
#ifndef BROTLI_ENC_BIT_COST_H_
|
|
|
|
#define BROTLI_ENC_BIT_COST_H_
|
|
|
|
|
2016-06-03 08:51:04 +00:00
|
|
|
#include "../common/types.h"
|
2013-10-23 11:06:13 +00:00
|
|
|
#include "./entropy_encode.h"
|
|
|
|
#include "./fast_log.h"
|
|
|
|
|
|
|
|
namespace brotli {
|
|
|
|
|
2016-01-07 15:27:49 +00:00
|
|
|
static inline double ShannonEntropy(const uint32_t *population, size_t size,
|
|
|
|
size_t *total) {
|
|
|
|
size_t sum = 0;
|
2015-03-27 13:20:35 +00:00
|
|
|
double retval = 0;
|
2016-01-07 15:27:49 +00:00
|
|
|
const uint32_t *population_end = population + size;
|
|
|
|
size_t p;
|
2015-03-27 13:20:35 +00:00
|
|
|
if (size & 1) {
|
|
|
|
goto odd_number_of_elements_left;
|
|
|
|
}
|
|
|
|
while (population < population_end) {
|
|
|
|
p = *population++;
|
|
|
|
sum += p;
|
2016-01-07 15:27:49 +00:00
|
|
|
retval -= static_cast<double>(p) * FastLog2(p);
|
2015-03-27 13:20:35 +00:00
|
|
|
odd_number_of_elements_left:
|
|
|
|
p = *population++;
|
|
|
|
sum += p;
|
2016-01-07 15:27:49 +00:00
|
|
|
retval -= static_cast<double>(p) * FastLog2(p);
|
2015-03-27 13:20:35 +00:00
|
|
|
}
|
2016-01-07 15:27:49 +00:00
|
|
|
if (sum) retval += static_cast<double>(sum) * FastLog2(sum);
|
2015-08-28 14:09:23 +00:00
|
|
|
*total = sum;
|
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
|
2016-01-07 15:27:49 +00:00
|
|
|
static inline double BitsEntropy(const uint32_t *population, size_t size) {
|
|
|
|
size_t sum;
|
2015-08-28 14:09:23 +00:00
|
|
|
double retval = ShannonEntropy(population, size, &sum);
|
2015-03-27 13:20:35 +00:00
|
|
|
if (retval < sum) {
|
2016-06-03 09:19:23 +00:00
|
|
|
/* At least one bit per literal is needed. */
|
2016-01-07 15:27:49 +00:00
|
|
|
retval = static_cast<double>(sum);
|
2015-03-27 13:20:35 +00:00
|
|
|
}
|
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
|
2013-10-23 11:06:13 +00:00
|
|
|
template<int kSize>
|
|
|
|
double PopulationCost(const Histogram<kSize>& histogram) {
|
2016-03-15 09:50:16 +00:00
|
|
|
static const double kOneSymbolHistogramCost = 12;
|
|
|
|
static const double kTwoSymbolHistogramCost = 20;
|
|
|
|
static const double kThreeSymbolHistogramCost = 28;
|
|
|
|
static const double kFourSymbolHistogramCost = 37;
|
2013-10-23 11:06:13 +00:00
|
|
|
if (histogram.total_count_ == 0) {
|
2016-03-15 09:50:16 +00:00
|
|
|
return kOneSymbolHistogramCost;
|
2013-10-23 11:06:13 +00:00
|
|
|
}
|
|
|
|
int count = 0;
|
2016-03-15 09:50:16 +00:00
|
|
|
int s[5];
|
2015-06-12 13:29:06 +00:00
|
|
|
for (int i = 0; i < kSize; ++i) {
|
2013-10-23 11:06:13 +00:00
|
|
|
if (histogram.data_[i] > 0) {
|
2016-03-15 09:50:16 +00:00
|
|
|
s[count] = i;
|
2013-10-23 11:06:13 +00:00
|
|
|
++count;
|
2016-03-15 09:50:16 +00:00
|
|
|
if (count > 4) break;
|
2013-10-23 11:06:13 +00:00
|
|
|
}
|
|
|
|
}
|
2013-11-15 18:02:17 +00:00
|
|
|
if (count == 1) {
|
2016-03-15 09:50:16 +00:00
|
|
|
return kOneSymbolHistogramCost;
|
2013-11-15 18:02:17 +00:00
|
|
|
}
|
|
|
|
if (count == 2) {
|
2016-03-15 09:50:16 +00:00
|
|
|
return (kTwoSymbolHistogramCost +
|
|
|
|
static_cast<double>(histogram.total_count_));
|
2013-10-23 11:06:13 +00:00
|
|
|
}
|
2016-03-15 09:50:16 +00:00
|
|
|
if (count == 3) {
|
|
|
|
const uint32_t histo0 = histogram.data_[s[0]];
|
|
|
|
const uint32_t histo1 = histogram.data_[s[1]];
|
|
|
|
const uint32_t histo2 = histogram.data_[s[2]];
|
|
|
|
const uint32_t histomax = std::max(histo0, std::max(histo1, histo2));
|
|
|
|
return (kThreeSymbolHistogramCost +
|
|
|
|
2 * (histo0 + histo1 + histo2) - histomax);
|
|
|
|
}
|
|
|
|
if (count == 4) {
|
|
|
|
uint32_t histo[4];
|
|
|
|
for (int i = 0; i < 4; ++i) {
|
|
|
|
histo[i] = histogram.data_[s[i]];
|
2015-06-12 13:29:06 +00:00
|
|
|
}
|
2016-03-15 09:50:16 +00:00
|
|
|
// Sort
|
|
|
|
for (int i = 0; i < 4; ++i) {
|
|
|
|
for (int j = i + 1; j < 4; ++j) {
|
|
|
|
if (histo[j] > histo[i]) {
|
|
|
|
std::swap(histo[j], histo[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
const uint32_t h23 = histo[2] + histo[3];
|
|
|
|
const uint32_t histomax = std::max(h23, histo[0]);
|
|
|
|
return (kFourSymbolHistogramCost +
|
|
|
|
3 * h23 + 2 * (histo[0] + histo[1]) - histomax);
|
2013-10-23 11:06:13 +00:00
|
|
|
}
|
2015-06-12 13:29:06 +00:00
|
|
|
|
|
|
|
// In this loop we compute the entropy of the histogram and simultaneously
|
|
|
|
// build a simplified histogram of the code length codes where we use the
|
|
|
|
// zero repeat code 17, but we don't use the non-zero repeat code 16.
|
2016-03-15 09:50:16 +00:00
|
|
|
double bits = 0;
|
2016-01-07 15:27:49 +00:00
|
|
|
size_t max_depth = 1;
|
|
|
|
uint32_t depth_histo[kCodeLengthCodes] = { 0 };
|
2015-06-12 13:29:06 +00:00
|
|
|
const double log2total = FastLog2(histogram.total_count_);
|
2016-01-07 15:27:49 +00:00
|
|
|
for (size_t i = 0; i < kSize;) {
|
2015-06-12 13:29:06 +00:00
|
|
|
if (histogram.data_[i] > 0) {
|
|
|
|
// Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
|
|
|
|
// = log2(total_count) - log2(count(symbol))
|
|
|
|
double log2p = log2total - FastLog2(histogram.data_[i]);
|
|
|
|
// Approximate the bit depth by round(-log2(P(symbol)))
|
2016-01-07 15:27:49 +00:00
|
|
|
size_t depth = static_cast<size_t>(log2p + 0.5);
|
2015-06-12 13:29:06 +00:00
|
|
|
bits += histogram.data_[i] * log2p;
|
2015-06-12 14:11:50 +00:00
|
|
|
if (depth > 15) {
|
|
|
|
depth = 15;
|
|
|
|
}
|
2015-06-12 13:29:06 +00:00
|
|
|
if (depth > max_depth) {
|
|
|
|
max_depth = depth;
|
|
|
|
}
|
|
|
|
++depth_histo[depth];
|
|
|
|
++i;
|
|
|
|
} else {
|
2015-09-21 19:04:07 +00:00
|
|
|
// Compute the run length of zeros and add the appropriate number of 0 and
|
2015-06-12 13:29:06 +00:00
|
|
|
// 17 code length codes to the code length code histogram.
|
2016-01-07 15:27:49 +00:00
|
|
|
uint32_t reps = 1;
|
|
|
|
for (size_t k = i + 1; k < kSize && histogram.data_[k] == 0; ++k) {
|
2015-06-12 13:29:06 +00:00
|
|
|
++reps;
|
|
|
|
}
|
|
|
|
i += reps;
|
|
|
|
if (i == kSize) {
|
|
|
|
// Don't add any cost for the last zero run, since these are encoded
|
|
|
|
// only implicitly.
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (reps < 3) {
|
|
|
|
depth_histo[0] += reps;
|
|
|
|
} else {
|
|
|
|
reps -= 2;
|
|
|
|
while (reps > 0) {
|
|
|
|
++depth_histo[17];
|
|
|
|
// Add the 3 extra bits for the 17 code length code.
|
|
|
|
bits += 3;
|
|
|
|
reps >>= 3;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2013-11-15 18:02:17 +00:00
|
|
|
}
|
2015-06-12 13:29:06 +00:00
|
|
|
// Add the estimated encoding cost of the code length code histogram.
|
2016-01-07 15:27:49 +00:00
|
|
|
bits += static_cast<double>(18 + 2 * max_depth);
|
2015-06-12 13:29:06 +00:00
|
|
|
// Add the entropy of the code length code histogram.
|
|
|
|
bits += BitsEntropy(depth_histo, kCodeLengthCodes);
|
2013-10-23 11:06:13 +00:00
|
|
|
return bits;
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace brotli
|
|
|
|
|
2016-06-03 09:19:23 +00:00
|
|
|
#endif /* BROTLI_ENC_BIT_COST_H_ */
|