Reduce memory usage of brotli encoder at quality 10 and 11.

This commit is contained in:
Zoltan Szabadka 2016-03-15 10:50:16 +01:00
parent cfba2db7b3
commit b820c39bd9
23 changed files with 1658 additions and 967 deletions

View File

@ -21,8 +21,6 @@ namespace brotli {
// The maximum length for which the zopflification uses distinct distances. // The maximum length for which the zopflification uses distinct distances.
static const uint16_t kMaxZopfliLen = 325; static const uint16_t kMaxZopfliLen = 325;
static const double kInfinity = std::numeric_limits<double>::infinity();
// Histogram based cost model for zopflification. // Histogram based cost model for zopflification.
class ZopfliCostModel { class ZopfliCostModel {
public: public:
@ -42,7 +40,7 @@ class ZopfliCostModel {
size_t pos = position - last_insert_len; size_t pos = position - last_insert_len;
for (size_t i = 0; i < num_commands; i++) { for (size_t i = 0; i < num_commands; i++) {
size_t inslength = commands[i].insert_len_; size_t inslength = commands[i].insert_len_;
size_t copylength = commands[i].copy_len_; size_t copylength = commands[i].copy_len();
size_t distcode = commands[i].dist_prefix_; size_t distcode = commands[i].dist_prefix_;
size_t cmdcode = commands[i].cmd_prefix_; size_t cmdcode = commands[i].cmd_prefix_;
@ -56,7 +54,7 @@ class ZopfliCostModel {
pos += inslength + copylength; pos += inslength + copylength;
} }
std::vector<double> cost_literal; std::vector<float> cost_literal;
Set(histogram_literal, &cost_literal); Set(histogram_literal, &cost_literal);
Set(histogram_cmd, &cost_cmd_); Set(histogram_cmd, &cost_cmd_);
Set(histogram_dist, &cost_dist_); Set(histogram_dist, &cost_dist_);
@ -77,26 +75,25 @@ class ZopfliCostModel {
size_t position, size_t position,
const uint8_t* ringbuffer, const uint8_t* ringbuffer,
size_t ringbuffer_mask) { size_t ringbuffer_mask) {
std::vector<float> literal_cost(num_bytes + 1); literal_costs_.resize(num_bytes + 2);
EstimateBitCostsForLiterals(position, num_bytes, ringbuffer_mask, EstimateBitCostsForLiterals(position, num_bytes, ringbuffer_mask,
ringbuffer, &literal_cost[0]); ringbuffer, &literal_costs_[1]);
literal_costs_.resize(num_bytes + 1);
literal_costs_[0] = 0.0; literal_costs_[0] = 0.0;
for (size_t i = 0; i < num_bytes; ++i) { for (size_t i = 0; i < num_bytes; ++i) {
literal_costs_[i + 1] = literal_costs_[i] + literal_cost[i]; literal_costs_[i + 1] += literal_costs_[i];
} }
cost_cmd_.resize(kNumCommandPrefixes); cost_cmd_.resize(kNumCommandPrefixes);
cost_dist_.resize(kNumDistancePrefixes); cost_dist_.resize(kNumDistancePrefixes);
for (uint32_t i = 0; i < kNumCommandPrefixes; ++i) { for (uint32_t i = 0; i < kNumCommandPrefixes; ++i) {
cost_cmd_[i] = FastLog2(11 + i); cost_cmd_[i] = static_cast<float>(FastLog2(11 + i));
} }
for (uint32_t i = 0; i < kNumDistancePrefixes; ++i) { for (uint32_t i = 0; i < kNumDistancePrefixes; ++i) {
cost_dist_[i] = FastLog2(20 + i); cost_dist_[i] = static_cast<float>(FastLog2(20 + i));
} }
min_cost_cmd_ = FastLog2(11); min_cost_cmd_ = static_cast<float>(FastLog2(11));
} }
double GetCommandCost( float GetCommandCost(
size_t dist_code, size_t length_code, size_t insert_length) const { size_t dist_code, size_t length_code, size_t insert_length) const {
uint16_t inscode = GetInsertLengthCode(insert_length); uint16_t inscode = GetInsertLengthCode(insert_length);
uint16_t copycode = GetCopyLengthCode(length_code); uint16_t copycode = GetCopyLengthCode(length_code);
@ -106,29 +103,29 @@ class ZopfliCostModel {
PrefixEncodeCopyDistance(dist_code, 0, 0, &dist_symbol, &distextra); PrefixEncodeCopyDistance(dist_code, 0, 0, &dist_symbol, &distextra);
uint32_t distnumextra = distextra >> 24; uint32_t distnumextra = distextra >> 24;
double result = static_cast<double>( float result = static_cast<float>(
kInsExtra[inscode] + kCopyExtra[copycode] + distnumextra); GetInsertExtra(inscode) + GetCopyExtra(copycode) + distnumextra);
result += cost_cmd_[cmdcode]; result += cost_cmd_[cmdcode];
if (cmdcode >= 128) result += cost_dist_[dist_symbol]; if (cmdcode >= 128) result += cost_dist_[dist_symbol];
return result; return result;
} }
double GetLiteralCosts(size_t from, size_t to) const { float GetLiteralCosts(size_t from, size_t to) const {
return literal_costs_[to] - literal_costs_[from]; return literal_costs_[to] - literal_costs_[from];
} }
double GetMinCostCmd(void) const { float GetMinCostCmd(void) const {
return min_cost_cmd_; return min_cost_cmd_;
} }
private: private:
void Set(const std::vector<uint32_t>& histogram, std::vector<double>* cost) { void Set(const std::vector<uint32_t>& histogram, std::vector<float>* cost) {
cost->resize(histogram.size()); cost->resize(histogram.size());
size_t sum = 0; size_t sum = 0;
for (size_t i = 0; i < histogram.size(); i++) { for (size_t i = 0; i < histogram.size(); i++) {
sum += histogram[i]; sum += histogram[i];
} }
double log2sum = FastLog2(sum); float log2sum = static_cast<float>(FastLog2(sum));
for (size_t i = 0; i < histogram.size(); i++) { for (size_t i = 0; i < histogram.size(); i++) {
if (histogram[i] == 0) { if (histogram[i] == 0) {
(*cost)[i] = log2sum + 2; (*cost)[i] = log2sum + 2;
@ -136,33 +133,20 @@ class ZopfliCostModel {
} }
// Shannon bits for this symbol. // Shannon bits for this symbol.
(*cost)[i] = log2sum - FastLog2(histogram[i]); (*cost)[i] = log2sum - static_cast<float>(FastLog2(histogram[i]));
// Cannot be coded with less than 1 bit // Cannot be coded with less than 1 bit
if ((*cost)[i] < 1) (*cost)[i] = 1; if ((*cost)[i] < 1) (*cost)[i] = 1;
} }
} }
std::vector<double> cost_cmd_; // The insert and copy length symbols. std::vector<float> cost_cmd_; // The insert and copy length symbols.
std::vector<double> cost_dist_; std::vector<float> cost_dist_;
// Cumulative costs of literals per position in the stream. // Cumulative costs of literals per position in the stream.
std::vector<double> literal_costs_; std::vector<float> literal_costs_;
double min_cost_cmd_; float min_cost_cmd_;
}; };
inline void SetDistanceCache(size_t distance,
size_t distance_code,
size_t max_distance,
const int* dist_cache,
int* result_dist_cache) {
if (distance <= max_distance && distance_code > 0) {
result_dist_cache[0] = static_cast<int>(distance);
memcpy(&result_dist_cache[1], dist_cache, 3 * sizeof(dist_cache[0]));
} else {
memcpy(result_dist_cache, dist_cache, 4 * sizeof(dist_cache[0]));
}
}
inline size_t ComputeDistanceCode(size_t distance, inline size_t ComputeDistanceCode(size_t distance,
size_t max_distance, size_t max_distance,
int quality, int quality,
@ -194,47 +178,28 @@ inline size_t ComputeDistanceCode(size_t distance,
return distance + 15; return distance + 15;
} }
struct ZopfliNode { // REQUIRES: len >= 2, start_pos <= pos
ZopfliNode() : length(1), // REQUIRES: cost < kInfinity, nodes[start_pos].cost < kInfinity
distance(0), // Maintains the "ZopfliNode array invariant".
distance_code(0),
length_code(0),
insert_length(0),
cost(kInfinity) {}
// best length to get up to this byte (not including this byte itself)
uint32_t length;
// distance associated with the length
uint32_t distance;
uint32_t distance_code;
int distance_cache[4];
// length code associated with the length - usually the same as length,
// except in case of length-changing dictionary transformation.
uint32_t length_code;
// number of literal inserts before this copy
uint32_t insert_length;
// smallest cost to get to this byte from the beginning, as found so far
double cost;
};
inline void UpdateZopfliNode(ZopfliNode* nodes, size_t pos, size_t start_pos, inline void UpdateZopfliNode(ZopfliNode* nodes, size_t pos, size_t start_pos,
size_t len, size_t len_code, size_t dist, size_t len, size_t len_code, size_t dist,
size_t dist_code, size_t max_dist, size_t short_code, float cost) {
const int* dist_cache, double cost) {
ZopfliNode& next = nodes[pos + len]; ZopfliNode& next = nodes[pos + len];
next.length = static_cast<uint32_t>(len); next.length = static_cast<uint32_t>(len | ((len + 9u - len_code) << 24));
next.length_code = static_cast<uint32_t>(len_code); next.distance = static_cast<uint32_t>(dist | (short_code << 25));
next.distance = static_cast<uint32_t>(dist);
next.distance_code = static_cast<uint32_t>(dist_code);
next.insert_length = static_cast<uint32_t>(pos - start_pos); next.insert_length = static_cast<uint32_t>(pos - start_pos);
next.cost = cost; next.cost = cost;
SetDistanceCache(dist, dist_code, max_dist, dist_cache,
&next.distance_cache[0]);
} }
// Maintains the smallest 2^k cost difference together with their positions // Maintains the smallest 2^k cost difference together with their positions
class StartPosQueue { class StartPosQueue {
public: public:
struct PosData {
size_t pos;
int distance_cache[4];
float costdiff;
};
explicit StartPosQueue(int bits) explicit StartPosQueue(int bits)
: mask_((1u << bits) - 1), q_(1 << bits), idx_(0) {} : mask_((1u << bits) - 1), q_(1 << bits), idx_(0) {}
@ -242,21 +207,15 @@ class StartPosQueue {
idx_ = 0; idx_ = 0;
} }
void Push(size_t pos, double costdiff) { void Push(const StartPosQueue::PosData& posdata) {
if (costdiff == kInfinity) { size_t offset = ~idx_ & mask_;
// We can't start a command from an unreachable start position.
// E.g. position 1 in a stream is always unreachable, because all commands
// have a copy of at least length 2.
return;
}
size_t offset = -idx_ & mask_;
++idx_; ++idx_;
size_t len = size(); size_t len = size();
q_[offset] = std::make_pair(pos, costdiff); q_[offset] = posdata;
/* Restore the sorted order. In the list of |len| items at most |len - 1| /* Restore the sorted order. In the list of |len| items at most |len - 1|
adjacent element comparisons / swaps are required. */ adjacent element comparisons / swaps are required. */
for (size_t i = 1; i < len; ++i) { for (size_t i = 1; i < len; ++i) {
if (q_[offset & mask_].second > q_[(offset + 1) & mask_].second) { if (q_[offset & mask_].costdiff > q_[(offset + 1) & mask_].costdiff) {
std::swap(q_[offset & mask_], q_[(offset + 1) & mask_]); std::swap(q_[offset & mask_], q_[(offset + 1) & mask_]);
} }
++offset; ++offset;
@ -265,32 +224,32 @@ class StartPosQueue {
size_t size(void) const { return std::min(idx_, mask_ + 1); } size_t size(void) const { return std::min(idx_, mask_ + 1); }
size_t GetStartPos(size_t k) const { const StartPosQueue::PosData& GetStartPosData(size_t k) const {
return q_[(k + 1 - idx_) & mask_].first; return q_[(k - idx_) & mask_];
} }
private: private:
const size_t mask_; const size_t mask_;
std::vector<std::pair<size_t, double> > q_; std::vector<PosData> q_;
size_t idx_; size_t idx_;
}; };
// Returns the minimum possible copy length that can improve the cost of any // Returns the minimum possible copy length that can improve the cost of any
// future position. // future position.
size_t ComputeMinimumCopyLength(const StartPosQueue& queue, size_t ComputeMinimumCopyLength(const StartPosQueue& queue,
const std::vector<ZopfliNode>& nodes, const ZopfliNode* nodes,
const ZopfliCostModel& model, const ZopfliCostModel& model,
size_t pos, const size_t num_bytes,
double min_cost_cmd) { const size_t pos) {
// Compute the minimum possible cost of reaching any future position. // Compute the minimum possible cost of reaching any future position.
const size_t start0 = queue.GetStartPos(0); const size_t start0 = queue.GetStartPosData(0).pos;
double min_cost = (nodes[start0].cost + float min_cost = (nodes[start0].cost +
model.GetLiteralCosts(start0, pos) + model.GetLiteralCosts(start0, pos) +
min_cost_cmd); model.GetMinCostCmd());
size_t len = 2; size_t len = 2;
size_t next_len_bucket = 4; size_t next_len_bucket = 4;
size_t next_len_offset = 10; size_t next_len_offset = 10;
while (pos + len < nodes.size() && nodes[pos + len].cost <= min_cost) { while (pos + len <= num_bytes && nodes[pos + len].cost <= min_cost) {
// We already reached (pos + len) with no more cost than the minimum // We already reached (pos + len) with no more cost than the minimum
// possible cost of reaching anything from this pos, so there is no point in // possible cost of reaching anything from this pos, so there is no point in
// looking for lengths <= len. // looking for lengths <= len.
@ -298,7 +257,7 @@ size_t ComputeMinimumCopyLength(const StartPosQueue& queue,
if (len == next_len_offset) { if (len == next_len_offset) {
// We reached the next copy length code bucket, so we add one more // We reached the next copy length code bucket, so we add one more
// extra bit to the minimum cost. // extra bit to the minimum cost.
min_cost += 1.0; min_cost += static_cast<float>(1.0);
next_len_offset += next_len_bucket; next_len_offset += next_len_bucket;
next_len_bucket *= 2; next_len_bucket *= 2;
} }
@ -306,164 +265,194 @@ size_t ComputeMinimumCopyLength(const StartPosQueue& queue,
return len; return len;
} }
void ZopfliIterate(size_t num_bytes, // Fills in dist_cache[0..3] with the last four distances (as defined by
size_t position, // Section 4. of the Spec) that would be used at (block_start + pos) if we
const uint8_t* ringbuffer, // used the shortest path of commands from block_start, computed from
size_t ringbuffer_mask, // nodes[0..pos]. The last four distances at block_start are in
const size_t max_backward_limit, // starting_dist_cach[0..3].
const ZopfliCostModel& model, // REQUIRES: nodes[pos].cost < kInfinity
const std::vector<uint32_t>& num_matches, // REQUIRES: nodes[0..pos] satisfies that "ZopfliNode array invariant".
const std::vector<BackwardMatch>& matches, void ComputeDistanceCache(const size_t block_start,
int* dist_cache, const size_t pos,
size_t* last_insert_len, const size_t max_backward,
Command* commands, const int* starting_dist_cache,
size_t* num_commands, const ZopfliNode* nodes,
size_t* num_literals) { int* dist_cache) {
const Command * const orig_commands = commands; int idx = 0;
size_t p = pos;
std::vector<ZopfliNode> nodes(num_bytes + 1); // Because of prerequisite, does at most (pos + 1) / 2 iterations.
nodes[0].length = 0; while (idx < 4 && p > 0) {
nodes[0].cost = 0; const size_t clen = nodes[p].copy_length();
memcpy(nodes[0].distance_cache, dist_cache, 4 * sizeof(dist_cache[0])); const size_t ilen = nodes[p].insert_length;
const size_t dist = nodes[p].copy_distance();
StartPosQueue queue(3); // Since block_start + p is the end position of the command, the copy part
const double min_cost_cmd = model.GetMinCostCmd(); // starts from block_start + p - clen. Distances that are greater than this
// or greater than max_backward are static dictionary references, and do
size_t cur_match_pos = 0; // not update the last distances. Also distance code 0 (last distance)
for (size_t i = 0; i + 3 < num_bytes; i++) { // does not update the last distances.
size_t cur_ix = position + i; if (dist + clen <= block_start + p && dist <= max_backward &&
size_t cur_ix_masked = cur_ix & ringbuffer_mask; nodes[p].distance_code() > 0) {
size_t max_distance = std::min(cur_ix, max_backward_limit); dist_cache[idx++] = static_cast<int>(dist);
size_t max_length = num_bytes - i;
queue.Push(i, nodes[i].cost - model.GetLiteralCosts(0, i));
const size_t min_len = ComputeMinimumCopyLength(queue, nodes, model,
i, min_cost_cmd);
// Go over the command starting positions in order of increasing cost
// difference.
for (size_t k = 0; k < 5 && k < queue.size(); ++k) {
const size_t start = queue.GetStartPos(k);
const double start_costdiff =
nodes[start].cost - model.GetLiteralCosts(0, start);
const int* dist_cache2 = &nodes[start].distance_cache[0];
// Look for last distance matches using the distance cache from this
// starting position.
size_t best_len = min_len - 1;
for (size_t j = 0; j < kNumDistanceShortCodes; ++j) {
const size_t idx = kDistanceCacheIndex[j];
const size_t backward =
static_cast<size_t>(dist_cache2[idx] + kDistanceCacheOffset[j]);
size_t prev_ix = cur_ix - backward;
if (prev_ix >= cur_ix) {
continue;
}
if (PREDICT_FALSE(backward > max_distance)) {
continue;
}
prev_ix &= ringbuffer_mask;
if (cur_ix_masked + best_len > ringbuffer_mask ||
prev_ix + best_len > ringbuffer_mask ||
ringbuffer[cur_ix_masked + best_len] !=
ringbuffer[prev_ix + best_len]) {
continue;
}
const size_t len =
FindMatchLengthWithLimit(&ringbuffer[prev_ix],
&ringbuffer[cur_ix_masked],
max_length);
for (size_t l = best_len + 1; l <= len; ++l) {
const size_t inslen = i - start;
double cmd_cost = model.GetCommandCost(j, l, inslen);
double cost = start_costdiff + cmd_cost + model.GetLiteralCosts(0, i);
if (cost < nodes[i + l].cost) {
UpdateZopfliNode(&nodes[0], i, start, l, l, backward, j,
max_distance, dist_cache2, cost);
}
best_len = l;
}
}
// At higher iterations look only for new last distance matches, since
// looking only for new command start positions with the same distances
// does not help much.
if (k >= 2) continue;
// Loop through all possible copy lengths at this position.
size_t len = min_len;
for (size_t j = 0; j < num_matches[i]; ++j) {
BackwardMatch match = matches[cur_match_pos + j];
size_t dist = match.distance;
bool is_dictionary_match = dist > max_distance;
// We already tried all possible last distance matches, so we can use
// normal distance code here.
size_t dist_code = dist + 15;
// Try all copy lengths up until the maximum copy length corresponding
// to this distance. If the distance refers to the static dictionary, or
// the maximum length is long enough, try only one maximum length.
size_t max_len = match.length();
if (len < max_len && (is_dictionary_match || max_len > kMaxZopfliLen)) {
len = max_len;
}
for (; len <= max_len; ++len) {
size_t len_code = is_dictionary_match ? match.length_code() : len;
const size_t inslen = i - start;
double cmd_cost = model.GetCommandCost(dist_code, len_code, inslen);
double cost = start_costdiff + cmd_cost + model.GetLiteralCosts(0, i);
if (cost < nodes[i + len].cost) {
UpdateZopfliNode(&nodes[0], i, start, len, len_code, dist,
dist_code, max_distance, dist_cache2, cost);
}
}
}
} }
// Because of prerequisite, p >= clen + ilen >= 2.
p -= clen + ilen;
}
for (; idx < 4; ++idx) {
dist_cache[idx] = *starting_dist_cache++;
}
}
cur_match_pos += num_matches[i]; void UpdateNodes(const size_t num_bytes,
const size_t block_start,
const size_t pos,
const uint8_t* ringbuffer,
const size_t ringbuffer_mask,
const size_t max_backward_limit,
const int* starting_dist_cache,
const size_t num_matches,
const BackwardMatch* matches,
const ZopfliCostModel* model,
StartPosQueue* queue,
ZopfliNode* nodes) {
size_t cur_ix = block_start + pos;
size_t cur_ix_masked = cur_ix & ringbuffer_mask;
size_t max_distance = std::min(cur_ix, max_backward_limit);
// The zopflification can be too slow in case of very long lengths, so in if (nodes[pos].cost <= model->GetLiteralCosts(0, pos)) {
// such case skip it all, it does not cost a lot of compression ratio. StartPosQueue::PosData posdata;
if (num_matches[i] == 1 && posdata.pos = pos;
matches[cur_match_pos - 1].length() > kMaxZopfliLen) { posdata.costdiff = nodes[pos].cost - model->GetLiteralCosts(0, pos);
i += matches[cur_match_pos - 1].length() - 1; ComputeDistanceCache(block_start, pos, max_backward_limit,
queue.Clear(); starting_dist_cache, nodes, posdata.distance_cache);
} queue->Push(posdata);
} }
std::vector<uint32_t> backwards; const size_t min_len = ComputeMinimumCopyLength(
*queue, nodes, *model, num_bytes, pos);
// Go over the command starting positions in order of increasing cost
// difference.
for (size_t k = 0; k < 5 && k < queue->size(); ++k) {
const StartPosQueue::PosData& posdata = queue->GetStartPosData(k);
const size_t start = posdata.pos;
const float start_costdiff = posdata.costdiff;
// Look for last distance matches using the distance cache from this
// starting position.
size_t best_len = min_len - 1;
for (size_t j = 0; j < kNumDistanceShortCodes; ++j) {
const size_t idx = kDistanceCacheIndex[j];
const size_t backward = static_cast<size_t>(posdata.distance_cache[idx] +
kDistanceCacheOffset[j]);
size_t prev_ix = cur_ix - backward;
if (prev_ix >= cur_ix) {
continue;
}
if (PREDICT_FALSE(backward > max_distance)) {
continue;
}
prev_ix &= ringbuffer_mask;
if (cur_ix_masked + best_len > ringbuffer_mask ||
prev_ix + best_len > ringbuffer_mask ||
ringbuffer[cur_ix_masked + best_len] !=
ringbuffer[prev_ix + best_len]) {
continue;
}
const size_t len =
FindMatchLengthWithLimit(&ringbuffer[prev_ix],
&ringbuffer[cur_ix_masked],
num_bytes - pos);
for (size_t l = best_len + 1; l <= len; ++l) {
const size_t inslen = pos - start;
float cmd_cost = model->GetCommandCost(j, l, inslen);
float cost = start_costdiff + cmd_cost + model->GetLiteralCosts(0, pos);
if (cost < nodes[pos + l].cost) {
UpdateZopfliNode(&nodes[0], pos, start, l, l, backward, j + 1, cost);
}
best_len = l;
}
}
// At higher iterations look only for new last distance matches, since
// looking only for new command start positions with the same distances
// does not help much.
if (k >= 2) continue;
// Loop through all possible copy lengths at this position.
size_t len = min_len;
for (size_t j = 0; j < num_matches; ++j) {
BackwardMatch match = matches[j];
size_t dist = match.distance;
bool is_dictionary_match = dist > max_distance;
// We already tried all possible last distance matches, so we can use
// normal distance code here.
size_t dist_code = dist + 15;
// Try all copy lengths up until the maximum copy length corresponding
// to this distance. If the distance refers to the static dictionary, or
// the maximum length is long enough, try only one maximum length.
size_t max_len = match.length();
if (len < max_len && (is_dictionary_match || max_len > kMaxZopfliLen)) {
len = max_len;
}
for (; len <= max_len; ++len) {
size_t len_code = is_dictionary_match ? match.length_code() : len;
const size_t inslen = pos - start;
float cmd_cost = model->GetCommandCost(dist_code, len_code, inslen);
float cost = start_costdiff + cmd_cost + model->GetLiteralCosts(0, pos);
if (cost < nodes[pos + len].cost) {
UpdateZopfliNode(&nodes[0], pos, start, len, len_code, dist, 0, cost);
}
}
}
}
}
void ComputeShortestPathFromNodes(size_t num_bytes,
const ZopfliNode* nodes,
std::vector<uint32_t>* path) {
std::vector<uint32_t> backwards(num_bytes / 2 + 1);
size_t index = num_bytes; size_t index = num_bytes;
while (nodes[index].cost == kInfinity) --index; while (nodes[index].cost == kInfinity) --index;
size_t num_commands = 0;
while (index != 0) { while (index != 0) {
size_t len = nodes[index].length + nodes[index].insert_length; size_t len = nodes[index].command_length();
backwards.push_back(static_cast<uint32_t>(len)); backwards[num_commands++] = static_cast<uint32_t>(len);
index -= len; index -= len;
} }
path->resize(num_commands);
std::vector<uint32_t> path; for (size_t i = num_commands, j = 0; i > 0; --i, ++j) {
for (size_t i = backwards.size(); i > 0; i--) { (*path)[j] = backwards[i - 1];
path.push_back(backwards[i - 1]);
} }
}
void ZopfliCreateCommands(const size_t num_bytes,
const size_t block_start,
const size_t max_backward_limit,
const std::vector<uint32_t>& path,
const ZopfliNode* nodes,
int* dist_cache,
size_t* last_insert_len,
Command* commands,
size_t* num_literals) {
size_t pos = 0; size_t pos = 0;
for (size_t i = 0; i < path.size(); i++) { for (size_t i = 0; i < path.size(); i++) {
const ZopfliNode& next = nodes[pos + path[i]]; const ZopfliNode& next = nodes[pos + path[i]];
size_t copy_length = next.length; size_t copy_length = next.copy_length();
size_t insert_length = next.insert_length; size_t insert_length = next.insert_length;
pos += insert_length; pos += insert_length;
if (i == 0) { if (i == 0) {
insert_length += *last_insert_len; insert_length += *last_insert_len;
*last_insert_len = 0; *last_insert_len = 0;
} }
size_t distance = next.distance; size_t distance = next.copy_distance();
size_t len_code = next.length_code; size_t len_code = next.length_code();
size_t max_distance = std::min(position + pos, max_backward_limit); size_t max_distance = std::min(block_start + pos, max_backward_limit);
bool is_dictionary = (distance > max_distance); bool is_dictionary = (distance > max_distance);
size_t dist_code = next.distance_code; size_t dist_code = next.distance_code();
Command cmd(insert_length, copy_length, len_code, dist_code); Command cmd(insert_length, copy_length, len_code, dist_code);
*commands++ = cmd; commands[i] = cmd;
if (!is_dictionary && dist_code > 0) { if (!is_dictionary && dist_code > 0) {
dist_cache[3] = dist_cache[2]; dist_cache[3] = dist_cache[2];
@ -473,11 +462,85 @@ void ZopfliIterate(size_t num_bytes,
} }
*num_literals += insert_length; *num_literals += insert_length;
insert_length = 0;
pos += copy_length; pos += copy_length;
} }
*last_insert_len += num_bytes - pos; *last_insert_len += num_bytes - pos;
*num_commands += static_cast<size_t>(commands - orig_commands); }
void ZopfliIterate(size_t num_bytes,
size_t position,
const uint8_t* ringbuffer,
size_t ringbuffer_mask,
const size_t max_backward_limit,
const int* dist_cache,
const ZopfliCostModel& model,
const std::vector<uint32_t>& num_matches,
const std::vector<BackwardMatch>& matches,
ZopfliNode* nodes,
std::vector<uint32_t>* path) {
nodes[0].length = 0;
nodes[0].cost = 0;
StartPosQueue queue(3);
size_t cur_match_pos = 0;
for (size_t i = 0; i + 3 < num_bytes; i++) {
UpdateNodes(num_bytes, position, i, ringbuffer, ringbuffer_mask,
max_backward_limit, dist_cache, num_matches[i],
&matches[cur_match_pos], &model, &queue, &nodes[0]);
cur_match_pos += num_matches[i];
// The zopflification can be too slow in case of very long lengths, so in
// such case skip it all, it does not cost a lot of compression ratio.
if (num_matches[i] == 1 &&
matches[cur_match_pos - 1].length() > kMaxZopfliLen) {
i += matches[cur_match_pos - 1].length() - 1;
queue.Clear();
}
}
ComputeShortestPathFromNodes(num_bytes, &nodes[0], path);
}
void ZopfliComputeShortestPath(size_t num_bytes,
size_t position,
const uint8_t* ringbuffer,
size_t ringbuffer_mask,
const size_t max_backward_limit,
const int* dist_cache,
Hashers::H10* hasher,
ZopfliNode* nodes,
std::vector<uint32_t>* path) {
nodes[0].length = 0;
nodes[0].cost = 0;
ZopfliCostModel* model = new ZopfliCostModel;
model->SetFromLiteralCosts(num_bytes, position,
ringbuffer, ringbuffer_mask);
StartPosQueue queue(3);
BackwardMatch matches[Hashers::H10::kMaxNumMatches];
for (size_t i = 0; i + 3 < num_bytes; i++) {
const size_t max_distance = std::min(position + i, max_backward_limit);
size_t num_matches = hasher->FindAllMatches(
ringbuffer, ringbuffer_mask, position + i, num_bytes - i, max_distance,
matches);
if (num_matches > 0 &&
matches[num_matches - 1].length() > kMaxZopfliLen) {
matches[0] = matches[num_matches - 1];
num_matches = 1;
}
UpdateNodes(num_bytes, position, i, ringbuffer, ringbuffer_mask,
max_backward_limit, dist_cache, num_matches, matches,
model, &queue, nodes);
if (num_matches == 1 && matches[0].length() > kMaxZopfliLen) {
for (size_t j = 1; j < matches[0].length() && i + 4 < num_bytes; ++j) {
++i;
if (matches[0].length() - j < 64 &&
num_bytes - i >= kMaxTreeCompLength) {
hasher->Store(ringbuffer, ringbuffer_mask, position + i);
}
}
queue.Clear();
}
}
delete model;
ComputeShortestPathFromNodes(num_bytes, nodes, path);
} }
template<typename Hasher> template<typename Hasher>
@ -527,7 +590,7 @@ void CreateBackwardReferences(size_t num_bytes,
size_t apply_random_heuristics = i + random_heuristics_window_size; size_t apply_random_heuristics = i + random_heuristics_window_size;
// Minimum score to accept a backward reference. // Minimum score to accept a backward reference.
const int kMinScore = 4.0; const double kMinScore = 4.0;
while (i + Hasher::kHashTypeLength - 1 < i_end) { while (i + Hasher::kHashTypeLength - 1 < i_end) {
size_t max_length = i_end - i; size_t max_length = i_end - i;
@ -649,16 +712,23 @@ void CreateBackwardReferences(size_t num_bytes,
if (zopflify) { if (zopflify) {
Hashers::H10* hasher = hashers->hash_h10; Hashers::H10* hasher = hashers->hash_h10;
hasher->Init(lgwin, position, num_bytes, is_last); hasher->Init(lgwin, position, num_bytes, is_last);
if (num_bytes >= 3 && position >= kMaxTreeCompLength) { hasher->StitchToPreviousBlock(num_bytes, position,
// Store the last `kMaxTreeCompLength - 1` positions in the hasher. ringbuffer, ringbuffer_mask);
// These could not be calculated before, since they require knowledge
// of both the previous and the current block.
for (size_t i = position - kMaxTreeCompLength + 1; i < position; ++i) {
hasher->Store(ringbuffer, ringbuffer_mask, i, num_bytes + position - i);
}
}
// Set maximum distance, see section 9.1. of the spec. // Set maximum distance, see section 9.1. of the spec.
const size_t max_backward_limit = (1 << lgwin) - 16; const size_t max_backward_limit = (1 << lgwin) - 16;
if (quality == 10) {
std::vector<ZopfliNode> nodes(num_bytes + 1);
std::vector<uint32_t> path;
ZopfliComputeShortestPath(num_bytes, position,
ringbuffer, ringbuffer_mask,
max_backward_limit, dist_cache, hasher,
&nodes[0], &path);
ZopfliCreateCommands(num_bytes, position, max_backward_limit, path,
&nodes[0], dist_cache, last_insert_len, commands,
num_literals);
*num_commands += path.size();
return;
}
std::vector<uint32_t> num_matches(num_bytes); std::vector<uint32_t> num_matches(num_bytes);
std::vector<BackwardMatch> matches(4 * num_bytes); std::vector<BackwardMatch> matches(4 * num_bytes);
size_t cur_match_pos = 0; size_t cur_match_pos = 0;
@ -686,9 +756,8 @@ void CreateBackwardReferences(size_t num_bytes,
num_matches[i] = 1; num_matches[i] = 1;
for (size_t j = 1; j < match_len; ++j) { for (size_t j = 1; j < match_len; ++j) {
++i; ++i;
if (match_len - j < 64) { if (match_len - j < 64 && num_bytes - i >= kMaxTreeCompLength) {
hasher->Store(ringbuffer, ringbuffer_mask, position + i, hasher->Store(ringbuffer, ringbuffer_mask, position + i);
num_bytes - i);
} }
num_matches[i] = 0; num_matches[i] = 0;
} }
@ -719,9 +788,15 @@ void CreateBackwardReferences(size_t num_bytes,
*num_literals = orig_num_literals; *num_literals = orig_num_literals;
*last_insert_len = orig_last_insert_len; *last_insert_len = orig_last_insert_len;
memcpy(dist_cache, orig_dist_cache, 4 * sizeof(dist_cache[0])); memcpy(dist_cache, orig_dist_cache, 4 * sizeof(dist_cache[0]));
std::vector<ZopfliNode> nodes(num_bytes + 1);
std::vector<uint32_t> path;
ZopfliIterate(num_bytes, position, ringbuffer, ringbuffer_mask, ZopfliIterate(num_bytes, position, ringbuffer, ringbuffer_mask,
max_backward_limit, model, num_matches, matches, dist_cache, max_backward_limit, dist_cache, model, num_matches, matches,
last_insert_len, commands, num_commands, num_literals); &nodes[0], &path);
ZopfliCreateCommands(num_bytes, position, max_backward_limit, path,
&nodes[0], dist_cache, last_insert_len, commands,
num_literals);
*num_commands += path.size();
} }
return; return;
} }

View File

@ -9,6 +9,8 @@
#ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_ #ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_
#define BROTLI_ENC_BACKWARD_REFERENCES_H_ #define BROTLI_ENC_BACKWARD_REFERENCES_H_
#include <vector>
#include "./hash.h" #include "./hash.h"
#include "./command.h" #include "./command.h"
#include "./types.h" #include "./types.h"
@ -34,6 +36,81 @@ void CreateBackwardReferences(size_t num_bytes,
size_t* num_commands, size_t* num_commands,
size_t* num_literals); size_t* num_literals);
static const float kInfinity = std::numeric_limits<float>::infinity();
struct ZopfliNode {
ZopfliNode(void) : length(1),
distance(0),
insert_length(0),
cost(kInfinity) {}
inline uint32_t copy_length() const {
return length & 0xffffff;
}
inline uint32_t length_code() const {
const uint32_t modifier = length >> 24;
return copy_length() + 9u - modifier;
}
inline uint32_t copy_distance() const {
return distance & 0x1ffffff;
}
inline uint32_t distance_code() const {
const uint32_t short_code = distance >> 25;
return short_code == 0 ? copy_distance() + 15 : short_code - 1;
}
inline uint32_t command_length() const {
return copy_length() + insert_length;
}
// best length to get up to this byte (not including this byte itself)
// highest 8 bit is used to reconstruct the length code
uint32_t length;
// distance associated with the length
// highest 7 bit contains distance short code + 1 (or zero if no short code)
uint32_t distance;
// number of literal inserts before this copy
uint32_t insert_length;
// smallest cost to get to this byte from the beginning, as found so far
float cost;
};
// Computes the shortest path of commands from position to at most
// position + num_bytes.
//
// On return, path->size() is the number of commands found and path[i] is the
// length of the ith command (copy length plus insert length).
// Note that the sum of the lengths of all commands can be less than num_bytes.
//
// On return, the nodes[0..num_bytes] array will have the following
// "ZopfliNode array invariant":
// For each i in [1..num_bytes], if nodes[i].cost < kInfinity, then
// (1) nodes[i].copy_length() >= 2
// (2) nodes[i].command_length() <= i and
// (3) nodes[i - nodes[i].command_length()].cost < kInfinity
void ZopfliComputeShortestPath(size_t num_bytes,
size_t position,
const uint8_t* ringbuffer,
size_t ringbuffer_mask,
const size_t max_backward_limit,
const int* dist_cache,
Hashers::H10* hasher,
ZopfliNode* nodes,
std::vector<uint32_t>* path);
void ZopfliCreateCommands(const size_t num_bytes,
const size_t block_start,
const size_t max_backward_limit,
const std::vector<uint32_t>& path,
const ZopfliNode* nodes,
int* dist_cache,
size_t* last_insert_len,
Command* commands,
size_t* num_literals);
} // namespace brotli } // namespace brotli
#endif // BROTLI_ENC_BACKWARD_REFERENCES_H_ #endif // BROTLI_ENC_BACKWARD_REFERENCES_H_

View File

@ -48,38 +48,62 @@ static inline double BitsEntropy(const uint32_t *population, size_t size) {
return retval; return retval;
} }
template<int kSize> template<int kSize>
double PopulationCost(const Histogram<kSize>& histogram) { double PopulationCost(const Histogram<kSize>& histogram) {
static const double kOneSymbolHistogramCost = 12;
static const double kTwoSymbolHistogramCost = 20;
static const double kThreeSymbolHistogramCost = 28;
static const double kFourSymbolHistogramCost = 37;
if (histogram.total_count_ == 0) { if (histogram.total_count_ == 0) {
return 12; return kOneSymbolHistogramCost;
} }
int count = 0; int count = 0;
int s[5];
for (int i = 0; i < kSize; ++i) { for (int i = 0; i < kSize; ++i) {
if (histogram.data_[i] > 0) { if (histogram.data_[i] > 0) {
s[count] = i;
++count; ++count;
if (count > 4) break;
} }
} }
if (count == 1) { if (count == 1) {
return 12; return kOneSymbolHistogramCost;
} }
if (count == 2) { if (count == 2) {
return static_cast<double>(20 + histogram.total_count_); return (kTwoSymbolHistogramCost +
static_cast<double>(histogram.total_count_));
} }
double bits = 0; if (count == 3) {
uint8_t depth_array[kSize] = { 0 }; const uint32_t histo0 = histogram.data_[s[0]];
if (count <= 4) { const uint32_t histo1 = histogram.data_[s[1]];
// For very low symbol count we build the Huffman tree. const uint32_t histo2 = histogram.data_[s[2]];
CreateHuffmanTree(&histogram.data_[0], kSize, 15, depth_array); const uint32_t histomax = std::max(histo0, std::max(histo1, histo2));
for (int i = 0; i < kSize; ++i) { return (kThreeSymbolHistogramCost +
bits += histogram.data_[i] * depth_array[i]; 2 * (histo0 + histo1 + histo2) - histomax);
}
if (count == 4) {
uint32_t histo[4];
for (int i = 0; i < 4; ++i) {
histo[i] = histogram.data_[s[i]];
} }
return count == 3 ? bits + 28 : bits + 37; // Sort
for (int i = 0; i < 4; ++i) {
for (int j = i + 1; j < 4; ++j) {
if (histo[j] > histo[i]) {
std::swap(histo[j], histo[i]);
}
}
}
const uint32_t h23 = histo[2] + histo[3];
const uint32_t histomax = std::max(h23, histo[0]);
return (kFourSymbolHistogramCost +
3 * h23 + 2 * (histo[0] + histo[1]) - histomax);
} }
// In this loop we compute the entropy of the histogram and simultaneously // In this loop we compute the entropy of the histogram and simultaneously
// build a simplified histogram of the code length codes where we use the // build a simplified histogram of the code length codes where we use the
// zero repeat code 17, but we don't use the non-zero repeat code 16. // zero repeat code 17, but we don't use the non-zero repeat code 16.
double bits = 0;
size_t max_depth = 1; size_t max_depth = 1;
uint32_t depth_histo[kCodeLengthCodes] = { 0 }; uint32_t depth_histo[kCodeLengthCodes] = { 0 };
const double log2total = FastLog2(histogram.total_count_); const double log2total = FastLog2(histogram.total_count_);

View File

@ -13,7 +13,7 @@
#include <algorithm> #include <algorithm>
#include <cstring> #include <cstring>
#include <map> #include <vector>
#include "./cluster.h" #include "./cluster.h"
#include "./command.h" #include "./command.h"
@ -70,20 +70,7 @@ void CopyLiteralsToByteArray(const Command* cmds,
memcpy(&(*literals)[pos], data + from_pos, insert_len); memcpy(&(*literals)[pos], data + from_pos, insert_len);
pos += insert_len; pos += insert_len;
} }
from_pos = (from_pos + insert_len + cmds[i].copy_len_) & mask; from_pos = (from_pos + insert_len + cmds[i].copy_len()) & mask;
}
}
void CopyCommandsToByteArray(const Command* cmds,
const size_t num_commands,
std::vector<uint16_t>* insert_and_copy_codes,
std::vector<uint16_t>* distance_prefixes) {
for (size_t i = 0; i < num_commands; ++i) {
const Command& cmd = cmds[i];
insert_and_copy_codes->push_back(cmd.cmd_prefix_);
if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) {
distance_prefixes->push_back(cmd.dist_prefix_);
}
} }
} }
@ -97,27 +84,23 @@ inline static unsigned int MyRand(unsigned int* seed) {
template<typename HistogramType, typename DataType> template<typename HistogramType, typename DataType>
void InitialEntropyCodes(const DataType* data, size_t length, void InitialEntropyCodes(const DataType* data, size_t length,
size_t literals_per_histogram,
size_t max_histograms,
size_t stride, size_t stride,
std::vector<HistogramType>* vec) { size_t num_histograms,
size_t total_histograms = length / literals_per_histogram + 1; HistogramType* histograms) {
if (total_histograms > max_histograms) { for (size_t i = 0; i < num_histograms; ++i) {
total_histograms = max_histograms; histograms[i].Clear();
} }
unsigned int seed = 7; unsigned int seed = 7;
size_t block_length = length / total_histograms; size_t block_length = length / num_histograms;
for (size_t i = 0; i < total_histograms; ++i) { for (size_t i = 0; i < num_histograms; ++i) {
size_t pos = length * i / total_histograms; size_t pos = length * i / num_histograms;
if (i != 0) { if (i != 0) {
pos += MyRand(&seed) % block_length; pos += MyRand(&seed) % block_length;
} }
if (pos + stride >= length) { if (pos + stride >= length) {
pos = length - stride - 1; pos = length - stride - 1;
} }
HistogramType histo; histograms[i].Add(data + pos, stride);
histo.Add(data + pos, stride);
vec->push_back(histo);
} }
} }
@ -140,16 +123,17 @@ void RandomSample(unsigned int* seed,
template<typename HistogramType, typename DataType> template<typename HistogramType, typename DataType>
void RefineEntropyCodes(const DataType* data, size_t length, void RefineEntropyCodes(const DataType* data, size_t length,
size_t stride, size_t stride,
std::vector<HistogramType>* vec) { size_t num_histograms,
HistogramType* histograms) {
size_t iters = size_t iters =
kIterMulForRefining * length / stride + kMinItersForRefining; kIterMulForRefining * length / stride + kMinItersForRefining;
unsigned int seed = 7; unsigned int seed = 7;
iters = ((iters + vec->size() - 1) / vec->size()) * vec->size(); iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms;
for (size_t iter = 0; iter < iters; ++iter) { for (size_t iter = 0; iter < iters; ++iter) {
HistogramType sample; HistogramType sample;
RandomSample(&seed, data, length, stride, &sample); RandomSample(&seed, data, length, stride, &sample);
size_t ix = iter % vec->size(); size_t ix = iter % num_histograms;
(*vec)[ix].AddHistogram(sample); histograms[ix].AddHistogram(sample);
} }
} }
@ -157,34 +141,40 @@ inline static double BitCost(size_t count) {
return count == 0 ? -2.0 : FastLog2(count); return count == 0 ? -2.0 : FastLog2(count);
} }
// Assigns a block id from the range [0, vec.size()) to each data element
// in data[0..length) and fills in block_id[0..length) with the assigned values.
// Returns the number of blocks, i.e. one plus the number of block switches.
template<typename DataType, int kSize> template<typename DataType, int kSize>
void FindBlocks(const DataType* data, const size_t length, size_t FindBlocks(const DataType* data, const size_t length,
const double block_switch_bitcost, const double block_switch_bitcost,
const std::vector<Histogram<kSize> > &vec, const size_t num_histograms,
uint8_t *block_id) { const Histogram<kSize>* histograms,
if (vec.size() <= 1) { double* insert_cost,
double* cost,
uint8_t* switch_signal,
uint8_t *block_id) {
if (num_histograms <= 1) {
for (size_t i = 0; i < length; ++i) { for (size_t i = 0; i < length; ++i) {
block_id[i] = 0; block_id[i] = 0;
} }
return; return 1;
} }
size_t vecsize = vec.size(); const size_t bitmaplen = (num_histograms + 7) >> 3;
assert(vecsize <= 256); assert(num_histograms <= 256);
double* insert_cost = new double[kSize * vecsize]; memset(insert_cost, 0, sizeof(insert_cost[0]) * kSize * num_histograms);
memset(insert_cost, 0, sizeof(insert_cost[0]) * kSize * vecsize); for (size_t j = 0; j < num_histograms; ++j) {
for (size_t j = 0; j < vecsize; ++j) { insert_cost[j] = FastLog2(static_cast<uint32_t>(
insert_cost[j] = FastLog2(static_cast<uint32_t>(vec[j].total_count_)); histograms[j].total_count_));
} }
for (size_t i = kSize; i != 0;) { for (size_t i = kSize; i != 0;) {
--i; --i;
for (size_t j = 0; j < vecsize; ++j) { for (size_t j = 0; j < num_histograms; ++j) {
insert_cost[i * vecsize + j] = insert_cost[j] - BitCost(vec[j].data_[i]); insert_cost[i * num_histograms + j] =
insert_cost[j] - BitCost(histograms[j].data_[i]);
} }
} }
double *cost = new double[vecsize]; memset(cost, 0, sizeof(cost[0]) * num_histograms);
memset(cost, 0, sizeof(cost[0]) * vecsize); memset(switch_signal, 0, sizeof(switch_signal[0]) * length * bitmaplen);
bool* switch_signal = new bool[length * vecsize];
memset(switch_signal, 0, sizeof(switch_signal[0]) * length * vecsize);
// After each iteration of this loop, cost[k] will contain the difference // After each iteration of this loop, cost[k] will contain the difference
// between the minimum cost of arriving at the current byte position using // between the minimum cost of arriving at the current byte position using
// entropy code k, and the minimum cost of arriving at the current byte // entropy code k, and the minimum cost of arriving at the current byte
@ -192,10 +182,10 @@ void FindBlocks(const DataType* data, const size_t length,
// reaches block switch cost, it means that when we trace back from the last // reaches block switch cost, it means that when we trace back from the last
// position, we need to switch here. // position, we need to switch here.
for (size_t byte_ix = 0; byte_ix < length; ++byte_ix) { for (size_t byte_ix = 0; byte_ix < length; ++byte_ix) {
size_t ix = byte_ix * vecsize; size_t ix = byte_ix * bitmaplen;
size_t insert_cost_ix = data[byte_ix] * vecsize; size_t insert_cost_ix = data[byte_ix] * num_histograms;
double min_cost = 1e99; double min_cost = 1e99;
for (size_t k = 0; k < vecsize; ++k) { for (size_t k = 0; k < num_histograms; ++k) {
// We are coding the symbol in data[byte_ix] with entropy code k. // We are coding the symbol in data[byte_ix] with entropy code k.
cost[k] += insert_cost[insert_cost_ix + k]; cost[k] += insert_cost[insert_cost_ix + k];
if (cost[k] < min_cost) { if (cost[k] < min_cost) {
@ -208,110 +198,200 @@ void FindBlocks(const DataType* data, const size_t length,
if (byte_ix < 2000) { if (byte_ix < 2000) {
block_switch_cost *= 0.77 + 0.07 * static_cast<double>(byte_ix) / 2000; block_switch_cost *= 0.77 + 0.07 * static_cast<double>(byte_ix) / 2000;
} }
for (size_t k = 0; k < vecsize; ++k) { for (size_t k = 0; k < num_histograms; ++k) {
cost[k] -= min_cost; cost[k] -= min_cost;
if (cost[k] >= block_switch_cost) { if (cost[k] >= block_switch_cost) {
cost[k] = block_switch_cost; cost[k] = block_switch_cost;
switch_signal[ix + k] = true; const uint8_t mask = static_cast<uint8_t>(1u << (k & 7));
assert((k >> 3) < bitmaplen);
switch_signal[ix + (k >> 3)] |= mask;
} }
} }
} }
// Now trace back from the last position and switch at the marked places. // Now trace back from the last position and switch at the marked places.
size_t byte_ix = length - 1; size_t byte_ix = length - 1;
size_t ix = byte_ix * vecsize; size_t ix = byte_ix * bitmaplen;
uint8_t cur_id = block_id[byte_ix]; uint8_t cur_id = block_id[byte_ix];
size_t num_blocks = 1;
while (byte_ix > 0) { while (byte_ix > 0) {
--byte_ix; --byte_ix;
ix -= vecsize; ix -= bitmaplen;
if (switch_signal[ix + cur_id]) { const uint8_t mask = static_cast<uint8_t>(1u << (cur_id & 7));
cur_id = block_id[byte_ix]; assert((static_cast<size_t>(cur_id) >> 3) < bitmaplen);
if (switch_signal[ix + (cur_id >> 3)] & mask) {
if (cur_id != block_id[byte_ix]) {
cur_id = block_id[byte_ix];
++num_blocks;
}
} }
block_id[byte_ix] = cur_id; block_id[byte_ix] = cur_id;
} }
delete[] insert_cost; return num_blocks;
delete[] cost;
delete[] switch_signal;
} }
size_t RemapBlockIds(uint8_t* block_ids, const size_t length) { size_t RemapBlockIds(uint8_t* block_ids, const size_t length,
std::map<uint8_t, uint8_t> new_id; uint16_t* new_id, const size_t num_histograms) {
size_t next_id = 0; static const uint16_t kInvalidId = 256;
for (size_t i = 0; i < num_histograms; ++i) {
new_id[i] = kInvalidId;
}
uint16_t next_id = 0;
for (size_t i = 0; i < length; ++i) { for (size_t i = 0; i < length; ++i) {
if (new_id.find(block_ids[i]) == new_id.end()) { assert(block_ids[i] < num_histograms);
new_id[block_ids[i]] = static_cast<uint8_t>(next_id); if (new_id[block_ids[i]] == kInvalidId) {
++next_id; new_id[block_ids[i]] = next_id++;
} }
} }
for (size_t i = 0; i < length; ++i) { for (size_t i = 0; i < length; ++i) {
block_ids[i] = new_id[block_ids[i]]; block_ids[i] = static_cast<uint8_t>(new_id[block_ids[i]]);
assert(block_ids[i] < num_histograms);
} }
assert(next_id <= num_histograms);
return next_id; return next_id;
} }
template<typename HistogramType, typename DataType> template<typename HistogramType, typename DataType>
void BuildBlockHistograms(const DataType* data, const size_t length, void BuildBlockHistograms(const DataType* data, const size_t length,
uint8_t* block_ids, const uint8_t* block_ids,
std::vector<HistogramType>* histograms) { const size_t num_histograms,
size_t num_types = RemapBlockIds(block_ids, length); HistogramType* histograms) {
assert(num_types <= 256); for (size_t i = 0; i < num_histograms; ++i) {
histograms->clear(); histograms[i].Clear();
histograms->resize(num_types); }
for (size_t i = 0; i < length; ++i) { for (size_t i = 0; i < length; ++i) {
(*histograms)[block_ids[i]].Add(data[i]); histograms[block_ids[i]].Add(data[i]);
} }
} }
template<typename HistogramType, typename DataType> template<typename HistogramType, typename DataType>
void ClusterBlocks(const DataType* data, const size_t length, void ClusterBlocks(const DataType* data, const size_t length,
uint8_t* block_ids) { const size_t num_blocks,
std::vector<HistogramType> histograms; uint8_t* block_ids,
std::vector<uint32_t> block_index(length); BlockSplit* split) {
uint32_t cur_idx = 0;
HistogramType cur_histogram;
for (size_t i = 0; i < length; ++i) {
bool block_boundary = (i + 1 == length || block_ids[i] != block_ids[i + 1]);
block_index[i] = cur_idx;
cur_histogram.Add(data[i]);
if (block_boundary) {
histograms.push_back(cur_histogram);
cur_histogram.Clear();
++cur_idx;
}
}
std::vector<HistogramType> clustered_histograms;
std::vector<uint32_t> histogram_symbols;
// Block ids need to fit in one byte.
static const size_t kMaxNumberOfBlockTypes = 256; static const size_t kMaxNumberOfBlockTypes = 256;
ClusterHistograms(histograms, 1, histograms.size(), static const size_t kHistogramsPerBatch = 64;
kMaxNumberOfBlockTypes, static const size_t kClustersPerBatch = 16;
&clustered_histograms, std::vector<uint32_t> histogram_symbols(num_blocks);
&histogram_symbols); std::vector<uint32_t> block_lengths(num_blocks);
for (size_t i = 0; i < length; ++i) {
block_ids[i] = static_cast<uint8_t>(histogram_symbols[block_index[i]]);
}
}
void BuildBlockSplit(const std::vector<uint8_t>& block_ids, BlockSplit* split) { size_t block_idx = 0;
uint8_t cur_id = block_ids[0]; for (size_t i = 0; i < length; ++i) {
uint8_t max_type = cur_id; assert(block_idx < num_blocks);
uint32_t cur_length = 1; ++block_lengths[block_idx];
for (size_t i = 1; i < block_ids.size(); ++i) { if (i + 1 == length || block_ids[i] != block_ids[i + 1]) {
uint8_t next_id = block_ids[i]; ++block_idx;
if (next_id != cur_id) {
split->types.push_back(cur_id);
split->lengths.push_back(cur_length);
max_type = std::max(max_type, next_id);
cur_id = next_id;
cur_length = 0;
} }
++cur_length;
} }
split->types.push_back(cur_id); assert(block_idx == num_blocks);
split->lengths.push_back(cur_length);
const size_t expected_num_clusters =
kClustersPerBatch *
(num_blocks + kHistogramsPerBatch - 1) / kHistogramsPerBatch;
std::vector<HistogramType> all_histograms;
std::vector<uint32_t> cluster_size;
all_histograms.reserve(expected_num_clusters);
cluster_size.reserve(expected_num_clusters);
size_t num_clusters = 0;
std::vector<HistogramType> histograms(
std::min(num_blocks, kHistogramsPerBatch));
size_t max_num_pairs = kHistogramsPerBatch * kHistogramsPerBatch / 2;
std::vector<HistogramPair> pairs(max_num_pairs + 1);
size_t pos = 0;
for (size_t i = 0; i < num_blocks; i += kHistogramsPerBatch) {
const size_t num_to_combine = std::min(num_blocks - i, kHistogramsPerBatch);
uint32_t sizes[kHistogramsPerBatch];
uint32_t clusters[kHistogramsPerBatch];
uint32_t symbols[kHistogramsPerBatch];
uint32_t remap[kHistogramsPerBatch];
for (size_t j = 0; j < num_to_combine; ++j) {
histograms[j].Clear();
for (size_t k = 0; k < block_lengths[i + j]; ++k) {
histograms[j].Add(data[pos++]);
}
histograms[j].bit_cost_ = PopulationCost(histograms[j]);
symbols[j] = clusters[j] = static_cast<uint32_t>(j);
sizes[j] = 1;
}
size_t num_new_clusters = HistogramCombine(
&histograms[0], sizes, symbols, clusters, &pairs[0], num_to_combine,
num_to_combine, kHistogramsPerBatch, max_num_pairs);
for (size_t j = 0; j < num_new_clusters; ++j) {
all_histograms.push_back(histograms[clusters[j]]);
cluster_size.push_back(sizes[clusters[j]]);
remap[clusters[j]] = static_cast<uint32_t>(j);
}
for (size_t j = 0; j < num_to_combine; ++j) {
histogram_symbols[i + j] =
static_cast<uint32_t>(num_clusters) + remap[symbols[j]];
}
num_clusters += num_new_clusters;
assert(num_clusters == cluster_size.size());
assert(num_clusters == all_histograms.size());
}
max_num_pairs =
std::min(64 * num_clusters, (num_clusters / 2) * num_clusters);
pairs.resize(max_num_pairs + 1);
std::vector<uint32_t> clusters(num_clusters);
for (size_t i = 0; i < num_clusters; ++i) {
clusters[i] = static_cast<uint32_t>(i);
}
size_t num_final_clusters =
HistogramCombine(&all_histograms[0], &cluster_size[0],
&histogram_symbols[0],
&clusters[0], &pairs[0], num_clusters,
num_blocks, kMaxNumberOfBlockTypes, max_num_pairs);
static const uint32_t kInvalidIndex = std::numeric_limits<uint32_t>::max();
std::vector<uint32_t> new_index(num_clusters, kInvalidIndex);
uint32_t next_index = 0;
pos = 0;
for (size_t i = 0; i < num_blocks; ++i) {
HistogramType histo;
for (size_t j = 0; j < block_lengths[i]; ++j) {
histo.Add(data[pos++]);
}
uint32_t best_out =
i == 0 ? histogram_symbols[0] : histogram_symbols[i - 1];
double best_bits = HistogramBitCostDistance(
histo, all_histograms[best_out]);
for (size_t j = 0; j < num_final_clusters; ++j) {
const double cur_bits = HistogramBitCostDistance(
histo, all_histograms[clusters[j]]);
if (cur_bits < best_bits) {
best_bits = cur_bits;
best_out = clusters[j];
}
}
histogram_symbols[i] = best_out;
if (new_index[best_out] == kInvalidIndex) {
new_index[best_out] = next_index++;
}
}
uint8_t max_type = 0;
uint32_t cur_length = 0;
block_idx = 0;
split->types.resize(num_blocks);
split->lengths.resize(num_blocks);
for (size_t i = 0; i < num_blocks; ++i) {
cur_length += block_lengths[i];
if (i + 1 == num_blocks ||
histogram_symbols[i] != histogram_symbols[i + 1]) {
const uint8_t id = static_cast<uint8_t>(new_index[histogram_symbols[i]]);
split->types[block_idx] = id;
split->lengths[block_idx] = cur_length;
max_type = std::max(max_type, id);
cur_length = 0;
++block_idx;
}
}
split->types.resize(block_idx);
split->lengths.resize(block_idx);
split->num_types = static_cast<size_t>(max_type) + 1; split->num_types = static_cast<size_t>(max_type) + 1;
} }
template<typename HistogramType, typename DataType> template<int kSize, typename DataType>
void SplitByteVector(const std::vector<DataType>& data, void SplitByteVector(const std::vector<DataType>& data,
const size_t literals_per_histogram, const size_t literals_per_histogram,
const size_t max_histograms, const size_t max_histograms,
@ -327,27 +407,44 @@ void SplitByteVector(const std::vector<DataType>& data,
split->lengths.push_back(static_cast<uint32_t>(data.size())); split->lengths.push_back(static_cast<uint32_t>(data.size()));
return; return;
} }
std::vector<HistogramType> histograms; size_t num_histograms = data.size() / literals_per_histogram + 1;
if (num_histograms > max_histograms) {
num_histograms = max_histograms;
}
Histogram<kSize>* histograms = new Histogram<kSize>[num_histograms];
// Find good entropy codes. // Find good entropy codes.
InitialEntropyCodes(&data[0], data.size(), InitialEntropyCodes(&data[0], data.size(),
literals_per_histogram,
max_histograms,
sampling_stride_length, sampling_stride_length,
&histograms); num_histograms, histograms);
RefineEntropyCodes(&data[0], data.size(), RefineEntropyCodes(&data[0], data.size(),
sampling_stride_length, sampling_stride_length,
&histograms); num_histograms, histograms);
// Find a good path through literals with the good entropy codes. // Find a good path through literals with the good entropy codes.
std::vector<uint8_t> block_ids(data.size()); std::vector<uint8_t> block_ids(data.size());
size_t num_blocks;
const size_t bitmaplen = (num_histograms + 7) >> 3;
double* insert_cost = new double[kSize * num_histograms];
double *cost = new double[num_histograms];
uint8_t* switch_signal = new uint8_t[data.size() * bitmaplen];
uint16_t* new_id = new uint16_t[num_histograms];
for (size_t i = 0; i < 10; ++i) { for (size_t i = 0; i < 10; ++i) {
FindBlocks(&data[0], data.size(), num_blocks = FindBlocks(&data[0], data.size(),
block_switch_cost, block_switch_cost,
histograms, num_histograms, histograms,
&block_ids[0]); insert_cost, cost, switch_signal,
BuildBlockHistograms(&data[0], data.size(), &block_ids[0], &histograms); &block_ids[0]);
num_histograms = RemapBlockIds(&block_ids[0], data.size(),
new_id, num_histograms);
BuildBlockHistograms(&data[0], data.size(), &block_ids[0],
num_histograms, histograms);
} }
ClusterBlocks<HistogramType>(&data[0], data.size(), &block_ids[0]); delete[] insert_cost;
BuildBlockSplit(block_ids, split); delete[] cost;
delete[] switch_signal;
delete[] new_id;
delete[] histograms;
ClusterBlocks<Histogram<kSize> >(&data[0], data.size(), num_blocks,
&block_ids[0], split);
} }
void SplitBlock(const Command* cmds, void SplitBlock(const Command* cmds,
@ -358,32 +455,51 @@ void SplitBlock(const Command* cmds,
BlockSplit* literal_split, BlockSplit* literal_split,
BlockSplit* insert_and_copy_split, BlockSplit* insert_and_copy_split,
BlockSplit* dist_split) { BlockSplit* dist_split) {
// Create a continuous array of literals. {
std::vector<uint8_t> literals; // Create a continuous array of literals.
CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, &literals); std::vector<uint8_t> literals;
CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, &literals);
// Create the block split on the array of literals.
// Literal histograms have alphabet size 256.
SplitByteVector<256>(
literals,
kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
kLiteralStrideLength, kLiteralBlockSwitchCost,
literal_split);
}
// Compute prefix codes for commands. {
std::vector<uint16_t> insert_and_copy_codes; // Compute prefix codes for commands.
std::vector<uint16_t> distance_prefixes; std::vector<uint16_t> insert_and_copy_codes(num_commands);
CopyCommandsToByteArray(cmds, num_commands, for (size_t i = 0; i < num_commands; ++i) {
&insert_and_copy_codes, insert_and_copy_codes[i] = cmds[i].cmd_prefix_;
&distance_prefixes); }
// Create the block split on the array of command prefixes.
SplitByteVector<kNumCommandPrefixes>(
insert_and_copy_codes,
kSymbolsPerCommandHistogram, kMaxCommandHistograms,
kCommandStrideLength, kCommandBlockSwitchCost,
insert_and_copy_split);
}
SplitByteVector<HistogramLiteral>( {
literals, // Create a continuous array of distance prefixes.
kSymbolsPerLiteralHistogram, kMaxLiteralHistograms, std::vector<uint16_t> distance_prefixes(num_commands);
kLiteralStrideLength, kLiteralBlockSwitchCost, size_t pos = 0;
literal_split); for (size_t i = 0; i < num_commands; ++i) {
SplitByteVector<HistogramCommand>( const Command& cmd = cmds[i];
insert_and_copy_codes, if (cmd.copy_len() && cmd.cmd_prefix_ >= 128) {
kSymbolsPerCommandHistogram, kMaxCommandHistograms, distance_prefixes[pos++] = cmd.dist_prefix_;
kCommandStrideLength, kCommandBlockSwitchCost, }
insert_and_copy_split); }
SplitByteVector<HistogramDistance>( distance_prefixes.resize(pos);
distance_prefixes, // Create the block split on the array of distance prefixes.
kSymbolsPerDistanceHistogram, kMaxCommandHistograms, SplitByteVector<kNumDistancePrefixes>(
kCommandStrideLength, kDistanceBlockSwitchCost, distance_prefixes,
dist_split); kSymbolsPerDistanceHistogram, kMaxCommandHistograms,
kCommandStrideLength, kDistanceBlockSwitchCost,
dist_split);
}
} }
} // namespace brotli } // namespace brotli

View File

@ -28,6 +28,12 @@ namespace brotli {
namespace { namespace {
static const size_t kMaxHuffmanTreeSize = 2 * kNumCommandPrefixes + 1;
// Context map alphabet has 256 context id symbols plus max 16 rle symbols.
static const size_t kContextMapAlphabetSize = 256 + 16;
// Block type alphabet has 256 block id symbols plus 2 special symbols.
static const size_t kBlockTypeAlphabetSize = 256 + 2;
// nibblesbits represents the 2 bits to encode MNIBBLES (0-3) // nibblesbits represents the 2 bits to encode MNIBBLES (0-3)
// REQUIRES: length > 0 // REQUIRES: length > 0
// REQUIRES: length <= (1 << 24) // REQUIRES: length <= (1 << 24)
@ -45,6 +51,18 @@ void EncodeMlen(size_t length, uint64_t* bits,
*bits = length; *bits = length;
} }
static inline void StoreCommandExtra(
const Command& cmd, size_t* storage_ix, uint8_t* storage) {
uint32_t copylen_code = cmd.copy_len_code();
uint16_t inscode = GetInsertLengthCode(cmd.insert_len_);
uint16_t copycode = GetCopyLengthCode(copylen_code);
uint32_t insnumextra = GetInsertExtra(inscode);
uint64_t insextraval = cmd.insert_len_ - GetInsertBase(inscode);
uint64_t copyextraval = copylen_code - GetCopyBase(copycode);
uint64_t bits = (copyextraval << insnumextra) | insextraval;
WriteBits(insnumextra + GetCopyExtra(copycode), bits, storage_ix, storage);
}
} // namespace } // namespace
void StoreVarLenUint8(size_t n, size_t* storage_ix, uint8_t* storage) { void StoreVarLenUint8(size_t n, size_t* storage_ix, uint8_t* storage) {
@ -148,13 +166,14 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask(
} }
void StoreHuffmanTreeToBitMask( void StoreHuffmanTreeToBitMask(
const std::vector<uint8_t> &huffman_tree, const size_t huffman_tree_size,
const std::vector<uint8_t> &huffman_tree_extra_bits, const uint8_t* huffman_tree,
const uint8_t *code_length_bitdepth, const uint8_t* huffman_tree_extra_bits,
const std::vector<uint16_t> &code_length_bitdepth_symbols, const uint8_t* code_length_bitdepth,
const uint16_t* code_length_bitdepth_symbols,
size_t * __restrict storage_ix, size_t * __restrict storage_ix,
uint8_t * __restrict storage) { uint8_t * __restrict storage) {
for (size_t i = 0; i < huffman_tree.size(); ++i) { for (size_t i = 0; i < huffman_tree_size; ++i) {
size_t ix = huffman_tree[i]; size_t ix = huffman_tree[i];
WriteBits(code_length_bitdepth[ix], code_length_bitdepth_symbols[ix], WriteBits(code_length_bitdepth[ix], code_length_bitdepth_symbols[ix],
storage_ix, storage); storage_ix, storage);
@ -208,18 +227,21 @@ void StoreSimpleHuffmanTree(const uint8_t* depths,
// num = alphabet size // num = alphabet size
// depths = symbol depths // depths = symbol depths
void StoreHuffmanTree(const uint8_t* depths, size_t num, void StoreHuffmanTree(const uint8_t* depths, size_t num,
HuffmanTree* tree,
size_t *storage_ix, uint8_t *storage) { size_t *storage_ix, uint8_t *storage) {
// Write the Huffman tree into the brotli-representation. // Write the Huffman tree into the brotli-representation.
std::vector<uint8_t> huffman_tree; // The command alphabet is the largest, so this allocation will fit all
std::vector<uint8_t> huffman_tree_extra_bits; // alphabets.
// TODO: Consider allocating these from stack. assert(num <= kNumCommandPrefixes);
huffman_tree.reserve(256); uint8_t huffman_tree[kNumCommandPrefixes];
huffman_tree_extra_bits.reserve(256); uint8_t huffman_tree_extra_bits[kNumCommandPrefixes];
WriteHuffmanTree(depths, num, &huffman_tree, &huffman_tree_extra_bits); size_t huffman_tree_size = 0;
WriteHuffmanTree(depths, num, &huffman_tree_size, huffman_tree,
huffman_tree_extra_bits);
// Calculate the statistics of the Huffman tree in brotli-representation. // Calculate the statistics of the Huffman tree in brotli-representation.
uint32_t huffman_tree_histogram[kCodeLengthCodes] = { 0 }; uint32_t huffman_tree_histogram[kCodeLengthCodes] = { 0 };
for (size_t i = 0; i < huffman_tree.size(); ++i) { for (size_t i = 0; i < huffman_tree_size; ++i) {
++huffman_tree_histogram[huffman_tree[i]]; ++huffman_tree_histogram[huffman_tree[i]];
} }
@ -239,11 +261,10 @@ void StoreHuffmanTree(const uint8_t* depths, size_t num,
// Calculate another Huffman tree to use for compressing both the // Calculate another Huffman tree to use for compressing both the
// earlier Huffman tree with. // earlier Huffman tree with.
// TODO: Consider allocating these from stack.
uint8_t code_length_bitdepth[kCodeLengthCodes] = { 0 }; uint8_t code_length_bitdepth[kCodeLengthCodes] = { 0 };
std::vector<uint16_t> code_length_bitdepth_symbols(kCodeLengthCodes); uint16_t code_length_bitdepth_symbols[kCodeLengthCodes] = { 0 };
CreateHuffmanTree(&huffman_tree_histogram[0], kCodeLengthCodes, CreateHuffmanTree(&huffman_tree_histogram[0], kCodeLengthCodes,
5, &code_length_bitdepth[0]); 5, tree, &code_length_bitdepth[0]);
ConvertBitDepthsToSymbols(code_length_bitdepth, kCodeLengthCodes, ConvertBitDepthsToSymbols(code_length_bitdepth, kCodeLengthCodes,
&code_length_bitdepth_symbols[0]); &code_length_bitdepth_symbols[0]);
@ -256,16 +277,17 @@ void StoreHuffmanTree(const uint8_t* depths, size_t num,
} }
// Store the real huffman tree now. // Store the real huffman tree now.
StoreHuffmanTreeToBitMask(huffman_tree, StoreHuffmanTreeToBitMask(huffman_tree_size,
huffman_tree,
huffman_tree_extra_bits, huffman_tree_extra_bits,
&code_length_bitdepth[0], &code_length_bitdepth[0],
code_length_bitdepth_symbols, code_length_bitdepth_symbols,
storage_ix, storage); storage_ix, storage);
} }
void BuildAndStoreHuffmanTree(const uint32_t *histogram, void BuildAndStoreHuffmanTree(const uint32_t *histogram,
const size_t length, const size_t length,
HuffmanTree* tree,
uint8_t* depth, uint8_t* depth,
uint16_t* bits, uint16_t* bits,
size_t* storage_ix, size_t* storage_ix,
@ -296,16 +318,21 @@ void BuildAndStoreHuffmanTree(const uint32_t *histogram,
return; return;
} }
CreateHuffmanTree(histogram, length, 15, depth); CreateHuffmanTree(histogram, length, 15, tree, depth);
ConvertBitDepthsToSymbols(depth, length, bits); ConvertBitDepthsToSymbols(depth, length, bits);
if (count <= 4) { if (count <= 4) {
StoreSimpleHuffmanTree(depth, s4, count, max_bits, storage_ix, storage); StoreSimpleHuffmanTree(depth, s4, count, max_bits, storage_ix, storage);
} else { } else {
StoreHuffmanTree(depth, length, storage_ix, storage); StoreHuffmanTree(depth, length, tree, storage_ix, storage);
} }
} }
static inline bool SortHuffmanTree(const HuffmanTree& v0,
const HuffmanTree& v1) {
return v0.total_count_ < v1.total_count_;
}
void BuildAndStoreHuffmanTreeFast(const uint32_t *histogram, void BuildAndStoreHuffmanTreeFast(const uint32_t *histogram,
const size_t histogram_total, const size_t histogram_total,
const size_t max_bits, const size_t max_bits,
@ -467,52 +494,58 @@ void BuildAndStoreHuffmanTreeFast(const uint32_t *histogram,
} }
} }
size_t IndexOf(const std::vector<uint32_t>& v, uint32_t value) { size_t IndexOf(const uint8_t* v, size_t v_size, uint8_t value) {
size_t i = 0; size_t i = 0;
for (; i < v.size(); ++i) { for (; i < v_size; ++i) {
if (v[i] == value) return i; if (v[i] == value) return i;
} }
return i; return i;
} }
void MoveToFront(std::vector<uint32_t>* v, size_t index) { void MoveToFront(uint8_t* v, size_t index) {
uint32_t value = (*v)[index]; uint8_t value = v[index];
for (size_t i = index; i != 0; --i) { for (size_t i = index; i != 0; --i) {
(*v)[i] = (*v)[i - 1]; v[i] = v[i - 1];
} }
(*v)[0] = value; v[0] = value;
} }
std::vector<uint32_t> MoveToFrontTransform(const std::vector<uint32_t>& v) { void MoveToFrontTransform(const uint32_t* __restrict v_in,
if (v.empty()) return v; const size_t v_size,
uint32_t max_value = *std::max_element(v.begin(), v.end()); uint32_t* v_out) {
std::vector<uint32_t> mtf(max_value + 1); if (v_size == 0) {
for (uint32_t i = 0; i <= max_value; ++i) mtf[i] = i; return;
std::vector<uint32_t> result(v.size()); }
for (size_t i = 0; i < v.size(); ++i) { uint32_t max_value = *std::max_element(v_in, v_in + v_size);
size_t index = IndexOf(mtf, v[i]); assert(max_value < 256u);
assert(index < mtf.size()); uint8_t mtf[256];
result[i] = static_cast<uint32_t>(index); size_t mtf_size = max_value + 1;
MoveToFront(&mtf, index); for (uint32_t i = 0; i <= max_value; ++i) {
mtf[i] = static_cast<uint8_t>(i);
}
for (size_t i = 0; i < v_size; ++i) {
size_t index = IndexOf(mtf, mtf_size, static_cast<uint8_t>(v_in[i]));
assert(index < mtf_size);
v_out[i] = static_cast<uint32_t>(index);
MoveToFront(mtf, index);
} }
return result;
} }
// Finds runs of zeros in v_in and replaces them with a prefix code of the run // Finds runs of zeros in v[0..in_size) and replaces them with a prefix code of
// length plus extra bits in *v_out and *extra_bits. Non-zero values in v_in are // the run length plus extra bits (lower 9 bits is the prefix code and the rest
// shifted by *max_length_prefix. Will not create prefix codes bigger than the // are the extra bits). Non-zero values in v[] are shifted by
// initial value of *max_run_length_prefix. The prefix code of run length L is // *max_length_prefix. Will not create prefix codes bigger than the initial
// simply Log2Floor(L) and the number of extra bits is the same as the prefix // value of *max_run_length_prefix. The prefix code of run length L is simply
// code. // Log2Floor(L) and the number of extra bits is the same as the prefix code.
void RunLengthCodeZeros(const std::vector<uint32_t>& v_in, void RunLengthCodeZeros(const size_t in_size,
uint32_t* max_run_length_prefix, uint32_t* __restrict v,
std::vector<uint32_t>* v_out, size_t* __restrict out_size,
std::vector<uint32_t>* extra_bits) { uint32_t* __restrict max_run_length_prefix) {
uint32_t max_reps = 0; uint32_t max_reps = 0;
for (size_t i = 0; i < v_in.size();) { for (size_t i = 0; i < in_size;) {
for (; i < v_in.size() && v_in[i] != 0; ++i) ; for (; i < in_size && v[i] != 0; ++i) ;
uint32_t reps = 0; uint32_t reps = 0;
for (; i < v_in.size() && v_in[i] == 0; ++i) { for (; i < in_size && v[i] == 0; ++i) {
++reps; ++reps;
} }
max_reps = std::max(reps, max_reps); max_reps = std::max(reps, max_reps);
@ -520,27 +553,31 @@ void RunLengthCodeZeros(const std::vector<uint32_t>& v_in,
uint32_t max_prefix = max_reps > 0 ? Log2FloorNonZero(max_reps) : 0; uint32_t max_prefix = max_reps > 0 ? Log2FloorNonZero(max_reps) : 0;
max_prefix = std::min(max_prefix, *max_run_length_prefix); max_prefix = std::min(max_prefix, *max_run_length_prefix);
*max_run_length_prefix = max_prefix; *max_run_length_prefix = max_prefix;
for (size_t i = 0; i < v_in.size();) { *out_size = 0;
if (v_in[i] != 0) { for (size_t i = 0; i < in_size;) {
v_out->push_back(v_in[i] + *max_run_length_prefix); assert(*out_size <= i);
extra_bits->push_back(0); if (v[i] != 0) {
v[*out_size] = v[i] + *max_run_length_prefix;
++i; ++i;
++(*out_size);
} else { } else {
uint32_t reps = 1; uint32_t reps = 1;
for (size_t k = i + 1; k < v_in.size() && v_in[k] == 0; ++k) { for (size_t k = i + 1; k < in_size && v[k] == 0; ++k) {
++reps; ++reps;
} }
i += reps; i += reps;
while (reps != 0) { while (reps != 0) {
if (reps < (2u << max_prefix)) { if (reps < (2u << max_prefix)) {
uint32_t run_length_prefix = Log2FloorNonZero(reps); uint32_t run_length_prefix = Log2FloorNonZero(reps);
v_out->push_back(run_length_prefix); const uint32_t extra_bits = reps - (1u << run_length_prefix);
extra_bits->push_back(reps - (1u << run_length_prefix)); v[*out_size] = run_length_prefix + (extra_bits << 9);
++(*out_size);
break; break;
} else { } else {
v_out->push_back(max_prefix); const uint32_t extra_bits = (1u << max_prefix) - 1u;
extra_bits->push_back((1u << max_prefix) - 1u); v[*out_size] = max_prefix + (extra_bits << 9);
reps -= (2u << max_prefix) - 1u; reps -= (2u << max_prefix) - 1u;
++(*out_size);
} }
} }
} }
@ -549,6 +586,7 @@ void RunLengthCodeZeros(const std::vector<uint32_t>& v_in,
void EncodeContextMap(const std::vector<uint32_t>& context_map, void EncodeContextMap(const std::vector<uint32_t>& context_map,
size_t num_clusters, size_t num_clusters,
HuffmanTree* tree,
size_t* storage_ix, uint8_t* storage) { size_t* storage_ix, uint8_t* storage) {
StoreVarLenUint8(num_clusters - 1, storage_ix, storage); StoreVarLenUint8(num_clusters - 1, storage_ix, storage);
@ -556,37 +594,40 @@ void EncodeContextMap(const std::vector<uint32_t>& context_map,
return; return;
} }
std::vector<uint32_t> transformed_symbols = MoveToFrontTransform(context_map); uint32_t* rle_symbols = new uint32_t[context_map.size()];
std::vector<uint32_t> rle_symbols; MoveToFrontTransform(&context_map[0], context_map.size(), rle_symbols);
std::vector<uint32_t> extra_bits;
uint32_t max_run_length_prefix = 6; uint32_t max_run_length_prefix = 6;
RunLengthCodeZeros(transformed_symbols, &max_run_length_prefix, size_t num_rle_symbols = 0;
&rle_symbols, &extra_bits); RunLengthCodeZeros(context_map.size(), rle_symbols,
HistogramContextMap symbol_histogram; &num_rle_symbols, &max_run_length_prefix);
for (size_t i = 0; i < rle_symbols.size(); ++i) { uint32_t histogram[kContextMapAlphabetSize];
symbol_histogram.Add(rle_symbols[i]); memset(histogram, 0, sizeof(histogram));
static const int kSymbolBits = 9;
static const uint32_t kSymbolMask = (1u << kSymbolBits) - 1u;
for (size_t i = 0; i < num_rle_symbols; ++i) {
++histogram[rle_symbols[i] & kSymbolMask];
} }
bool use_rle = max_run_length_prefix > 0; bool use_rle = max_run_length_prefix > 0;
WriteBits(1, use_rle, storage_ix, storage); WriteBits(1, use_rle, storage_ix, storage);
if (use_rle) { if (use_rle) {
WriteBits(4, max_run_length_prefix - 1, storage_ix, storage); WriteBits(4, max_run_length_prefix - 1, storage_ix, storage);
} }
EntropyCodeContextMap symbol_code; uint8_t depths[kContextMapAlphabetSize];
memset(symbol_code.depth_, 0, sizeof(symbol_code.depth_)); uint16_t bits[kContextMapAlphabetSize];
memset(symbol_code.bits_, 0, sizeof(symbol_code.bits_)); memset(depths, 0, sizeof(depths));
BuildAndStoreHuffmanTree(symbol_histogram.data_, memset(bits, 0, sizeof(bits));
num_clusters + max_run_length_prefix, BuildAndStoreHuffmanTree(histogram, num_clusters + max_run_length_prefix,
symbol_code.depth_, symbol_code.bits_, tree, depths, bits, storage_ix, storage);
storage_ix, storage); for (size_t i = 0; i < num_rle_symbols; ++i) {
for (size_t i = 0; i < rle_symbols.size(); ++i) { const uint32_t rle_symbol = rle_symbols[i] & kSymbolMask;
WriteBits(symbol_code.depth_[rle_symbols[i]], const uint32_t extra_bits_val = rle_symbols[i] >> kSymbolBits;
symbol_code.bits_[rle_symbols[i]], WriteBits(depths[rle_symbol], bits[rle_symbol], storage_ix, storage);
storage_ix, storage); if (rle_symbol > 0 && rle_symbol <= max_run_length_prefix) {
if (rle_symbols[i] > 0 && rle_symbols[i] <= max_run_length_prefix) { WriteBits(rle_symbol, extra_bits_val, storage_ix, storage);
WriteBits(rle_symbols[i], extra_bits[i], storage_ix, storage);
} }
} }
WriteBits(1, 1, storage_ix, storage); // use move-to-front WriteBits(1, 1, storage_ix, storage); // use move-to-front
delete[] rle_symbols;
} }
void StoreBlockSwitch(const BlockSplitCode& code, void StoreBlockSwitch(const BlockSplitCode& code,
@ -608,12 +649,15 @@ void StoreBlockSwitch(const BlockSplitCode& code,
void BuildAndStoreBlockSplitCode(const std::vector<uint8_t>& types, void BuildAndStoreBlockSplitCode(const std::vector<uint8_t>& types,
const std::vector<uint32_t>& lengths, const std::vector<uint32_t>& lengths,
const size_t num_types, const size_t num_types,
HuffmanTree* tree,
BlockSplitCode* code, BlockSplitCode* code,
size_t* storage_ix, size_t* storage_ix,
uint8_t* storage) { uint8_t* storage) {
const size_t num_blocks = types.size(); const size_t num_blocks = types.size();
std::vector<uint32_t> type_histo(num_types + 2); uint32_t type_histo[kBlockTypeAlphabetSize];
std::vector<uint32_t> length_histo(26); uint32_t length_histo[kNumBlockLenPrefixes];
memset(type_histo, 0, (num_types + 2) * sizeof(type_histo[0]));
memset(length_histo, 0, sizeof(length_histo));
size_t last_type = 1; size_t last_type = 1;
size_t second_last_type = 0; size_t second_last_type = 0;
code->type_code.resize(num_blocks); code->type_code.resize(num_blocks);
@ -622,8 +666,8 @@ void BuildAndStoreBlockSplitCode(const std::vector<uint8_t>& types,
code->length_extra.resize(num_blocks); code->length_extra.resize(num_blocks);
code->type_depths.resize(num_types + 2); code->type_depths.resize(num_types + 2);
code->type_bits.resize(num_types + 2); code->type_bits.resize(num_types + 2);
code->length_depths.resize(26); memset(code->length_depths, 0, sizeof(code->length_depths));
code->length_bits.resize(26); memset(code->length_bits, 0, sizeof(code->length_bits));
for (size_t i = 0; i < num_blocks; ++i) { for (size_t i = 0; i < num_blocks; ++i) {
size_t type = types[i]; size_t type = types[i];
size_t type_code = (type == last_type + 1 ? 1 : size_t type_code = (type == last_type + 1 ? 1 :
@ -641,10 +685,10 @@ void BuildAndStoreBlockSplitCode(const std::vector<uint8_t>& types,
} }
StoreVarLenUint8(num_types - 1, storage_ix, storage); StoreVarLenUint8(num_types - 1, storage_ix, storage);
if (num_types > 1) { if (num_types > 1) {
BuildAndStoreHuffmanTree(&type_histo[0], num_types + 2, BuildAndStoreHuffmanTree(&type_histo[0], num_types + 2, tree,
&code->type_depths[0], &code->type_bits[0], &code->type_depths[0], &code->type_bits[0],
storage_ix, storage); storage_ix, storage);
BuildAndStoreHuffmanTree(&length_histo[0], 26, BuildAndStoreHuffmanTree(&length_histo[0], kNumBlockLenPrefixes, tree,
&code->length_depths[0], &code->length_bits[0], &code->length_depths[0], &code->length_bits[0],
storage_ix, storage); storage_ix, storage);
StoreBlockSwitch(*code, 0, storage_ix, storage); StoreBlockSwitch(*code, 0, storage_ix, storage);
@ -653,6 +697,7 @@ void BuildAndStoreBlockSplitCode(const std::vector<uint8_t>& types,
void StoreTrivialContextMap(size_t num_types, void StoreTrivialContextMap(size_t num_types,
size_t context_bits, size_t context_bits,
HuffmanTree* tree,
size_t* storage_ix, size_t* storage_ix,
uint8_t* storage) { uint8_t* storage) {
StoreVarLenUint8(num_types - 1, storage_ix, storage); StoreVarLenUint8(num_types - 1, storage_ix, storage);
@ -660,9 +705,12 @@ void StoreTrivialContextMap(size_t num_types,
size_t repeat_code = context_bits - 1u; size_t repeat_code = context_bits - 1u;
size_t repeat_bits = (1u << repeat_code) - 1u; size_t repeat_bits = (1u << repeat_code) - 1u;
size_t alphabet_size = num_types + repeat_code; size_t alphabet_size = num_types + repeat_code;
std::vector<uint32_t> histogram(alphabet_size); uint32_t histogram[kContextMapAlphabetSize];
std::vector<uint8_t> depths(alphabet_size); uint8_t depths[kContextMapAlphabetSize];
std::vector<uint16_t> bits(alphabet_size); uint16_t bits[kContextMapAlphabetSize];
memset(histogram, 0, alphabet_size * sizeof(histogram[0]));
memset(depths, 0, alphabet_size * sizeof(depths[0]));
memset(bits, 0, alphabet_size * sizeof(bits[0]));
// Write RLEMAX. // Write RLEMAX.
WriteBits(1, 1, storage_ix, storage); WriteBits(1, 1, storage_ix, storage);
WriteBits(4, repeat_code - 1, storage_ix, storage); WriteBits(4, repeat_code - 1, storage_ix, storage);
@ -671,7 +719,7 @@ void StoreTrivialContextMap(size_t num_types,
for (size_t i = context_bits; i < alphabet_size; ++i) { for (size_t i = context_bits; i < alphabet_size; ++i) {
histogram[i] = 1; histogram[i] = 1;
} }
BuildAndStoreHuffmanTree(&histogram[0], alphabet_size, BuildAndStoreHuffmanTree(&histogram[0], alphabet_size, tree,
&depths[0], &bits[0], &depths[0], &bits[0],
storage_ix, storage); storage_ix, storage);
for (size_t i = 0; i < num_types; ++i) { for (size_t i = 0; i < num_types; ++i) {
@ -702,11 +750,12 @@ class BlockEncoder {
// Creates entropy codes of block lengths and block types and stores them // Creates entropy codes of block lengths and block types and stores them
// to the bit stream. // to the bit stream.
void BuildAndStoreBlockSwitchEntropyCodes(size_t* storage_ix, void BuildAndStoreBlockSwitchEntropyCodes(HuffmanTree* tree,
size_t* storage_ix,
uint8_t* storage) { uint8_t* storage) {
BuildAndStoreBlockSplitCode( BuildAndStoreBlockSplitCode(
block_types_, block_lengths_, num_block_types_, block_types_, block_lengths_, num_block_types_,
&block_split_code_, storage_ix, storage); tree, &block_split_code_, storage_ix, storage);
} }
// Creates entropy codes for all block types and stores them to the bit // Creates entropy codes for all block types and stores them to the bit
@ -714,12 +763,14 @@ class BlockEncoder {
template<int kSize> template<int kSize>
void BuildAndStoreEntropyCodes( void BuildAndStoreEntropyCodes(
const std::vector<Histogram<kSize> >& histograms, const std::vector<Histogram<kSize> >& histograms,
HuffmanTree* tree,
size_t* storage_ix, uint8_t* storage) { size_t* storage_ix, uint8_t* storage) {
depths_.resize(histograms.size() * alphabet_size_); depths_.resize(histograms.size() * alphabet_size_);
bits_.resize(histograms.size() * alphabet_size_); bits_.resize(histograms.size() * alphabet_size_);
for (size_t i = 0; i < histograms.size(); ++i) { for (size_t i = 0; i < histograms.size(); ++i) {
size_t ix = i * alphabet_size_; size_t ix = i * alphabet_size_;
BuildAndStoreHuffmanTree(&histograms[i].data_[0], alphabet_size_, BuildAndStoreHuffmanTree(&histograms[i].data_[0], alphabet_size_,
tree,
&depths_[ix], &bits_[ix], &depths_[ix], &bits_[ix],
storage_ix, storage); storage_ix, storage);
} }
@ -798,6 +849,8 @@ void StoreMetaBlock(const uint8_t* input,
kNumDistanceShortCodes + num_direct_distance_codes + kNumDistanceShortCodes + num_direct_distance_codes +
(48u << distance_postfix_bits); (48u << distance_postfix_bits);
HuffmanTree* tree = static_cast<HuffmanTree*>(
malloc(kMaxHuffmanTreeSize * sizeof(HuffmanTree)));
BlockEncoder literal_enc(256, BlockEncoder literal_enc(256,
mb.literal_split.num_types, mb.literal_split.num_types,
mb.literal_split.types, mb.literal_split.types,
@ -811,9 +864,9 @@ void StoreMetaBlock(const uint8_t* input,
mb.distance_split.types, mb.distance_split.types,
mb.distance_split.lengths); mb.distance_split.lengths);
literal_enc.BuildAndStoreBlockSwitchEntropyCodes(storage_ix, storage); literal_enc.BuildAndStoreBlockSwitchEntropyCodes(tree, storage_ix, storage);
command_enc.BuildAndStoreBlockSwitchEntropyCodes(storage_ix, storage); command_enc.BuildAndStoreBlockSwitchEntropyCodes(tree, storage_ix, storage);
distance_enc.BuildAndStoreBlockSwitchEntropyCodes(storage_ix, storage); distance_enc.BuildAndStoreBlockSwitchEntropyCodes(tree, storage_ix, storage);
WriteBits(2, distance_postfix_bits, storage_ix, storage); WriteBits(2, distance_postfix_bits, storage_ix, storage);
WriteBits(4, num_direct_distance_codes >> distance_postfix_bits, WriteBits(4, num_direct_distance_codes >> distance_postfix_bits,
@ -824,37 +877,36 @@ void StoreMetaBlock(const uint8_t* input,
size_t num_literal_histograms = mb.literal_histograms.size(); size_t num_literal_histograms = mb.literal_histograms.size();
if (mb.literal_context_map.empty()) { if (mb.literal_context_map.empty()) {
StoreTrivialContextMap(num_literal_histograms, kLiteralContextBits, StoreTrivialContextMap(num_literal_histograms, kLiteralContextBits, tree,
storage_ix, storage); storage_ix, storage);
} else { } else {
EncodeContextMap(mb.literal_context_map, num_literal_histograms, EncodeContextMap(mb.literal_context_map, num_literal_histograms, tree,
storage_ix, storage); storage_ix, storage);
} }
size_t num_dist_histograms = mb.distance_histograms.size(); size_t num_dist_histograms = mb.distance_histograms.size();
if (mb.distance_context_map.empty()) { if (mb.distance_context_map.empty()) {
StoreTrivialContextMap(num_dist_histograms, kDistanceContextBits, StoreTrivialContextMap(num_dist_histograms, kDistanceContextBits, tree,
storage_ix, storage); storage_ix, storage);
} else { } else {
EncodeContextMap(mb.distance_context_map, num_dist_histograms, EncodeContextMap(mb.distance_context_map, num_dist_histograms, tree,
storage_ix, storage); storage_ix, storage);
} }
literal_enc.BuildAndStoreEntropyCodes(mb.literal_histograms, literal_enc.BuildAndStoreEntropyCodes(mb.literal_histograms, tree,
storage_ix, storage); storage_ix, storage);
command_enc.BuildAndStoreEntropyCodes(mb.command_histograms, command_enc.BuildAndStoreEntropyCodes(mb.command_histograms, tree,
storage_ix, storage); storage_ix, storage);
distance_enc.BuildAndStoreEntropyCodes(mb.distance_histograms, distance_enc.BuildAndStoreEntropyCodes(mb.distance_histograms, tree,
storage_ix, storage); storage_ix, storage);
free(tree);
size_t pos = start_pos; size_t pos = start_pos;
for (size_t i = 0; i < n_commands; ++i) { for (size_t i = 0; i < n_commands; ++i) {
const Command cmd = commands[i]; const Command cmd = commands[i];
size_t cmd_code = cmd.cmd_prefix_; size_t cmd_code = cmd.cmd_prefix_;
uint32_t lennumextra = static_cast<uint32_t>(cmd.cmd_extra_ >> 48);
uint64_t lenextra = cmd.cmd_extra_ & 0xffffffffffffUL;
command_enc.StoreSymbol(cmd_code, storage_ix, storage); command_enc.StoreSymbol(cmd_code, storage_ix, storage);
WriteBits(lennumextra, lenextra, storage_ix, storage); StoreCommandExtra(cmd, storage_ix, storage);
if (mb.literal_context_map.empty()) { if (mb.literal_context_map.empty()) {
for (size_t j = cmd.insert_len_; j != 0; --j) { for (size_t j = cmd.insert_len_; j != 0; --j) {
literal_enc.StoreSymbol(input[pos & mask], storage_ix, storage); literal_enc.StoreSymbol(input[pos & mask], storage_ix, storage);
@ -871,8 +923,8 @@ void StoreMetaBlock(const uint8_t* input,
++pos; ++pos;
} }
} }
pos += cmd.copy_len_; pos += cmd.copy_len();
if (cmd.copy_len_ > 0) { if (cmd.copy_len()) {
prev_byte2 = input[(pos - 2) & mask]; prev_byte2 = input[(pos - 2) & mask];
prev_byte = input[(pos - 1) & mask]; prev_byte = input[(pos - 1) & mask];
if (cmd.cmd_prefix_ >= 128) { if (cmd.cmd_prefix_ >= 128) {
@ -911,8 +963,8 @@ void BuildHistograms(const uint8_t* input,
lit_histo->Add(input[pos & mask]); lit_histo->Add(input[pos & mask]);
++pos; ++pos;
} }
pos += cmd.copy_len_; pos += cmd.copy_len();
if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) { if (cmd.copy_len() && cmd.cmd_prefix_ >= 128) {
dist_histo->Add(cmd.dist_prefix_); dist_histo->Add(cmd.dist_prefix_);
} }
} }
@ -935,17 +987,15 @@ void StoreDataWithHuffmanCodes(const uint8_t* input,
for (size_t i = 0; i < n_commands; ++i) { for (size_t i = 0; i < n_commands; ++i) {
const Command cmd = commands[i]; const Command cmd = commands[i];
const size_t cmd_code = cmd.cmd_prefix_; const size_t cmd_code = cmd.cmd_prefix_;
const uint32_t lennumextra = static_cast<uint32_t>(cmd.cmd_extra_ >> 48);
const uint64_t lenextra = cmd.cmd_extra_ & 0xffffffffffffUL;
WriteBits(cmd_depth[cmd_code], cmd_bits[cmd_code], storage_ix, storage); WriteBits(cmd_depth[cmd_code], cmd_bits[cmd_code], storage_ix, storage);
WriteBits(lennumextra, lenextra, storage_ix, storage); StoreCommandExtra(cmd, storage_ix, storage);
for (size_t j = cmd.insert_len_; j != 0; --j) { for (size_t j = cmd.insert_len_; j != 0; --j) {
const uint8_t literal = input[pos & mask]; const uint8_t literal = input[pos & mask];
WriteBits(lit_depth[literal], lit_bits[literal], storage_ix, storage); WriteBits(lit_depth[literal], lit_bits[literal], storage_ix, storage);
++pos; ++pos;
} }
pos += cmd.copy_len_; pos += cmd.copy_len();
if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) { if (cmd.copy_len() && cmd.cmd_prefix_ >= 128) {
const size_t dist_code = cmd.dist_prefix_; const size_t dist_code = cmd.dist_prefix_;
const uint32_t distnumextra = cmd.dist_extra_ >> 24; const uint32_t distnumextra = cmd.dist_extra_ >> 24;
const uint32_t distextra = cmd.dist_extra_ & 0xffffff; const uint32_t distextra = cmd.dist_extra_ & 0xffffff;
@ -983,15 +1033,18 @@ void StoreMetaBlockTrivial(const uint8_t* input,
std::vector<uint8_t> dist_depth(64); std::vector<uint8_t> dist_depth(64);
std::vector<uint16_t> dist_bits(64); std::vector<uint16_t> dist_bits(64);
BuildAndStoreHuffmanTree(&lit_histo.data_[0], 256, HuffmanTree* tree = static_cast<HuffmanTree*>(
malloc(kMaxHuffmanTreeSize * sizeof(HuffmanTree)));
BuildAndStoreHuffmanTree(&lit_histo.data_[0], 256, tree,
&lit_depth[0], &lit_bits[0], &lit_depth[0], &lit_bits[0],
storage_ix, storage); storage_ix, storage);
BuildAndStoreHuffmanTree(&cmd_histo.data_[0], kNumCommandPrefixes, BuildAndStoreHuffmanTree(&cmd_histo.data_[0], kNumCommandPrefixes, tree,
&cmd_depth[0], &cmd_bits[0], &cmd_depth[0], &cmd_bits[0],
storage_ix, storage); storage_ix, storage);
BuildAndStoreHuffmanTree(&dist_histo.data_[0], 64, BuildAndStoreHuffmanTree(&dist_histo.data_[0], 64, tree,
&dist_depth[0], &dist_bits[0], &dist_depth[0], &dist_bits[0],
storage_ix, storage); storage_ix, storage);
free(tree);
StoreDataWithHuffmanCodes(input, start_pos, mask, commands, StoreDataWithHuffmanCodes(input, start_pos, mask, commands,
n_commands, &lit_depth[0], &lit_bits[0], n_commands, &lit_depth[0], &lit_bits[0],
&cmd_depth[0], &cmd_bits[0], &cmd_depth[0], &cmd_bits[0],
@ -1026,7 +1079,7 @@ void StoreMetaBlockFast(const uint8_t* input,
++pos; ++pos;
} }
num_literals += cmd.insert_len_; num_literals += cmd.insert_len_;
pos += cmd.copy_len_; pos += cmd.copy_len();
} }
uint8_t lit_depth[256] = { 0 }; uint8_t lit_depth[256] = { 0 };
uint16_t lit_bits[256] = { 0 }; uint16_t lit_bits[256] = { 0 };

View File

@ -48,6 +48,7 @@ void StoreUncompressedMetaBlockHeader(size_t length,
// Stores a context map where the histogram type is always the block type. // Stores a context map where the histogram type is always the block type.
void StoreTrivialContextMap(size_t num_types, void StoreTrivialContextMap(size_t num_types,
size_t context_bits, size_t context_bits,
HuffmanTree* tree,
size_t* storage_ix, size_t* storage_ix,
uint8_t* storage); uint8_t* storage);
@ -57,13 +58,14 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask(
size_t *storage_ix, size_t *storage_ix,
uint8_t *storage); uint8_t *storage);
void StoreHuffmanTree(const uint8_t* depths, size_t num, void StoreHuffmanTree(const uint8_t* depths, size_t num, HuffmanTree* tree,
size_t *storage_ix, uint8_t *storage); size_t *storage_ix, uint8_t *storage);
// Builds a Huffman tree from histogram[0:length] into depth[0:length] and // Builds a Huffman tree from histogram[0:length] into depth[0:length] and
// bits[0:length] and stores the encoded tree to the bit stream. // bits[0:length] and stores the encoded tree to the bit stream.
void BuildAndStoreHuffmanTree(const uint32_t *histogram, void BuildAndStoreHuffmanTree(const uint32_t *histogram,
const size_t length, const size_t length,
HuffmanTree* tree,
uint8_t* depth, uint8_t* depth,
uint16_t* bits, uint16_t* bits,
size_t* storage_ix, size_t* storage_ix,
@ -81,6 +83,7 @@ void BuildAndStoreHuffmanTreeFast(const uint32_t *histogram,
// histogram ids is given by num_clusters. // histogram ids is given by num_clusters.
void EncodeContextMap(const std::vector<uint32_t>& context_map, void EncodeContextMap(const std::vector<uint32_t>& context_map,
size_t num_clusters, size_t num_clusters,
HuffmanTree* tree,
size_t* storage_ix, uint8_t* storage); size_t* storage_ix, uint8_t* storage);
// Data structure that stores everything that is needed to encode each block // Data structure that stores everything that is needed to encode each block
@ -92,8 +95,8 @@ struct BlockSplitCode {
std::vector<uint32_t> length_extra; std::vector<uint32_t> length_extra;
std::vector<uint8_t> type_depths; std::vector<uint8_t> type_depths;
std::vector<uint16_t> type_bits; std::vector<uint16_t> type_bits;
std::vector<uint8_t> length_depths; uint8_t length_depths[kNumBlockLenPrefixes];
std::vector<uint16_t> length_bits; uint16_t length_bits[kNumBlockLenPrefixes];
}; };
// Builds a BlockSplitCode data structure from the block split given by the // Builds a BlockSplitCode data structure from the block split given by the

View File

@ -11,7 +11,6 @@
#include <math.h> #include <math.h>
#include <algorithm> #include <algorithm>
#include <map>
#include <utility> #include <utility>
#include <vector> #include <vector>
@ -52,7 +51,9 @@ template<typename HistogramType>
void CompareAndPushToQueue(const HistogramType* out, void CompareAndPushToQueue(const HistogramType* out,
const uint32_t* cluster_size, const uint32_t* cluster_size,
uint32_t idx1, uint32_t idx2, uint32_t idx1, uint32_t idx2,
std::vector<HistogramPair>* pairs) { size_t max_num_pairs,
HistogramPair* pairs,
size_t* num_pairs) {
if (idx1 == idx2) { if (idx1 == idx2) {
return; return;
} }
@ -76,8 +77,8 @@ void CompareAndPushToQueue(const HistogramType* out,
p.cost_combo = out[idx1].bit_cost_; p.cost_combo = out[idx1].bit_cost_;
store_pair = true; store_pair = true;
} else { } else {
double threshold = pairs->empty() ? 1e99 : double threshold = *num_pairs == 0 ? 1e99 :
std::max(0.0, (*pairs)[0].cost_diff); std::max(0.0, pairs[0].cost_diff);
HistogramType combo = out[idx1]; HistogramType combo = out[idx1];
combo.AddHistogram(out[idx2]); combo.AddHistogram(out[idx2]);
double cost_combo = PopulationCost(combo); double cost_combo = PopulationCost(combo);
@ -88,42 +89,44 @@ void CompareAndPushToQueue(const HistogramType* out,
} }
if (store_pair) { if (store_pair) {
p.cost_diff += p.cost_combo; p.cost_diff += p.cost_combo;
if (!pairs->empty() && (pairs->front() < p)) { if (*num_pairs > 0 && pairs[0] < p) {
// Replace the top of the queue if needed. // Replace the top of the queue if needed.
pairs->push_back(pairs->front()); if (*num_pairs < max_num_pairs) {
pairs->front() = p; pairs[*num_pairs] = pairs[0];
} else { ++(*num_pairs);
pairs->push_back(p); }
pairs[0] = p;
} else if (*num_pairs < max_num_pairs) {
pairs[*num_pairs] = p;
++(*num_pairs);
} }
} }
} }
template<typename HistogramType> template<typename HistogramType>
void HistogramCombine(HistogramType* out, size_t HistogramCombine(HistogramType* out,
uint32_t* cluster_size, uint32_t* cluster_size,
uint32_t* symbols, uint32_t* symbols,
size_t symbols_size, uint32_t* clusters,
size_t max_clusters) { HistogramPair* pairs,
size_t num_clusters,
size_t symbols_size,
size_t max_clusters,
size_t max_num_pairs) {
double cost_diff_threshold = 0.0; double cost_diff_threshold = 0.0;
size_t min_cluster_size = 1; size_t min_cluster_size = 1;
// Uniquify the list of symbols. // We maintain a vector of histogram pairs, with the property that the pair
std::vector<uint32_t> clusters(symbols, symbols + symbols_size); // with the maximum bit cost reduction is the first.
std::sort(clusters.begin(), clusters.end()); size_t num_pairs = 0;
std::vector<uint32_t>::iterator last = for (size_t idx1 = 0; idx1 < num_clusters; ++idx1) {
std::unique(clusters.begin(), clusters.end()); for (size_t idx2 = idx1 + 1; idx2 < num_clusters; ++idx2) {
clusters.resize(static_cast<size_t>(last - clusters.begin()));
// We maintain a heap of histogram pairs, ordered by the bit cost reduction.
std::vector<HistogramPair> pairs;
for (size_t idx1 = 0; idx1 < clusters.size(); ++idx1) {
for (size_t idx2 = idx1 + 1; idx2 < clusters.size(); ++idx2) {
CompareAndPushToQueue(out, cluster_size, clusters[idx1], clusters[idx2], CompareAndPushToQueue(out, cluster_size, clusters[idx1], clusters[idx2],
&pairs); max_num_pairs, &pairs[0], &num_pairs);
} }
} }
while (clusters.size() > min_cluster_size) { while (num_clusters > min_cluster_size) {
if (pairs[0].cost_diff >= cost_diff_threshold) { if (pairs[0].cost_diff >= cost_diff_threshold) {
cost_diff_threshold = 1e99; cost_diff_threshold = 1e99;
min_cluster_size = max_clusters; min_cluster_size = max_clusters;
@ -140,40 +143,42 @@ void HistogramCombine(HistogramType* out,
symbols[i] = best_idx1; symbols[i] = best_idx1;
} }
} }
for (std::vector<uint32_t>::iterator cluster = clusters.begin(); for (size_t i = 0; i < num_clusters; ++i) {
cluster != clusters.end(); ++cluster) { if (clusters[i] == best_idx2) {
if (*cluster >= best_idx2) { memmove(&clusters[i], &clusters[i + 1],
clusters.erase(cluster); (num_clusters - i - 1) * sizeof(clusters[0]));
break; break;
} }
} }
--num_clusters;
// Remove pairs intersecting the just combined best pair. // Remove pairs intersecting the just combined best pair.
size_t copy_to_idx = 0; size_t copy_to_idx = 0;
for (size_t i = 0; i < pairs.size(); ++i) { for (size_t i = 0; i < num_pairs; ++i) {
HistogramPair& p = pairs[i]; HistogramPair& p = pairs[i];
if (p.idx1 == best_idx1 || p.idx2 == best_idx1 || if (p.idx1 == best_idx1 || p.idx2 == best_idx1 ||
p.idx1 == best_idx2 || p.idx2 == best_idx2) { p.idx1 == best_idx2 || p.idx2 == best_idx2) {
// Remove invalid pair from the queue. // Remove invalid pair from the queue.
continue; continue;
} }
if (pairs.front() < p) { if (pairs[0] < p) {
// Replace the top of the queue if needed. // Replace the top of the queue if needed.
HistogramPair front = pairs.front(); HistogramPair front = pairs[0];
pairs.front() = p; pairs[0] = p;
pairs[copy_to_idx] = front; pairs[copy_to_idx] = front;
} else { } else {
pairs[copy_to_idx] = p; pairs[copy_to_idx] = p;
} }
++copy_to_idx; ++copy_to_idx;
} }
pairs.resize(copy_to_idx); num_pairs = copy_to_idx;
// Push new pairs formed with the combined histogram to the heap. // Push new pairs formed with the combined histogram to the heap.
for (size_t i = 0; i < clusters.size(); ++i) { for (size_t i = 0; i < num_clusters; ++i) {
CompareAndPushToQueue(out, cluster_size, best_idx1, clusters[i], &pairs); CompareAndPushToQueue(out, cluster_size, best_idx1, clusters[i],
max_num_pairs, &pairs[0], &num_pairs);
} }
} }
return num_clusters;
} }
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
@ -192,61 +197,69 @@ double HistogramBitCostDistance(const HistogramType& histogram,
} }
// Find the best 'out' histogram for each of the 'in' histograms. // Find the best 'out' histogram for each of the 'in' histograms.
// When called, clusters[0..num_clusters) contains the unique values from
// symbols[0..in_size), but this property is not preserved in this function.
// Note: we assume that out[]->bit_cost_ is already up-to-date. // Note: we assume that out[]->bit_cost_ is already up-to-date.
template<typename HistogramType> template<typename HistogramType>
void HistogramRemap(const HistogramType* in, size_t in_size, void HistogramRemap(const HistogramType* in, size_t in_size,
const uint32_t* clusters, size_t num_clusters,
HistogramType* out, uint32_t* symbols) { HistogramType* out, uint32_t* symbols) {
// Uniquify the list of symbols.
std::vector<uint32_t> all_symbols(symbols, symbols + in_size);
std::sort(all_symbols.begin(), all_symbols.end());
std::vector<uint32_t>::iterator last =
std::unique(all_symbols.begin(), all_symbols.end());
all_symbols.resize(static_cast<size_t>(last - all_symbols.begin()));
for (size_t i = 0; i < in_size; ++i) { for (size_t i = 0; i < in_size; ++i) {
uint32_t best_out = i == 0 ? symbols[0] : symbols[i - 1]; uint32_t best_out = i == 0 ? symbols[0] : symbols[i - 1];
double best_bits = HistogramBitCostDistance(in[i], out[best_out]); double best_bits = HistogramBitCostDistance(in[i], out[best_out]);
for (std::vector<uint32_t>::const_iterator k = all_symbols.begin(); for (size_t j = 0; j < num_clusters; ++j) {
k != all_symbols.end(); ++k) { const double cur_bits = HistogramBitCostDistance(in[i], out[clusters[j]]);
const double cur_bits = HistogramBitCostDistance(in[i], out[*k]);
if (cur_bits < best_bits) { if (cur_bits < best_bits) {
best_bits = cur_bits; best_bits = cur_bits;
best_out = *k; best_out = clusters[j];
} }
} }
symbols[i] = best_out; symbols[i] = best_out;
} }
// Recompute each out based on raw and symbols. // Recompute each out based on raw and symbols.
for (std::vector<uint32_t>::const_iterator k = all_symbols.begin(); for (size_t j = 0; j < num_clusters; ++j) {
k != all_symbols.end(); ++k) { out[clusters[j]].Clear();
out[*k].Clear();
} }
for (size_t i = 0; i < in_size; ++i) { for (size_t i = 0; i < in_size; ++i) {
out[symbols[i]].AddHistogram(in[i]); out[symbols[i]].AddHistogram(in[i]);
} }
} }
// Reorder histograms in *out so that the new symbols in *symbols come in // Reorders elements of the out[0..length) array and changes values in
// increasing order. // symbols[0..length) array in the following way:
// * when called, symbols[] contains indexes into out[], and has N unique
// values (possibly N < length)
// * on return, symbols'[i] = f(symbols[i]) and
// out'[symbols'[i]] = out[symbols[i]], for each 0 <= i < length,
// where f is a bijection between the range of symbols[] and [0..N), and
// the first occurrences of values in symbols'[i] come in consecutive
// increasing order.
// Returns N, the number of unique values in symbols[].
template<typename HistogramType> template<typename HistogramType>
void HistogramReindex(std::vector<HistogramType>* out, size_t HistogramReindex(HistogramType* out, uint32_t* symbols, size_t length) {
std::vector<uint32_t>* symbols) { static const uint32_t kInvalidIndex = std::numeric_limits<uint32_t>::max();
std::vector<HistogramType> tmp(*out); std::vector<uint32_t> new_index(length, kInvalidIndex);
std::map<uint32_t, uint32_t> new_index;
uint32_t next_index = 0; uint32_t next_index = 0;
for (size_t i = 0; i < symbols->size(); ++i) { for (size_t i = 0; i < length; ++i) {
if (new_index.find((*symbols)[i]) == new_index.end()) { if (new_index[symbols[i]] == kInvalidIndex) {
new_index[(*symbols)[i]] = next_index; new_index[symbols[i]] = next_index;
(*out)[next_index] = tmp[(*symbols)[i]];
++next_index; ++next_index;
} }
} }
out->resize(next_index); std::vector<HistogramType> tmp(next_index);
for (size_t i = 0; i < symbols->size(); ++i) { next_index = 0;
(*symbols)[i] = new_index[(*symbols)[i]]; for (size_t i = 0; i < length; ++i) {
if (new_index[symbols[i]] == next_index) {
tmp[next_index] = out[symbols[i]];
++next_index;
}
symbols[i] = new_index[symbols[i]];
} }
for (size_t i = 0; i < next_index; ++i) {
out[i] = tmp[i];
}
return next_index;
} }
// Clusters similar histograms in 'in' together, the selected histograms are // Clusters similar histograms in 'in' together, the selected histograms are
@ -261,6 +274,8 @@ void ClusterHistograms(const std::vector<HistogramType>& in,
const size_t in_size = num_contexts * num_blocks; const size_t in_size = num_contexts * num_blocks;
assert(in_size == in.size()); assert(in_size == in.size());
std::vector<uint32_t> cluster_size(in_size, 1); std::vector<uint32_t> cluster_size(in_size, 1);
std::vector<uint32_t> clusters(in_size);
size_t num_clusters = 0;
out->resize(in_size); out->resize(in_size);
histogram_symbols->resize(in_size); histogram_symbols->resize(in_size);
for (size_t i = 0; i < in_size; ++i) { for (size_t i = 0; i < in_size; ++i) {
@ -269,29 +284,47 @@ void ClusterHistograms(const std::vector<HistogramType>& in,
(*histogram_symbols)[i] = static_cast<uint32_t>(i); (*histogram_symbols)[i] = static_cast<uint32_t>(i);
} }
const size_t max_input_histograms = 64; const size_t max_input_histograms = 64;
// For the first pass of clustering, we allow all pairs.
size_t max_num_pairs = max_input_histograms * max_input_histograms / 2;
std::vector<HistogramPair> pairs(max_num_pairs + 1);
for (size_t i = 0; i < in_size; i += max_input_histograms) { for (size_t i = 0; i < in_size; i += max_input_histograms) {
size_t num_to_combine = std::min(in_size - i, max_input_histograms); size_t num_to_combine = std::min(in_size - i, max_input_histograms);
HistogramCombine(&(*out)[0], &cluster_size[0], for (size_t j = 0; j < num_to_combine; ++j) {
&(*histogram_symbols)[i], num_to_combine, clusters[num_clusters + j] = static_cast<uint32_t>(i + j);
max_histograms); }
size_t num_new_clusters =
HistogramCombine(&(*out)[0], &cluster_size[0],
&(*histogram_symbols)[i],
&clusters[num_clusters], &pairs[0],
num_to_combine, num_to_combine,
max_histograms, max_num_pairs);
num_clusters += num_new_clusters;
} }
// For the second pass, we limit the total number of histogram pairs.
// After this limit is reached, we only keep searching for the best pair.
max_num_pairs =
std::min(64 * num_clusters, (num_clusters / 2) * num_clusters);
pairs.resize(max_num_pairs + 1);
// Collapse similar histograms. // Collapse similar histograms.
HistogramCombine(&(*out)[0], &cluster_size[0], num_clusters = HistogramCombine(&(*out)[0], &cluster_size[0],
&(*histogram_symbols)[0], in_size, &(*histogram_symbols)[0], &clusters[0],
max_histograms); &pairs[0], num_clusters, in_size,
max_histograms, max_num_pairs);
// Find the optimal map from original histograms to the final ones. // Find the optimal map from original histograms to the final ones.
HistogramRemap(&in[0], in_size, &(*out)[0], &(*histogram_symbols)[0]); HistogramRemap(&in[0], in_size, &clusters[0], num_clusters,
&(*out)[0], &(*histogram_symbols)[0]);
// Convert the context map to a canonical form. // Convert the context map to a canonical form.
HistogramReindex(out, histogram_symbols); size_t num_histograms =
HistogramReindex(&(*out)[0], &(*histogram_symbols)[0], in_size);
out->resize(num_histograms);
} }
} // namespace brotli } // namespace brotli
#endif // BROTLI_ENC_CLUSTER_H_ #endif // BROTLI_ENC_CLUSTER_H_

View File

@ -73,35 +73,47 @@ static inline uint16_t CombineLengthCodes(
static inline void GetLengthCode(size_t insertlen, size_t copylen, static inline void GetLengthCode(size_t insertlen, size_t copylen,
bool use_last_distance, bool use_last_distance,
uint16_t* code, uint64_t* extra) { uint16_t* code) {
uint16_t inscode = GetInsertLengthCode(insertlen); uint16_t inscode = GetInsertLengthCode(insertlen);
uint16_t copycode = GetCopyLengthCode(copylen); uint16_t copycode = GetCopyLengthCode(copylen);
uint64_t insnumextra = kInsExtra[inscode];
uint64_t numextra = insnumextra + kCopyExtra[copycode];
uint64_t insextraval = insertlen - kInsBase[inscode];
uint64_t copyextraval = copylen - kCopyBase[copycode];
*code = CombineLengthCodes(inscode, copycode, use_last_distance); *code = CombineLengthCodes(inscode, copycode, use_last_distance);
*extra = (numextra << 48) | (copyextraval << insnumextra) | insextraval; }
static inline uint32_t GetInsertBase(uint16_t inscode) {
return kInsBase[inscode];
}
static inline uint32_t GetInsertExtra(uint16_t inscode) {
return kInsExtra[inscode];
}
static inline uint32_t GetCopyBase(uint16_t copycode) {
return kCopyBase[copycode];
}
static inline uint32_t GetCopyExtra(uint16_t copycode) {
return kCopyExtra[copycode];
} }
struct Command { struct Command {
// distance_code is e.g. 0 for same-as-last short code, or 16 for offset 1. // distance_code is e.g. 0 for same-as-last short code, or 16 for offset 1.
Command(size_t insertlen, size_t copylen, size_t copylen_code, Command(size_t insertlen, size_t copylen, size_t copylen_code,
size_t distance_code) size_t distance_code)
: insert_len_(static_cast<uint32_t>(insertlen)) : insert_len_(static_cast<uint32_t>(insertlen)) {
, copy_len_(static_cast<uint32_t>(copylen)) { copy_len_ = static_cast<uint32_t>(
copylen | ((copylen_code ^ copylen) << 24));
// The distance prefix and extra bits are stored in this Command as if // The distance prefix and extra bits are stored in this Command as if
// npostfix and ndirect were 0, they are only recomputed later after the // npostfix and ndirect were 0, they are only recomputed later after the
// clustering if needed. // clustering if needed.
PrefixEncodeCopyDistance(distance_code, 0, 0, &dist_prefix_, &dist_extra_); PrefixEncodeCopyDistance(distance_code, 0, 0, &dist_prefix_, &dist_extra_);
GetLengthCode(insertlen, copylen_code, dist_prefix_ == 0, GetLengthCode(insertlen, copylen_code, dist_prefix_ == 0,
&cmd_prefix_, &cmd_extra_); &cmd_prefix_);
} }
explicit Command(size_t insertlen) explicit Command(size_t insertlen)
: insert_len_(static_cast<uint32_t>(insertlen)) : insert_len_(static_cast<uint32_t>(insertlen))
, copy_len_(0), dist_extra_(0), dist_prefix_(16) { , copy_len_(4 << 24), dist_extra_(0), dist_prefix_(16) {
GetLengthCode(insertlen, 4, dist_prefix_ == 0, &cmd_prefix_, &cmd_extra_); GetLengthCode(insertlen, 4, dist_prefix_ == 0, &cmd_prefix_);
} }
uint32_t DistanceCode(void) const { uint32_t DistanceCode(void) const {
@ -123,9 +135,17 @@ struct Command {
return 3; return 3;
} }
inline uint32_t copy_len(void) const {
return copy_len_ & 0xFFFFFF;
}
inline uint32_t copy_len_code(void) const {
return (copy_len_ & 0xFFFFFF) ^ (copy_len_ >> 24);
}
uint32_t insert_len_; uint32_t insert_len_;
/* Stores copy_len in low 24 bits and copy_len XOR copy_code in high 8 bit. */
uint32_t copy_len_; uint32_t copy_len_;
uint64_t cmd_extra_;
uint32_t dist_extra_; uint32_t dist_extra_;
uint16_t cmd_prefix_; uint16_t cmd_prefix_;
uint16_t dist_prefix_; uint16_t dist_prefix_;

View File

@ -105,8 +105,11 @@ void BuildAndStoreLiteralPrefixCode(const uint8_t* input,
void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128], void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128],
uint8_t depth[128], uint16_t bits[128], uint8_t depth[128], uint16_t bits[128],
size_t* storage_ix, uint8_t* storage) { size_t* storage_ix, uint8_t* storage) {
CreateHuffmanTree(histogram, 64, 15, depth); // Tree size for building a tree over 64 symbols is 2 * 64 + 1.
CreateHuffmanTree(&histogram[64], 64, 14, &depth[64]); static const size_t kTreeSize = 129;
HuffmanTree tree[kTreeSize];
CreateHuffmanTree(histogram, 64, 15, tree, depth);
CreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
// We have to jump through a few hoopes here in order to compute // We have to jump through a few hoopes here in order to compute
// the command bits because the symbols are in a different order than in // the command bits because the symbols are in a different order than in
// the full alphabet. This looks complicated, but having the symbols // the full alphabet. This looks complicated, but having the symbols
@ -141,9 +144,9 @@ void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128],
cmd_depth[256 + 8 * i] = depth[48 + i]; cmd_depth[256 + 8 * i] = depth[48 + i];
cmd_depth[448 + 8 * i] = depth[56 + i]; cmd_depth[448 + 8 * i] = depth[56 + i];
} }
StoreHuffmanTree(cmd_depth, 704, storage_ix, storage); StoreHuffmanTree(cmd_depth, 704, tree, storage_ix, storage);
} }
StoreHuffmanTree(&depth[64], 64, storage_ix, storage); StoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
} }
// REQUIRES: insertlen < 6210 // REQUIRES: insertlen < 6210
@ -452,7 +455,8 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
assert(table_size <= (1u << 31)); assert(table_size <= (1u << 31));
assert((table_size & (table_size - 1)) == 0); // table must be power of two assert((table_size & (table_size - 1)) == 0); // table must be power of two
const size_t shift = 64u - Log2FloorNonZero(table_size); const size_t shift = 64u - Log2FloorNonZero(table_size);
assert(static_cast<size_t>(0xffffffffffffffffU >> shift) == table_size - 1); assert(table_size - 1 == static_cast<size_t>(
MAKE_UINT64_T(0xFFFFFFFF, 0xFFFFFF) >> shift));
const uint8_t* ip_end = input + block_size; const uint8_t* ip_end = input + block_size;
int last_distance = -1; int last_distance = -1;

View File

@ -57,8 +57,11 @@ static void BuildAndStoreCommandPrefixCode(
const uint32_t histogram[128], const uint32_t histogram[128],
uint8_t depth[128], uint16_t bits[128], uint8_t depth[128], uint16_t bits[128],
size_t* storage_ix, uint8_t* storage) { size_t* storage_ix, uint8_t* storage) {
CreateHuffmanTree(histogram, 64, 15, depth); // Tree size for building a tree over 64 symbols is 2 * 64 + 1.
CreateHuffmanTree(&histogram[64], 64, 14, &depth[64]); static const size_t kTreeSize = 129;
HuffmanTree tree[kTreeSize];
CreateHuffmanTree(histogram, 64, 15, tree, depth);
CreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
// We have to jump through a few hoopes here in order to compute // We have to jump through a few hoopes here in order to compute
// the command bits because the symbols are in a different order than in // the command bits because the symbols are in a different order than in
// the full alphabet. This looks complicated, but having the symbols // the full alphabet. This looks complicated, but having the symbols
@ -93,9 +96,9 @@ static void BuildAndStoreCommandPrefixCode(
cmd_depth[256 + 8 * i] = depth[8 + i]; cmd_depth[256 + 8 * i] = depth[8 + i];
cmd_depth[448 + 8 * i] = depth[16 + i]; cmd_depth[448 + 8 * i] = depth[16 + i];
} }
StoreHuffmanTree(cmd_depth, 704, storage_ix, storage); StoreHuffmanTree(cmd_depth, 704, tree, storage_ix, storage);
} }
StoreHuffmanTree(&depth[64], 64, storage_ix, storage); StoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
} }
inline void EmitInsertLen(uint32_t insertlen, uint32_t** commands) { inline void EmitInsertLen(uint32_t insertlen, uint32_t** commands) {
@ -227,7 +230,8 @@ void CreateCommands(const uint8_t* input, size_t block_size, size_t input_size,
assert(table_size <= (1u << 31)); assert(table_size <= (1u << 31));
assert((table_size & (table_size - 1)) == 0); // table must be power of two assert((table_size & (table_size - 1)) == 0); // table must be power of two
const size_t shift = 64u - Log2FloorNonZero(table_size); const size_t shift = 64u - Log2FloorNonZero(table_size);
assert(static_cast<size_t>(0xffffffffffffffffU >> shift) == table_size - 1); assert(table_size - 1 == static_cast<size_t>(
MAKE_UINT64_T(0xFFFFFFFF, 0xFFFFFF) >> shift));
const uint8_t* ip_end = input + block_size; const uint8_t* ip_end = input + block_size;
// "next_emit" is a pointer to the first byte that is not covered by a // "next_emit" is a pointer to the first byte that is not covered by a
// previous copy. Bytes between "next_emit" and the start of the next copy or // previous copy. Bytes between "next_emit" and the start of the next copy or

View File

@ -38,7 +38,7 @@ static const int kMinQualityForContextModeling = 5;
static const int kMinQualityForOptimizeHistograms = 4; static const int kMinQualityForOptimizeHistograms = 4;
// For quality 2 there is no block splitting, so we buffer at most this much // For quality 2 there is no block splitting, so we buffer at most this much
// literals and commands. // literals and commands.
static const int kMaxNumDelayedSymbols = 0x2fff; static const size_t kMaxNumDelayedSymbols = 0x2fff;
#define COPY_ARRAY(dst, src) memcpy(dst, src, sizeof(src)); #define COPY_ARRAY(dst, src) memcpy(dst, src, sizeof(src));
@ -51,7 +51,7 @@ void RecomputeDistancePrefixes(Command* cmds,
} }
for (size_t i = 0; i < num_commands; ++i) { for (size_t i = 0; i < num_commands; ++i) {
Command* cmd = &cmds[i]; Command* cmd = &cmds[i];
if (cmd->copy_len_ > 0 && cmd->cmd_prefix_ >= 128) { if (cmd->copy_len() && cmd->cmd_prefix_ >= 128) {
PrefixEncodeCopyDistance(cmd->DistanceCode(), PrefixEncodeCopyDistance(cmd->DistanceCode(),
num_direct_distance_codes, num_direct_distance_codes,
distance_postfix_bits, distance_postfix_bits,
@ -180,6 +180,250 @@ void InitCommandPrefixCodes(uint8_t cmd_depths[128],
*cmd_code_numbits = kDefaultCommandCodeNumBits; *cmd_code_numbits = kDefaultCommandCodeNumBits;
} }
// Decide about the context map based on the ability of the prediction
// ability of the previous byte UTF8-prefix on the next byte. The
// prediction ability is calculated as shannon entropy. Here we need
// shannon entropy instead of 'BitsEntropy' since the prefix will be
// encoded with the remaining 6 bits of the following byte, and
// BitsEntropy will assume that symbol to be stored alone using Huffman
// coding.
void ChooseContextMap(int quality,
uint32_t* bigram_histo,
size_t* num_literal_contexts,
const uint32_t** literal_context_map) {
uint32_t monogram_histo[3] = { 0 };
uint32_t two_prefix_histo[6] = { 0 };
size_t total = 0;
for (size_t i = 0; i < 9; ++i) {
total += bigram_histo[i];
monogram_histo[i % 3] += bigram_histo[i];
size_t j = i;
if (j >= 6) {
j -= 6;
}
two_prefix_histo[j] += bigram_histo[i];
}
size_t dummy;
double entropy1 = ShannonEntropy(monogram_histo, 3, &dummy);
double entropy2 = (ShannonEntropy(two_prefix_histo, 3, &dummy) +
ShannonEntropy(two_prefix_histo + 3, 3, &dummy));
double entropy3 = 0;
for (size_t k = 0; k < 3; ++k) {
entropy3 += ShannonEntropy(bigram_histo + 3 * k, 3, &dummy);
}
assert(total != 0);
double scale = 1.0 / static_cast<double>(total);
entropy1 *= scale;
entropy2 *= scale;
entropy3 *= scale;
static const uint32_t kStaticContextMapContinuation[64] = {
1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
static const uint32_t kStaticContextMapSimpleUTF8[64] = {
0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
if (quality < 7) {
// 3 context models is a bit slower, don't use it at lower qualities.
entropy3 = entropy1 * 10;
}
// If expected savings by symbol are less than 0.2 bits, skip the
// context modeling -- in exchange for faster decoding speed.
if (entropy1 - entropy2 < 0.2 &&
entropy1 - entropy3 < 0.2) {
*num_literal_contexts = 1;
} else if (entropy2 - entropy3 < 0.02) {
*num_literal_contexts = 2;
*literal_context_map = kStaticContextMapSimpleUTF8;
} else {
*num_literal_contexts = 3;
*literal_context_map = kStaticContextMapContinuation;
}
}
void DecideOverLiteralContextModeling(const uint8_t* input,
size_t start_pos,
size_t length,
size_t mask,
int quality,
ContextType* literal_context_mode,
size_t* num_literal_contexts,
const uint32_t** literal_context_map) {
if (quality < kMinQualityForContextModeling || length < 64) {
return;
}
// Gather bigram data of the UTF8 byte prefixes. To make the analysis of
// UTF8 data faster we only examine 64 byte long strides at every 4kB
// intervals.
const size_t end_pos = start_pos + length;
uint32_t bigram_prefix_histo[9] = { 0 };
for (; start_pos + 64 <= end_pos; start_pos += 4096) {
static const int lut[4] = { 0, 0, 1, 2 };
const size_t stride_end_pos = start_pos + 64;
int prev = lut[input[start_pos & mask] >> 6] * 3;
for (size_t pos = start_pos + 1; pos < stride_end_pos; ++pos) {
const uint8_t literal = input[pos & mask];
++bigram_prefix_histo[prev + lut[literal >> 6]];
prev = lut[literal >> 6] * 3;
}
}
*literal_context_mode = CONTEXT_UTF8;
ChooseContextMap(quality, &bigram_prefix_histo[0], num_literal_contexts,
literal_context_map);
}
bool ShouldCompress(const uint8_t* data,
const size_t mask,
const uint64_t last_flush_pos,
const size_t bytes,
const size_t num_literals,
const size_t num_commands) {
if (num_commands < (bytes >> 8) + 2) {
if (num_literals > 0.99 * static_cast<double>(bytes)) {
uint32_t literal_histo[256] = { 0 };
static const uint32_t kSampleRate = 13;
static const double kMinEntropy = 7.92;
const double bit_cost_threshold =
static_cast<double>(bytes) * kMinEntropy / kSampleRate;
size_t t = (bytes + kSampleRate - 1) / kSampleRate;
uint32_t pos = static_cast<uint32_t>(last_flush_pos);
for (size_t i = 0; i < t; i++) {
++literal_histo[data[pos & mask]];
pos += kSampleRate;
}
if (BitsEntropy(literal_histo, 256) > bit_cost_threshold) {
return false;
}
}
}
return true;
}
void WriteMetaBlockInternal(const uint8_t* data,
const size_t mask,
const uint64_t last_flush_pos,
const size_t bytes,
const bool is_last,
const int quality,
const bool font_mode,
const uint8_t prev_byte,
const uint8_t prev_byte2,
const size_t num_literals,
const size_t num_commands,
Command* commands,
const int* saved_dist_cache,
int* dist_cache,
size_t* storage_ix,
uint8_t* storage) {
if (bytes == 0) {
// Write the ISLAST and ISEMPTY bits.
WriteBits(2, 3, storage_ix, storage);
*storage_ix = (*storage_ix + 7u) & ~7u;
return;
}
if (!ShouldCompress(data, mask, last_flush_pos, bytes,
num_literals, num_commands)) {
// Restore the distance cache, as its last update by
// CreateBackwardReferences is now unused.
memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
StoreUncompressedMetaBlock(is_last, data,
WrapPosition(last_flush_pos), mask, bytes,
storage_ix, storage);
return;
}
const uint8_t last_byte = storage[0];
const uint8_t last_byte_bits = static_cast<uint8_t>(*storage_ix & 0xff);
uint32_t num_direct_distance_codes = 0;
uint32_t distance_postfix_bits = 0;
if (quality > 9 && font_mode) {
num_direct_distance_codes = 12;
distance_postfix_bits = 1;
RecomputeDistancePrefixes(commands,
num_commands,
num_direct_distance_codes,
distance_postfix_bits);
}
if (quality == 2) {
StoreMetaBlockFast(data, WrapPosition(last_flush_pos),
bytes, mask, is_last,
commands, num_commands,
storage_ix, storage);
} else if (quality < kMinQualityForBlockSplit) {
StoreMetaBlockTrivial(data, WrapPosition(last_flush_pos),
bytes, mask, is_last,
commands, num_commands,
storage_ix, storage);
} else {
MetaBlockSplit mb;
ContextType literal_context_mode = CONTEXT_UTF8;
if (quality <= 9) {
size_t num_literal_contexts = 1;
const uint32_t* literal_context_map = NULL;
DecideOverLiteralContextModeling(data, WrapPosition(last_flush_pos),
bytes, mask,
quality,
&literal_context_mode,
&num_literal_contexts,
&literal_context_map);
if (literal_context_map == NULL) {
BuildMetaBlockGreedy(data, WrapPosition(last_flush_pos), mask,
commands, num_commands, &mb);
} else {
BuildMetaBlockGreedyWithContexts(data, WrapPosition(last_flush_pos),
mask,
prev_byte, prev_byte2,
literal_context_mode,
num_literal_contexts,
literal_context_map,
commands, num_commands,
&mb);
}
} else {
if (!IsMostlyUTF8(data, WrapPosition(last_flush_pos), mask, bytes,
kMinUTF8Ratio)) {
literal_context_mode = CONTEXT_SIGNED;
}
BuildMetaBlock(data, WrapPosition(last_flush_pos), mask,
prev_byte, prev_byte2,
commands, num_commands,
literal_context_mode,
&mb);
}
if (quality >= kMinQualityForOptimizeHistograms) {
OptimizeHistograms(num_direct_distance_codes,
distance_postfix_bits,
&mb);
}
StoreMetaBlock(data, WrapPosition(last_flush_pos), bytes, mask,
prev_byte, prev_byte2,
is_last,
num_direct_distance_codes,
distance_postfix_bits,
literal_context_mode,
commands, num_commands,
mb,
storage_ix, storage);
}
if (bytes + 4 < (*storage_ix >> 3)) {
// Restore the distance cache and last byte.
memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
storage[0] = last_byte;
*storage_ix = last_byte_bits;
StoreUncompressedMetaBlock(is_last, data,
WrapPosition(last_flush_pos), mask,
bytes, storage_ix, storage);
}
}
BrotliCompressor::BrotliCompressor(BrotliParams params) BrotliCompressor::BrotliCompressor(BrotliParams params)
: params_(params), : params_(params),
hashers_(new Hashers()), hashers_(new Hashers()),
@ -211,7 +455,7 @@ BrotliCompressor::BrotliCompressor(BrotliParams params)
} else if (params_.lgblock == 0) { } else if (params_.lgblock == 0) {
params_.lgblock = 16; params_.lgblock = 16;
if (params_.quality >= 9 && params_.lgwin > params_.lgblock) { if (params_.quality >= 9 && params_.lgwin > params_.lgblock) {
params_.lgblock = std::min(21, params_.lgwin); params_.lgblock = std::min(18, params_.lgwin);
} }
} else { } else {
params_.lgblock = std::min(kMaxInputBlockBits, params_.lgblock = std::min(kMaxInputBlockBits,
@ -403,9 +647,13 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last,
&num_literals_); &num_literals_);
size_t max_length = std::min<size_t>(mask + 1, 1u << kMaxInputBlockBits); size_t max_length = std::min<size_t>(mask + 1, 1u << kMaxInputBlockBits);
const size_t max_literals = max_length / 8;
const size_t max_commands = max_length / 8;
if (!is_last && !force_flush && if (!is_last && !force_flush &&
(params_.quality >= kMinQualityForBlockSplit || (params_.quality >= kMinQualityForBlockSplit ||
(num_literals_ + num_commands_ < kMaxNumDelayedSymbols)) && (num_literals_ + num_commands_ < kMaxNumDelayedSymbols)) &&
num_literals_ < max_literals &&
num_commands_ < max_commands &&
input_pos_ + input_block_size() <= last_flush_pos_ + max_length) { input_pos_ + input_block_size() <= last_flush_pos_ + max_length) {
// Merge with next input block. Everything will happen later. // Merge with next input block. Everything will happen later.
last_processed_pos_ = input_pos_; last_processed_pos_ = input_pos_;
@ -421,253 +669,36 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last,
last_insert_len_ = 0; last_insert_len_ = 0;
} }
WriteMetaBlockInternal(is_last, out_size, output);
return true;
}
// Decide about the context map based on the ability of the prediction
// ability of the previous byte UTF8-prefix on the next byte. The
// prediction ability is calculated as shannon entropy. Here we need
// shannon entropy instead of 'BitsEntropy' since the prefix will be
// encoded with the remaining 6 bits of the following byte, and
// BitsEntropy will assume that symbol to be stored alone using Huffman
// coding.
void ChooseContextMap(int quality,
uint32_t* bigram_histo,
size_t* num_literal_contexts,
const uint32_t** literal_context_map) {
uint32_t monogram_histo[3] = { 0 };
uint32_t two_prefix_histo[6] = { 0 };
size_t total = 0;
for (size_t i = 0; i < 9; ++i) {
total += bigram_histo[i];
monogram_histo[i % 3] += bigram_histo[i];
size_t j = i;
if (j >= 6) {
j -= 6;
}
two_prefix_histo[j] += bigram_histo[i];
}
size_t dummy;
double entropy1 = ShannonEntropy(monogram_histo, 3, &dummy);
double entropy2 = (ShannonEntropy(two_prefix_histo, 3, &dummy) +
ShannonEntropy(two_prefix_histo + 3, 3, &dummy));
double entropy3 = 0;
for (size_t k = 0; k < 3; ++k) {
entropy3 += ShannonEntropy(bigram_histo + 3 * k, 3, &dummy);
}
assert(total != 0);
double scale = 1.0 / static_cast<double>(total);
entropy1 *= scale;
entropy2 *= scale;
entropy3 *= scale;
static const uint32_t kStaticContextMapContinuation[64] = {
1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
static const uint32_t kStaticContextMapSimpleUTF8[64] = {
0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
if (quality < 7) {
// 3 context models is a bit slower, don't use it at lower qualities.
entropy3 = entropy1 * 10;
}
// If expected savings by symbol are less than 0.2 bits, skip the
// context modeling -- in exchange for faster decoding speed.
if (entropy1 - entropy2 < 0.2 &&
entropy1 - entropy3 < 0.2) {
*num_literal_contexts = 1;
} else if (entropy2 - entropy3 < 0.02) {
*num_literal_contexts = 2;
*literal_context_map = kStaticContextMapSimpleUTF8;
} else {
*num_literal_contexts = 3;
*literal_context_map = kStaticContextMapContinuation;
}
}
void DecideOverLiteralContextModeling(const uint8_t* input,
size_t start_pos,
size_t length,
size_t mask,
int quality,
ContextType* literal_context_mode,
size_t* num_literal_contexts,
const uint32_t** literal_context_map) {
if (quality < kMinQualityForContextModeling || length < 64) {
return;
}
// Gather bigram data of the UTF8 byte prefixes. To make the analysis of
// UTF8 data faster we only examine 64 byte long strides at every 4kB
// intervals.
const size_t end_pos = start_pos + length;
uint32_t bigram_prefix_histo[9] = { 0 };
for (; start_pos + 64 <= end_pos; start_pos += 4096) {
static const int lut[4] = { 0, 0, 1, 2 };
const size_t stride_end_pos = start_pos + 64;
int prev = lut[input[start_pos & mask] >> 6] * 3;
for (size_t pos = start_pos + 1; pos < stride_end_pos; ++pos) {
const uint8_t literal = input[pos & mask];
++bigram_prefix_histo[prev + lut[literal >> 6]];
prev = lut[literal >> 6] * 3;
}
}
*literal_context_mode = CONTEXT_UTF8;
ChooseContextMap(quality, &bigram_prefix_histo[0], num_literal_contexts,
literal_context_map);
}
void BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
size_t* out_size,
uint8_t** output) {
if (!is_last && input_pos_ == last_flush_pos_) { if (!is_last && input_pos_ == last_flush_pos_) {
// We have no new input data and we don't have to finish the stream, so // We have no new input data and we don't have to finish the stream, so
// nothing to do. // nothing to do.
*out_size = 0; *out_size = 0;
return; return true;
} }
assert(input_pos_ >= last_flush_pos_); assert(input_pos_ >= last_flush_pos_);
assert(input_pos_ > last_flush_pos_ || is_last); assert(input_pos_ > last_flush_pos_ || is_last);
assert(input_pos_ - last_flush_pos_ <= 1u << 24); assert(input_pos_ - last_flush_pos_ <= 1u << 24);
const uint32_t bytes = static_cast<uint32_t>(input_pos_ - last_flush_pos_); const uint32_t metablock_size =
const uint8_t* data = ringbuffer_->start(); static_cast<uint32_t>(input_pos_ - last_flush_pos_);
const uint32_t mask = ringbuffer_->mask(); const size_t max_out_size = 2 * metablock_size + 500;
const size_t max_out_size = 2 * bytes + 500;
uint8_t* storage = GetBrotliStorage(max_out_size); uint8_t* storage = GetBrotliStorage(max_out_size);
storage[0] = last_byte_; storage[0] = last_byte_;
size_t storage_ix = last_byte_bits_; size_t storage_ix = last_byte_bits_;
bool font_mode = params_.mode == BrotliParams::MODE_FONT;
bool uncompressed = false; WriteMetaBlockInternal(
if (num_commands_ < (bytes >> 8) + 2) { data, mask, last_flush_pos_, metablock_size, is_last, params_.quality,
if (num_literals_ > 0.99 * static_cast<double>(bytes)) { font_mode, prev_byte_, prev_byte2_, num_literals_, num_commands_,
uint32_t literal_histo[256] = { 0 }; commands_, saved_dist_cache_, dist_cache_, &storage_ix, storage);
static const uint32_t kSampleRate = 13;
static const double kMinEntropy = 7.92;
const double bit_cost_threshold =
static_cast<double>(bytes) * kMinEntropy / kSampleRate;
size_t t = (bytes + kSampleRate - 1) / kSampleRate;
uint32_t pos = static_cast<uint32_t>(last_flush_pos_);
for (size_t i = 0; i < t; i++) {
++literal_histo[data[pos & mask]];
pos += kSampleRate;
}
if (BitsEntropy(literal_histo, 256) > bit_cost_threshold) {
uncompressed = true;
}
}
}
if (bytes == 0) {
// Write the ISLAST and ISEMPTY bits.
WriteBits(2, 3, &storage_ix, &storage[0]);
storage_ix = (storage_ix + 7u) & ~7u;
} else if (uncompressed) {
// Restore the distance cache, as its last update by
// CreateBackwardReferences is now unused.
memcpy(dist_cache_, saved_dist_cache_, sizeof(dist_cache_));
StoreUncompressedMetaBlock(is_last, data,
WrapPosition(last_flush_pos_), mask, bytes,
&storage_ix,
&storage[0]);
} else {
uint32_t num_direct_distance_codes = 0;
uint32_t distance_postfix_bits = 0;
if (params_.quality > 9 && params_.mode == BrotliParams::MODE_FONT) {
num_direct_distance_codes = 12;
distance_postfix_bits = 1;
RecomputeDistancePrefixes(commands_,
num_commands_,
num_direct_distance_codes,
distance_postfix_bits);
}
if (params_.quality == 2) {
StoreMetaBlockFast(data, WrapPosition(last_flush_pos_),
bytes, mask, is_last,
commands_, num_commands_,
&storage_ix,
&storage[0]);
} else if (params_.quality < kMinQualityForBlockSplit) {
StoreMetaBlockTrivial(data, WrapPosition(last_flush_pos_),
bytes, mask, is_last,
commands_, num_commands_,
&storage_ix,
&storage[0]);
} else {
MetaBlockSplit mb;
ContextType literal_context_mode = CONTEXT_UTF8;
if (params_.quality <= 9) {
size_t num_literal_contexts = 1;
const uint32_t* literal_context_map = NULL;
DecideOverLiteralContextModeling(data, WrapPosition(last_flush_pos_),
bytes, mask,
params_.quality,
&literal_context_mode,
&num_literal_contexts,
&literal_context_map);
if (literal_context_map == NULL) {
BuildMetaBlockGreedy(data, WrapPosition(last_flush_pos_), mask,
commands_, num_commands_,
&mb);
} else {
BuildMetaBlockGreedyWithContexts(data, WrapPosition(last_flush_pos_),
mask,
prev_byte_, prev_byte2_,
literal_context_mode,
num_literal_contexts,
literal_context_map,
commands_, num_commands_,
&mb);
}
} else {
if (!IsMostlyUTF8(
data, WrapPosition(last_flush_pos_), mask, bytes, kMinUTF8Ratio)) {
literal_context_mode = CONTEXT_SIGNED;
}
BuildMetaBlock(data, WrapPosition(last_flush_pos_), mask,
prev_byte_, prev_byte2_,
commands_, num_commands_,
literal_context_mode,
&mb);
}
if (params_.quality >= kMinQualityForOptimizeHistograms) {
OptimizeHistograms(num_direct_distance_codes,
distance_postfix_bits,
&mb);
}
StoreMetaBlock(data, WrapPosition(last_flush_pos_), bytes, mask,
prev_byte_, prev_byte2_,
is_last,
num_direct_distance_codes,
distance_postfix_bits,
literal_context_mode,
commands_, num_commands_,
mb,
&storage_ix,
&storage[0]);
}
if (bytes + 4 < (storage_ix >> 3)) {
// Restore the distance cache and last byte.
memcpy(dist_cache_, saved_dist_cache_, sizeof(dist_cache_));
storage[0] = last_byte_;
storage_ix = last_byte_bits_;
StoreUncompressedMetaBlock(is_last, data,
WrapPosition(last_flush_pos_), mask,
bytes, &storage_ix, &storage[0]);
}
}
last_byte_ = storage[storage_ix >> 3]; last_byte_ = storage[storage_ix >> 3];
last_byte_bits_ = storage_ix & 7u; last_byte_bits_ = storage_ix & 7u;
last_flush_pos_ = input_pos_; last_flush_pos_ = input_pos_;
last_processed_pos_ = input_pos_; last_processed_pos_ = input_pos_;
prev_byte_ = data[(static_cast<uint32_t>(last_flush_pos_) - 1) & mask]; if (last_flush_pos_ > 0) {
prev_byte2_ = data[(static_cast<uint32_t>(last_flush_pos_) - 2) & mask]; prev_byte_ = data[(static_cast<uint32_t>(last_flush_pos_) - 1) & mask];
}
if (last_flush_pos_ > 1) {
prev_byte2_ = data[(static_cast<uint32_t>(last_flush_pos_) - 2) & mask];
}
num_commands_ = 0; num_commands_ = 0;
num_literals_ = 0; num_literals_ = 0;
// Save the state of the distance cache in case we need to restore it for // Save the state of the distance cache in case we need to restore it for
@ -675,6 +706,7 @@ void BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
memcpy(saved_dist_cache_, dist_cache_, sizeof(dist_cache_)); memcpy(saved_dist_cache_, dist_cache_, sizeof(dist_cache_));
*output = &storage[0]; *output = &storage[0];
*out_size = storage_ix >> 3; *out_size = storage_ix >> 3;
return true;
} }
bool BrotliCompressor::WriteMetaBlock(const size_t input_size, bool BrotliCompressor::WriteMetaBlock(const size_t input_size,
@ -739,6 +771,177 @@ bool BrotliCompressor::FinishStream(
return WriteMetaBlock(0, NULL, true, encoded_size, encoded_buffer); return WriteMetaBlock(0, NULL, true, encoded_size, encoded_buffer);
} }
int BrotliCompressBufferQuality10(int lgwin,
size_t input_size,
const uint8_t* input_buffer,
size_t* encoded_size,
uint8_t* encoded_buffer) {
const size_t mask = std::numeric_limits<size_t>::max() >> 1;
assert(input_size <= mask + 1);
const size_t max_backward_limit = (1 << lgwin) - 16;
int dist_cache[4] = { 4, 11, 15, 16 };
int saved_dist_cache[4] = { 4, 11, 15, 16 };
int ok = 1;
const size_t max_out_size = *encoded_size;
size_t total_out_size = 0;
uint8_t last_byte;
uint8_t last_byte_bits;
EncodeWindowBits(lgwin, &last_byte, &last_byte_bits);
Hashers::H10* hasher = new Hashers::H10;
const size_t hasher_eff_size = std::min(input_size, max_backward_limit + 16);
hasher->Init(lgwin, 0, hasher_eff_size, true);
const int lgblock = std::min(18, lgwin);
const int lgmetablock = std::min(24, lgwin + 1);
const size_t max_block_size = static_cast<size_t>(1) << lgblock;
const size_t max_metablock_size = static_cast<size_t>(1) << lgmetablock;
const size_t max_literals_per_metablock = max_metablock_size / 8;
const size_t max_commands_per_metablock = max_metablock_size / 8;
size_t metablock_start = 0;
uint8_t prev_byte = 0;
uint8_t prev_byte2 = 0;
while (ok && metablock_start < input_size) {
const size_t metablock_end =
std::min(input_size, metablock_start + max_metablock_size);
const size_t expected_num_commands =
(metablock_end - metablock_start) / 12 + 16;
Command* commands = 0;
size_t num_commands = 0;
size_t last_insert_len = 0;
size_t num_literals = 0;
size_t metablock_size = 0;
size_t cmd_alloc_size = 0;
for (size_t block_start = metablock_start; block_start < metablock_end; ) {
size_t block_size = std::min(metablock_end - block_start, max_block_size);
ZopfliNode* nodes = new ZopfliNode[block_size + 1];
std::vector<uint32_t> path;
hasher->StitchToPreviousBlock(block_size, block_start,
input_buffer, mask);
ZopfliComputeShortestPath(block_size, block_start, input_buffer, mask,
max_backward_limit, dist_cache,
hasher, nodes, &path);
// We allocate a command buffer in the first iteration of this loop that
// will be likely big enough for the whole metablock, so that for most
// inputs we will not have to reallocate in later iterations. We do the
// allocation here and not before the loop, because if the input is small,
// this will be allocated after the zopfli cost model is freed, so this
// will not increase peak memory usage.
// TODO: If the first allocation is too small, increase command
// buffer size exponentially.
size_t new_cmd_alloc_size = std::max(expected_num_commands,
num_commands + path.size() + 1);
if (cmd_alloc_size != new_cmd_alloc_size) {
cmd_alloc_size = new_cmd_alloc_size;
commands = static_cast<Command*>(
realloc(commands, cmd_alloc_size * sizeof(Command)));
}
ZopfliCreateCommands(block_size, block_start, max_backward_limit, path,
&nodes[0], dist_cache, &last_insert_len,
&commands[num_commands], &num_literals);
num_commands += path.size();
block_start += block_size;
metablock_size += block_size;
delete[] nodes;
if (num_literals > max_literals_per_metablock ||
num_commands > max_commands_per_metablock) {
break;
}
}
if (last_insert_len > 0) {
Command cmd(last_insert_len);
commands[num_commands++] = cmd;
num_literals += last_insert_len;
}
const bool is_last = (metablock_start + metablock_size == input_size);
uint8_t* storage = NULL;
size_t storage_ix = last_byte_bits;
if (metablock_size == 0) {
// Write the ISLAST and ISEMPTY bits.
storage = new uint8_t[16];
storage[0] = last_byte;
WriteBits(2, 3, &storage_ix, storage);
storage_ix = (storage_ix + 7u) & ~7u;
} else if (!ShouldCompress(input_buffer, mask, metablock_start,
metablock_size, num_literals, num_commands)) {
// Restore the distance cache, as its last update by
// CreateBackwardReferences is now unused.
memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
storage = new uint8_t[metablock_size + 16];
storage[0] = last_byte;
StoreUncompressedMetaBlock(is_last, input_buffer,
metablock_start, mask, metablock_size,
&storage_ix, storage);
} else {
uint32_t num_direct_distance_codes = 0;
uint32_t distance_postfix_bits = 0;
MetaBlockSplit mb;
ContextType literal_context_mode = CONTEXT_UTF8;
if (!IsMostlyUTF8(
input_buffer, metablock_start, mask, metablock_size,
kMinUTF8Ratio)) {
literal_context_mode = CONTEXT_SIGNED;
}
BuildMetaBlock(input_buffer, metablock_start, mask,
prev_byte, prev_byte2,
commands, num_commands,
literal_context_mode,
&mb);
OptimizeHistograms(num_direct_distance_codes,
distance_postfix_bits,
&mb);
const size_t max_out_metablock_size = 2 * metablock_size + 500;
storage = new uint8_t[max_out_metablock_size];
storage[0] = last_byte;
StoreMetaBlock(input_buffer, metablock_start, metablock_size, mask,
prev_byte, prev_byte2,
is_last,
num_direct_distance_codes,
distance_postfix_bits,
literal_context_mode,
commands, num_commands,
mb,
&storage_ix, storage);
if (metablock_size + 4 < (storage_ix >> 3)) {
// Restore the distance cache and last byte.
memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
storage[0] = last_byte;
storage_ix = last_byte_bits;
StoreUncompressedMetaBlock(is_last, input_buffer,
metablock_start, mask,
metablock_size, &storage_ix, storage);
}
}
last_byte = storage[storage_ix >> 3];
last_byte_bits = storage_ix & 7u;
metablock_start += metablock_size;
prev_byte = input_buffer[metablock_start - 1];
prev_byte2 = input_buffer[metablock_start - 2];
// Save the state of the distance cache in case we need to restore it for
// emitting an uncompressed block.
memcpy(saved_dist_cache, dist_cache, 4 * sizeof(dist_cache[0]));
const size_t out_size = storage_ix >> 3;
total_out_size += out_size;
if (total_out_size <= max_out_size) {
memcpy(encoded_buffer, storage, out_size);
encoded_buffer += out_size;
} else {
ok = 0;
}
delete[] storage;
free(commands);
}
*encoded_size = total_out_size;
delete hasher;
return ok;
}
int BrotliCompressBuffer(BrotliParams params, int BrotliCompressBuffer(BrotliParams params,
size_t input_size, size_t input_size,
const uint8_t* input_buffer, const uint8_t* input_buffer,
@ -748,6 +951,18 @@ int BrotliCompressBuffer(BrotliParams params,
// Output buffer needs at least one byte. // Output buffer needs at least one byte.
return 0; return 0;
} }
if (input_size == 0) {
// Handle the special case of empty input.
*encoded_size = 1;
*encoded_buffer = 6;
return 1;
}
if (params.quality == 10) {
// TODO(user) Implement this direct path for all quality levels.
const int lgwin = std::min(24, std::max(16, params.lgwin));
return BrotliCompressBufferQuality10(lgwin, input_size, input_buffer,
encoded_size, encoded_buffer);
}
BrotliMemIn in(input_buffer, input_size); BrotliMemIn in(input_buffer, input_size);
BrotliMemOut out(encoded_buffer, *encoded_size); BrotliMemOut out(encoded_buffer, *encoded_size);
if (!BrotliCompress(params, &in, &out)) { if (!BrotliCompress(params, &in, &out)) {

View File

@ -140,10 +140,6 @@ class BrotliCompressor {
int* GetHashTable(int quality, int* GetHashTable(int quality,
size_t input_size, size_t* table_size); size_t input_size, size_t* table_size);
void WriteMetaBlockInternal(const bool is_last,
size_t* out_size,
uint8_t** output);
BrotliParams params_; BrotliParams params_;
Hashers* hashers_; Hashers* hashers_;
int hash_type_; int hash_type_;

View File

@ -40,7 +40,7 @@ void RecomputeDistancePrefixes(Command* cmds, size_t num_commands,
} }
for (size_t i = 0; i < num_commands; ++i) { for (size_t i = 0; i < num_commands; ++i) {
Command* cmd = &cmds[i]; Command* cmd = &cmds[i];
if (cmd->copy_len_ > 0 && cmd->cmd_prefix_ >= 128) { if (cmd->copy_len() && cmd->cmd_prefix_ >= 128) {
PrefixEncodeCopyDistance(cmd->DistanceCode(), PrefixEncodeCopyDistance(cmd->DistanceCode(),
num_direct_distance_codes, num_direct_distance_codes,
distance_postfix_bits, distance_postfix_bits,

View File

@ -10,7 +10,6 @@
#include <algorithm> #include <algorithm>
#include <limits> #include <limits>
#include <vector>
#include <cstdlib> #include <cstdlib>
#include "./histogram.h" #include "./histogram.h"
@ -32,6 +31,15 @@ void SetDepth(const HuffmanTree &p,
} }
} }
// Sort the root nodes, least popular first.
static inline bool SortHuffmanTree(const HuffmanTree& v0,
const HuffmanTree& v1) {
if (v0.total_count_ != v1.total_count_) {
return v0.total_count_ < v1.total_count_;
}
return v0.index_right_or_value_ > v1.index_right_or_value_;
}
// This function will create a Huffman tree. // This function will create a Huffman tree.
// //
// The catch here is that the tree cannot be arbitrarily deep. // The catch here is that the tree cannot be arbitrarily deep.
@ -50,30 +58,28 @@ void SetDepth(const HuffmanTree &p,
void CreateHuffmanTree(const uint32_t *data, void CreateHuffmanTree(const uint32_t *data,
const size_t length, const size_t length,
const int tree_limit, const int tree_limit,
HuffmanTree* tree,
uint8_t *depth) { uint8_t *depth) {
// For block sizes below 64 kB, we never need to do a second iteration // For block sizes below 64 kB, we never need to do a second iteration
// of this loop. Probably all of our block sizes will be smaller than // of this loop. Probably all of our block sizes will be smaller than
// that, so this loop is mostly of academic interest. If we actually // that, so this loop is mostly of academic interest. If we actually
// would need this, we would be better off with the Katajainen algorithm. // would need this, we would be better off with the Katajainen algorithm.
for (uint32_t count_limit = 1; ; count_limit *= 2) { for (uint32_t count_limit = 1; ; count_limit *= 2) {
std::vector<HuffmanTree> tree; size_t n = 0;
tree.reserve(2 * length + 1);
for (size_t i = length; i != 0;) { for (size_t i = length; i != 0;) {
--i; --i;
if (data[i]) { if (data[i]) {
const uint32_t count = std::max(data[i], count_limit); const uint32_t count = std::max(data[i], count_limit);
tree.push_back(HuffmanTree(count, -1, static_cast<int16_t>(i))); tree[n++] = HuffmanTree(count, -1, static_cast<int16_t>(i));
} }
} }
const size_t n = tree.size();
if (n == 1) { if (n == 1) {
depth[tree[0].index_right_or_value_] = 1; // Only one element. depth[tree[0].index_right_or_value_] = 1; // Only one element.
break; break;
} }
std::stable_sort(tree.begin(), tree.end(), SortHuffmanTree); std::sort(tree, tree + n, SortHuffmanTree);
// The nodes are: // The nodes are:
// [0, n): the sorted leaf nodes that we start with. // [0, n): the sorted leaf nodes that we start with.
@ -83,8 +89,8 @@ void CreateHuffmanTree(const uint32_t *data,
// [2n]: we add a sentinel at the end as well. // [2n]: we add a sentinel at the end as well.
// There will be (2n+1) elements at the end. // There will be (2n+1) elements at the end.
const HuffmanTree sentinel(std::numeric_limits<uint32_t>::max(), -1, -1); const HuffmanTree sentinel(std::numeric_limits<uint32_t>::max(), -1, -1);
tree.push_back(sentinel); tree[n] = sentinel;
tree.push_back(sentinel); tree[n + 1] = sentinel;
size_t i = 0; // Points to the next leaf node. size_t i = 0; // Points to the next leaf node.
size_t j = n + 1; // Points to the next non-leaf node. size_t j = n + 1; // Points to the next non-leaf node.
@ -106,16 +112,15 @@ void CreateHuffmanTree(const uint32_t *data,
} }
// The sentinel node becomes the parent node. // The sentinel node becomes the parent node.
size_t j_end = tree.size() - 1; size_t j_end = 2 * n - k;
tree[j_end].total_count_ = tree[j_end].total_count_ =
tree[left].total_count_ + tree[right].total_count_; tree[left].total_count_ + tree[right].total_count_;
tree[j_end].index_left_ = static_cast<int16_t>(left); tree[j_end].index_left_ = static_cast<int16_t>(left);
tree[j_end].index_right_or_value_ = static_cast<int16_t>(right); tree[j_end].index_right_or_value_ = static_cast<int16_t>(right);
// Add back the last sentinel node. // Add back the last sentinel node.
tree.push_back(sentinel); tree[j_end + 1] = sentinel;
} }
assert(tree.size() == 2 * n + 1);
SetDepth(tree[2 * n - 1], &tree[0], depth, 0); SetDepth(tree[2 * n - 1], &tree[0], depth, 0);
// We need to pack the Huffman tree in tree_limit bits. // We need to pack the Huffman tree in tree_limit bits.
@ -127,12 +132,12 @@ void CreateHuffmanTree(const uint32_t *data,
} }
} }
void Reverse(std::vector<uint8_t>* v, size_t start, size_t end) { void Reverse(uint8_t* v, size_t start, size_t end) {
--end; --end;
while (start < end) { while (start < end) {
uint8_t tmp = (*v)[start]; uint8_t tmp = v[start];
(*v)[start] = (*v)[end]; v[start] = v[end];
(*v)[end] = tmp; v[end] = tmp;
++start; ++start;
--end; --end;
} }
@ -142,79 +147,88 @@ void WriteHuffmanTreeRepetitions(
const uint8_t previous_value, const uint8_t previous_value,
const uint8_t value, const uint8_t value,
size_t repetitions, size_t repetitions,
std::vector<uint8_t> *tree, size_t* tree_size,
std::vector<uint8_t> *extra_bits_data) { uint8_t* tree,
uint8_t* extra_bits_data) {
assert(repetitions > 0); assert(repetitions > 0);
if (previous_value != value) { if (previous_value != value) {
tree->push_back(value); tree[*tree_size] = value;
extra_bits_data->push_back(0); extra_bits_data[*tree_size] = 0;
++(*tree_size);
--repetitions; --repetitions;
} }
if (repetitions == 7) { if (repetitions == 7) {
tree->push_back(value); tree[*tree_size] = value;
extra_bits_data->push_back(0); extra_bits_data[*tree_size] = 0;
++(*tree_size);
--repetitions; --repetitions;
} }
if (repetitions < 3) { if (repetitions < 3) {
for (size_t i = 0; i < repetitions; ++i) { for (size_t i = 0; i < repetitions; ++i) {
tree->push_back(value); tree[*tree_size] = value;
extra_bits_data->push_back(0); extra_bits_data[*tree_size] = 0;
++(*tree_size);
} }
} else { } else {
repetitions -= 3; repetitions -= 3;
size_t start = tree->size(); size_t start = *tree_size;
while (true) { while (true) {
tree->push_back(16); tree[*tree_size] = 16;
extra_bits_data->push_back(repetitions & 0x3); extra_bits_data[*tree_size] = repetitions & 0x3;
++(*tree_size);
repetitions >>= 2; repetitions >>= 2;
if (repetitions == 0) { if (repetitions == 0) {
break; break;
} }
--repetitions; --repetitions;
} }
Reverse(tree, start, tree->size()); Reverse(tree, start, *tree_size);
Reverse(extra_bits_data, start, tree->size()); Reverse(extra_bits_data, start, *tree_size);
} }
} }
void WriteHuffmanTreeRepetitionsZeros( void WriteHuffmanTreeRepetitionsZeros(
size_t repetitions, size_t repetitions,
std::vector<uint8_t> *tree, size_t* tree_size,
std::vector<uint8_t> *extra_bits_data) { uint8_t* tree,
uint8_t* extra_bits_data) {
if (repetitions == 11) { if (repetitions == 11) {
tree->push_back(0); tree[*tree_size] = 0;
extra_bits_data->push_back(0); extra_bits_data[*tree_size] = 0;
++(*tree_size);
--repetitions; --repetitions;
} }
if (repetitions < 3) { if (repetitions < 3) {
for (size_t i = 0; i < repetitions; ++i) { for (size_t i = 0; i < repetitions; ++i) {
tree->push_back(0); tree[*tree_size] = 0;
extra_bits_data->push_back(0); extra_bits_data[*tree_size] = 0;
++(*tree_size);
} }
} else { } else {
repetitions -= 3; repetitions -= 3;
size_t start = tree->size(); size_t start = *tree_size;
while (true) { while (true) {
tree->push_back(17); tree[*tree_size] = 17;
extra_bits_data->push_back(repetitions & 0x7); extra_bits_data[*tree_size] = repetitions & 0x7;
++(*tree_size);
repetitions >>= 3; repetitions >>= 3;
if (repetitions == 0) { if (repetitions == 0) {
break; break;
} }
--repetitions; --repetitions;
} }
Reverse(tree, start, tree->size()); Reverse(tree, start, *tree_size);
Reverse(extra_bits_data, start, tree->size()); Reverse(extra_bits_data, start, *tree_size);
} }
} }
bool OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts) { void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
uint8_t* good_for_rle) {
size_t nonzero_count = 0; size_t nonzero_count = 0;
size_t stride; size_t stride;
size_t limit; size_t limit;
size_t sum; size_t sum;
const size_t streak_limit = 1240; const size_t streak_limit = 1240;
uint8_t* good_for_rle;
// Let's make the Huffman code more compatible with rle encoding. // Let's make the Huffman code more compatible with rle encoding.
size_t i; size_t i;
for (i = 0; i < length; i++) { for (i = 0; i < length; i++) {
@ -223,13 +237,13 @@ bool OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts) {
} }
} }
if (nonzero_count < 16) { if (nonzero_count < 16) {
return 1; return;
} }
while (length != 0 && counts[length - 1] == 0) { while (length != 0 && counts[length - 1] == 0) {
--length; --length;
} }
if (length == 0) { if (length == 0) {
return 1; // All zeros. return; // All zeros.
} }
// Now counts[0..length - 1] does not have trailing zeros. // Now counts[0..length - 1] does not have trailing zeros.
{ {
@ -245,7 +259,7 @@ bool OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts) {
} }
if (nonzeros < 5) { if (nonzeros < 5) {
// Small histogram will model it well. // Small histogram will model it well.
return 1; return;
} }
size_t zeros = length - nonzeros; size_t zeros = length - nonzeros;
if (smallest_nonzero < 4) { if (smallest_nonzero < 4) {
@ -258,15 +272,12 @@ bool OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts) {
} }
} }
if (nonzeros < 28) { if (nonzeros < 28) {
return 1; return;
} }
} }
// 2) Let's mark all population counts that already can be encoded // 2) Let's mark all population counts that already can be encoded
// with an rle code. // with an rle code.
good_for_rle = (uint8_t*)calloc(length, 1); memset(good_for_rle, 0, length);
if (good_for_rle == NULL) {
return 0;
}
{ {
// Let's not spoil any of the existing good rle codes. // Let's not spoil any of the existing good rle codes.
// Mark any seq of 0's that is longer as 5 as a good_for_rle. // Mark any seq of 0's that is longer as 5 as a good_for_rle.
@ -340,8 +351,6 @@ bool OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts) {
} }
} }
} }
free(good_for_rle);
return 1;
} }
static void DecideOverRleUse(const uint8_t* depth, const size_t length, static void DecideOverRleUse(const uint8_t* depth, const size_t length,
@ -373,8 +382,9 @@ static void DecideOverRleUse(const uint8_t* depth, const size_t length,
void WriteHuffmanTree(const uint8_t* depth, void WriteHuffmanTree(const uint8_t* depth,
size_t length, size_t length,
std::vector<uint8_t> *tree, size_t* tree_size,
std::vector<uint8_t> *extra_bits_data) { uint8_t* tree,
uint8_t* extra_bits_data) {
uint8_t previous_value = 8; uint8_t previous_value = 8;
// Throw away trailing zeros. // Throw away trailing zeros.
@ -408,10 +418,11 @@ void WriteHuffmanTree(const uint8_t* depth,
} }
} }
if (value == 0) { if (value == 0) {
WriteHuffmanTreeRepetitionsZeros(reps, tree, extra_bits_data); WriteHuffmanTreeRepetitionsZeros(reps, tree_size, tree, extra_bits_data);
} else { } else {
WriteHuffmanTreeRepetitions(previous_value, WriteHuffmanTreeRepetitions(previous_value,
value, reps, tree, extra_bits_data); value, reps, tree_size,
tree, extra_bits_data);
previous_value = value; previous_value = value;
} }
i += reps; i += reps;

View File

@ -10,7 +10,6 @@
#define BROTLI_ENC_ENTROPY_ENCODE_H_ #define BROTLI_ENC_ENTROPY_ENCODE_H_
#include <string.h> #include <string.h>
#include <vector>
#include "./histogram.h" #include "./histogram.h"
#include "./prefix.h" #include "./prefix.h"
#include "./types.h" #include "./types.h"
@ -19,6 +18,7 @@ namespace brotli {
// A node of a Huffman tree. // A node of a Huffman tree.
struct HuffmanTree { struct HuffmanTree {
HuffmanTree() {}
HuffmanTree(uint32_t count, int16_t left, int16_t right) HuffmanTree(uint32_t count, int16_t left, int16_t right)
: total_count_(count), : total_count_(count),
index_left_(left), index_left_(left),
@ -29,11 +29,6 @@ struct HuffmanTree {
int16_t index_right_or_value_; int16_t index_right_or_value_;
}; };
// Sort the root nodes, least popular first.
inline bool SortHuffmanTree(const HuffmanTree &v0, const HuffmanTree &v1) {
return v0.total_count_ < v1.total_count_;
}
void SetDepth(const HuffmanTree &p, HuffmanTree *pool, void SetDepth(const HuffmanTree &p, HuffmanTree *pool,
uint8_t *depth, uint8_t level); uint8_t *depth, uint8_t level);
@ -45,10 +40,14 @@ void SetDepth(const HuffmanTree &p, HuffmanTree *pool,
// The depth contains the tree, i.e., how many bits are used for // The depth contains the tree, i.e., how many bits are used for
// the symbol. // the symbol.
// //
// The actual Huffman tree is constructed in the tree[] array, which has to
// be at least 2 * length + 1 long.
//
// See http://en.wikipedia.org/wiki/Huffman_coding // See http://en.wikipedia.org/wiki/Huffman_coding
void CreateHuffmanTree(const uint32_t *data, void CreateHuffmanTree(const uint32_t *data,
const size_t length, const size_t length,
const int tree_limit, const int tree_limit,
HuffmanTree* tree,
uint8_t *depth); uint8_t *depth);
// Change the population counts in a way that the consequent // Change the population counts in a way that the consequent
@ -57,15 +56,18 @@ void CreateHuffmanTree(const uint32_t *data,
// //
// length contains the size of the histogram. // length contains the size of the histogram.
// counts contains the population counts. // counts contains the population counts.
bool OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts); // good_for_rle is a buffer of at least length size
void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
uint8_t* good_for_rle);
// Write a Huffman tree from bit depths into the bitstream representation // Write a Huffman tree from bit depths into the bitstream representation
// of a Huffman tree. The generated Huffman tree is to be compressed once // of a Huffman tree. The generated Huffman tree is to be compressed once
// more using a Huffman tree // more using a Huffman tree
void WriteHuffmanTree(const uint8_t* depth, void WriteHuffmanTree(const uint8_t* depth,
size_t num, size_t num,
std::vector<uint8_t> *tree, size_t* tree_size,
std::vector<uint8_t> *extra_bits_data); uint8_t* tree,
uint8_t* extra_bits_data);
// Get the actual bit values for a tree of bit depths. // Get the actual bit values for a tree of bit depths.
void ConvertBitDepthsToSymbols(const uint8_t *depth, void ConvertBitDepthsToSymbols(const uint8_t *depth,

View File

@ -78,7 +78,7 @@ static const uint32_t kCodeLengthBits[18] = {
}; };
inline void StoreStaticCodeLengthCode(size_t* storage_ix, uint8_t* storage) { inline void StoreStaticCodeLengthCode(size_t* storage_ix, uint8_t* storage) {
WriteBits(40, 0x000000ff55555554U, storage_ix, storage); WriteBits(40, MAKE_UINT64_T(0xff, 0x55555554), storage_ix, storage);
} }
static const uint64_t kZeroRepsBits[704] = { static const uint64_t kZeroRepsBits[704] = {

View File

@ -14,7 +14,6 @@
#include <algorithm> #include <algorithm>
#include <cstring> #include <cstring>
#include <limits> #include <limits>
#include <vector>
#include "./dictionary_hash.h" #include "./dictionary_hash.h"
#include "./fast_log.h" #include "./fast_log.h"
@ -278,7 +277,7 @@ class HashLongestMatchQuickly {
if (matchlen + kCutoffTransformsCount > len && matchlen > 0) { if (matchlen + kCutoffTransformsCount > len && matchlen > 0) {
const size_t transform_id = kCutoffTransforms[len - matchlen]; const size_t transform_id = kCutoffTransforms[len - matchlen];
const size_t word_id = const size_t word_id =
transform_id * (1 << kBrotliDictionarySizeBitsByLength[len]) + transform_id * (1u << kBrotliDictionarySizeBitsByLength[len]) +
dist; dist;
const size_t backward = max_backward + word_id + 1; const size_t backward = max_backward + word_id + 1;
const double score = BackwardReferenceScore(matchlen, backward); const double score = BackwardReferenceScore(matchlen, backward);
@ -574,8 +573,10 @@ class HashLongestMatch {
} }
buckets_[key][num_[key] & kBlockMask] = static_cast<uint32_t>(cur_ix); buckets_[key][num_[key] & kBlockMask] = static_cast<uint32_t>(cur_ix);
++num_[key]; ++num_[key];
std::vector<uint32_t> dict_matches(kMaxDictionaryMatchLen + 1, uint32_t dict_matches[kMaxDictionaryMatchLen + 1];
kInvalidMatch); for (size_t i = 0; i <= kMaxDictionaryMatchLen; ++i) {
dict_matches[i] = kInvalidMatch;
}
size_t minlen = std::max<size_t>(4, best_len + 1); size_t minlen = std::max<size_t>(4, best_len + 1);
if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length, if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length,
&dict_matches[0])) { &dict_matches[0])) {
@ -706,8 +707,10 @@ class HashToBinaryTree {
matches = StoreAndFindMatches(data, cur_ix, ring_buffer_mask, matches = StoreAndFindMatches(data, cur_ix, ring_buffer_mask,
max_length, &best_len, matches); max_length, &best_len, matches);
} }
std::vector<uint32_t> dict_matches(kMaxDictionaryMatchLen + 1, uint32_t dict_matches[kMaxDictionaryMatchLen + 1];
kInvalidMatch); for (size_t i = 0; i <= kMaxDictionaryMatchLen; ++i) {
dict_matches[i] = kInvalidMatch;
}
size_t minlen = std::max<size_t>(4, best_len + 1); size_t minlen = std::max<size_t>(4, best_len + 1);
if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length, if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length,
&dict_matches[0])) { &dict_matches[0])) {
@ -725,15 +728,34 @@ class HashToBinaryTree {
// Stores the hash of the next 4 bytes and re-roots the binary tree at the // Stores the hash of the next 4 bytes and re-roots the binary tree at the
// current sequence, without returning any matches. // current sequence, without returning any matches.
// REQUIRES: cur_ix + kMaxTreeCompLength <= end-of-current-block
void Store(const uint8_t* data, void Store(const uint8_t* data,
const size_t ring_buffer_mask, const size_t ring_buffer_mask,
const size_t cur_ix, const size_t cur_ix) {
const size_t max_length) {
size_t best_len = 0; size_t best_len = 0;
StoreAndFindMatches(data, cur_ix, ring_buffer_mask, max_length, StoreAndFindMatches(data, cur_ix, ring_buffer_mask, kMaxTreeCompLength,
&best_len, NULL); &best_len, NULL);
} }
void StitchToPreviousBlock(size_t num_bytes,
size_t position,
const uint8_t* ringbuffer,
size_t ringbuffer_mask) {
if (num_bytes >= 3 && position >= kMaxTreeCompLength) {
// Store the last `kMaxTreeCompLength - 1` positions in the hasher.
// These could not be calculated before, since they require knowledge
// of both the previous and the current block.
const size_t i_start = position - kMaxTreeCompLength + 1;
const size_t i_end = std::min(position, i_start + num_bytes);
for (size_t i = i_start; i < i_end; ++i) {
// We know that i + kMaxTreeCompLength <= position + num_bytes, i.e. the
// end of the current block and that we have at least
// kMaxTreeCompLength tail in the ringbuffer.
Store(ringbuffer, ringbuffer_mask, i);
}
}
}
static const size_t kMaxNumMatches = 64 + kMaxTreeSearchDepth; static const size_t kMaxNumMatches = 64 + kMaxTreeSearchDepth;
private: private:
@ -928,8 +950,7 @@ struct Hashers {
case 10: case 10:
hash_h10->Init(lgwin, 0, size, false); hash_h10->Init(lgwin, 0, size, false);
for (size_t i = 0; i + kMaxTreeCompLength - 1 < size; ++i) { for (size_t i = 0; i + kMaxTreeCompLength - 1 < size; ++i) {
hash_h10->Store(dict, std::numeric_limits<size_t>::max(), hash_h10->Store(dict, std::numeric_limits<size_t>::max(), i);
i, size - i);
} }
break; break;
default: break; default: break;

View File

@ -50,8 +50,8 @@ void BuildHistograms(
prev_byte = ringbuffer[pos & mask]; prev_byte = ringbuffer[pos & mask];
++pos; ++pos;
} }
pos += cmd.copy_len_; pos += cmd.copy_len();
if (cmd.copy_len_ > 0) { if (cmd.copy_len()) {
prev_byte2 = ringbuffer[(pos - 2) & mask]; prev_byte2 = ringbuffer[(pos - 2) & mask];
prev_byte = ringbuffer[(pos - 1) & mask]; prev_byte = ringbuffer[(pos - 1) & mask];
if (cmd.cmd_prefix_ >= 128) { if (cmd.cmd_prefix_ >= 128) {

View File

@ -258,8 +258,8 @@ void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
lit_blocks.AddSymbol(ringbuffer[pos & mask]); lit_blocks.AddSymbol(ringbuffer[pos & mask]);
++pos; ++pos;
} }
pos += cmd.copy_len_; pos += cmd.copy_len();
if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) { if (cmd.copy_len() && cmd.cmd_prefix_ >= 128) {
dist_blocks.AddSymbol(cmd.dist_prefix_); dist_blocks.AddSymbol(cmd.dist_prefix_);
} }
} }
@ -488,8 +488,8 @@ void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
prev_byte = literal; prev_byte = literal;
++pos; ++pos;
} }
pos += cmd.copy_len_; pos += cmd.copy_len();
if (cmd.copy_len_ > 0) { if (cmd.copy_len()) {
prev_byte2 = ringbuffer[(pos - 2) & mask]; prev_byte2 = ringbuffer[(pos - 2) & mask];
prev_byte = ringbuffer[(pos - 1) & mask]; prev_byte = ringbuffer[(pos - 1) & mask];
if (cmd.cmd_prefix_ >= 128) { if (cmd.cmd_prefix_ >= 128) {
@ -515,20 +515,25 @@ void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
void OptimizeHistograms(size_t num_direct_distance_codes, void OptimizeHistograms(size_t num_direct_distance_codes,
size_t distance_postfix_bits, size_t distance_postfix_bits,
MetaBlockSplit* mb) { MetaBlockSplit* mb) {
uint8_t* good_for_rle = new uint8_t[kNumCommandPrefixes];
for (size_t i = 0; i < mb->literal_histograms.size(); ++i) { for (size_t i = 0; i < mb->literal_histograms.size(); ++i) {
OptimizeHuffmanCountsForRle(256, &mb->literal_histograms[i].data_[0]); OptimizeHuffmanCountsForRle(256, &mb->literal_histograms[i].data_[0],
good_for_rle);
} }
for (size_t i = 0; i < mb->command_histograms.size(); ++i) { for (size_t i = 0; i < mb->command_histograms.size(); ++i) {
OptimizeHuffmanCountsForRle(kNumCommandPrefixes, OptimizeHuffmanCountsForRle(kNumCommandPrefixes,
&mb->command_histograms[i].data_[0]); &mb->command_histograms[i].data_[0],
good_for_rle);
} }
size_t num_distance_codes = size_t num_distance_codes =
kNumDistanceShortCodes + num_direct_distance_codes + kNumDistanceShortCodes + num_direct_distance_codes +
(48u << distance_postfix_bits); (48u << distance_postfix_bits);
for (size_t i = 0; i < mb->distance_histograms.size(); ++i) { for (size_t i = 0; i < mb->distance_histograms.size(); ++i) {
OptimizeHuffmanCountsForRle(num_distance_codes, OptimizeHuffmanCountsForRle(num_distance_codes,
&mb->distance_histograms[i].data_[0]); &mb->distance_histograms[i].data_[0],
good_for_rle);
} }
delete[] good_for_rle;
} }
} // namespace brotli } // namespace brotli

View File

@ -60,7 +60,7 @@ inline void PrefixEncodeCopyDistance(size_t distance_code,
return; return;
} }
distance_code -= kNumDistanceShortCodes + num_direct_codes; /* >= 0 */ distance_code -= kNumDistanceShortCodes + num_direct_codes; /* >= 0 */
distance_code += (1 << (postfix_bits + 2)); /* > 0 */ distance_code += (1u << (postfix_bits + 2u)); /* > 0 */
size_t bucket = Log2FloorNonZero(distance_code) - 1; size_t bucket = Log2FloorNonZero(distance_code) - 1;
size_t postfix_mask = (1 << postfix_bits) - 1; size_t postfix_mask = (1 << postfix_bits) - 1;
size_t postfix = distance_code & postfix_mask; size_t postfix = distance_code & postfix_mask;

View File

@ -31,24 +31,52 @@ class RingBuffer {
: size_(1u << window_bits), : size_(1u << window_bits),
mask_((1u << window_bits) - 1), mask_((1u << window_bits) - 1),
tail_size_(1u << tail_bits), tail_size_(1u << tail_bits),
pos_(0) { total_size_(size_ + tail_size_),
static const size_t kSlackForEightByteHashingEverywhere = 7; cur_size_(0),
const size_t buflen = size_ + tail_size_; pos_(0),
data_ = new uint8_t[2 + buflen + kSlackForEightByteHashingEverywhere]; data_(0),
buffer_ = data_ + 2; buffer_(0) {}
for (size_t i = 0; i < kSlackForEightByteHashingEverywhere; ++i) {
buffer_[buflen + i] = 0;
}
// Initialize the last two bytes and their copy to zero.
buffer_[-2] = buffer_[size_ - 2] = 0;
buffer_[-1] = buffer_[size_ - 1] = 0;
}
~RingBuffer(void) { ~RingBuffer(void) {
delete [] data_; free(data_);
}
// Allocates or re-allocates data_ to the given length + plus some slack
// region before and after. Fills the slack regions with zeros.
inline void InitBuffer(const uint32_t buflen) {
static const size_t kSlackForEightByteHashingEverywhere = 7;
cur_size_ = buflen;
data_ = static_cast<uint8_t*>(realloc(
data_, 2 + buflen + kSlackForEightByteHashingEverywhere));
buffer_ = data_ + 2;
buffer_[-2] = buffer_[-1] = 0;
for (size_t i = 0; i < kSlackForEightByteHashingEverywhere; ++i) {
buffer_[cur_size_ + i] = 0;
}
} }
// Push bytes into the ring buffer. // Push bytes into the ring buffer.
void Write(const uint8_t *bytes, size_t n) { void Write(const uint8_t *bytes, size_t n) {
if (pos_ == 0 && n < tail_size_) {
// Special case for the first write: to process the first block, we don't
// need to allocate the whole ringbuffer and we don't need the tail
// either. However, we do this memory usage optimization only if the
// first write is less than the tail size, which is also the input block
// size, otherwise it is likely that other blocks will follow and we
// will need to reallocate to the full size anyway.
pos_ = static_cast<uint32_t>(n);
InitBuffer(pos_);
memcpy(buffer_, bytes, n);
return;
}
if (cur_size_ < total_size_) {
// Lazily allocate the full buffer.
InitBuffer(total_size_);
// Initialize the last two bytes to zero, so that we don't have to worry
// later when we copy the last two bytes to the first two positions.
buffer_[size_ - 2] = 0;
buffer_[size_ - 1] = 0;
}
const size_t masked_pos = pos_ & mask_; const size_t masked_pos = pos_ & mask_;
// The length of the writes is limited so that we do not need to worry // The length of the writes is limited so that we do not need to worry
// about a write // about a write
@ -60,7 +88,7 @@ class RingBuffer {
// Split into two writes. // Split into two writes.
// Copy into the end of the buffer, including the tail buffer. // Copy into the end of the buffer, including the tail buffer.
memcpy(&buffer_[masked_pos], bytes, memcpy(&buffer_[masked_pos], bytes,
std::min(n, (size_ + tail_size_) - masked_pos)); std::min(n, total_size_ - masked_pos));
// Copy into the beginning of the buffer // Copy into the beginning of the buffer
memcpy(&buffer_[0], bytes + (size_ - masked_pos), memcpy(&buffer_[0], bytes + (size_ - masked_pos),
n - (size_ - masked_pos)); n - (size_ - masked_pos));
@ -100,7 +128,9 @@ class RingBuffer {
const uint32_t size_; const uint32_t size_;
const uint32_t mask_; const uint32_t mask_;
const uint32_t tail_size_; const uint32_t tail_size_;
const uint32_t total_size_;
uint32_t cur_size_;
// Position to write in the ring buffer. // Position to write in the ring buffer.
uint32_t pos_; uint32_t pos_;
// The actual ring buffer containing the copy of the last two bytes, the data, // The actual ring buffer containing the copy of the last two bytes, the data,

View File

@ -197,7 +197,7 @@ static size_t ToUpperCase(uint8_t *p, size_t len) {
inline std::string TransformWord( inline std::string TransformWord(
WordTransformType transform_type, const uint8_t* word, size_t len) { WordTransformType transform_type, const uint8_t* word, size_t len) {
if (transform_type <= kOmitLast9) { if (transform_type <= kOmitLast9) {
if (len <= transform_type) { if (len <= static_cast<size_t>(transform_type)) {
return std::string(); return std::string();
} }
return std::string(word, word + len - transform_type); return std::string(word, word + len - transform_type);

View File

@ -24,4 +24,6 @@ typedef __int64 int64_t;
#include <stdint.h> #include <stdint.h>
#endif /* defined(_MSC_VER) && (_MSC_VER < 1600) */ #endif /* defined(_MSC_VER) && (_MSC_VER < 1600) */
#define MAKE_UINT64_T(high, low) ((((uint64_t)(high)) << 32) | low)
#endif /* BROTLI_ENC_TYPES_H_ */ #endif /* BROTLI_ENC_TYPES_H_ */