Reduce memory usage of brotli encoder at quality 10 and 11.

This commit is contained in:
Zoltan Szabadka 2016-03-15 10:50:16 +01:00
parent cfba2db7b3
commit b820c39bd9
23 changed files with 1658 additions and 967 deletions

View File

@ -21,8 +21,6 @@ namespace brotli {
// The maximum length for which the zopflification uses distinct distances.
static const uint16_t kMaxZopfliLen = 325;
static const double kInfinity = std::numeric_limits<double>::infinity();
// Histogram based cost model for zopflification.
class ZopfliCostModel {
public:
@ -42,7 +40,7 @@ class ZopfliCostModel {
size_t pos = position - last_insert_len;
for (size_t i = 0; i < num_commands; i++) {
size_t inslength = commands[i].insert_len_;
size_t copylength = commands[i].copy_len_;
size_t copylength = commands[i].copy_len();
size_t distcode = commands[i].dist_prefix_;
size_t cmdcode = commands[i].cmd_prefix_;
@ -56,7 +54,7 @@ class ZopfliCostModel {
pos += inslength + copylength;
}
std::vector<double> cost_literal;
std::vector<float> cost_literal;
Set(histogram_literal, &cost_literal);
Set(histogram_cmd, &cost_cmd_);
Set(histogram_dist, &cost_dist_);
@ -77,26 +75,25 @@ class ZopfliCostModel {
size_t position,
const uint8_t* ringbuffer,
size_t ringbuffer_mask) {
std::vector<float> literal_cost(num_bytes + 1);
literal_costs_.resize(num_bytes + 2);
EstimateBitCostsForLiterals(position, num_bytes, ringbuffer_mask,
ringbuffer, &literal_cost[0]);
literal_costs_.resize(num_bytes + 1);
ringbuffer, &literal_costs_[1]);
literal_costs_[0] = 0.0;
for (size_t i = 0; i < num_bytes; ++i) {
literal_costs_[i + 1] = literal_costs_[i] + literal_cost[i];
literal_costs_[i + 1] += literal_costs_[i];
}
cost_cmd_.resize(kNumCommandPrefixes);
cost_dist_.resize(kNumDistancePrefixes);
for (uint32_t i = 0; i < kNumCommandPrefixes; ++i) {
cost_cmd_[i] = FastLog2(11 + i);
cost_cmd_[i] = static_cast<float>(FastLog2(11 + i));
}
for (uint32_t i = 0; i < kNumDistancePrefixes; ++i) {
cost_dist_[i] = FastLog2(20 + i);
cost_dist_[i] = static_cast<float>(FastLog2(20 + i));
}
min_cost_cmd_ = FastLog2(11);
min_cost_cmd_ = static_cast<float>(FastLog2(11));
}
double GetCommandCost(
float GetCommandCost(
size_t dist_code, size_t length_code, size_t insert_length) const {
uint16_t inscode = GetInsertLengthCode(insert_length);
uint16_t copycode = GetCopyLengthCode(length_code);
@ -106,29 +103,29 @@ class ZopfliCostModel {
PrefixEncodeCopyDistance(dist_code, 0, 0, &dist_symbol, &distextra);
uint32_t distnumextra = distextra >> 24;
double result = static_cast<double>(
kInsExtra[inscode] + kCopyExtra[copycode] + distnumextra);
float result = static_cast<float>(
GetInsertExtra(inscode) + GetCopyExtra(copycode) + distnumextra);
result += cost_cmd_[cmdcode];
if (cmdcode >= 128) result += cost_dist_[dist_symbol];
return result;
}
double GetLiteralCosts(size_t from, size_t to) const {
float GetLiteralCosts(size_t from, size_t to) const {
return literal_costs_[to] - literal_costs_[from];
}
double GetMinCostCmd(void) const {
float GetMinCostCmd(void) const {
return min_cost_cmd_;
}
private:
void Set(const std::vector<uint32_t>& histogram, std::vector<double>* cost) {
void Set(const std::vector<uint32_t>& histogram, std::vector<float>* cost) {
cost->resize(histogram.size());
size_t sum = 0;
for (size_t i = 0; i < histogram.size(); i++) {
sum += histogram[i];
}
double log2sum = FastLog2(sum);
float log2sum = static_cast<float>(FastLog2(sum));
for (size_t i = 0; i < histogram.size(); i++) {
if (histogram[i] == 0) {
(*cost)[i] = log2sum + 2;
@ -136,33 +133,20 @@ class ZopfliCostModel {
}
// Shannon bits for this symbol.
(*cost)[i] = log2sum - FastLog2(histogram[i]);
(*cost)[i] = log2sum - static_cast<float>(FastLog2(histogram[i]));
// Cannot be coded with less than 1 bit
if ((*cost)[i] < 1) (*cost)[i] = 1;
}
}
std::vector<double> cost_cmd_; // The insert and copy length symbols.
std::vector<double> cost_dist_;
std::vector<float> cost_cmd_; // The insert and copy length symbols.
std::vector<float> cost_dist_;
// Cumulative costs of literals per position in the stream.
std::vector<double> literal_costs_;
double min_cost_cmd_;
std::vector<float> literal_costs_;
float min_cost_cmd_;
};
inline void SetDistanceCache(size_t distance,
size_t distance_code,
size_t max_distance,
const int* dist_cache,
int* result_dist_cache) {
if (distance <= max_distance && distance_code > 0) {
result_dist_cache[0] = static_cast<int>(distance);
memcpy(&result_dist_cache[1], dist_cache, 3 * sizeof(dist_cache[0]));
} else {
memcpy(result_dist_cache, dist_cache, 4 * sizeof(dist_cache[0]));
}
}
inline size_t ComputeDistanceCode(size_t distance,
size_t max_distance,
int quality,
@ -194,47 +178,28 @@ inline size_t ComputeDistanceCode(size_t distance,
return distance + 15;
}
struct ZopfliNode {
ZopfliNode() : length(1),
distance(0),
distance_code(0),
length_code(0),
insert_length(0),
cost(kInfinity) {}
// best length to get up to this byte (not including this byte itself)
uint32_t length;
// distance associated with the length
uint32_t distance;
uint32_t distance_code;
int distance_cache[4];
// length code associated with the length - usually the same as length,
// except in case of length-changing dictionary transformation.
uint32_t length_code;
// number of literal inserts before this copy
uint32_t insert_length;
// smallest cost to get to this byte from the beginning, as found so far
double cost;
};
// REQUIRES: len >= 2, start_pos <= pos
// REQUIRES: cost < kInfinity, nodes[start_pos].cost < kInfinity
// Maintains the "ZopfliNode array invariant".
inline void UpdateZopfliNode(ZopfliNode* nodes, size_t pos, size_t start_pos,
size_t len, size_t len_code, size_t dist,
size_t dist_code, size_t max_dist,
const int* dist_cache, double cost) {
size_t short_code, float cost) {
ZopfliNode& next = nodes[pos + len];
next.length = static_cast<uint32_t>(len);
next.length_code = static_cast<uint32_t>(len_code);
next.distance = static_cast<uint32_t>(dist);
next.distance_code = static_cast<uint32_t>(dist_code);
next.length = static_cast<uint32_t>(len | ((len + 9u - len_code) << 24));
next.distance = static_cast<uint32_t>(dist | (short_code << 25));
next.insert_length = static_cast<uint32_t>(pos - start_pos);
next.cost = cost;
SetDistanceCache(dist, dist_code, max_dist, dist_cache,
&next.distance_cache[0]);
}
// Maintains the smallest 2^k cost difference together with their positions
class StartPosQueue {
public:
struct PosData {
size_t pos;
int distance_cache[4];
float costdiff;
};
explicit StartPosQueue(int bits)
: mask_((1u << bits) - 1), q_(1 << bits), idx_(0) {}
@ -242,21 +207,15 @@ class StartPosQueue {
idx_ = 0;
}
void Push(size_t pos, double costdiff) {
if (costdiff == kInfinity) {
// We can't start a command from an unreachable start position.
// E.g. position 1 in a stream is always unreachable, because all commands
// have a copy of at least length 2.
return;
}
size_t offset = -idx_ & mask_;
void Push(const StartPosQueue::PosData& posdata) {
size_t offset = ~idx_ & mask_;
++idx_;
size_t len = size();
q_[offset] = std::make_pair(pos, costdiff);
q_[offset] = posdata;
/* Restore the sorted order. In the list of |len| items at most |len - 1|
adjacent element comparisons / swaps are required. */
for (size_t i = 1; i < len; ++i) {
if (q_[offset & mask_].second > q_[(offset + 1) & mask_].second) {
if (q_[offset & mask_].costdiff > q_[(offset + 1) & mask_].costdiff) {
std::swap(q_[offset & mask_], q_[(offset + 1) & mask_]);
}
++offset;
@ -265,32 +224,32 @@ class StartPosQueue {
size_t size(void) const { return std::min(idx_, mask_ + 1); }
size_t GetStartPos(size_t k) const {
return q_[(k + 1 - idx_) & mask_].first;
const StartPosQueue::PosData& GetStartPosData(size_t k) const {
return q_[(k - idx_) & mask_];
}
private:
const size_t mask_;
std::vector<std::pair<size_t, double> > q_;
std::vector<PosData> q_;
size_t idx_;
};
// Returns the minimum possible copy length that can improve the cost of any
// future position.
size_t ComputeMinimumCopyLength(const StartPosQueue& queue,
const std::vector<ZopfliNode>& nodes,
const ZopfliNode* nodes,
const ZopfliCostModel& model,
size_t pos,
double min_cost_cmd) {
const size_t num_bytes,
const size_t pos) {
// Compute the minimum possible cost of reaching any future position.
const size_t start0 = queue.GetStartPos(0);
double min_cost = (nodes[start0].cost +
const size_t start0 = queue.GetStartPosData(0).pos;
float min_cost = (nodes[start0].cost +
model.GetLiteralCosts(start0, pos) +
min_cost_cmd);
model.GetMinCostCmd());
size_t len = 2;
size_t next_len_bucket = 4;
size_t next_len_offset = 10;
while (pos + len < nodes.size() && nodes[pos + len].cost <= min_cost) {
while (pos + len <= num_bytes && nodes[pos + len].cost <= min_cost) {
// We already reached (pos + len) with no more cost than the minimum
// possible cost of reaching anything from this pos, so there is no point in
// looking for lengths <= len.
@ -298,7 +257,7 @@ size_t ComputeMinimumCopyLength(const StartPosQueue& queue,
if (len == next_len_offset) {
// We reached the next copy length code bucket, so we add one more
// extra bit to the minimum cost.
min_cost += 1.0;
min_cost += static_cast<float>(1.0);
next_len_offset += next_len_bucket;
next_len_bucket *= 2;
}
@ -306,56 +265,85 @@ size_t ComputeMinimumCopyLength(const StartPosQueue& queue,
return len;
}
void ZopfliIterate(size_t num_bytes,
size_t position,
// Fills in dist_cache[0..3] with the last four distances (as defined by
// Section 4. of the Spec) that would be used at (block_start + pos) if we
// used the shortest path of commands from block_start, computed from
// nodes[0..pos]. The last four distances at block_start are in
// starting_dist_cach[0..3].
// REQUIRES: nodes[pos].cost < kInfinity
// REQUIRES: nodes[0..pos] satisfies that "ZopfliNode array invariant".
void ComputeDistanceCache(const size_t block_start,
const size_t pos,
const size_t max_backward,
const int* starting_dist_cache,
const ZopfliNode* nodes,
int* dist_cache) {
int idx = 0;
size_t p = pos;
// Because of prerequisite, does at most (pos + 1) / 2 iterations.
while (idx < 4 && p > 0) {
const size_t clen = nodes[p].copy_length();
const size_t ilen = nodes[p].insert_length;
const size_t dist = nodes[p].copy_distance();
// Since block_start + p is the end position of the command, the copy part
// starts from block_start + p - clen. Distances that are greater than this
// or greater than max_backward are static dictionary references, and do
// not update the last distances. Also distance code 0 (last distance)
// does not update the last distances.
if (dist + clen <= block_start + p && dist <= max_backward &&
nodes[p].distance_code() > 0) {
dist_cache[idx++] = static_cast<int>(dist);
}
// Because of prerequisite, p >= clen + ilen >= 2.
p -= clen + ilen;
}
for (; idx < 4; ++idx) {
dist_cache[idx] = *starting_dist_cache++;
}
}
void UpdateNodes(const size_t num_bytes,
const size_t block_start,
const size_t pos,
const uint8_t* ringbuffer,
size_t ringbuffer_mask,
const size_t ringbuffer_mask,
const size_t max_backward_limit,
const ZopfliCostModel& model,
const std::vector<uint32_t>& num_matches,
const std::vector<BackwardMatch>& matches,
int* dist_cache,
size_t* last_insert_len,
Command* commands,
size_t* num_commands,
size_t* num_literals) {
const Command * const orig_commands = commands;
std::vector<ZopfliNode> nodes(num_bytes + 1);
nodes[0].length = 0;
nodes[0].cost = 0;
memcpy(nodes[0].distance_cache, dist_cache, 4 * sizeof(dist_cache[0]));
StartPosQueue queue(3);
const double min_cost_cmd = model.GetMinCostCmd();
size_t cur_match_pos = 0;
for (size_t i = 0; i + 3 < num_bytes; i++) {
size_t cur_ix = position + i;
const int* starting_dist_cache,
const size_t num_matches,
const BackwardMatch* matches,
const ZopfliCostModel* model,
StartPosQueue* queue,
ZopfliNode* nodes) {
size_t cur_ix = block_start + pos;
size_t cur_ix_masked = cur_ix & ringbuffer_mask;
size_t max_distance = std::min(cur_ix, max_backward_limit);
size_t max_length = num_bytes - i;
queue.Push(i, nodes[i].cost - model.GetLiteralCosts(0, i));
if (nodes[pos].cost <= model->GetLiteralCosts(0, pos)) {
StartPosQueue::PosData posdata;
posdata.pos = pos;
posdata.costdiff = nodes[pos].cost - model->GetLiteralCosts(0, pos);
ComputeDistanceCache(block_start, pos, max_backward_limit,
starting_dist_cache, nodes, posdata.distance_cache);
queue->Push(posdata);
}
const size_t min_len = ComputeMinimumCopyLength(queue, nodes, model,
i, min_cost_cmd);
const size_t min_len = ComputeMinimumCopyLength(
*queue, nodes, *model, num_bytes, pos);
// Go over the command starting positions in order of increasing cost
// difference.
for (size_t k = 0; k < 5 && k < queue.size(); ++k) {
const size_t start = queue.GetStartPos(k);
const double start_costdiff =
nodes[start].cost - model.GetLiteralCosts(0, start);
const int* dist_cache2 = &nodes[start].distance_cache[0];
for (size_t k = 0; k < 5 && k < queue->size(); ++k) {
const StartPosQueue::PosData& posdata = queue->GetStartPosData(k);
const size_t start = posdata.pos;
const float start_costdiff = posdata.costdiff;
// Look for last distance matches using the distance cache from this
// starting position.
size_t best_len = min_len - 1;
for (size_t j = 0; j < kNumDistanceShortCodes; ++j) {
const size_t idx = kDistanceCacheIndex[j];
const size_t backward =
static_cast<size_t>(dist_cache2[idx] + kDistanceCacheOffset[j]);
const size_t backward = static_cast<size_t>(posdata.distance_cache[idx] +
kDistanceCacheOffset[j]);
size_t prev_ix = cur_ix - backward;
if (prev_ix >= cur_ix) {
continue;
@ -374,14 +362,13 @@ void ZopfliIterate(size_t num_bytes,
const size_t len =
FindMatchLengthWithLimit(&ringbuffer[prev_ix],
&ringbuffer[cur_ix_masked],
max_length);
num_bytes - pos);
for (size_t l = best_len + 1; l <= len; ++l) {
const size_t inslen = i - start;
double cmd_cost = model.GetCommandCost(j, l, inslen);
double cost = start_costdiff + cmd_cost + model.GetLiteralCosts(0, i);
if (cost < nodes[i + l].cost) {
UpdateZopfliNode(&nodes[0], i, start, l, l, backward, j,
max_distance, dist_cache2, cost);
const size_t inslen = pos - start;
float cmd_cost = model->GetCommandCost(j, l, inslen);
float cost = start_costdiff + cmd_cost + model->GetLiteralCosts(0, pos);
if (cost < nodes[pos + l].cost) {
UpdateZopfliNode(&nodes[0], pos, start, l, l, backward, j + 1, cost);
}
best_len = l;
}
@ -394,8 +381,8 @@ void ZopfliIterate(size_t num_bytes,
// Loop through all possible copy lengths at this position.
size_t len = min_len;
for (size_t j = 0; j < num_matches[i]; ++j) {
BackwardMatch match = matches[cur_match_pos + j];
for (size_t j = 0; j < num_matches; ++j) {
BackwardMatch match = matches[j];
size_t dist = match.distance;
bool is_dictionary_match = dist > max_distance;
// We already tried all possible last distance matches, so we can use
@ -410,60 +397,62 @@ void ZopfliIterate(size_t num_bytes,
}
for (; len <= max_len; ++len) {
size_t len_code = is_dictionary_match ? match.length_code() : len;
const size_t inslen = i - start;
double cmd_cost = model.GetCommandCost(dist_code, len_code, inslen);
double cost = start_costdiff + cmd_cost + model.GetLiteralCosts(0, i);
if (cost < nodes[i + len].cost) {
UpdateZopfliNode(&nodes[0], i, start, len, len_code, dist,
dist_code, max_distance, dist_cache2, cost);
const size_t inslen = pos - start;
float cmd_cost = model->GetCommandCost(dist_code, len_code, inslen);
float cost = start_costdiff + cmd_cost + model->GetLiteralCosts(0, pos);
if (cost < nodes[pos + len].cost) {
UpdateZopfliNode(&nodes[0], pos, start, len, len_code, dist, 0, cost);
}
}
}
}
}
cur_match_pos += num_matches[i];
// The zopflification can be too slow in case of very long lengths, so in
// such case skip it all, it does not cost a lot of compression ratio.
if (num_matches[i] == 1 &&
matches[cur_match_pos - 1].length() > kMaxZopfliLen) {
i += matches[cur_match_pos - 1].length() - 1;
queue.Clear();
}
}
std::vector<uint32_t> backwards;
void ComputeShortestPathFromNodes(size_t num_bytes,
const ZopfliNode* nodes,
std::vector<uint32_t>* path) {
std::vector<uint32_t> backwards(num_bytes / 2 + 1);
size_t index = num_bytes;
while (nodes[index].cost == kInfinity) --index;
size_t num_commands = 0;
while (index != 0) {
size_t len = nodes[index].length + nodes[index].insert_length;
backwards.push_back(static_cast<uint32_t>(len));
size_t len = nodes[index].command_length();
backwards[num_commands++] = static_cast<uint32_t>(len);
index -= len;
}
std::vector<uint32_t> path;
for (size_t i = backwards.size(); i > 0; i--) {
path.push_back(backwards[i - 1]);
path->resize(num_commands);
for (size_t i = num_commands, j = 0; i > 0; --i, ++j) {
(*path)[j] = backwards[i - 1];
}
}
void ZopfliCreateCommands(const size_t num_bytes,
const size_t block_start,
const size_t max_backward_limit,
const std::vector<uint32_t>& path,
const ZopfliNode* nodes,
int* dist_cache,
size_t* last_insert_len,
Command* commands,
size_t* num_literals) {
size_t pos = 0;
for (size_t i = 0; i < path.size(); i++) {
const ZopfliNode& next = nodes[pos + path[i]];
size_t copy_length = next.length;
size_t copy_length = next.copy_length();
size_t insert_length = next.insert_length;
pos += insert_length;
if (i == 0) {
insert_length += *last_insert_len;
*last_insert_len = 0;
}
size_t distance = next.distance;
size_t len_code = next.length_code;
size_t max_distance = std::min(position + pos, max_backward_limit);
size_t distance = next.copy_distance();
size_t len_code = next.length_code();
size_t max_distance = std::min(block_start + pos, max_backward_limit);
bool is_dictionary = (distance > max_distance);
size_t dist_code = next.distance_code;
size_t dist_code = next.distance_code();
Command cmd(insert_length, copy_length, len_code, dist_code);
*commands++ = cmd;
commands[i] = cmd;
if (!is_dictionary && dist_code > 0) {
dist_cache[3] = dist_cache[2];
@ -473,11 +462,85 @@ void ZopfliIterate(size_t num_bytes,
}
*num_literals += insert_length;
insert_length = 0;
pos += copy_length;
}
*last_insert_len += num_bytes - pos;
*num_commands += static_cast<size_t>(commands - orig_commands);
}
void ZopfliIterate(size_t num_bytes,
size_t position,
const uint8_t* ringbuffer,
size_t ringbuffer_mask,
const size_t max_backward_limit,
const int* dist_cache,
const ZopfliCostModel& model,
const std::vector<uint32_t>& num_matches,
const std::vector<BackwardMatch>& matches,
ZopfliNode* nodes,
std::vector<uint32_t>* path) {
nodes[0].length = 0;
nodes[0].cost = 0;
StartPosQueue queue(3);
size_t cur_match_pos = 0;
for (size_t i = 0; i + 3 < num_bytes; i++) {
UpdateNodes(num_bytes, position, i, ringbuffer, ringbuffer_mask,
max_backward_limit, dist_cache, num_matches[i],
&matches[cur_match_pos], &model, &queue, &nodes[0]);
cur_match_pos += num_matches[i];
// The zopflification can be too slow in case of very long lengths, so in
// such case skip it all, it does not cost a lot of compression ratio.
if (num_matches[i] == 1 &&
matches[cur_match_pos - 1].length() > kMaxZopfliLen) {
i += matches[cur_match_pos - 1].length() - 1;
queue.Clear();
}
}
ComputeShortestPathFromNodes(num_bytes, &nodes[0], path);
}
void ZopfliComputeShortestPath(size_t num_bytes,
size_t position,
const uint8_t* ringbuffer,
size_t ringbuffer_mask,
const size_t max_backward_limit,
const int* dist_cache,
Hashers::H10* hasher,
ZopfliNode* nodes,
std::vector<uint32_t>* path) {
nodes[0].length = 0;
nodes[0].cost = 0;
ZopfliCostModel* model = new ZopfliCostModel;
model->SetFromLiteralCosts(num_bytes, position,
ringbuffer, ringbuffer_mask);
StartPosQueue queue(3);
BackwardMatch matches[Hashers::H10::kMaxNumMatches];
for (size_t i = 0; i + 3 < num_bytes; i++) {
const size_t max_distance = std::min(position + i, max_backward_limit);
size_t num_matches = hasher->FindAllMatches(
ringbuffer, ringbuffer_mask, position + i, num_bytes - i, max_distance,
matches);
if (num_matches > 0 &&
matches[num_matches - 1].length() > kMaxZopfliLen) {
matches[0] = matches[num_matches - 1];
num_matches = 1;
}
UpdateNodes(num_bytes, position, i, ringbuffer, ringbuffer_mask,
max_backward_limit, dist_cache, num_matches, matches,
model, &queue, nodes);
if (num_matches == 1 && matches[0].length() > kMaxZopfliLen) {
for (size_t j = 1; j < matches[0].length() && i + 4 < num_bytes; ++j) {
++i;
if (matches[0].length() - j < 64 &&
num_bytes - i >= kMaxTreeCompLength) {
hasher->Store(ringbuffer, ringbuffer_mask, position + i);
}
}
queue.Clear();
}
}
delete model;
ComputeShortestPathFromNodes(num_bytes, nodes, path);
}
template<typename Hasher>
@ -527,7 +590,7 @@ void CreateBackwardReferences(size_t num_bytes,
size_t apply_random_heuristics = i + random_heuristics_window_size;
// Minimum score to accept a backward reference.
const int kMinScore = 4.0;
const double kMinScore = 4.0;
while (i + Hasher::kHashTypeLength - 1 < i_end) {
size_t max_length = i_end - i;
@ -649,16 +712,23 @@ void CreateBackwardReferences(size_t num_bytes,
if (zopflify) {
Hashers::H10* hasher = hashers->hash_h10;
hasher->Init(lgwin, position, num_bytes, is_last);
if (num_bytes >= 3 && position >= kMaxTreeCompLength) {
// Store the last `kMaxTreeCompLength - 1` positions in the hasher.
// These could not be calculated before, since they require knowledge
// of both the previous and the current block.
for (size_t i = position - kMaxTreeCompLength + 1; i < position; ++i) {
hasher->Store(ringbuffer, ringbuffer_mask, i, num_bytes + position - i);
}
}
hasher->StitchToPreviousBlock(num_bytes, position,
ringbuffer, ringbuffer_mask);
// Set maximum distance, see section 9.1. of the spec.
const size_t max_backward_limit = (1 << lgwin) - 16;
if (quality == 10) {
std::vector<ZopfliNode> nodes(num_bytes + 1);
std::vector<uint32_t> path;
ZopfliComputeShortestPath(num_bytes, position,
ringbuffer, ringbuffer_mask,
max_backward_limit, dist_cache, hasher,
&nodes[0], &path);
ZopfliCreateCommands(num_bytes, position, max_backward_limit, path,
&nodes[0], dist_cache, last_insert_len, commands,
num_literals);
*num_commands += path.size();
return;
}
std::vector<uint32_t> num_matches(num_bytes);
std::vector<BackwardMatch> matches(4 * num_bytes);
size_t cur_match_pos = 0;
@ -686,9 +756,8 @@ void CreateBackwardReferences(size_t num_bytes,
num_matches[i] = 1;
for (size_t j = 1; j < match_len; ++j) {
++i;
if (match_len - j < 64) {
hasher->Store(ringbuffer, ringbuffer_mask, position + i,
num_bytes - i);
if (match_len - j < 64 && num_bytes - i >= kMaxTreeCompLength) {
hasher->Store(ringbuffer, ringbuffer_mask, position + i);
}
num_matches[i] = 0;
}
@ -719,9 +788,15 @@ void CreateBackwardReferences(size_t num_bytes,
*num_literals = orig_num_literals;
*last_insert_len = orig_last_insert_len;
memcpy(dist_cache, orig_dist_cache, 4 * sizeof(dist_cache[0]));
std::vector<ZopfliNode> nodes(num_bytes + 1);
std::vector<uint32_t> path;
ZopfliIterate(num_bytes, position, ringbuffer, ringbuffer_mask,
max_backward_limit, model, num_matches, matches, dist_cache,
last_insert_len, commands, num_commands, num_literals);
max_backward_limit, dist_cache, model, num_matches, matches,
&nodes[0], &path);
ZopfliCreateCommands(num_bytes, position, max_backward_limit, path,
&nodes[0], dist_cache, last_insert_len, commands,
num_literals);
*num_commands += path.size();
}
return;
}

View File

@ -9,6 +9,8 @@
#ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_
#define BROTLI_ENC_BACKWARD_REFERENCES_H_
#include <vector>
#include "./hash.h"
#include "./command.h"
#include "./types.h"
@ -34,6 +36,81 @@ void CreateBackwardReferences(size_t num_bytes,
size_t* num_commands,
size_t* num_literals);
static const float kInfinity = std::numeric_limits<float>::infinity();
struct ZopfliNode {
ZopfliNode(void) : length(1),
distance(0),
insert_length(0),
cost(kInfinity) {}
inline uint32_t copy_length() const {
return length & 0xffffff;
}
inline uint32_t length_code() const {
const uint32_t modifier = length >> 24;
return copy_length() + 9u - modifier;
}
inline uint32_t copy_distance() const {
return distance & 0x1ffffff;
}
inline uint32_t distance_code() const {
const uint32_t short_code = distance >> 25;
return short_code == 0 ? copy_distance() + 15 : short_code - 1;
}
inline uint32_t command_length() const {
return copy_length() + insert_length;
}
// best length to get up to this byte (not including this byte itself)
// highest 8 bit is used to reconstruct the length code
uint32_t length;
// distance associated with the length
// highest 7 bit contains distance short code + 1 (or zero if no short code)
uint32_t distance;
// number of literal inserts before this copy
uint32_t insert_length;
// smallest cost to get to this byte from the beginning, as found so far
float cost;
};
// Computes the shortest path of commands from position to at most
// position + num_bytes.
//
// On return, path->size() is the number of commands found and path[i] is the
// length of the ith command (copy length plus insert length).
// Note that the sum of the lengths of all commands can be less than num_bytes.
//
// On return, the nodes[0..num_bytes] array will have the following
// "ZopfliNode array invariant":
// For each i in [1..num_bytes], if nodes[i].cost < kInfinity, then
// (1) nodes[i].copy_length() >= 2
// (2) nodes[i].command_length() <= i and
// (3) nodes[i - nodes[i].command_length()].cost < kInfinity
void ZopfliComputeShortestPath(size_t num_bytes,
size_t position,
const uint8_t* ringbuffer,
size_t ringbuffer_mask,
const size_t max_backward_limit,
const int* dist_cache,
Hashers::H10* hasher,
ZopfliNode* nodes,
std::vector<uint32_t>* path);
void ZopfliCreateCommands(const size_t num_bytes,
const size_t block_start,
const size_t max_backward_limit,
const std::vector<uint32_t>& path,
const ZopfliNode* nodes,
int* dist_cache,
size_t* last_insert_len,
Command* commands,
size_t* num_literals);
} // namespace brotli
#endif // BROTLI_ENC_BACKWARD_REFERENCES_H_

View File

@ -48,38 +48,62 @@ static inline double BitsEntropy(const uint32_t *population, size_t size) {
return retval;
}
template<int kSize>
double PopulationCost(const Histogram<kSize>& histogram) {
static const double kOneSymbolHistogramCost = 12;
static const double kTwoSymbolHistogramCost = 20;
static const double kThreeSymbolHistogramCost = 28;
static const double kFourSymbolHistogramCost = 37;
if (histogram.total_count_ == 0) {
return 12;
return kOneSymbolHistogramCost;
}
int count = 0;
int s[5];
for (int i = 0; i < kSize; ++i) {
if (histogram.data_[i] > 0) {
s[count] = i;
++count;
if (count > 4) break;
}
}
if (count == 1) {
return 12;
return kOneSymbolHistogramCost;
}
if (count == 2) {
return static_cast<double>(20 + histogram.total_count_);
return (kTwoSymbolHistogramCost +
static_cast<double>(histogram.total_count_));
}
double bits = 0;
uint8_t depth_array[kSize] = { 0 };
if (count <= 4) {
// For very low symbol count we build the Huffman tree.
CreateHuffmanTree(&histogram.data_[0], kSize, 15, depth_array);
for (int i = 0; i < kSize; ++i) {
bits += histogram.data_[i] * depth_array[i];
if (count == 3) {
const uint32_t histo0 = histogram.data_[s[0]];
const uint32_t histo1 = histogram.data_[s[1]];
const uint32_t histo2 = histogram.data_[s[2]];
const uint32_t histomax = std::max(histo0, std::max(histo1, histo2));
return (kThreeSymbolHistogramCost +
2 * (histo0 + histo1 + histo2) - histomax);
}
return count == 3 ? bits + 28 : bits + 37;
if (count == 4) {
uint32_t histo[4];
for (int i = 0; i < 4; ++i) {
histo[i] = histogram.data_[s[i]];
}
// Sort
for (int i = 0; i < 4; ++i) {
for (int j = i + 1; j < 4; ++j) {
if (histo[j] > histo[i]) {
std::swap(histo[j], histo[i]);
}
}
}
const uint32_t h23 = histo[2] + histo[3];
const uint32_t histomax = std::max(h23, histo[0]);
return (kFourSymbolHistogramCost +
3 * h23 + 2 * (histo[0] + histo[1]) - histomax);
}
// In this loop we compute the entropy of the histogram and simultaneously
// build a simplified histogram of the code length codes where we use the
// zero repeat code 17, but we don't use the non-zero repeat code 16.
double bits = 0;
size_t max_depth = 1;
uint32_t depth_histo[kCodeLengthCodes] = { 0 };
const double log2total = FastLog2(histogram.total_count_);

View File

@ -13,7 +13,7 @@
#include <algorithm>
#include <cstring>
#include <map>
#include <vector>
#include "./cluster.h"
#include "./command.h"
@ -70,20 +70,7 @@ void CopyLiteralsToByteArray(const Command* cmds,
memcpy(&(*literals)[pos], data + from_pos, insert_len);
pos += insert_len;
}
from_pos = (from_pos + insert_len + cmds[i].copy_len_) & mask;
}
}
void CopyCommandsToByteArray(const Command* cmds,
const size_t num_commands,
std::vector<uint16_t>* insert_and_copy_codes,
std::vector<uint16_t>* distance_prefixes) {
for (size_t i = 0; i < num_commands; ++i) {
const Command& cmd = cmds[i];
insert_and_copy_codes->push_back(cmd.cmd_prefix_);
if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) {
distance_prefixes->push_back(cmd.dist_prefix_);
}
from_pos = (from_pos + insert_len + cmds[i].copy_len()) & mask;
}
}
@ -97,27 +84,23 @@ inline static unsigned int MyRand(unsigned int* seed) {
template<typename HistogramType, typename DataType>
void InitialEntropyCodes(const DataType* data, size_t length,
size_t literals_per_histogram,
size_t max_histograms,
size_t stride,
std::vector<HistogramType>* vec) {
size_t total_histograms = length / literals_per_histogram + 1;
if (total_histograms > max_histograms) {
total_histograms = max_histograms;
size_t num_histograms,
HistogramType* histograms) {
for (size_t i = 0; i < num_histograms; ++i) {
histograms[i].Clear();
}
unsigned int seed = 7;
size_t block_length = length / total_histograms;
for (size_t i = 0; i < total_histograms; ++i) {
size_t pos = length * i / total_histograms;
size_t block_length = length / num_histograms;
for (size_t i = 0; i < num_histograms; ++i) {
size_t pos = length * i / num_histograms;
if (i != 0) {
pos += MyRand(&seed) % block_length;
}
if (pos + stride >= length) {
pos = length - stride - 1;
}
HistogramType histo;
histo.Add(data + pos, stride);
vec->push_back(histo);
histograms[i].Add(data + pos, stride);
}
}
@ -140,16 +123,17 @@ void RandomSample(unsigned int* seed,
template<typename HistogramType, typename DataType>
void RefineEntropyCodes(const DataType* data, size_t length,
size_t stride,
std::vector<HistogramType>* vec) {
size_t num_histograms,
HistogramType* histograms) {
size_t iters =
kIterMulForRefining * length / stride + kMinItersForRefining;
unsigned int seed = 7;
iters = ((iters + vec->size() - 1) / vec->size()) * vec->size();
iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms;
for (size_t iter = 0; iter < iters; ++iter) {
HistogramType sample;
RandomSample(&seed, data, length, stride, &sample);
size_t ix = iter % vec->size();
(*vec)[ix].AddHistogram(sample);
size_t ix = iter % num_histograms;
histograms[ix].AddHistogram(sample);
}
}
@ -157,34 +141,40 @@ inline static double BitCost(size_t count) {
return count == 0 ? -2.0 : FastLog2(count);
}
// Assigns a block id from the range [0, vec.size()) to each data element
// in data[0..length) and fills in block_id[0..length) with the assigned values.
// Returns the number of blocks, i.e. one plus the number of block switches.
template<typename DataType, int kSize>
void FindBlocks(const DataType* data, const size_t length,
size_t FindBlocks(const DataType* data, const size_t length,
const double block_switch_bitcost,
const std::vector<Histogram<kSize> > &vec,
const size_t num_histograms,
const Histogram<kSize>* histograms,
double* insert_cost,
double* cost,
uint8_t* switch_signal,
uint8_t *block_id) {
if (vec.size() <= 1) {
if (num_histograms <= 1) {
for (size_t i = 0; i < length; ++i) {
block_id[i] = 0;
}
return;
return 1;
}
size_t vecsize = vec.size();
assert(vecsize <= 256);
double* insert_cost = new double[kSize * vecsize];
memset(insert_cost, 0, sizeof(insert_cost[0]) * kSize * vecsize);
for (size_t j = 0; j < vecsize; ++j) {
insert_cost[j] = FastLog2(static_cast<uint32_t>(vec[j].total_count_));
const size_t bitmaplen = (num_histograms + 7) >> 3;
assert(num_histograms <= 256);
memset(insert_cost, 0, sizeof(insert_cost[0]) * kSize * num_histograms);
for (size_t j = 0; j < num_histograms; ++j) {
insert_cost[j] = FastLog2(static_cast<uint32_t>(
histograms[j].total_count_));
}
for (size_t i = kSize; i != 0;) {
--i;
for (size_t j = 0; j < vecsize; ++j) {
insert_cost[i * vecsize + j] = insert_cost[j] - BitCost(vec[j].data_[i]);
for (size_t j = 0; j < num_histograms; ++j) {
insert_cost[i * num_histograms + j] =
insert_cost[j] - BitCost(histograms[j].data_[i]);
}
}
double *cost = new double[vecsize];
memset(cost, 0, sizeof(cost[0]) * vecsize);
bool* switch_signal = new bool[length * vecsize];
memset(switch_signal, 0, sizeof(switch_signal[0]) * length * vecsize);
memset(cost, 0, sizeof(cost[0]) * num_histograms);
memset(switch_signal, 0, sizeof(switch_signal[0]) * length * bitmaplen);
// After each iteration of this loop, cost[k] will contain the difference
// between the minimum cost of arriving at the current byte position using
// entropy code k, and the minimum cost of arriving at the current byte
@ -192,10 +182,10 @@ void FindBlocks(const DataType* data, const size_t length,
// reaches block switch cost, it means that when we trace back from the last
// position, we need to switch here.
for (size_t byte_ix = 0; byte_ix < length; ++byte_ix) {
size_t ix = byte_ix * vecsize;
size_t insert_cost_ix = data[byte_ix] * vecsize;
size_t ix = byte_ix * bitmaplen;
size_t insert_cost_ix = data[byte_ix] * num_histograms;
double min_cost = 1e99;
for (size_t k = 0; k < vecsize; ++k) {
for (size_t k = 0; k < num_histograms; ++k) {
// We are coding the symbol in data[byte_ix] with entropy code k.
cost[k] += insert_cost[insert_cost_ix + k];
if (cost[k] < min_cost) {
@ -208,110 +198,200 @@ void FindBlocks(const DataType* data, const size_t length,
if (byte_ix < 2000) {
block_switch_cost *= 0.77 + 0.07 * static_cast<double>(byte_ix) / 2000;
}
for (size_t k = 0; k < vecsize; ++k) {
for (size_t k = 0; k < num_histograms; ++k) {
cost[k] -= min_cost;
if (cost[k] >= block_switch_cost) {
cost[k] = block_switch_cost;
switch_signal[ix + k] = true;
const uint8_t mask = static_cast<uint8_t>(1u << (k & 7));
assert((k >> 3) < bitmaplen);
switch_signal[ix + (k >> 3)] |= mask;
}
}
}
// Now trace back from the last position and switch at the marked places.
size_t byte_ix = length - 1;
size_t ix = byte_ix * vecsize;
size_t ix = byte_ix * bitmaplen;
uint8_t cur_id = block_id[byte_ix];
size_t num_blocks = 1;
while (byte_ix > 0) {
--byte_ix;
ix -= vecsize;
if (switch_signal[ix + cur_id]) {
ix -= bitmaplen;
const uint8_t mask = static_cast<uint8_t>(1u << (cur_id & 7));
assert((static_cast<size_t>(cur_id) >> 3) < bitmaplen);
if (switch_signal[ix + (cur_id >> 3)] & mask) {
if (cur_id != block_id[byte_ix]) {
cur_id = block_id[byte_ix];
++num_blocks;
}
}
block_id[byte_ix] = cur_id;
}
delete[] insert_cost;
delete[] cost;
delete[] switch_signal;
return num_blocks;
}
size_t RemapBlockIds(uint8_t* block_ids, const size_t length) {
std::map<uint8_t, uint8_t> new_id;
size_t next_id = 0;
size_t RemapBlockIds(uint8_t* block_ids, const size_t length,
uint16_t* new_id, const size_t num_histograms) {
static const uint16_t kInvalidId = 256;
for (size_t i = 0; i < num_histograms; ++i) {
new_id[i] = kInvalidId;
}
uint16_t next_id = 0;
for (size_t i = 0; i < length; ++i) {
if (new_id.find(block_ids[i]) == new_id.end()) {
new_id[block_ids[i]] = static_cast<uint8_t>(next_id);
++next_id;
assert(block_ids[i] < num_histograms);
if (new_id[block_ids[i]] == kInvalidId) {
new_id[block_ids[i]] = next_id++;
}
}
for (size_t i = 0; i < length; ++i) {
block_ids[i] = new_id[block_ids[i]];
block_ids[i] = static_cast<uint8_t>(new_id[block_ids[i]]);
assert(block_ids[i] < num_histograms);
}
assert(next_id <= num_histograms);
return next_id;
}
template<typename HistogramType, typename DataType>
void BuildBlockHistograms(const DataType* data, const size_t length,
uint8_t* block_ids,
std::vector<HistogramType>* histograms) {
size_t num_types = RemapBlockIds(block_ids, length);
assert(num_types <= 256);
histograms->clear();
histograms->resize(num_types);
const uint8_t* block_ids,
const size_t num_histograms,
HistogramType* histograms) {
for (size_t i = 0; i < num_histograms; ++i) {
histograms[i].Clear();
}
for (size_t i = 0; i < length; ++i) {
(*histograms)[block_ids[i]].Add(data[i]);
histograms[block_ids[i]].Add(data[i]);
}
}
template<typename HistogramType, typename DataType>
void ClusterBlocks(const DataType* data, const size_t length,
uint8_t* block_ids) {
std::vector<HistogramType> histograms;
std::vector<uint32_t> block_index(length);
uint32_t cur_idx = 0;
HistogramType cur_histogram;
for (size_t i = 0; i < length; ++i) {
bool block_boundary = (i + 1 == length || block_ids[i] != block_ids[i + 1]);
block_index[i] = cur_idx;
cur_histogram.Add(data[i]);
if (block_boundary) {
histograms.push_back(cur_histogram);
cur_histogram.Clear();
++cur_idx;
}
}
std::vector<HistogramType> clustered_histograms;
std::vector<uint32_t> histogram_symbols;
// Block ids need to fit in one byte.
const size_t num_blocks,
uint8_t* block_ids,
BlockSplit* split) {
static const size_t kMaxNumberOfBlockTypes = 256;
ClusterHistograms(histograms, 1, histograms.size(),
kMaxNumberOfBlockTypes,
&clustered_histograms,
&histogram_symbols);
static const size_t kHistogramsPerBatch = 64;
static const size_t kClustersPerBatch = 16;
std::vector<uint32_t> histogram_symbols(num_blocks);
std::vector<uint32_t> block_lengths(num_blocks);
size_t block_idx = 0;
for (size_t i = 0; i < length; ++i) {
block_ids[i] = static_cast<uint8_t>(histogram_symbols[block_index[i]]);
assert(block_idx < num_blocks);
++block_lengths[block_idx];
if (i + 1 == length || block_ids[i] != block_ids[i + 1]) {
++block_idx;
}
}
assert(block_idx == num_blocks);
const size_t expected_num_clusters =
kClustersPerBatch *
(num_blocks + kHistogramsPerBatch - 1) / kHistogramsPerBatch;
std::vector<HistogramType> all_histograms;
std::vector<uint32_t> cluster_size;
all_histograms.reserve(expected_num_clusters);
cluster_size.reserve(expected_num_clusters);
size_t num_clusters = 0;
std::vector<HistogramType> histograms(
std::min(num_blocks, kHistogramsPerBatch));
size_t max_num_pairs = kHistogramsPerBatch * kHistogramsPerBatch / 2;
std::vector<HistogramPair> pairs(max_num_pairs + 1);
size_t pos = 0;
for (size_t i = 0; i < num_blocks; i += kHistogramsPerBatch) {
const size_t num_to_combine = std::min(num_blocks - i, kHistogramsPerBatch);
uint32_t sizes[kHistogramsPerBatch];
uint32_t clusters[kHistogramsPerBatch];
uint32_t symbols[kHistogramsPerBatch];
uint32_t remap[kHistogramsPerBatch];
for (size_t j = 0; j < num_to_combine; ++j) {
histograms[j].Clear();
for (size_t k = 0; k < block_lengths[i + j]; ++k) {
histograms[j].Add(data[pos++]);
}
histograms[j].bit_cost_ = PopulationCost(histograms[j]);
symbols[j] = clusters[j] = static_cast<uint32_t>(j);
sizes[j] = 1;
}
size_t num_new_clusters = HistogramCombine(
&histograms[0], sizes, symbols, clusters, &pairs[0], num_to_combine,
num_to_combine, kHistogramsPerBatch, max_num_pairs);
for (size_t j = 0; j < num_new_clusters; ++j) {
all_histograms.push_back(histograms[clusters[j]]);
cluster_size.push_back(sizes[clusters[j]]);
remap[clusters[j]] = static_cast<uint32_t>(j);
}
for (size_t j = 0; j < num_to_combine; ++j) {
histogram_symbols[i + j] =
static_cast<uint32_t>(num_clusters) + remap[symbols[j]];
}
num_clusters += num_new_clusters;
assert(num_clusters == cluster_size.size());
assert(num_clusters == all_histograms.size());
}
void BuildBlockSplit(const std::vector<uint8_t>& block_ids, BlockSplit* split) {
uint8_t cur_id = block_ids[0];
uint8_t max_type = cur_id;
uint32_t cur_length = 1;
for (size_t i = 1; i < block_ids.size(); ++i) {
uint8_t next_id = block_ids[i];
if (next_id != cur_id) {
split->types.push_back(cur_id);
split->lengths.push_back(cur_length);
max_type = std::max(max_type, next_id);
cur_id = next_id;
max_num_pairs =
std::min(64 * num_clusters, (num_clusters / 2) * num_clusters);
pairs.resize(max_num_pairs + 1);
std::vector<uint32_t> clusters(num_clusters);
for (size_t i = 0; i < num_clusters; ++i) {
clusters[i] = static_cast<uint32_t>(i);
}
size_t num_final_clusters =
HistogramCombine(&all_histograms[0], &cluster_size[0],
&histogram_symbols[0],
&clusters[0], &pairs[0], num_clusters,
num_blocks, kMaxNumberOfBlockTypes, max_num_pairs);
static const uint32_t kInvalidIndex = std::numeric_limits<uint32_t>::max();
std::vector<uint32_t> new_index(num_clusters, kInvalidIndex);
uint32_t next_index = 0;
pos = 0;
for (size_t i = 0; i < num_blocks; ++i) {
HistogramType histo;
for (size_t j = 0; j < block_lengths[i]; ++j) {
histo.Add(data[pos++]);
}
uint32_t best_out =
i == 0 ? histogram_symbols[0] : histogram_symbols[i - 1];
double best_bits = HistogramBitCostDistance(
histo, all_histograms[best_out]);
for (size_t j = 0; j < num_final_clusters; ++j) {
const double cur_bits = HistogramBitCostDistance(
histo, all_histograms[clusters[j]]);
if (cur_bits < best_bits) {
best_bits = cur_bits;
best_out = clusters[j];
}
}
histogram_symbols[i] = best_out;
if (new_index[best_out] == kInvalidIndex) {
new_index[best_out] = next_index++;
}
}
uint8_t max_type = 0;
uint32_t cur_length = 0;
block_idx = 0;
split->types.resize(num_blocks);
split->lengths.resize(num_blocks);
for (size_t i = 0; i < num_blocks; ++i) {
cur_length += block_lengths[i];
if (i + 1 == num_blocks ||
histogram_symbols[i] != histogram_symbols[i + 1]) {
const uint8_t id = static_cast<uint8_t>(new_index[histogram_symbols[i]]);
split->types[block_idx] = id;
split->lengths[block_idx] = cur_length;
max_type = std::max(max_type, id);
cur_length = 0;
++block_idx;
}
++cur_length;
}
split->types.push_back(cur_id);
split->lengths.push_back(cur_length);
split->types.resize(block_idx);
split->lengths.resize(block_idx);
split->num_types = static_cast<size_t>(max_type) + 1;
}
template<typename HistogramType, typename DataType>
template<int kSize, typename DataType>
void SplitByteVector(const std::vector<DataType>& data,
const size_t literals_per_histogram,
const size_t max_histograms,
@ -327,27 +407,44 @@ void SplitByteVector(const std::vector<DataType>& data,
split->lengths.push_back(static_cast<uint32_t>(data.size()));
return;
}
std::vector<HistogramType> histograms;
size_t num_histograms = data.size() / literals_per_histogram + 1;
if (num_histograms > max_histograms) {
num_histograms = max_histograms;
}
Histogram<kSize>* histograms = new Histogram<kSize>[num_histograms];
// Find good entropy codes.
InitialEntropyCodes(&data[0], data.size(),
literals_per_histogram,
max_histograms,
sampling_stride_length,
&histograms);
num_histograms, histograms);
RefineEntropyCodes(&data[0], data.size(),
sampling_stride_length,
&histograms);
num_histograms, histograms);
// Find a good path through literals with the good entropy codes.
std::vector<uint8_t> block_ids(data.size());
size_t num_blocks;
const size_t bitmaplen = (num_histograms + 7) >> 3;
double* insert_cost = new double[kSize * num_histograms];
double *cost = new double[num_histograms];
uint8_t* switch_signal = new uint8_t[data.size() * bitmaplen];
uint16_t* new_id = new uint16_t[num_histograms];
for (size_t i = 0; i < 10; ++i) {
FindBlocks(&data[0], data.size(),
num_blocks = FindBlocks(&data[0], data.size(),
block_switch_cost,
histograms,
num_histograms, histograms,
insert_cost, cost, switch_signal,
&block_ids[0]);
BuildBlockHistograms(&data[0], data.size(), &block_ids[0], &histograms);
num_histograms = RemapBlockIds(&block_ids[0], data.size(),
new_id, num_histograms);
BuildBlockHistograms(&data[0], data.size(), &block_ids[0],
num_histograms, histograms);
}
ClusterBlocks<HistogramType>(&data[0], data.size(), &block_ids[0]);
BuildBlockSplit(block_ids, split);
delete[] insert_cost;
delete[] cost;
delete[] switch_signal;
delete[] new_id;
delete[] histograms;
ClusterBlocks<Histogram<kSize> >(&data[0], data.size(), num_blocks,
&block_ids[0], split);
}
void SplitBlock(const Command* cmds,
@ -358,32 +455,51 @@ void SplitBlock(const Command* cmds,
BlockSplit* literal_split,
BlockSplit* insert_and_copy_split,
BlockSplit* dist_split) {
{
// Create a continuous array of literals.
std::vector<uint8_t> literals;
CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, &literals);
// Compute prefix codes for commands.
std::vector<uint16_t> insert_and_copy_codes;
std::vector<uint16_t> distance_prefixes;
CopyCommandsToByteArray(cmds, num_commands,
&insert_and_copy_codes,
&distance_prefixes);
SplitByteVector<HistogramLiteral>(
// Create the block split on the array of literals.
// Literal histograms have alphabet size 256.
SplitByteVector<256>(
literals,
kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
kLiteralStrideLength, kLiteralBlockSwitchCost,
literal_split);
SplitByteVector<HistogramCommand>(
}
{
// Compute prefix codes for commands.
std::vector<uint16_t> insert_and_copy_codes(num_commands);
for (size_t i = 0; i < num_commands; ++i) {
insert_and_copy_codes[i] = cmds[i].cmd_prefix_;
}
// Create the block split on the array of command prefixes.
SplitByteVector<kNumCommandPrefixes>(
insert_and_copy_codes,
kSymbolsPerCommandHistogram, kMaxCommandHistograms,
kCommandStrideLength, kCommandBlockSwitchCost,
insert_and_copy_split);
SplitByteVector<HistogramDistance>(
}
{
// Create a continuous array of distance prefixes.
std::vector<uint16_t> distance_prefixes(num_commands);
size_t pos = 0;
for (size_t i = 0; i < num_commands; ++i) {
const Command& cmd = cmds[i];
if (cmd.copy_len() && cmd.cmd_prefix_ >= 128) {
distance_prefixes[pos++] = cmd.dist_prefix_;
}
}
distance_prefixes.resize(pos);
// Create the block split on the array of distance prefixes.
SplitByteVector<kNumDistancePrefixes>(
distance_prefixes,
kSymbolsPerDistanceHistogram, kMaxCommandHistograms,
kCommandStrideLength, kDistanceBlockSwitchCost,
dist_split);
}
}
} // namespace brotli

View File

@ -28,6 +28,12 @@ namespace brotli {
namespace {
static const size_t kMaxHuffmanTreeSize = 2 * kNumCommandPrefixes + 1;
// Context map alphabet has 256 context id symbols plus max 16 rle symbols.
static const size_t kContextMapAlphabetSize = 256 + 16;
// Block type alphabet has 256 block id symbols plus 2 special symbols.
static const size_t kBlockTypeAlphabetSize = 256 + 2;
// nibblesbits represents the 2 bits to encode MNIBBLES (0-3)
// REQUIRES: length > 0
// REQUIRES: length <= (1 << 24)
@ -45,6 +51,18 @@ void EncodeMlen(size_t length, uint64_t* bits,
*bits = length;
}
static inline void StoreCommandExtra(
const Command& cmd, size_t* storage_ix, uint8_t* storage) {
uint32_t copylen_code = cmd.copy_len_code();
uint16_t inscode = GetInsertLengthCode(cmd.insert_len_);
uint16_t copycode = GetCopyLengthCode(copylen_code);
uint32_t insnumextra = GetInsertExtra(inscode);
uint64_t insextraval = cmd.insert_len_ - GetInsertBase(inscode);
uint64_t copyextraval = copylen_code - GetCopyBase(copycode);
uint64_t bits = (copyextraval << insnumextra) | insextraval;
WriteBits(insnumextra + GetCopyExtra(copycode), bits, storage_ix, storage);
}
} // namespace
void StoreVarLenUint8(size_t n, size_t* storage_ix, uint8_t* storage) {
@ -148,13 +166,14 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask(
}
void StoreHuffmanTreeToBitMask(
const std::vector<uint8_t> &huffman_tree,
const std::vector<uint8_t> &huffman_tree_extra_bits,
const size_t huffman_tree_size,
const uint8_t* huffman_tree,
const uint8_t* huffman_tree_extra_bits,
const uint8_t* code_length_bitdepth,
const std::vector<uint16_t> &code_length_bitdepth_symbols,
const uint16_t* code_length_bitdepth_symbols,
size_t * __restrict storage_ix,
uint8_t * __restrict storage) {
for (size_t i = 0; i < huffman_tree.size(); ++i) {
for (size_t i = 0; i < huffman_tree_size; ++i) {
size_t ix = huffman_tree[i];
WriteBits(code_length_bitdepth[ix], code_length_bitdepth_symbols[ix],
storage_ix, storage);
@ -208,18 +227,21 @@ void StoreSimpleHuffmanTree(const uint8_t* depths,
// num = alphabet size
// depths = symbol depths
void StoreHuffmanTree(const uint8_t* depths, size_t num,
HuffmanTree* tree,
size_t *storage_ix, uint8_t *storage) {
// Write the Huffman tree into the brotli-representation.
std::vector<uint8_t> huffman_tree;
std::vector<uint8_t> huffman_tree_extra_bits;
// TODO: Consider allocating these from stack.
huffman_tree.reserve(256);
huffman_tree_extra_bits.reserve(256);
WriteHuffmanTree(depths, num, &huffman_tree, &huffman_tree_extra_bits);
// The command alphabet is the largest, so this allocation will fit all
// alphabets.
assert(num <= kNumCommandPrefixes);
uint8_t huffman_tree[kNumCommandPrefixes];
uint8_t huffman_tree_extra_bits[kNumCommandPrefixes];
size_t huffman_tree_size = 0;
WriteHuffmanTree(depths, num, &huffman_tree_size, huffman_tree,
huffman_tree_extra_bits);
// Calculate the statistics of the Huffman tree in brotli-representation.
uint32_t huffman_tree_histogram[kCodeLengthCodes] = { 0 };
for (size_t i = 0; i < huffman_tree.size(); ++i) {
for (size_t i = 0; i < huffman_tree_size; ++i) {
++huffman_tree_histogram[huffman_tree[i]];
}
@ -239,11 +261,10 @@ void StoreHuffmanTree(const uint8_t* depths, size_t num,
// Calculate another Huffman tree to use for compressing both the
// earlier Huffman tree with.
// TODO: Consider allocating these from stack.
uint8_t code_length_bitdepth[kCodeLengthCodes] = { 0 };
std::vector<uint16_t> code_length_bitdepth_symbols(kCodeLengthCodes);
uint16_t code_length_bitdepth_symbols[kCodeLengthCodes] = { 0 };
CreateHuffmanTree(&huffman_tree_histogram[0], kCodeLengthCodes,
5, &code_length_bitdepth[0]);
5, tree, &code_length_bitdepth[0]);
ConvertBitDepthsToSymbols(code_length_bitdepth, kCodeLengthCodes,
&code_length_bitdepth_symbols[0]);
@ -256,16 +277,17 @@ void StoreHuffmanTree(const uint8_t* depths, size_t num,
}
// Store the real huffman tree now.
StoreHuffmanTreeToBitMask(huffman_tree,
StoreHuffmanTreeToBitMask(huffman_tree_size,
huffman_tree,
huffman_tree_extra_bits,
&code_length_bitdepth[0],
code_length_bitdepth_symbols,
storage_ix, storage);
}
void BuildAndStoreHuffmanTree(const uint32_t *histogram,
const size_t length,
HuffmanTree* tree,
uint8_t* depth,
uint16_t* bits,
size_t* storage_ix,
@ -296,16 +318,21 @@ void BuildAndStoreHuffmanTree(const uint32_t *histogram,
return;
}
CreateHuffmanTree(histogram, length, 15, depth);
CreateHuffmanTree(histogram, length, 15, tree, depth);
ConvertBitDepthsToSymbols(depth, length, bits);
if (count <= 4) {
StoreSimpleHuffmanTree(depth, s4, count, max_bits, storage_ix, storage);
} else {
StoreHuffmanTree(depth, length, storage_ix, storage);
StoreHuffmanTree(depth, length, tree, storage_ix, storage);
}
}
static inline bool SortHuffmanTree(const HuffmanTree& v0,
const HuffmanTree& v1) {
return v0.total_count_ < v1.total_count_;
}
void BuildAndStoreHuffmanTreeFast(const uint32_t *histogram,
const size_t histogram_total,
const size_t max_bits,
@ -467,52 +494,58 @@ void BuildAndStoreHuffmanTreeFast(const uint32_t *histogram,
}
}
size_t IndexOf(const std::vector<uint32_t>& v, uint32_t value) {
size_t IndexOf(const uint8_t* v, size_t v_size, uint8_t value) {
size_t i = 0;
for (; i < v.size(); ++i) {
for (; i < v_size; ++i) {
if (v[i] == value) return i;
}
return i;
}
void MoveToFront(std::vector<uint32_t>* v, size_t index) {
uint32_t value = (*v)[index];
void MoveToFront(uint8_t* v, size_t index) {
uint8_t value = v[index];
for (size_t i = index; i != 0; --i) {
(*v)[i] = (*v)[i - 1];
v[i] = v[i - 1];
}
(*v)[0] = value;
v[0] = value;
}
std::vector<uint32_t> MoveToFrontTransform(const std::vector<uint32_t>& v) {
if (v.empty()) return v;
uint32_t max_value = *std::max_element(v.begin(), v.end());
std::vector<uint32_t> mtf(max_value + 1);
for (uint32_t i = 0; i <= max_value; ++i) mtf[i] = i;
std::vector<uint32_t> result(v.size());
for (size_t i = 0; i < v.size(); ++i) {
size_t index = IndexOf(mtf, v[i]);
assert(index < mtf.size());
result[i] = static_cast<uint32_t>(index);
MoveToFront(&mtf, index);
void MoveToFrontTransform(const uint32_t* __restrict v_in,
const size_t v_size,
uint32_t* v_out) {
if (v_size == 0) {
return;
}
uint32_t max_value = *std::max_element(v_in, v_in + v_size);
assert(max_value < 256u);
uint8_t mtf[256];
size_t mtf_size = max_value + 1;
for (uint32_t i = 0; i <= max_value; ++i) {
mtf[i] = static_cast<uint8_t>(i);
}
for (size_t i = 0; i < v_size; ++i) {
size_t index = IndexOf(mtf, mtf_size, static_cast<uint8_t>(v_in[i]));
assert(index < mtf_size);
v_out[i] = static_cast<uint32_t>(index);
MoveToFront(mtf, index);
}
return result;
}
// Finds runs of zeros in v_in and replaces them with a prefix code of the run
// length plus extra bits in *v_out and *extra_bits. Non-zero values in v_in are
// shifted by *max_length_prefix. Will not create prefix codes bigger than the
// initial value of *max_run_length_prefix. The prefix code of run length L is
// simply Log2Floor(L) and the number of extra bits is the same as the prefix
// code.
void RunLengthCodeZeros(const std::vector<uint32_t>& v_in,
uint32_t* max_run_length_prefix,
std::vector<uint32_t>* v_out,
std::vector<uint32_t>* extra_bits) {
// Finds runs of zeros in v[0..in_size) and replaces them with a prefix code of
// the run length plus extra bits (lower 9 bits is the prefix code and the rest
// are the extra bits). Non-zero values in v[] are shifted by
// *max_length_prefix. Will not create prefix codes bigger than the initial
// value of *max_run_length_prefix. The prefix code of run length L is simply
// Log2Floor(L) and the number of extra bits is the same as the prefix code.
void RunLengthCodeZeros(const size_t in_size,
uint32_t* __restrict v,
size_t* __restrict out_size,
uint32_t* __restrict max_run_length_prefix) {
uint32_t max_reps = 0;
for (size_t i = 0; i < v_in.size();) {
for (; i < v_in.size() && v_in[i] != 0; ++i) ;
for (size_t i = 0; i < in_size;) {
for (; i < in_size && v[i] != 0; ++i) ;
uint32_t reps = 0;
for (; i < v_in.size() && v_in[i] == 0; ++i) {
for (; i < in_size && v[i] == 0; ++i) {
++reps;
}
max_reps = std::max(reps, max_reps);
@ -520,27 +553,31 @@ void RunLengthCodeZeros(const std::vector<uint32_t>& v_in,
uint32_t max_prefix = max_reps > 0 ? Log2FloorNonZero(max_reps) : 0;
max_prefix = std::min(max_prefix, *max_run_length_prefix);
*max_run_length_prefix = max_prefix;
for (size_t i = 0; i < v_in.size();) {
if (v_in[i] != 0) {
v_out->push_back(v_in[i] + *max_run_length_prefix);
extra_bits->push_back(0);
*out_size = 0;
for (size_t i = 0; i < in_size;) {
assert(*out_size <= i);
if (v[i] != 0) {
v[*out_size] = v[i] + *max_run_length_prefix;
++i;
++(*out_size);
} else {
uint32_t reps = 1;
for (size_t k = i + 1; k < v_in.size() && v_in[k] == 0; ++k) {
for (size_t k = i + 1; k < in_size && v[k] == 0; ++k) {
++reps;
}
i += reps;
while (reps != 0) {
if (reps < (2u << max_prefix)) {
uint32_t run_length_prefix = Log2FloorNonZero(reps);
v_out->push_back(run_length_prefix);
extra_bits->push_back(reps - (1u << run_length_prefix));
const uint32_t extra_bits = reps - (1u << run_length_prefix);
v[*out_size] = run_length_prefix + (extra_bits << 9);
++(*out_size);
break;
} else {
v_out->push_back(max_prefix);
extra_bits->push_back((1u << max_prefix) - 1u);
const uint32_t extra_bits = (1u << max_prefix) - 1u;
v[*out_size] = max_prefix + (extra_bits << 9);
reps -= (2u << max_prefix) - 1u;
++(*out_size);
}
}
}
@ -549,6 +586,7 @@ void RunLengthCodeZeros(const std::vector<uint32_t>& v_in,
void EncodeContextMap(const std::vector<uint32_t>& context_map,
size_t num_clusters,
HuffmanTree* tree,
size_t* storage_ix, uint8_t* storage) {
StoreVarLenUint8(num_clusters - 1, storage_ix, storage);
@ -556,37 +594,40 @@ void EncodeContextMap(const std::vector<uint32_t>& context_map,
return;
}
std::vector<uint32_t> transformed_symbols = MoveToFrontTransform(context_map);
std::vector<uint32_t> rle_symbols;
std::vector<uint32_t> extra_bits;
uint32_t* rle_symbols = new uint32_t[context_map.size()];
MoveToFrontTransform(&context_map[0], context_map.size(), rle_symbols);
uint32_t max_run_length_prefix = 6;
RunLengthCodeZeros(transformed_symbols, &max_run_length_prefix,
&rle_symbols, &extra_bits);
HistogramContextMap symbol_histogram;
for (size_t i = 0; i < rle_symbols.size(); ++i) {
symbol_histogram.Add(rle_symbols[i]);
size_t num_rle_symbols = 0;
RunLengthCodeZeros(context_map.size(), rle_symbols,
&num_rle_symbols, &max_run_length_prefix);
uint32_t histogram[kContextMapAlphabetSize];
memset(histogram, 0, sizeof(histogram));
static const int kSymbolBits = 9;
static const uint32_t kSymbolMask = (1u << kSymbolBits) - 1u;
for (size_t i = 0; i < num_rle_symbols; ++i) {
++histogram[rle_symbols[i] & kSymbolMask];
}
bool use_rle = max_run_length_prefix > 0;
WriteBits(1, use_rle, storage_ix, storage);
if (use_rle) {
WriteBits(4, max_run_length_prefix - 1, storage_ix, storage);
}
EntropyCodeContextMap symbol_code;
memset(symbol_code.depth_, 0, sizeof(symbol_code.depth_));
memset(symbol_code.bits_, 0, sizeof(symbol_code.bits_));
BuildAndStoreHuffmanTree(symbol_histogram.data_,
num_clusters + max_run_length_prefix,
symbol_code.depth_, symbol_code.bits_,
storage_ix, storage);
for (size_t i = 0; i < rle_symbols.size(); ++i) {
WriteBits(symbol_code.depth_[rle_symbols[i]],
symbol_code.bits_[rle_symbols[i]],
storage_ix, storage);
if (rle_symbols[i] > 0 && rle_symbols[i] <= max_run_length_prefix) {
WriteBits(rle_symbols[i], extra_bits[i], storage_ix, storage);
uint8_t depths[kContextMapAlphabetSize];
uint16_t bits[kContextMapAlphabetSize];
memset(depths, 0, sizeof(depths));
memset(bits, 0, sizeof(bits));
BuildAndStoreHuffmanTree(histogram, num_clusters + max_run_length_prefix,
tree, depths, bits, storage_ix, storage);
for (size_t i = 0; i < num_rle_symbols; ++i) {
const uint32_t rle_symbol = rle_symbols[i] & kSymbolMask;
const uint32_t extra_bits_val = rle_symbols[i] >> kSymbolBits;
WriteBits(depths[rle_symbol], bits[rle_symbol], storage_ix, storage);
if (rle_symbol > 0 && rle_symbol <= max_run_length_prefix) {
WriteBits(rle_symbol, extra_bits_val, storage_ix, storage);
}
}
WriteBits(1, 1, storage_ix, storage); // use move-to-front
delete[] rle_symbols;
}
void StoreBlockSwitch(const BlockSplitCode& code,
@ -608,12 +649,15 @@ void StoreBlockSwitch(const BlockSplitCode& code,
void BuildAndStoreBlockSplitCode(const std::vector<uint8_t>& types,
const std::vector<uint32_t>& lengths,
const size_t num_types,
HuffmanTree* tree,
BlockSplitCode* code,
size_t* storage_ix,
uint8_t* storage) {
const size_t num_blocks = types.size();
std::vector<uint32_t> type_histo(num_types + 2);
std::vector<uint32_t> length_histo(26);
uint32_t type_histo[kBlockTypeAlphabetSize];
uint32_t length_histo[kNumBlockLenPrefixes];
memset(type_histo, 0, (num_types + 2) * sizeof(type_histo[0]));
memset(length_histo, 0, sizeof(length_histo));
size_t last_type = 1;
size_t second_last_type = 0;
code->type_code.resize(num_blocks);
@ -622,8 +666,8 @@ void BuildAndStoreBlockSplitCode(const std::vector<uint8_t>& types,
code->length_extra.resize(num_blocks);
code->type_depths.resize(num_types + 2);
code->type_bits.resize(num_types + 2);
code->length_depths.resize(26);
code->length_bits.resize(26);
memset(code->length_depths, 0, sizeof(code->length_depths));
memset(code->length_bits, 0, sizeof(code->length_bits));
for (size_t i = 0; i < num_blocks; ++i) {
size_t type = types[i];
size_t type_code = (type == last_type + 1 ? 1 :
@ -641,10 +685,10 @@ void BuildAndStoreBlockSplitCode(const std::vector<uint8_t>& types,
}
StoreVarLenUint8(num_types - 1, storage_ix, storage);
if (num_types > 1) {
BuildAndStoreHuffmanTree(&type_histo[0], num_types + 2,
BuildAndStoreHuffmanTree(&type_histo[0], num_types + 2, tree,
&code->type_depths[0], &code->type_bits[0],
storage_ix, storage);
BuildAndStoreHuffmanTree(&length_histo[0], 26,
BuildAndStoreHuffmanTree(&length_histo[0], kNumBlockLenPrefixes, tree,
&code->length_depths[0], &code->length_bits[0],
storage_ix, storage);
StoreBlockSwitch(*code, 0, storage_ix, storage);
@ -653,6 +697,7 @@ void BuildAndStoreBlockSplitCode(const std::vector<uint8_t>& types,
void StoreTrivialContextMap(size_t num_types,
size_t context_bits,
HuffmanTree* tree,
size_t* storage_ix,
uint8_t* storage) {
StoreVarLenUint8(num_types - 1, storage_ix, storage);
@ -660,9 +705,12 @@ void StoreTrivialContextMap(size_t num_types,
size_t repeat_code = context_bits - 1u;
size_t repeat_bits = (1u << repeat_code) - 1u;
size_t alphabet_size = num_types + repeat_code;
std::vector<uint32_t> histogram(alphabet_size);
std::vector<uint8_t> depths(alphabet_size);
std::vector<uint16_t> bits(alphabet_size);
uint32_t histogram[kContextMapAlphabetSize];
uint8_t depths[kContextMapAlphabetSize];
uint16_t bits[kContextMapAlphabetSize];
memset(histogram, 0, alphabet_size * sizeof(histogram[0]));
memset(depths, 0, alphabet_size * sizeof(depths[0]));
memset(bits, 0, alphabet_size * sizeof(bits[0]));
// Write RLEMAX.
WriteBits(1, 1, storage_ix, storage);
WriteBits(4, repeat_code - 1, storage_ix, storage);
@ -671,7 +719,7 @@ void StoreTrivialContextMap(size_t num_types,
for (size_t i = context_bits; i < alphabet_size; ++i) {
histogram[i] = 1;
}
BuildAndStoreHuffmanTree(&histogram[0], alphabet_size,
BuildAndStoreHuffmanTree(&histogram[0], alphabet_size, tree,
&depths[0], &bits[0],
storage_ix, storage);
for (size_t i = 0; i < num_types; ++i) {
@ -702,11 +750,12 @@ class BlockEncoder {
// Creates entropy codes of block lengths and block types and stores them
// to the bit stream.
void BuildAndStoreBlockSwitchEntropyCodes(size_t* storage_ix,
void BuildAndStoreBlockSwitchEntropyCodes(HuffmanTree* tree,
size_t* storage_ix,
uint8_t* storage) {
BuildAndStoreBlockSplitCode(
block_types_, block_lengths_, num_block_types_,
&block_split_code_, storage_ix, storage);
tree, &block_split_code_, storage_ix, storage);
}
// Creates entropy codes for all block types and stores them to the bit
@ -714,12 +763,14 @@ class BlockEncoder {
template<int kSize>
void BuildAndStoreEntropyCodes(
const std::vector<Histogram<kSize> >& histograms,
HuffmanTree* tree,
size_t* storage_ix, uint8_t* storage) {
depths_.resize(histograms.size() * alphabet_size_);
bits_.resize(histograms.size() * alphabet_size_);
for (size_t i = 0; i < histograms.size(); ++i) {
size_t ix = i * alphabet_size_;
BuildAndStoreHuffmanTree(&histograms[i].data_[0], alphabet_size_,
tree,
&depths_[ix], &bits_[ix],
storage_ix, storage);
}
@ -798,6 +849,8 @@ void StoreMetaBlock(const uint8_t* input,
kNumDistanceShortCodes + num_direct_distance_codes +
(48u << distance_postfix_bits);
HuffmanTree* tree = static_cast<HuffmanTree*>(
malloc(kMaxHuffmanTreeSize * sizeof(HuffmanTree)));
BlockEncoder literal_enc(256,
mb.literal_split.num_types,
mb.literal_split.types,
@ -811,9 +864,9 @@ void StoreMetaBlock(const uint8_t* input,
mb.distance_split.types,
mb.distance_split.lengths);
literal_enc.BuildAndStoreBlockSwitchEntropyCodes(storage_ix, storage);
command_enc.BuildAndStoreBlockSwitchEntropyCodes(storage_ix, storage);
distance_enc.BuildAndStoreBlockSwitchEntropyCodes(storage_ix, storage);
literal_enc.BuildAndStoreBlockSwitchEntropyCodes(tree, storage_ix, storage);
command_enc.BuildAndStoreBlockSwitchEntropyCodes(tree, storage_ix, storage);
distance_enc.BuildAndStoreBlockSwitchEntropyCodes(tree, storage_ix, storage);
WriteBits(2, distance_postfix_bits, storage_ix, storage);
WriteBits(4, num_direct_distance_codes >> distance_postfix_bits,
@ -824,37 +877,36 @@ void StoreMetaBlock(const uint8_t* input,
size_t num_literal_histograms = mb.literal_histograms.size();
if (mb.literal_context_map.empty()) {
StoreTrivialContextMap(num_literal_histograms, kLiteralContextBits,
StoreTrivialContextMap(num_literal_histograms, kLiteralContextBits, tree,
storage_ix, storage);
} else {
EncodeContextMap(mb.literal_context_map, num_literal_histograms,
EncodeContextMap(mb.literal_context_map, num_literal_histograms, tree,
storage_ix, storage);
}
size_t num_dist_histograms = mb.distance_histograms.size();
if (mb.distance_context_map.empty()) {
StoreTrivialContextMap(num_dist_histograms, kDistanceContextBits,
StoreTrivialContextMap(num_dist_histograms, kDistanceContextBits, tree,
storage_ix, storage);
} else {
EncodeContextMap(mb.distance_context_map, num_dist_histograms,
EncodeContextMap(mb.distance_context_map, num_dist_histograms, tree,
storage_ix, storage);
}
literal_enc.BuildAndStoreEntropyCodes(mb.literal_histograms,
literal_enc.BuildAndStoreEntropyCodes(mb.literal_histograms, tree,
storage_ix, storage);
command_enc.BuildAndStoreEntropyCodes(mb.command_histograms,
command_enc.BuildAndStoreEntropyCodes(mb.command_histograms, tree,
storage_ix, storage);
distance_enc.BuildAndStoreEntropyCodes(mb.distance_histograms,
distance_enc.BuildAndStoreEntropyCodes(mb.distance_histograms, tree,
storage_ix, storage);
free(tree);
size_t pos = start_pos;
for (size_t i = 0; i < n_commands; ++i) {
const Command cmd = commands[i];
size_t cmd_code = cmd.cmd_prefix_;
uint32_t lennumextra = static_cast<uint32_t>(cmd.cmd_extra_ >> 48);
uint64_t lenextra = cmd.cmd_extra_ & 0xffffffffffffUL;
command_enc.StoreSymbol(cmd_code, storage_ix, storage);
WriteBits(lennumextra, lenextra, storage_ix, storage);
StoreCommandExtra(cmd, storage_ix, storage);
if (mb.literal_context_map.empty()) {
for (size_t j = cmd.insert_len_; j != 0; --j) {
literal_enc.StoreSymbol(input[pos & mask], storage_ix, storage);
@ -871,8 +923,8 @@ void StoreMetaBlock(const uint8_t* input,
++pos;
}
}
pos += cmd.copy_len_;
if (cmd.copy_len_ > 0) {
pos += cmd.copy_len();
if (cmd.copy_len()) {
prev_byte2 = input[(pos - 2) & mask];
prev_byte = input[(pos - 1) & mask];
if (cmd.cmd_prefix_ >= 128) {
@ -911,8 +963,8 @@ void BuildHistograms(const uint8_t* input,
lit_histo->Add(input[pos & mask]);
++pos;
}
pos += cmd.copy_len_;
if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) {
pos += cmd.copy_len();
if (cmd.copy_len() && cmd.cmd_prefix_ >= 128) {
dist_histo->Add(cmd.dist_prefix_);
}
}
@ -935,17 +987,15 @@ void StoreDataWithHuffmanCodes(const uint8_t* input,
for (size_t i = 0; i < n_commands; ++i) {
const Command cmd = commands[i];
const size_t cmd_code = cmd.cmd_prefix_;
const uint32_t lennumextra = static_cast<uint32_t>(cmd.cmd_extra_ >> 48);
const uint64_t lenextra = cmd.cmd_extra_ & 0xffffffffffffUL;
WriteBits(cmd_depth[cmd_code], cmd_bits[cmd_code], storage_ix, storage);
WriteBits(lennumextra, lenextra, storage_ix, storage);
StoreCommandExtra(cmd, storage_ix, storage);
for (size_t j = cmd.insert_len_; j != 0; --j) {
const uint8_t literal = input[pos & mask];
WriteBits(lit_depth[literal], lit_bits[literal], storage_ix, storage);
++pos;
}
pos += cmd.copy_len_;
if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) {
pos += cmd.copy_len();
if (cmd.copy_len() && cmd.cmd_prefix_ >= 128) {
const size_t dist_code = cmd.dist_prefix_;
const uint32_t distnumextra = cmd.dist_extra_ >> 24;
const uint32_t distextra = cmd.dist_extra_ & 0xffffff;
@ -983,15 +1033,18 @@ void StoreMetaBlockTrivial(const uint8_t* input,
std::vector<uint8_t> dist_depth(64);
std::vector<uint16_t> dist_bits(64);
BuildAndStoreHuffmanTree(&lit_histo.data_[0], 256,
HuffmanTree* tree = static_cast<HuffmanTree*>(
malloc(kMaxHuffmanTreeSize * sizeof(HuffmanTree)));
BuildAndStoreHuffmanTree(&lit_histo.data_[0], 256, tree,
&lit_depth[0], &lit_bits[0],
storage_ix, storage);
BuildAndStoreHuffmanTree(&cmd_histo.data_[0], kNumCommandPrefixes,
BuildAndStoreHuffmanTree(&cmd_histo.data_[0], kNumCommandPrefixes, tree,
&cmd_depth[0], &cmd_bits[0],
storage_ix, storage);
BuildAndStoreHuffmanTree(&dist_histo.data_[0], 64,
BuildAndStoreHuffmanTree(&dist_histo.data_[0], 64, tree,
&dist_depth[0], &dist_bits[0],
storage_ix, storage);
free(tree);
StoreDataWithHuffmanCodes(input, start_pos, mask, commands,
n_commands, &lit_depth[0], &lit_bits[0],
&cmd_depth[0], &cmd_bits[0],
@ -1026,7 +1079,7 @@ void StoreMetaBlockFast(const uint8_t* input,
++pos;
}
num_literals += cmd.insert_len_;
pos += cmd.copy_len_;
pos += cmd.copy_len();
}
uint8_t lit_depth[256] = { 0 };
uint16_t lit_bits[256] = { 0 };

View File

@ -48,6 +48,7 @@ void StoreUncompressedMetaBlockHeader(size_t length,
// Stores a context map where the histogram type is always the block type.
void StoreTrivialContextMap(size_t num_types,
size_t context_bits,
HuffmanTree* tree,
size_t* storage_ix,
uint8_t* storage);
@ -57,13 +58,14 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask(
size_t *storage_ix,
uint8_t *storage);
void StoreHuffmanTree(const uint8_t* depths, size_t num,
void StoreHuffmanTree(const uint8_t* depths, size_t num, HuffmanTree* tree,
size_t *storage_ix, uint8_t *storage);
// Builds a Huffman tree from histogram[0:length] into depth[0:length] and
// bits[0:length] and stores the encoded tree to the bit stream.
void BuildAndStoreHuffmanTree(const uint32_t *histogram,
const size_t length,
HuffmanTree* tree,
uint8_t* depth,
uint16_t* bits,
size_t* storage_ix,
@ -81,6 +83,7 @@ void BuildAndStoreHuffmanTreeFast(const uint32_t *histogram,
// histogram ids is given by num_clusters.
void EncodeContextMap(const std::vector<uint32_t>& context_map,
size_t num_clusters,
HuffmanTree* tree,
size_t* storage_ix, uint8_t* storage);
// Data structure that stores everything that is needed to encode each block
@ -92,8 +95,8 @@ struct BlockSplitCode {
std::vector<uint32_t> length_extra;
std::vector<uint8_t> type_depths;
std::vector<uint16_t> type_bits;
std::vector<uint8_t> length_depths;
std::vector<uint16_t> length_bits;
uint8_t length_depths[kNumBlockLenPrefixes];
uint16_t length_bits[kNumBlockLenPrefixes];
};
// Builds a BlockSplitCode data structure from the block split given by the

View File

@ -11,7 +11,6 @@
#include <math.h>
#include <algorithm>
#include <map>
#include <utility>
#include <vector>
@ -52,7 +51,9 @@ template<typename HistogramType>
void CompareAndPushToQueue(const HistogramType* out,
const uint32_t* cluster_size,
uint32_t idx1, uint32_t idx2,
std::vector<HistogramPair>* pairs) {
size_t max_num_pairs,
HistogramPair* pairs,
size_t* num_pairs) {
if (idx1 == idx2) {
return;
}
@ -76,8 +77,8 @@ void CompareAndPushToQueue(const HistogramType* out,
p.cost_combo = out[idx1].bit_cost_;
store_pair = true;
} else {
double threshold = pairs->empty() ? 1e99 :
std::max(0.0, (*pairs)[0].cost_diff);
double threshold = *num_pairs == 0 ? 1e99 :
std::max(0.0, pairs[0].cost_diff);
HistogramType combo = out[idx1];
combo.AddHistogram(out[idx2]);
double cost_combo = PopulationCost(combo);
@ -88,42 +89,44 @@ void CompareAndPushToQueue(const HistogramType* out,
}
if (store_pair) {
p.cost_diff += p.cost_combo;
if (!pairs->empty() && (pairs->front() < p)) {
if (*num_pairs > 0 && pairs[0] < p) {
// Replace the top of the queue if needed.
pairs->push_back(pairs->front());
pairs->front() = p;
} else {
pairs->push_back(p);
if (*num_pairs < max_num_pairs) {
pairs[*num_pairs] = pairs[0];
++(*num_pairs);
}
pairs[0] = p;
} else if (*num_pairs < max_num_pairs) {
pairs[*num_pairs] = p;
++(*num_pairs);
}
}
}
template<typename HistogramType>
void HistogramCombine(HistogramType* out,
size_t HistogramCombine(HistogramType* out,
uint32_t* cluster_size,
uint32_t* symbols,
uint32_t* clusters,
HistogramPair* pairs,
size_t num_clusters,
size_t symbols_size,
size_t max_clusters) {
size_t max_clusters,
size_t max_num_pairs) {
double cost_diff_threshold = 0.0;
size_t min_cluster_size = 1;
// Uniquify the list of symbols.
std::vector<uint32_t> clusters(symbols, symbols + symbols_size);
std::sort(clusters.begin(), clusters.end());
std::vector<uint32_t>::iterator last =
std::unique(clusters.begin(), clusters.end());
clusters.resize(static_cast<size_t>(last - clusters.begin()));
// We maintain a heap of histogram pairs, ordered by the bit cost reduction.
std::vector<HistogramPair> pairs;
for (size_t idx1 = 0; idx1 < clusters.size(); ++idx1) {
for (size_t idx2 = idx1 + 1; idx2 < clusters.size(); ++idx2) {
// We maintain a vector of histogram pairs, with the property that the pair
// with the maximum bit cost reduction is the first.
size_t num_pairs = 0;
for (size_t idx1 = 0; idx1 < num_clusters; ++idx1) {
for (size_t idx2 = idx1 + 1; idx2 < num_clusters; ++idx2) {
CompareAndPushToQueue(out, cluster_size, clusters[idx1], clusters[idx2],
&pairs);
max_num_pairs, &pairs[0], &num_pairs);
}
}
while (clusters.size() > min_cluster_size) {
while (num_clusters > min_cluster_size) {
if (pairs[0].cost_diff >= cost_diff_threshold) {
cost_diff_threshold = 1e99;
min_cluster_size = max_clusters;
@ -140,40 +143,42 @@ void HistogramCombine(HistogramType* out,
symbols[i] = best_idx1;
}
}
for (std::vector<uint32_t>::iterator cluster = clusters.begin();
cluster != clusters.end(); ++cluster) {
if (*cluster >= best_idx2) {
clusters.erase(cluster);
for (size_t i = 0; i < num_clusters; ++i) {
if (clusters[i] == best_idx2) {
memmove(&clusters[i], &clusters[i + 1],
(num_clusters - i - 1) * sizeof(clusters[0]));
break;
}
}
--num_clusters;
// Remove pairs intersecting the just combined best pair.
size_t copy_to_idx = 0;
for (size_t i = 0; i < pairs.size(); ++i) {
for (size_t i = 0; i < num_pairs; ++i) {
HistogramPair& p = pairs[i];
if (p.idx1 == best_idx1 || p.idx2 == best_idx1 ||
p.idx1 == best_idx2 || p.idx2 == best_idx2) {
// Remove invalid pair from the queue.
continue;
}
if (pairs.front() < p) {
if (pairs[0] < p) {
// Replace the top of the queue if needed.
HistogramPair front = pairs.front();
pairs.front() = p;
HistogramPair front = pairs[0];
pairs[0] = p;
pairs[copy_to_idx] = front;
} else {
pairs[copy_to_idx] = p;
}
++copy_to_idx;
}
pairs.resize(copy_to_idx);
num_pairs = copy_to_idx;
// Push new pairs formed with the combined histogram to the heap.
for (size_t i = 0; i < clusters.size(); ++i) {
CompareAndPushToQueue(out, cluster_size, best_idx1, clusters[i], &pairs);
for (size_t i = 0; i < num_clusters; ++i) {
CompareAndPushToQueue(out, cluster_size, best_idx1, clusters[i],
max_num_pairs, &pairs[0], &num_pairs);
}
}
return num_clusters;
}
// -----------------------------------------------------------------------------
@ -192,61 +197,69 @@ double HistogramBitCostDistance(const HistogramType& histogram,
}
// Find the best 'out' histogram for each of the 'in' histograms.
// When called, clusters[0..num_clusters) contains the unique values from
// symbols[0..in_size), but this property is not preserved in this function.
// Note: we assume that out[]->bit_cost_ is already up-to-date.
template<typename HistogramType>
void HistogramRemap(const HistogramType* in, size_t in_size,
const uint32_t* clusters, size_t num_clusters,
HistogramType* out, uint32_t* symbols) {
// Uniquify the list of symbols.
std::vector<uint32_t> all_symbols(symbols, symbols + in_size);
std::sort(all_symbols.begin(), all_symbols.end());
std::vector<uint32_t>::iterator last =
std::unique(all_symbols.begin(), all_symbols.end());
all_symbols.resize(static_cast<size_t>(last - all_symbols.begin()));
for (size_t i = 0; i < in_size; ++i) {
uint32_t best_out = i == 0 ? symbols[0] : symbols[i - 1];
double best_bits = HistogramBitCostDistance(in[i], out[best_out]);
for (std::vector<uint32_t>::const_iterator k = all_symbols.begin();
k != all_symbols.end(); ++k) {
const double cur_bits = HistogramBitCostDistance(in[i], out[*k]);
for (size_t j = 0; j < num_clusters; ++j) {
const double cur_bits = HistogramBitCostDistance(in[i], out[clusters[j]]);
if (cur_bits < best_bits) {
best_bits = cur_bits;
best_out = *k;
best_out = clusters[j];
}
}
symbols[i] = best_out;
}
// Recompute each out based on raw and symbols.
for (std::vector<uint32_t>::const_iterator k = all_symbols.begin();
k != all_symbols.end(); ++k) {
out[*k].Clear();
for (size_t j = 0; j < num_clusters; ++j) {
out[clusters[j]].Clear();
}
for (size_t i = 0; i < in_size; ++i) {
out[symbols[i]].AddHistogram(in[i]);
}
}
// Reorder histograms in *out so that the new symbols in *symbols come in
// Reorders elements of the out[0..length) array and changes values in
// symbols[0..length) array in the following way:
// * when called, symbols[] contains indexes into out[], and has N unique
// values (possibly N < length)
// * on return, symbols'[i] = f(symbols[i]) and
// out'[symbols'[i]] = out[symbols[i]], for each 0 <= i < length,
// where f is a bijection between the range of symbols[] and [0..N), and
// the first occurrences of values in symbols'[i] come in consecutive
// increasing order.
// Returns N, the number of unique values in symbols[].
template<typename HistogramType>
void HistogramReindex(std::vector<HistogramType>* out,
std::vector<uint32_t>* symbols) {
std::vector<HistogramType> tmp(*out);
std::map<uint32_t, uint32_t> new_index;
size_t HistogramReindex(HistogramType* out, uint32_t* symbols, size_t length) {
static const uint32_t kInvalidIndex = std::numeric_limits<uint32_t>::max();
std::vector<uint32_t> new_index(length, kInvalidIndex);
uint32_t next_index = 0;
for (size_t i = 0; i < symbols->size(); ++i) {
if (new_index.find((*symbols)[i]) == new_index.end()) {
new_index[(*symbols)[i]] = next_index;
(*out)[next_index] = tmp[(*symbols)[i]];
for (size_t i = 0; i < length; ++i) {
if (new_index[symbols[i]] == kInvalidIndex) {
new_index[symbols[i]] = next_index;
++next_index;
}
}
out->resize(next_index);
for (size_t i = 0; i < symbols->size(); ++i) {
(*symbols)[i] = new_index[(*symbols)[i]];
std::vector<HistogramType> tmp(next_index);
next_index = 0;
for (size_t i = 0; i < length; ++i) {
if (new_index[symbols[i]] == next_index) {
tmp[next_index] = out[symbols[i]];
++next_index;
}
symbols[i] = new_index[symbols[i]];
}
for (size_t i = 0; i < next_index; ++i) {
out[i] = tmp[i];
}
return next_index;
}
// Clusters similar histograms in 'in' together, the selected histograms are
@ -261,6 +274,8 @@ void ClusterHistograms(const std::vector<HistogramType>& in,
const size_t in_size = num_contexts * num_blocks;
assert(in_size == in.size());
std::vector<uint32_t> cluster_size(in_size, 1);
std::vector<uint32_t> clusters(in_size);
size_t num_clusters = 0;
out->resize(in_size);
histogram_symbols->resize(in_size);
for (size_t i = 0; i < in_size; ++i) {
@ -269,29 +284,47 @@ void ClusterHistograms(const std::vector<HistogramType>& in,
(*histogram_symbols)[i] = static_cast<uint32_t>(i);
}
const size_t max_input_histograms = 64;
// For the first pass of clustering, we allow all pairs.
size_t max_num_pairs = max_input_histograms * max_input_histograms / 2;
std::vector<HistogramPair> pairs(max_num_pairs + 1);
for (size_t i = 0; i < in_size; i += max_input_histograms) {
size_t num_to_combine = std::min(in_size - i, max_input_histograms);
HistogramCombine(&(*out)[0], &cluster_size[0],
&(*histogram_symbols)[i], num_to_combine,
max_histograms);
for (size_t j = 0; j < num_to_combine; ++j) {
clusters[num_clusters + j] = static_cast<uint32_t>(i + j);
}
size_t num_new_clusters =
HistogramCombine(&(*out)[0], &cluster_size[0],
&(*histogram_symbols)[i],
&clusters[num_clusters], &pairs[0],
num_to_combine, num_to_combine,
max_histograms, max_num_pairs);
num_clusters += num_new_clusters;
}
// For the second pass, we limit the total number of histogram pairs.
// After this limit is reached, we only keep searching for the best pair.
max_num_pairs =
std::min(64 * num_clusters, (num_clusters / 2) * num_clusters);
pairs.resize(max_num_pairs + 1);
// Collapse similar histograms.
HistogramCombine(&(*out)[0], &cluster_size[0],
&(*histogram_symbols)[0], in_size,
max_histograms);
num_clusters = HistogramCombine(&(*out)[0], &cluster_size[0],
&(*histogram_symbols)[0], &clusters[0],
&pairs[0], num_clusters, in_size,
max_histograms, max_num_pairs);
// Find the optimal map from original histograms to the final ones.
HistogramRemap(&in[0], in_size, &(*out)[0], &(*histogram_symbols)[0]);
HistogramRemap(&in[0], in_size, &clusters[0], num_clusters,
&(*out)[0], &(*histogram_symbols)[0]);
// Convert the context map to a canonical form.
HistogramReindex(out, histogram_symbols);
size_t num_histograms =
HistogramReindex(&(*out)[0], &(*histogram_symbols)[0], in_size);
out->resize(num_histograms);
}
} // namespace brotli
#endif // BROTLI_ENC_CLUSTER_H_

View File

@ -73,35 +73,47 @@ static inline uint16_t CombineLengthCodes(
static inline void GetLengthCode(size_t insertlen, size_t copylen,
bool use_last_distance,
uint16_t* code, uint64_t* extra) {
uint16_t* code) {
uint16_t inscode = GetInsertLengthCode(insertlen);
uint16_t copycode = GetCopyLengthCode(copylen);
uint64_t insnumextra = kInsExtra[inscode];
uint64_t numextra = insnumextra + kCopyExtra[copycode];
uint64_t insextraval = insertlen - kInsBase[inscode];
uint64_t copyextraval = copylen - kCopyBase[copycode];
*code = CombineLengthCodes(inscode, copycode, use_last_distance);
*extra = (numextra << 48) | (copyextraval << insnumextra) | insextraval;
}
static inline uint32_t GetInsertBase(uint16_t inscode) {
return kInsBase[inscode];
}
static inline uint32_t GetInsertExtra(uint16_t inscode) {
return kInsExtra[inscode];
}
static inline uint32_t GetCopyBase(uint16_t copycode) {
return kCopyBase[copycode];
}
static inline uint32_t GetCopyExtra(uint16_t copycode) {
return kCopyExtra[copycode];
}
struct Command {
// distance_code is e.g. 0 for same-as-last short code, or 16 for offset 1.
Command(size_t insertlen, size_t copylen, size_t copylen_code,
size_t distance_code)
: insert_len_(static_cast<uint32_t>(insertlen))
, copy_len_(static_cast<uint32_t>(copylen)) {
: insert_len_(static_cast<uint32_t>(insertlen)) {
copy_len_ = static_cast<uint32_t>(
copylen | ((copylen_code ^ copylen) << 24));
// The distance prefix and extra bits are stored in this Command as if
// npostfix and ndirect were 0, they are only recomputed later after the
// clustering if needed.
PrefixEncodeCopyDistance(distance_code, 0, 0, &dist_prefix_, &dist_extra_);
GetLengthCode(insertlen, copylen_code, dist_prefix_ == 0,
&cmd_prefix_, &cmd_extra_);
&cmd_prefix_);
}
explicit Command(size_t insertlen)
: insert_len_(static_cast<uint32_t>(insertlen))
, copy_len_(0), dist_extra_(0), dist_prefix_(16) {
GetLengthCode(insertlen, 4, dist_prefix_ == 0, &cmd_prefix_, &cmd_extra_);
, copy_len_(4 << 24), dist_extra_(0), dist_prefix_(16) {
GetLengthCode(insertlen, 4, dist_prefix_ == 0, &cmd_prefix_);
}
uint32_t DistanceCode(void) const {
@ -123,9 +135,17 @@ struct Command {
return 3;
}
inline uint32_t copy_len(void) const {
return copy_len_ & 0xFFFFFF;
}
inline uint32_t copy_len_code(void) const {
return (copy_len_ & 0xFFFFFF) ^ (copy_len_ >> 24);
}
uint32_t insert_len_;
/* Stores copy_len in low 24 bits and copy_len XOR copy_code in high 8 bit. */
uint32_t copy_len_;
uint64_t cmd_extra_;
uint32_t dist_extra_;
uint16_t cmd_prefix_;
uint16_t dist_prefix_;

View File

@ -105,8 +105,11 @@ void BuildAndStoreLiteralPrefixCode(const uint8_t* input,
void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128],
uint8_t depth[128], uint16_t bits[128],
size_t* storage_ix, uint8_t* storage) {
CreateHuffmanTree(histogram, 64, 15, depth);
CreateHuffmanTree(&histogram[64], 64, 14, &depth[64]);
// Tree size for building a tree over 64 symbols is 2 * 64 + 1.
static const size_t kTreeSize = 129;
HuffmanTree tree[kTreeSize];
CreateHuffmanTree(histogram, 64, 15, tree, depth);
CreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
// We have to jump through a few hoopes here in order to compute
// the command bits because the symbols are in a different order than in
// the full alphabet. This looks complicated, but having the symbols
@ -141,9 +144,9 @@ void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128],
cmd_depth[256 + 8 * i] = depth[48 + i];
cmd_depth[448 + 8 * i] = depth[56 + i];
}
StoreHuffmanTree(cmd_depth, 704, storage_ix, storage);
StoreHuffmanTree(cmd_depth, 704, tree, storage_ix, storage);
}
StoreHuffmanTree(&depth[64], 64, storage_ix, storage);
StoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
}
// REQUIRES: insertlen < 6210
@ -452,7 +455,8 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
assert(table_size <= (1u << 31));
assert((table_size & (table_size - 1)) == 0); // table must be power of two
const size_t shift = 64u - Log2FloorNonZero(table_size);
assert(static_cast<size_t>(0xffffffffffffffffU >> shift) == table_size - 1);
assert(table_size - 1 == static_cast<size_t>(
MAKE_UINT64_T(0xFFFFFFFF, 0xFFFFFF) >> shift));
const uint8_t* ip_end = input + block_size;
int last_distance = -1;

View File

@ -57,8 +57,11 @@ static void BuildAndStoreCommandPrefixCode(
const uint32_t histogram[128],
uint8_t depth[128], uint16_t bits[128],
size_t* storage_ix, uint8_t* storage) {
CreateHuffmanTree(histogram, 64, 15, depth);
CreateHuffmanTree(&histogram[64], 64, 14, &depth[64]);
// Tree size for building a tree over 64 symbols is 2 * 64 + 1.
static const size_t kTreeSize = 129;
HuffmanTree tree[kTreeSize];
CreateHuffmanTree(histogram, 64, 15, tree, depth);
CreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
// We have to jump through a few hoopes here in order to compute
// the command bits because the symbols are in a different order than in
// the full alphabet. This looks complicated, but having the symbols
@ -93,9 +96,9 @@ static void BuildAndStoreCommandPrefixCode(
cmd_depth[256 + 8 * i] = depth[8 + i];
cmd_depth[448 + 8 * i] = depth[16 + i];
}
StoreHuffmanTree(cmd_depth, 704, storage_ix, storage);
StoreHuffmanTree(cmd_depth, 704, tree, storage_ix, storage);
}
StoreHuffmanTree(&depth[64], 64, storage_ix, storage);
StoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
}
inline void EmitInsertLen(uint32_t insertlen, uint32_t** commands) {
@ -227,7 +230,8 @@ void CreateCommands(const uint8_t* input, size_t block_size, size_t input_size,
assert(table_size <= (1u << 31));
assert((table_size & (table_size - 1)) == 0); // table must be power of two
const size_t shift = 64u - Log2FloorNonZero(table_size);
assert(static_cast<size_t>(0xffffffffffffffffU >> shift) == table_size - 1);
assert(table_size - 1 == static_cast<size_t>(
MAKE_UINT64_T(0xFFFFFFFF, 0xFFFFFF) >> shift));
const uint8_t* ip_end = input + block_size;
// "next_emit" is a pointer to the first byte that is not covered by a
// previous copy. Bytes between "next_emit" and the start of the next copy or

View File

@ -38,7 +38,7 @@ static const int kMinQualityForContextModeling = 5;
static const int kMinQualityForOptimizeHistograms = 4;
// For quality 2 there is no block splitting, so we buffer at most this much
// literals and commands.
static const int kMaxNumDelayedSymbols = 0x2fff;
static const size_t kMaxNumDelayedSymbols = 0x2fff;
#define COPY_ARRAY(dst, src) memcpy(dst, src, sizeof(src));
@ -51,7 +51,7 @@ void RecomputeDistancePrefixes(Command* cmds,
}
for (size_t i = 0; i < num_commands; ++i) {
Command* cmd = &cmds[i];
if (cmd->copy_len_ > 0 && cmd->cmd_prefix_ >= 128) {
if (cmd->copy_len() && cmd->cmd_prefix_ >= 128) {
PrefixEncodeCopyDistance(cmd->DistanceCode(),
num_direct_distance_codes,
distance_postfix_bits,
@ -180,6 +180,250 @@ void InitCommandPrefixCodes(uint8_t cmd_depths[128],
*cmd_code_numbits = kDefaultCommandCodeNumBits;
}
// Decide about the context map based on the ability of the prediction
// ability of the previous byte UTF8-prefix on the next byte. The
// prediction ability is calculated as shannon entropy. Here we need
// shannon entropy instead of 'BitsEntropy' since the prefix will be
// encoded with the remaining 6 bits of the following byte, and
// BitsEntropy will assume that symbol to be stored alone using Huffman
// coding.
void ChooseContextMap(int quality,
uint32_t* bigram_histo,
size_t* num_literal_contexts,
const uint32_t** literal_context_map) {
uint32_t monogram_histo[3] = { 0 };
uint32_t two_prefix_histo[6] = { 0 };
size_t total = 0;
for (size_t i = 0; i < 9; ++i) {
total += bigram_histo[i];
monogram_histo[i % 3] += bigram_histo[i];
size_t j = i;
if (j >= 6) {
j -= 6;
}
two_prefix_histo[j] += bigram_histo[i];
}
size_t dummy;
double entropy1 = ShannonEntropy(monogram_histo, 3, &dummy);
double entropy2 = (ShannonEntropy(two_prefix_histo, 3, &dummy) +
ShannonEntropy(two_prefix_histo + 3, 3, &dummy));
double entropy3 = 0;
for (size_t k = 0; k < 3; ++k) {
entropy3 += ShannonEntropy(bigram_histo + 3 * k, 3, &dummy);
}
assert(total != 0);
double scale = 1.0 / static_cast<double>(total);
entropy1 *= scale;
entropy2 *= scale;
entropy3 *= scale;
static const uint32_t kStaticContextMapContinuation[64] = {
1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
static const uint32_t kStaticContextMapSimpleUTF8[64] = {
0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
if (quality < 7) {
// 3 context models is a bit slower, don't use it at lower qualities.
entropy3 = entropy1 * 10;
}
// If expected savings by symbol are less than 0.2 bits, skip the
// context modeling -- in exchange for faster decoding speed.
if (entropy1 - entropy2 < 0.2 &&
entropy1 - entropy3 < 0.2) {
*num_literal_contexts = 1;
} else if (entropy2 - entropy3 < 0.02) {
*num_literal_contexts = 2;
*literal_context_map = kStaticContextMapSimpleUTF8;
} else {
*num_literal_contexts = 3;
*literal_context_map = kStaticContextMapContinuation;
}
}
void DecideOverLiteralContextModeling(const uint8_t* input,
size_t start_pos,
size_t length,
size_t mask,
int quality,
ContextType* literal_context_mode,
size_t* num_literal_contexts,
const uint32_t** literal_context_map) {
if (quality < kMinQualityForContextModeling || length < 64) {
return;
}
// Gather bigram data of the UTF8 byte prefixes. To make the analysis of
// UTF8 data faster we only examine 64 byte long strides at every 4kB
// intervals.
const size_t end_pos = start_pos + length;
uint32_t bigram_prefix_histo[9] = { 0 };
for (; start_pos + 64 <= end_pos; start_pos += 4096) {
static const int lut[4] = { 0, 0, 1, 2 };
const size_t stride_end_pos = start_pos + 64;
int prev = lut[input[start_pos & mask] >> 6] * 3;
for (size_t pos = start_pos + 1; pos < stride_end_pos; ++pos) {
const uint8_t literal = input[pos & mask];
++bigram_prefix_histo[prev + lut[literal >> 6]];
prev = lut[literal >> 6] * 3;
}
}
*literal_context_mode = CONTEXT_UTF8;
ChooseContextMap(quality, &bigram_prefix_histo[0], num_literal_contexts,
literal_context_map);
}
bool ShouldCompress(const uint8_t* data,
const size_t mask,
const uint64_t last_flush_pos,
const size_t bytes,
const size_t num_literals,
const size_t num_commands) {
if (num_commands < (bytes >> 8) + 2) {
if (num_literals > 0.99 * static_cast<double>(bytes)) {
uint32_t literal_histo[256] = { 0 };
static const uint32_t kSampleRate = 13;
static const double kMinEntropy = 7.92;
const double bit_cost_threshold =
static_cast<double>(bytes) * kMinEntropy / kSampleRate;
size_t t = (bytes + kSampleRate - 1) / kSampleRate;
uint32_t pos = static_cast<uint32_t>(last_flush_pos);
for (size_t i = 0; i < t; i++) {
++literal_histo[data[pos & mask]];
pos += kSampleRate;
}
if (BitsEntropy(literal_histo, 256) > bit_cost_threshold) {
return false;
}
}
}
return true;
}
void WriteMetaBlockInternal(const uint8_t* data,
const size_t mask,
const uint64_t last_flush_pos,
const size_t bytes,
const bool is_last,
const int quality,
const bool font_mode,
const uint8_t prev_byte,
const uint8_t prev_byte2,
const size_t num_literals,
const size_t num_commands,
Command* commands,
const int* saved_dist_cache,
int* dist_cache,
size_t* storage_ix,
uint8_t* storage) {
if (bytes == 0) {
// Write the ISLAST and ISEMPTY bits.
WriteBits(2, 3, storage_ix, storage);
*storage_ix = (*storage_ix + 7u) & ~7u;
return;
}
if (!ShouldCompress(data, mask, last_flush_pos, bytes,
num_literals, num_commands)) {
// Restore the distance cache, as its last update by
// CreateBackwardReferences is now unused.
memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
StoreUncompressedMetaBlock(is_last, data,
WrapPosition(last_flush_pos), mask, bytes,
storage_ix, storage);
return;
}
const uint8_t last_byte = storage[0];
const uint8_t last_byte_bits = static_cast<uint8_t>(*storage_ix & 0xff);
uint32_t num_direct_distance_codes = 0;
uint32_t distance_postfix_bits = 0;
if (quality > 9 && font_mode) {
num_direct_distance_codes = 12;
distance_postfix_bits = 1;
RecomputeDistancePrefixes(commands,
num_commands,
num_direct_distance_codes,
distance_postfix_bits);
}
if (quality == 2) {
StoreMetaBlockFast(data, WrapPosition(last_flush_pos),
bytes, mask, is_last,
commands, num_commands,
storage_ix, storage);
} else if (quality < kMinQualityForBlockSplit) {
StoreMetaBlockTrivial(data, WrapPosition(last_flush_pos),
bytes, mask, is_last,
commands, num_commands,
storage_ix, storage);
} else {
MetaBlockSplit mb;
ContextType literal_context_mode = CONTEXT_UTF8;
if (quality <= 9) {
size_t num_literal_contexts = 1;
const uint32_t* literal_context_map = NULL;
DecideOverLiteralContextModeling(data, WrapPosition(last_flush_pos),
bytes, mask,
quality,
&literal_context_mode,
&num_literal_contexts,
&literal_context_map);
if (literal_context_map == NULL) {
BuildMetaBlockGreedy(data, WrapPosition(last_flush_pos), mask,
commands, num_commands, &mb);
} else {
BuildMetaBlockGreedyWithContexts(data, WrapPosition(last_flush_pos),
mask,
prev_byte, prev_byte2,
literal_context_mode,
num_literal_contexts,
literal_context_map,
commands, num_commands,
&mb);
}
} else {
if (!IsMostlyUTF8(data, WrapPosition(last_flush_pos), mask, bytes,
kMinUTF8Ratio)) {
literal_context_mode = CONTEXT_SIGNED;
}
BuildMetaBlock(data, WrapPosition(last_flush_pos), mask,
prev_byte, prev_byte2,
commands, num_commands,
literal_context_mode,
&mb);
}
if (quality >= kMinQualityForOptimizeHistograms) {
OptimizeHistograms(num_direct_distance_codes,
distance_postfix_bits,
&mb);
}
StoreMetaBlock(data, WrapPosition(last_flush_pos), bytes, mask,
prev_byte, prev_byte2,
is_last,
num_direct_distance_codes,
distance_postfix_bits,
literal_context_mode,
commands, num_commands,
mb,
storage_ix, storage);
}
if (bytes + 4 < (*storage_ix >> 3)) {
// Restore the distance cache and last byte.
memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
storage[0] = last_byte;
*storage_ix = last_byte_bits;
StoreUncompressedMetaBlock(is_last, data,
WrapPosition(last_flush_pos), mask,
bytes, storage_ix, storage);
}
}
BrotliCompressor::BrotliCompressor(BrotliParams params)
: params_(params),
hashers_(new Hashers()),
@ -211,7 +455,7 @@ BrotliCompressor::BrotliCompressor(BrotliParams params)
} else if (params_.lgblock == 0) {
params_.lgblock = 16;
if (params_.quality >= 9 && params_.lgwin > params_.lgblock) {
params_.lgblock = std::min(21, params_.lgwin);
params_.lgblock = std::min(18, params_.lgwin);
}
} else {
params_.lgblock = std::min(kMaxInputBlockBits,
@ -403,9 +647,13 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last,
&num_literals_);
size_t max_length = std::min<size_t>(mask + 1, 1u << kMaxInputBlockBits);
const size_t max_literals = max_length / 8;
const size_t max_commands = max_length / 8;
if (!is_last && !force_flush &&
(params_.quality >= kMinQualityForBlockSplit ||
(num_literals_ + num_commands_ < kMaxNumDelayedSymbols)) &&
num_literals_ < max_literals &&
num_commands_ < max_commands &&
input_pos_ + input_block_size() <= last_flush_pos_ + max_length) {
// Merge with next input block. Everything will happen later.
last_processed_pos_ = input_pos_;
@ -421,253 +669,36 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last,
last_insert_len_ = 0;
}
WriteMetaBlockInternal(is_last, out_size, output);
return true;
}
// Decide about the context map based on the ability of the prediction
// ability of the previous byte UTF8-prefix on the next byte. The
// prediction ability is calculated as shannon entropy. Here we need
// shannon entropy instead of 'BitsEntropy' since the prefix will be
// encoded with the remaining 6 bits of the following byte, and
// BitsEntropy will assume that symbol to be stored alone using Huffman
// coding.
void ChooseContextMap(int quality,
uint32_t* bigram_histo,
size_t* num_literal_contexts,
const uint32_t** literal_context_map) {
uint32_t monogram_histo[3] = { 0 };
uint32_t two_prefix_histo[6] = { 0 };
size_t total = 0;
for (size_t i = 0; i < 9; ++i) {
total += bigram_histo[i];
monogram_histo[i % 3] += bigram_histo[i];
size_t j = i;
if (j >= 6) {
j -= 6;
}
two_prefix_histo[j] += bigram_histo[i];
}
size_t dummy;
double entropy1 = ShannonEntropy(monogram_histo, 3, &dummy);
double entropy2 = (ShannonEntropy(two_prefix_histo, 3, &dummy) +
ShannonEntropy(two_prefix_histo + 3, 3, &dummy));
double entropy3 = 0;
for (size_t k = 0; k < 3; ++k) {
entropy3 += ShannonEntropy(bigram_histo + 3 * k, 3, &dummy);
}
assert(total != 0);
double scale = 1.0 / static_cast<double>(total);
entropy1 *= scale;
entropy2 *= scale;
entropy3 *= scale;
static const uint32_t kStaticContextMapContinuation[64] = {
1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
static const uint32_t kStaticContextMapSimpleUTF8[64] = {
0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
if (quality < 7) {
// 3 context models is a bit slower, don't use it at lower qualities.
entropy3 = entropy1 * 10;
}
// If expected savings by symbol are less than 0.2 bits, skip the
// context modeling -- in exchange for faster decoding speed.
if (entropy1 - entropy2 < 0.2 &&
entropy1 - entropy3 < 0.2) {
*num_literal_contexts = 1;
} else if (entropy2 - entropy3 < 0.02) {
*num_literal_contexts = 2;
*literal_context_map = kStaticContextMapSimpleUTF8;
} else {
*num_literal_contexts = 3;
*literal_context_map = kStaticContextMapContinuation;
}
}
void DecideOverLiteralContextModeling(const uint8_t* input,
size_t start_pos,
size_t length,
size_t mask,
int quality,
ContextType* literal_context_mode,
size_t* num_literal_contexts,
const uint32_t** literal_context_map) {
if (quality < kMinQualityForContextModeling || length < 64) {
return;
}
// Gather bigram data of the UTF8 byte prefixes. To make the analysis of
// UTF8 data faster we only examine 64 byte long strides at every 4kB
// intervals.
const size_t end_pos = start_pos + length;
uint32_t bigram_prefix_histo[9] = { 0 };
for (; start_pos + 64 <= end_pos; start_pos += 4096) {
static const int lut[4] = { 0, 0, 1, 2 };
const size_t stride_end_pos = start_pos + 64;
int prev = lut[input[start_pos & mask] >> 6] * 3;
for (size_t pos = start_pos + 1; pos < stride_end_pos; ++pos) {
const uint8_t literal = input[pos & mask];
++bigram_prefix_histo[prev + lut[literal >> 6]];
prev = lut[literal >> 6] * 3;
}
}
*literal_context_mode = CONTEXT_UTF8;
ChooseContextMap(quality, &bigram_prefix_histo[0], num_literal_contexts,
literal_context_map);
}
void BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
size_t* out_size,
uint8_t** output) {
if (!is_last && input_pos_ == last_flush_pos_) {
// We have no new input data and we don't have to finish the stream, so
// nothing to do.
*out_size = 0;
return;
return true;
}
assert(input_pos_ >= last_flush_pos_);
assert(input_pos_ > last_flush_pos_ || is_last);
assert(input_pos_ - last_flush_pos_ <= 1u << 24);
const uint32_t bytes = static_cast<uint32_t>(input_pos_ - last_flush_pos_);
const uint8_t* data = ringbuffer_->start();
const uint32_t mask = ringbuffer_->mask();
const size_t max_out_size = 2 * bytes + 500;
const uint32_t metablock_size =
static_cast<uint32_t>(input_pos_ - last_flush_pos_);
const size_t max_out_size = 2 * metablock_size + 500;
uint8_t* storage = GetBrotliStorage(max_out_size);
storage[0] = last_byte_;
size_t storage_ix = last_byte_bits_;
bool uncompressed = false;
if (num_commands_ < (bytes >> 8) + 2) {
if (num_literals_ > 0.99 * static_cast<double>(bytes)) {
uint32_t literal_histo[256] = { 0 };
static const uint32_t kSampleRate = 13;
static const double kMinEntropy = 7.92;
const double bit_cost_threshold =
static_cast<double>(bytes) * kMinEntropy / kSampleRate;
size_t t = (bytes + kSampleRate - 1) / kSampleRate;
uint32_t pos = static_cast<uint32_t>(last_flush_pos_);
for (size_t i = 0; i < t; i++) {
++literal_histo[data[pos & mask]];
pos += kSampleRate;
}
if (BitsEntropy(literal_histo, 256) > bit_cost_threshold) {
uncompressed = true;
}
}
}
if (bytes == 0) {
// Write the ISLAST and ISEMPTY bits.
WriteBits(2, 3, &storage_ix, &storage[0]);
storage_ix = (storage_ix + 7u) & ~7u;
} else if (uncompressed) {
// Restore the distance cache, as its last update by
// CreateBackwardReferences is now unused.
memcpy(dist_cache_, saved_dist_cache_, sizeof(dist_cache_));
StoreUncompressedMetaBlock(is_last, data,
WrapPosition(last_flush_pos_), mask, bytes,
&storage_ix,
&storage[0]);
} else {
uint32_t num_direct_distance_codes = 0;
uint32_t distance_postfix_bits = 0;
if (params_.quality > 9 && params_.mode == BrotliParams::MODE_FONT) {
num_direct_distance_codes = 12;
distance_postfix_bits = 1;
RecomputeDistancePrefixes(commands_,
num_commands_,
num_direct_distance_codes,
distance_postfix_bits);
}
if (params_.quality == 2) {
StoreMetaBlockFast(data, WrapPosition(last_flush_pos_),
bytes, mask, is_last,
commands_, num_commands_,
&storage_ix,
&storage[0]);
} else if (params_.quality < kMinQualityForBlockSplit) {
StoreMetaBlockTrivial(data, WrapPosition(last_flush_pos_),
bytes, mask, is_last,
commands_, num_commands_,
&storage_ix,
&storage[0]);
} else {
MetaBlockSplit mb;
ContextType literal_context_mode = CONTEXT_UTF8;
if (params_.quality <= 9) {
size_t num_literal_contexts = 1;
const uint32_t* literal_context_map = NULL;
DecideOverLiteralContextModeling(data, WrapPosition(last_flush_pos_),
bytes, mask,
params_.quality,
&literal_context_mode,
&num_literal_contexts,
&literal_context_map);
if (literal_context_map == NULL) {
BuildMetaBlockGreedy(data, WrapPosition(last_flush_pos_), mask,
commands_, num_commands_,
&mb);
} else {
BuildMetaBlockGreedyWithContexts(data, WrapPosition(last_flush_pos_),
mask,
prev_byte_, prev_byte2_,
literal_context_mode,
num_literal_contexts,
literal_context_map,
commands_, num_commands_,
&mb);
}
} else {
if (!IsMostlyUTF8(
data, WrapPosition(last_flush_pos_), mask, bytes, kMinUTF8Ratio)) {
literal_context_mode = CONTEXT_SIGNED;
}
BuildMetaBlock(data, WrapPosition(last_flush_pos_), mask,
prev_byte_, prev_byte2_,
commands_, num_commands_,
literal_context_mode,
&mb);
}
if (params_.quality >= kMinQualityForOptimizeHistograms) {
OptimizeHistograms(num_direct_distance_codes,
distance_postfix_bits,
&mb);
}
StoreMetaBlock(data, WrapPosition(last_flush_pos_), bytes, mask,
prev_byte_, prev_byte2_,
is_last,
num_direct_distance_codes,
distance_postfix_bits,
literal_context_mode,
commands_, num_commands_,
mb,
&storage_ix,
&storage[0]);
}
if (bytes + 4 < (storage_ix >> 3)) {
// Restore the distance cache and last byte.
memcpy(dist_cache_, saved_dist_cache_, sizeof(dist_cache_));
storage[0] = last_byte_;
storage_ix = last_byte_bits_;
StoreUncompressedMetaBlock(is_last, data,
WrapPosition(last_flush_pos_), mask,
bytes, &storage_ix, &storage[0]);
}
}
bool font_mode = params_.mode == BrotliParams::MODE_FONT;
WriteMetaBlockInternal(
data, mask, last_flush_pos_, metablock_size, is_last, params_.quality,
font_mode, prev_byte_, prev_byte2_, num_literals_, num_commands_,
commands_, saved_dist_cache_, dist_cache_, &storage_ix, storage);
last_byte_ = storage[storage_ix >> 3];
last_byte_bits_ = storage_ix & 7u;
last_flush_pos_ = input_pos_;
last_processed_pos_ = input_pos_;
if (last_flush_pos_ > 0) {
prev_byte_ = data[(static_cast<uint32_t>(last_flush_pos_) - 1) & mask];
}
if (last_flush_pos_ > 1) {
prev_byte2_ = data[(static_cast<uint32_t>(last_flush_pos_) - 2) & mask];
}
num_commands_ = 0;
num_literals_ = 0;
// Save the state of the distance cache in case we need to restore it for
@ -675,6 +706,7 @@ void BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
memcpy(saved_dist_cache_, dist_cache_, sizeof(dist_cache_));
*output = &storage[0];
*out_size = storage_ix >> 3;
return true;
}
bool BrotliCompressor::WriteMetaBlock(const size_t input_size,
@ -739,6 +771,177 @@ bool BrotliCompressor::FinishStream(
return WriteMetaBlock(0, NULL, true, encoded_size, encoded_buffer);
}
int BrotliCompressBufferQuality10(int lgwin,
size_t input_size,
const uint8_t* input_buffer,
size_t* encoded_size,
uint8_t* encoded_buffer) {
const size_t mask = std::numeric_limits<size_t>::max() >> 1;
assert(input_size <= mask + 1);
const size_t max_backward_limit = (1 << lgwin) - 16;
int dist_cache[4] = { 4, 11, 15, 16 };
int saved_dist_cache[4] = { 4, 11, 15, 16 };
int ok = 1;
const size_t max_out_size = *encoded_size;
size_t total_out_size = 0;
uint8_t last_byte;
uint8_t last_byte_bits;
EncodeWindowBits(lgwin, &last_byte, &last_byte_bits);
Hashers::H10* hasher = new Hashers::H10;
const size_t hasher_eff_size = std::min(input_size, max_backward_limit + 16);
hasher->Init(lgwin, 0, hasher_eff_size, true);
const int lgblock = std::min(18, lgwin);
const int lgmetablock = std::min(24, lgwin + 1);
const size_t max_block_size = static_cast<size_t>(1) << lgblock;
const size_t max_metablock_size = static_cast<size_t>(1) << lgmetablock;
const size_t max_literals_per_metablock = max_metablock_size / 8;
const size_t max_commands_per_metablock = max_metablock_size / 8;
size_t metablock_start = 0;
uint8_t prev_byte = 0;
uint8_t prev_byte2 = 0;
while (ok && metablock_start < input_size) {
const size_t metablock_end =
std::min(input_size, metablock_start + max_metablock_size);
const size_t expected_num_commands =
(metablock_end - metablock_start) / 12 + 16;
Command* commands = 0;
size_t num_commands = 0;
size_t last_insert_len = 0;
size_t num_literals = 0;
size_t metablock_size = 0;
size_t cmd_alloc_size = 0;
for (size_t block_start = metablock_start; block_start < metablock_end; ) {
size_t block_size = std::min(metablock_end - block_start, max_block_size);
ZopfliNode* nodes = new ZopfliNode[block_size + 1];
std::vector<uint32_t> path;
hasher->StitchToPreviousBlock(block_size, block_start,
input_buffer, mask);
ZopfliComputeShortestPath(block_size, block_start, input_buffer, mask,
max_backward_limit, dist_cache,
hasher, nodes, &path);
// We allocate a command buffer in the first iteration of this loop that
// will be likely big enough for the whole metablock, so that for most
// inputs we will not have to reallocate in later iterations. We do the
// allocation here and not before the loop, because if the input is small,
// this will be allocated after the zopfli cost model is freed, so this
// will not increase peak memory usage.
// TODO: If the first allocation is too small, increase command
// buffer size exponentially.
size_t new_cmd_alloc_size = std::max(expected_num_commands,
num_commands + path.size() + 1);
if (cmd_alloc_size != new_cmd_alloc_size) {
cmd_alloc_size = new_cmd_alloc_size;
commands = static_cast<Command*>(
realloc(commands, cmd_alloc_size * sizeof(Command)));
}
ZopfliCreateCommands(block_size, block_start, max_backward_limit, path,
&nodes[0], dist_cache, &last_insert_len,
&commands[num_commands], &num_literals);
num_commands += path.size();
block_start += block_size;
metablock_size += block_size;
delete[] nodes;
if (num_literals > max_literals_per_metablock ||
num_commands > max_commands_per_metablock) {
break;
}
}
if (last_insert_len > 0) {
Command cmd(last_insert_len);
commands[num_commands++] = cmd;
num_literals += last_insert_len;
}
const bool is_last = (metablock_start + metablock_size == input_size);
uint8_t* storage = NULL;
size_t storage_ix = last_byte_bits;
if (metablock_size == 0) {
// Write the ISLAST and ISEMPTY bits.
storage = new uint8_t[16];
storage[0] = last_byte;
WriteBits(2, 3, &storage_ix, storage);
storage_ix = (storage_ix + 7u) & ~7u;
} else if (!ShouldCompress(input_buffer, mask, metablock_start,
metablock_size, num_literals, num_commands)) {
// Restore the distance cache, as its last update by
// CreateBackwardReferences is now unused.
memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
storage = new uint8_t[metablock_size + 16];
storage[0] = last_byte;
StoreUncompressedMetaBlock(is_last, input_buffer,
metablock_start, mask, metablock_size,
&storage_ix, storage);
} else {
uint32_t num_direct_distance_codes = 0;
uint32_t distance_postfix_bits = 0;
MetaBlockSplit mb;
ContextType literal_context_mode = CONTEXT_UTF8;
if (!IsMostlyUTF8(
input_buffer, metablock_start, mask, metablock_size,
kMinUTF8Ratio)) {
literal_context_mode = CONTEXT_SIGNED;
}
BuildMetaBlock(input_buffer, metablock_start, mask,
prev_byte, prev_byte2,
commands, num_commands,
literal_context_mode,
&mb);
OptimizeHistograms(num_direct_distance_codes,
distance_postfix_bits,
&mb);
const size_t max_out_metablock_size = 2 * metablock_size + 500;
storage = new uint8_t[max_out_metablock_size];
storage[0] = last_byte;
StoreMetaBlock(input_buffer, metablock_start, metablock_size, mask,
prev_byte, prev_byte2,
is_last,
num_direct_distance_codes,
distance_postfix_bits,
literal_context_mode,
commands, num_commands,
mb,
&storage_ix, storage);
if (metablock_size + 4 < (storage_ix >> 3)) {
// Restore the distance cache and last byte.
memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
storage[0] = last_byte;
storage_ix = last_byte_bits;
StoreUncompressedMetaBlock(is_last, input_buffer,
metablock_start, mask,
metablock_size, &storage_ix, storage);
}
}
last_byte = storage[storage_ix >> 3];
last_byte_bits = storage_ix & 7u;
metablock_start += metablock_size;
prev_byte = input_buffer[metablock_start - 1];
prev_byte2 = input_buffer[metablock_start - 2];
// Save the state of the distance cache in case we need to restore it for
// emitting an uncompressed block.
memcpy(saved_dist_cache, dist_cache, 4 * sizeof(dist_cache[0]));
const size_t out_size = storage_ix >> 3;
total_out_size += out_size;
if (total_out_size <= max_out_size) {
memcpy(encoded_buffer, storage, out_size);
encoded_buffer += out_size;
} else {
ok = 0;
}
delete[] storage;
free(commands);
}
*encoded_size = total_out_size;
delete hasher;
return ok;
}
int BrotliCompressBuffer(BrotliParams params,
size_t input_size,
const uint8_t* input_buffer,
@ -748,6 +951,18 @@ int BrotliCompressBuffer(BrotliParams params,
// Output buffer needs at least one byte.
return 0;
}
if (input_size == 0) {
// Handle the special case of empty input.
*encoded_size = 1;
*encoded_buffer = 6;
return 1;
}
if (params.quality == 10) {
// TODO(user) Implement this direct path for all quality levels.
const int lgwin = std::min(24, std::max(16, params.lgwin));
return BrotliCompressBufferQuality10(lgwin, input_size, input_buffer,
encoded_size, encoded_buffer);
}
BrotliMemIn in(input_buffer, input_size);
BrotliMemOut out(encoded_buffer, *encoded_size);
if (!BrotliCompress(params, &in, &out)) {

View File

@ -140,10 +140,6 @@ class BrotliCompressor {
int* GetHashTable(int quality,
size_t input_size, size_t* table_size);
void WriteMetaBlockInternal(const bool is_last,
size_t* out_size,
uint8_t** output);
BrotliParams params_;
Hashers* hashers_;
int hash_type_;

View File

@ -40,7 +40,7 @@ void RecomputeDistancePrefixes(Command* cmds, size_t num_commands,
}
for (size_t i = 0; i < num_commands; ++i) {
Command* cmd = &cmds[i];
if (cmd->copy_len_ > 0 && cmd->cmd_prefix_ >= 128) {
if (cmd->copy_len() && cmd->cmd_prefix_ >= 128) {
PrefixEncodeCopyDistance(cmd->DistanceCode(),
num_direct_distance_codes,
distance_postfix_bits,

View File

@ -10,7 +10,6 @@
#include <algorithm>
#include <limits>
#include <vector>
#include <cstdlib>
#include "./histogram.h"
@ -32,6 +31,15 @@ void SetDepth(const HuffmanTree &p,
}
}
// Sort the root nodes, least popular first.
static inline bool SortHuffmanTree(const HuffmanTree& v0,
const HuffmanTree& v1) {
if (v0.total_count_ != v1.total_count_) {
return v0.total_count_ < v1.total_count_;
}
return v0.index_right_or_value_ > v1.index_right_or_value_;
}
// This function will create a Huffman tree.
//
// The catch here is that the tree cannot be arbitrarily deep.
@ -50,30 +58,28 @@ void SetDepth(const HuffmanTree &p,
void CreateHuffmanTree(const uint32_t *data,
const size_t length,
const int tree_limit,
HuffmanTree* tree,
uint8_t *depth) {
// For block sizes below 64 kB, we never need to do a second iteration
// of this loop. Probably all of our block sizes will be smaller than
// that, so this loop is mostly of academic interest. If we actually
// would need this, we would be better off with the Katajainen algorithm.
for (uint32_t count_limit = 1; ; count_limit *= 2) {
std::vector<HuffmanTree> tree;
tree.reserve(2 * length + 1);
size_t n = 0;
for (size_t i = length; i != 0;) {
--i;
if (data[i]) {
const uint32_t count = std::max(data[i], count_limit);
tree.push_back(HuffmanTree(count, -1, static_cast<int16_t>(i)));
tree[n++] = HuffmanTree(count, -1, static_cast<int16_t>(i));
}
}
const size_t n = tree.size();
if (n == 1) {
depth[tree[0].index_right_or_value_] = 1; // Only one element.
break;
}
std::stable_sort(tree.begin(), tree.end(), SortHuffmanTree);
std::sort(tree, tree + n, SortHuffmanTree);
// The nodes are:
// [0, n): the sorted leaf nodes that we start with.
@ -83,8 +89,8 @@ void CreateHuffmanTree(const uint32_t *data,
// [2n]: we add a sentinel at the end as well.
// There will be (2n+1) elements at the end.
const HuffmanTree sentinel(std::numeric_limits<uint32_t>::max(), -1, -1);
tree.push_back(sentinel);
tree.push_back(sentinel);
tree[n] = sentinel;
tree[n + 1] = sentinel;
size_t i = 0; // Points to the next leaf node.
size_t j = n + 1; // Points to the next non-leaf node.
@ -106,16 +112,15 @@ void CreateHuffmanTree(const uint32_t *data,
}
// The sentinel node becomes the parent node.
size_t j_end = tree.size() - 1;
size_t j_end = 2 * n - k;
tree[j_end].total_count_ =
tree[left].total_count_ + tree[right].total_count_;
tree[j_end].index_left_ = static_cast<int16_t>(left);
tree[j_end].index_right_or_value_ = static_cast<int16_t>(right);
// Add back the last sentinel node.
tree.push_back(sentinel);
tree[j_end + 1] = sentinel;
}
assert(tree.size() == 2 * n + 1);
SetDepth(tree[2 * n - 1], &tree[0], depth, 0);
// We need to pack the Huffman tree in tree_limit bits.
@ -127,12 +132,12 @@ void CreateHuffmanTree(const uint32_t *data,
}
}
void Reverse(std::vector<uint8_t>* v, size_t start, size_t end) {
void Reverse(uint8_t* v, size_t start, size_t end) {
--end;
while (start < end) {
uint8_t tmp = (*v)[start];
(*v)[start] = (*v)[end];
(*v)[end] = tmp;
uint8_t tmp = v[start];
v[start] = v[end];
v[end] = tmp;
++start;
--end;
}
@ -142,79 +147,88 @@ void WriteHuffmanTreeRepetitions(
const uint8_t previous_value,
const uint8_t value,
size_t repetitions,
std::vector<uint8_t> *tree,
std::vector<uint8_t> *extra_bits_data) {
size_t* tree_size,
uint8_t* tree,
uint8_t* extra_bits_data) {
assert(repetitions > 0);
if (previous_value != value) {
tree->push_back(value);
extra_bits_data->push_back(0);
tree[*tree_size] = value;
extra_bits_data[*tree_size] = 0;
++(*tree_size);
--repetitions;
}
if (repetitions == 7) {
tree->push_back(value);
extra_bits_data->push_back(0);
tree[*tree_size] = value;
extra_bits_data[*tree_size] = 0;
++(*tree_size);
--repetitions;
}
if (repetitions < 3) {
for (size_t i = 0; i < repetitions; ++i) {
tree->push_back(value);
extra_bits_data->push_back(0);
tree[*tree_size] = value;
extra_bits_data[*tree_size] = 0;
++(*tree_size);
}
} else {
repetitions -= 3;
size_t start = tree->size();
size_t start = *tree_size;
while (true) {
tree->push_back(16);
extra_bits_data->push_back(repetitions & 0x3);
tree[*tree_size] = 16;
extra_bits_data[*tree_size] = repetitions & 0x3;
++(*tree_size);
repetitions >>= 2;
if (repetitions == 0) {
break;
}
--repetitions;
}
Reverse(tree, start, tree->size());
Reverse(extra_bits_data, start, tree->size());
Reverse(tree, start, *tree_size);
Reverse(extra_bits_data, start, *tree_size);
}
}
void WriteHuffmanTreeRepetitionsZeros(
size_t repetitions,
std::vector<uint8_t> *tree,
std::vector<uint8_t> *extra_bits_data) {
size_t* tree_size,
uint8_t* tree,
uint8_t* extra_bits_data) {
if (repetitions == 11) {
tree->push_back(0);
extra_bits_data->push_back(0);
tree[*tree_size] = 0;
extra_bits_data[*tree_size] = 0;
++(*tree_size);
--repetitions;
}
if (repetitions < 3) {
for (size_t i = 0; i < repetitions; ++i) {
tree->push_back(0);
extra_bits_data->push_back(0);
tree[*tree_size] = 0;
extra_bits_data[*tree_size] = 0;
++(*tree_size);
}
} else {
repetitions -= 3;
size_t start = tree->size();
size_t start = *tree_size;
while (true) {
tree->push_back(17);
extra_bits_data->push_back(repetitions & 0x7);
tree[*tree_size] = 17;
extra_bits_data[*tree_size] = repetitions & 0x7;
++(*tree_size);
repetitions >>= 3;
if (repetitions == 0) {
break;
}
--repetitions;
}
Reverse(tree, start, tree->size());
Reverse(extra_bits_data, start, tree->size());
Reverse(tree, start, *tree_size);
Reverse(extra_bits_data, start, *tree_size);
}
}
bool OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts) {
void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
uint8_t* good_for_rle) {
size_t nonzero_count = 0;
size_t stride;
size_t limit;
size_t sum;
const size_t streak_limit = 1240;
uint8_t* good_for_rle;
// Let's make the Huffman code more compatible with rle encoding.
size_t i;
for (i = 0; i < length; i++) {
@ -223,13 +237,13 @@ bool OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts) {
}
}
if (nonzero_count < 16) {
return 1;
return;
}
while (length != 0 && counts[length - 1] == 0) {
--length;
}
if (length == 0) {
return 1; // All zeros.
return; // All zeros.
}
// Now counts[0..length - 1] does not have trailing zeros.
{
@ -245,7 +259,7 @@ bool OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts) {
}
if (nonzeros < 5) {
// Small histogram will model it well.
return 1;
return;
}
size_t zeros = length - nonzeros;
if (smallest_nonzero < 4) {
@ -258,15 +272,12 @@ bool OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts) {
}
}
if (nonzeros < 28) {
return 1;
return;
}
}
// 2) Let's mark all population counts that already can be encoded
// with an rle code.
good_for_rle = (uint8_t*)calloc(length, 1);
if (good_for_rle == NULL) {
return 0;
}
memset(good_for_rle, 0, length);
{
// Let's not spoil any of the existing good rle codes.
// Mark any seq of 0's that is longer as 5 as a good_for_rle.
@ -340,8 +351,6 @@ bool OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts) {
}
}
}
free(good_for_rle);
return 1;
}
static void DecideOverRleUse(const uint8_t* depth, const size_t length,
@ -373,8 +382,9 @@ static void DecideOverRleUse(const uint8_t* depth, const size_t length,
void WriteHuffmanTree(const uint8_t* depth,
size_t length,
std::vector<uint8_t> *tree,
std::vector<uint8_t> *extra_bits_data) {
size_t* tree_size,
uint8_t* tree,
uint8_t* extra_bits_data) {
uint8_t previous_value = 8;
// Throw away trailing zeros.
@ -408,10 +418,11 @@ void WriteHuffmanTree(const uint8_t* depth,
}
}
if (value == 0) {
WriteHuffmanTreeRepetitionsZeros(reps, tree, extra_bits_data);
WriteHuffmanTreeRepetitionsZeros(reps, tree_size, tree, extra_bits_data);
} else {
WriteHuffmanTreeRepetitions(previous_value,
value, reps, tree, extra_bits_data);
value, reps, tree_size,
tree, extra_bits_data);
previous_value = value;
}
i += reps;

View File

@ -10,7 +10,6 @@
#define BROTLI_ENC_ENTROPY_ENCODE_H_
#include <string.h>
#include <vector>
#include "./histogram.h"
#include "./prefix.h"
#include "./types.h"
@ -19,6 +18,7 @@ namespace brotli {
// A node of a Huffman tree.
struct HuffmanTree {
HuffmanTree() {}
HuffmanTree(uint32_t count, int16_t left, int16_t right)
: total_count_(count),
index_left_(left),
@ -29,11 +29,6 @@ struct HuffmanTree {
int16_t index_right_or_value_;
};
// Sort the root nodes, least popular first.
inline bool SortHuffmanTree(const HuffmanTree &v0, const HuffmanTree &v1) {
return v0.total_count_ < v1.total_count_;
}
void SetDepth(const HuffmanTree &p, HuffmanTree *pool,
uint8_t *depth, uint8_t level);
@ -45,10 +40,14 @@ void SetDepth(const HuffmanTree &p, HuffmanTree *pool,
// The depth contains the tree, i.e., how many bits are used for
// the symbol.
//
// The actual Huffman tree is constructed in the tree[] array, which has to
// be at least 2 * length + 1 long.
//
// See http://en.wikipedia.org/wiki/Huffman_coding
void CreateHuffmanTree(const uint32_t *data,
const size_t length,
const int tree_limit,
HuffmanTree* tree,
uint8_t *depth);
// Change the population counts in a way that the consequent
@ -57,15 +56,18 @@ void CreateHuffmanTree(const uint32_t *data,
//
// length contains the size of the histogram.
// counts contains the population counts.
bool OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts);
// good_for_rle is a buffer of at least length size
void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
uint8_t* good_for_rle);
// Write a Huffman tree from bit depths into the bitstream representation
// of a Huffman tree. The generated Huffman tree is to be compressed once
// more using a Huffman tree
void WriteHuffmanTree(const uint8_t* depth,
size_t num,
std::vector<uint8_t> *tree,
std::vector<uint8_t> *extra_bits_data);
size_t* tree_size,
uint8_t* tree,
uint8_t* extra_bits_data);
// Get the actual bit values for a tree of bit depths.
void ConvertBitDepthsToSymbols(const uint8_t *depth,

View File

@ -78,7 +78,7 @@ static const uint32_t kCodeLengthBits[18] = {
};
inline void StoreStaticCodeLengthCode(size_t* storage_ix, uint8_t* storage) {
WriteBits(40, 0x000000ff55555554U, storage_ix, storage);
WriteBits(40, MAKE_UINT64_T(0xff, 0x55555554), storage_ix, storage);
}
static const uint64_t kZeroRepsBits[704] = {

View File

@ -14,7 +14,6 @@
#include <algorithm>
#include <cstring>
#include <limits>
#include <vector>
#include "./dictionary_hash.h"
#include "./fast_log.h"
@ -278,7 +277,7 @@ class HashLongestMatchQuickly {
if (matchlen + kCutoffTransformsCount > len && matchlen > 0) {
const size_t transform_id = kCutoffTransforms[len - matchlen];
const size_t word_id =
transform_id * (1 << kBrotliDictionarySizeBitsByLength[len]) +
transform_id * (1u << kBrotliDictionarySizeBitsByLength[len]) +
dist;
const size_t backward = max_backward + word_id + 1;
const double score = BackwardReferenceScore(matchlen, backward);
@ -574,8 +573,10 @@ class HashLongestMatch {
}
buckets_[key][num_[key] & kBlockMask] = static_cast<uint32_t>(cur_ix);
++num_[key];
std::vector<uint32_t> dict_matches(kMaxDictionaryMatchLen + 1,
kInvalidMatch);
uint32_t dict_matches[kMaxDictionaryMatchLen + 1];
for (size_t i = 0; i <= kMaxDictionaryMatchLen; ++i) {
dict_matches[i] = kInvalidMatch;
}
size_t minlen = std::max<size_t>(4, best_len + 1);
if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length,
&dict_matches[0])) {
@ -706,8 +707,10 @@ class HashToBinaryTree {
matches = StoreAndFindMatches(data, cur_ix, ring_buffer_mask,
max_length, &best_len, matches);
}
std::vector<uint32_t> dict_matches(kMaxDictionaryMatchLen + 1,
kInvalidMatch);
uint32_t dict_matches[kMaxDictionaryMatchLen + 1];
for (size_t i = 0; i <= kMaxDictionaryMatchLen; ++i) {
dict_matches[i] = kInvalidMatch;
}
size_t minlen = std::max<size_t>(4, best_len + 1);
if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length,
&dict_matches[0])) {
@ -725,15 +728,34 @@ class HashToBinaryTree {
// Stores the hash of the next 4 bytes and re-roots the binary tree at the
// current sequence, without returning any matches.
// REQUIRES: cur_ix + kMaxTreeCompLength <= end-of-current-block
void Store(const uint8_t* data,
const size_t ring_buffer_mask,
const size_t cur_ix,
const size_t max_length) {
const size_t cur_ix) {
size_t best_len = 0;
StoreAndFindMatches(data, cur_ix, ring_buffer_mask, max_length,
StoreAndFindMatches(data, cur_ix, ring_buffer_mask, kMaxTreeCompLength,
&best_len, NULL);
}
void StitchToPreviousBlock(size_t num_bytes,
size_t position,
const uint8_t* ringbuffer,
size_t ringbuffer_mask) {
if (num_bytes >= 3 && position >= kMaxTreeCompLength) {
// Store the last `kMaxTreeCompLength - 1` positions in the hasher.
// These could not be calculated before, since they require knowledge
// of both the previous and the current block.
const size_t i_start = position - kMaxTreeCompLength + 1;
const size_t i_end = std::min(position, i_start + num_bytes);
for (size_t i = i_start; i < i_end; ++i) {
// We know that i + kMaxTreeCompLength <= position + num_bytes, i.e. the
// end of the current block and that we have at least
// kMaxTreeCompLength tail in the ringbuffer.
Store(ringbuffer, ringbuffer_mask, i);
}
}
}
static const size_t kMaxNumMatches = 64 + kMaxTreeSearchDepth;
private:
@ -928,8 +950,7 @@ struct Hashers {
case 10:
hash_h10->Init(lgwin, 0, size, false);
for (size_t i = 0; i + kMaxTreeCompLength - 1 < size; ++i) {
hash_h10->Store(dict, std::numeric_limits<size_t>::max(),
i, size - i);
hash_h10->Store(dict, std::numeric_limits<size_t>::max(), i);
}
break;
default: break;

View File

@ -50,8 +50,8 @@ void BuildHistograms(
prev_byte = ringbuffer[pos & mask];
++pos;
}
pos += cmd.copy_len_;
if (cmd.copy_len_ > 0) {
pos += cmd.copy_len();
if (cmd.copy_len()) {
prev_byte2 = ringbuffer[(pos - 2) & mask];
prev_byte = ringbuffer[(pos - 1) & mask];
if (cmd.cmd_prefix_ >= 128) {

View File

@ -258,8 +258,8 @@ void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
lit_blocks.AddSymbol(ringbuffer[pos & mask]);
++pos;
}
pos += cmd.copy_len_;
if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) {
pos += cmd.copy_len();
if (cmd.copy_len() && cmd.cmd_prefix_ >= 128) {
dist_blocks.AddSymbol(cmd.dist_prefix_);
}
}
@ -488,8 +488,8 @@ void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
prev_byte = literal;
++pos;
}
pos += cmd.copy_len_;
if (cmd.copy_len_ > 0) {
pos += cmd.copy_len();
if (cmd.copy_len()) {
prev_byte2 = ringbuffer[(pos - 2) & mask];
prev_byte = ringbuffer[(pos - 1) & mask];
if (cmd.cmd_prefix_ >= 128) {
@ -515,20 +515,25 @@ void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
void OptimizeHistograms(size_t num_direct_distance_codes,
size_t distance_postfix_bits,
MetaBlockSplit* mb) {
uint8_t* good_for_rle = new uint8_t[kNumCommandPrefixes];
for (size_t i = 0; i < mb->literal_histograms.size(); ++i) {
OptimizeHuffmanCountsForRle(256, &mb->literal_histograms[i].data_[0]);
OptimizeHuffmanCountsForRle(256, &mb->literal_histograms[i].data_[0],
good_for_rle);
}
for (size_t i = 0; i < mb->command_histograms.size(); ++i) {
OptimizeHuffmanCountsForRle(kNumCommandPrefixes,
&mb->command_histograms[i].data_[0]);
&mb->command_histograms[i].data_[0],
good_for_rle);
}
size_t num_distance_codes =
kNumDistanceShortCodes + num_direct_distance_codes +
(48u << distance_postfix_bits);
for (size_t i = 0; i < mb->distance_histograms.size(); ++i) {
OptimizeHuffmanCountsForRle(num_distance_codes,
&mb->distance_histograms[i].data_[0]);
&mb->distance_histograms[i].data_[0],
good_for_rle);
}
delete[] good_for_rle;
}
} // namespace brotli

View File

@ -60,7 +60,7 @@ inline void PrefixEncodeCopyDistance(size_t distance_code,
return;
}
distance_code -= kNumDistanceShortCodes + num_direct_codes; /* >= 0 */
distance_code += (1 << (postfix_bits + 2)); /* > 0 */
distance_code += (1u << (postfix_bits + 2u)); /* > 0 */
size_t bucket = Log2FloorNonZero(distance_code) - 1;
size_t postfix_mask = (1 << postfix_bits) - 1;
size_t postfix = distance_code & postfix_mask;

View File

@ -31,24 +31,52 @@ class RingBuffer {
: size_(1u << window_bits),
mask_((1u << window_bits) - 1),
tail_size_(1u << tail_bits),
pos_(0) {
static const size_t kSlackForEightByteHashingEverywhere = 7;
const size_t buflen = size_ + tail_size_;
data_ = new uint8_t[2 + buflen + kSlackForEightByteHashingEverywhere];
buffer_ = data_ + 2;
for (size_t i = 0; i < kSlackForEightByteHashingEverywhere; ++i) {
buffer_[buflen + i] = 0;
}
// Initialize the last two bytes and their copy to zero.
buffer_[-2] = buffer_[size_ - 2] = 0;
buffer_[-1] = buffer_[size_ - 1] = 0;
}
total_size_(size_ + tail_size_),
cur_size_(0),
pos_(0),
data_(0),
buffer_(0) {}
~RingBuffer(void) {
delete [] data_;
free(data_);
}
// Allocates or re-allocates data_ to the given length + plus some slack
// region before and after. Fills the slack regions with zeros.
inline void InitBuffer(const uint32_t buflen) {
static const size_t kSlackForEightByteHashingEverywhere = 7;
cur_size_ = buflen;
data_ = static_cast<uint8_t*>(realloc(
data_, 2 + buflen + kSlackForEightByteHashingEverywhere));
buffer_ = data_ + 2;
buffer_[-2] = buffer_[-1] = 0;
for (size_t i = 0; i < kSlackForEightByteHashingEverywhere; ++i) {
buffer_[cur_size_ + i] = 0;
}
}
// Push bytes into the ring buffer.
void Write(const uint8_t *bytes, size_t n) {
if (pos_ == 0 && n < tail_size_) {
// Special case for the first write: to process the first block, we don't
// need to allocate the whole ringbuffer and we don't need the tail
// either. However, we do this memory usage optimization only if the
// first write is less than the tail size, which is also the input block
// size, otherwise it is likely that other blocks will follow and we
// will need to reallocate to the full size anyway.
pos_ = static_cast<uint32_t>(n);
InitBuffer(pos_);
memcpy(buffer_, bytes, n);
return;
}
if (cur_size_ < total_size_) {
// Lazily allocate the full buffer.
InitBuffer(total_size_);
// Initialize the last two bytes to zero, so that we don't have to worry
// later when we copy the last two bytes to the first two positions.
buffer_[size_ - 2] = 0;
buffer_[size_ - 1] = 0;
}
const size_t masked_pos = pos_ & mask_;
// The length of the writes is limited so that we do not need to worry
// about a write
@ -60,7 +88,7 @@ class RingBuffer {
// Split into two writes.
// Copy into the end of the buffer, including the tail buffer.
memcpy(&buffer_[masked_pos], bytes,
std::min(n, (size_ + tail_size_) - masked_pos));
std::min(n, total_size_ - masked_pos));
// Copy into the beginning of the buffer
memcpy(&buffer_[0], bytes + (size_ - masked_pos),
n - (size_ - masked_pos));
@ -100,7 +128,9 @@ class RingBuffer {
const uint32_t size_;
const uint32_t mask_;
const uint32_t tail_size_;
const uint32_t total_size_;
uint32_t cur_size_;
// Position to write in the ring buffer.
uint32_t pos_;
// The actual ring buffer containing the copy of the last two bytes, the data,

View File

@ -197,7 +197,7 @@ static size_t ToUpperCase(uint8_t *p, size_t len) {
inline std::string TransformWord(
WordTransformType transform_type, const uint8_t* word, size_t len) {
if (transform_type <= kOmitLast9) {
if (len <= transform_type) {
if (len <= static_cast<size_t>(transform_type)) {
return std::string();
}
return std::string(word, word + len - transform_type);

View File

@ -24,4 +24,6 @@ typedef __int64 int64_t;
#include <stdint.h>
#endif /* defined(_MSC_VER) && (_MSC_VER < 1600) */
#define MAKE_UINT64_T(high, low) ((((uint64_t)(high)) << 32) | low)
#endif /* BROTLI_ENC_TYPES_H_ */