mirror of
https://github.com/google/brotli.git
synced 2024-11-25 21:10:05 +00:00
Reduce memory usage of brotli encoder at quality 10 and 11.
This commit is contained in:
parent
cfba2db7b3
commit
b820c39bd9
@ -21,8 +21,6 @@ namespace brotli {
|
||||
// The maximum length for which the zopflification uses distinct distances.
|
||||
static const uint16_t kMaxZopfliLen = 325;
|
||||
|
||||
static const double kInfinity = std::numeric_limits<double>::infinity();
|
||||
|
||||
// Histogram based cost model for zopflification.
|
||||
class ZopfliCostModel {
|
||||
public:
|
||||
@ -42,7 +40,7 @@ class ZopfliCostModel {
|
||||
size_t pos = position - last_insert_len;
|
||||
for (size_t i = 0; i < num_commands; i++) {
|
||||
size_t inslength = commands[i].insert_len_;
|
||||
size_t copylength = commands[i].copy_len_;
|
||||
size_t copylength = commands[i].copy_len();
|
||||
size_t distcode = commands[i].dist_prefix_;
|
||||
size_t cmdcode = commands[i].cmd_prefix_;
|
||||
|
||||
@ -56,7 +54,7 @@ class ZopfliCostModel {
|
||||
pos += inslength + copylength;
|
||||
}
|
||||
|
||||
std::vector<double> cost_literal;
|
||||
std::vector<float> cost_literal;
|
||||
Set(histogram_literal, &cost_literal);
|
||||
Set(histogram_cmd, &cost_cmd_);
|
||||
Set(histogram_dist, &cost_dist_);
|
||||
@ -77,26 +75,25 @@ class ZopfliCostModel {
|
||||
size_t position,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t ringbuffer_mask) {
|
||||
std::vector<float> literal_cost(num_bytes + 1);
|
||||
literal_costs_.resize(num_bytes + 2);
|
||||
EstimateBitCostsForLiterals(position, num_bytes, ringbuffer_mask,
|
||||
ringbuffer, &literal_cost[0]);
|
||||
literal_costs_.resize(num_bytes + 1);
|
||||
ringbuffer, &literal_costs_[1]);
|
||||
literal_costs_[0] = 0.0;
|
||||
for (size_t i = 0; i < num_bytes; ++i) {
|
||||
literal_costs_[i + 1] = literal_costs_[i] + literal_cost[i];
|
||||
literal_costs_[i + 1] += literal_costs_[i];
|
||||
}
|
||||
cost_cmd_.resize(kNumCommandPrefixes);
|
||||
cost_dist_.resize(kNumDistancePrefixes);
|
||||
for (uint32_t i = 0; i < kNumCommandPrefixes; ++i) {
|
||||
cost_cmd_[i] = FastLog2(11 + i);
|
||||
cost_cmd_[i] = static_cast<float>(FastLog2(11 + i));
|
||||
}
|
||||
for (uint32_t i = 0; i < kNumDistancePrefixes; ++i) {
|
||||
cost_dist_[i] = FastLog2(20 + i);
|
||||
cost_dist_[i] = static_cast<float>(FastLog2(20 + i));
|
||||
}
|
||||
min_cost_cmd_ = FastLog2(11);
|
||||
min_cost_cmd_ = static_cast<float>(FastLog2(11));
|
||||
}
|
||||
|
||||
double GetCommandCost(
|
||||
float GetCommandCost(
|
||||
size_t dist_code, size_t length_code, size_t insert_length) const {
|
||||
uint16_t inscode = GetInsertLengthCode(insert_length);
|
||||
uint16_t copycode = GetCopyLengthCode(length_code);
|
||||
@ -106,29 +103,29 @@ class ZopfliCostModel {
|
||||
PrefixEncodeCopyDistance(dist_code, 0, 0, &dist_symbol, &distextra);
|
||||
uint32_t distnumextra = distextra >> 24;
|
||||
|
||||
double result = static_cast<double>(
|
||||
kInsExtra[inscode] + kCopyExtra[copycode] + distnumextra);
|
||||
float result = static_cast<float>(
|
||||
GetInsertExtra(inscode) + GetCopyExtra(copycode) + distnumextra);
|
||||
result += cost_cmd_[cmdcode];
|
||||
if (cmdcode >= 128) result += cost_dist_[dist_symbol];
|
||||
return result;
|
||||
}
|
||||
|
||||
double GetLiteralCosts(size_t from, size_t to) const {
|
||||
float GetLiteralCosts(size_t from, size_t to) const {
|
||||
return literal_costs_[to] - literal_costs_[from];
|
||||
}
|
||||
|
||||
double GetMinCostCmd(void) const {
|
||||
float GetMinCostCmd(void) const {
|
||||
return min_cost_cmd_;
|
||||
}
|
||||
|
||||
private:
|
||||
void Set(const std::vector<uint32_t>& histogram, std::vector<double>* cost) {
|
||||
void Set(const std::vector<uint32_t>& histogram, std::vector<float>* cost) {
|
||||
cost->resize(histogram.size());
|
||||
size_t sum = 0;
|
||||
for (size_t i = 0; i < histogram.size(); i++) {
|
||||
sum += histogram[i];
|
||||
}
|
||||
double log2sum = FastLog2(sum);
|
||||
float log2sum = static_cast<float>(FastLog2(sum));
|
||||
for (size_t i = 0; i < histogram.size(); i++) {
|
||||
if (histogram[i] == 0) {
|
||||
(*cost)[i] = log2sum + 2;
|
||||
@ -136,33 +133,20 @@ class ZopfliCostModel {
|
||||
}
|
||||
|
||||
// Shannon bits for this symbol.
|
||||
(*cost)[i] = log2sum - FastLog2(histogram[i]);
|
||||
(*cost)[i] = log2sum - static_cast<float>(FastLog2(histogram[i]));
|
||||
|
||||
// Cannot be coded with less than 1 bit
|
||||
if ((*cost)[i] < 1) (*cost)[i] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<double> cost_cmd_; // The insert and copy length symbols.
|
||||
std::vector<double> cost_dist_;
|
||||
std::vector<float> cost_cmd_; // The insert and copy length symbols.
|
||||
std::vector<float> cost_dist_;
|
||||
// Cumulative costs of literals per position in the stream.
|
||||
std::vector<double> literal_costs_;
|
||||
double min_cost_cmd_;
|
||||
std::vector<float> literal_costs_;
|
||||
float min_cost_cmd_;
|
||||
};
|
||||
|
||||
inline void SetDistanceCache(size_t distance,
|
||||
size_t distance_code,
|
||||
size_t max_distance,
|
||||
const int* dist_cache,
|
||||
int* result_dist_cache) {
|
||||
if (distance <= max_distance && distance_code > 0) {
|
||||
result_dist_cache[0] = static_cast<int>(distance);
|
||||
memcpy(&result_dist_cache[1], dist_cache, 3 * sizeof(dist_cache[0]));
|
||||
} else {
|
||||
memcpy(result_dist_cache, dist_cache, 4 * sizeof(dist_cache[0]));
|
||||
}
|
||||
}
|
||||
|
||||
inline size_t ComputeDistanceCode(size_t distance,
|
||||
size_t max_distance,
|
||||
int quality,
|
||||
@ -194,47 +178,28 @@ inline size_t ComputeDistanceCode(size_t distance,
|
||||
return distance + 15;
|
||||
}
|
||||
|
||||
struct ZopfliNode {
|
||||
ZopfliNode() : length(1),
|
||||
distance(0),
|
||||
distance_code(0),
|
||||
length_code(0),
|
||||
insert_length(0),
|
||||
cost(kInfinity) {}
|
||||
|
||||
// best length to get up to this byte (not including this byte itself)
|
||||
uint32_t length;
|
||||
// distance associated with the length
|
||||
uint32_t distance;
|
||||
uint32_t distance_code;
|
||||
int distance_cache[4];
|
||||
// length code associated with the length - usually the same as length,
|
||||
// except in case of length-changing dictionary transformation.
|
||||
uint32_t length_code;
|
||||
// number of literal inserts before this copy
|
||||
uint32_t insert_length;
|
||||
// smallest cost to get to this byte from the beginning, as found so far
|
||||
double cost;
|
||||
};
|
||||
|
||||
// REQUIRES: len >= 2, start_pos <= pos
|
||||
// REQUIRES: cost < kInfinity, nodes[start_pos].cost < kInfinity
|
||||
// Maintains the "ZopfliNode array invariant".
|
||||
inline void UpdateZopfliNode(ZopfliNode* nodes, size_t pos, size_t start_pos,
|
||||
size_t len, size_t len_code, size_t dist,
|
||||
size_t dist_code, size_t max_dist,
|
||||
const int* dist_cache, double cost) {
|
||||
size_t short_code, float cost) {
|
||||
ZopfliNode& next = nodes[pos + len];
|
||||
next.length = static_cast<uint32_t>(len);
|
||||
next.length_code = static_cast<uint32_t>(len_code);
|
||||
next.distance = static_cast<uint32_t>(dist);
|
||||
next.distance_code = static_cast<uint32_t>(dist_code);
|
||||
next.length = static_cast<uint32_t>(len | ((len + 9u - len_code) << 24));
|
||||
next.distance = static_cast<uint32_t>(dist | (short_code << 25));
|
||||
next.insert_length = static_cast<uint32_t>(pos - start_pos);
|
||||
next.cost = cost;
|
||||
SetDistanceCache(dist, dist_code, max_dist, dist_cache,
|
||||
&next.distance_cache[0]);
|
||||
}
|
||||
|
||||
// Maintains the smallest 2^k cost difference together with their positions
|
||||
class StartPosQueue {
|
||||
public:
|
||||
struct PosData {
|
||||
size_t pos;
|
||||
int distance_cache[4];
|
||||
float costdiff;
|
||||
};
|
||||
|
||||
explicit StartPosQueue(int bits)
|
||||
: mask_((1u << bits) - 1), q_(1 << bits), idx_(0) {}
|
||||
|
||||
@ -242,21 +207,15 @@ class StartPosQueue {
|
||||
idx_ = 0;
|
||||
}
|
||||
|
||||
void Push(size_t pos, double costdiff) {
|
||||
if (costdiff == kInfinity) {
|
||||
// We can't start a command from an unreachable start position.
|
||||
// E.g. position 1 in a stream is always unreachable, because all commands
|
||||
// have a copy of at least length 2.
|
||||
return;
|
||||
}
|
||||
size_t offset = -idx_ & mask_;
|
||||
void Push(const StartPosQueue::PosData& posdata) {
|
||||
size_t offset = ~idx_ & mask_;
|
||||
++idx_;
|
||||
size_t len = size();
|
||||
q_[offset] = std::make_pair(pos, costdiff);
|
||||
q_[offset] = posdata;
|
||||
/* Restore the sorted order. In the list of |len| items at most |len - 1|
|
||||
adjacent element comparisons / swaps are required. */
|
||||
for (size_t i = 1; i < len; ++i) {
|
||||
if (q_[offset & mask_].second > q_[(offset + 1) & mask_].second) {
|
||||
if (q_[offset & mask_].costdiff > q_[(offset + 1) & mask_].costdiff) {
|
||||
std::swap(q_[offset & mask_], q_[(offset + 1) & mask_]);
|
||||
}
|
||||
++offset;
|
||||
@ -265,32 +224,32 @@ class StartPosQueue {
|
||||
|
||||
size_t size(void) const { return std::min(idx_, mask_ + 1); }
|
||||
|
||||
size_t GetStartPos(size_t k) const {
|
||||
return q_[(k + 1 - idx_) & mask_].first;
|
||||
const StartPosQueue::PosData& GetStartPosData(size_t k) const {
|
||||
return q_[(k - idx_) & mask_];
|
||||
}
|
||||
|
||||
private:
|
||||
const size_t mask_;
|
||||
std::vector<std::pair<size_t, double> > q_;
|
||||
std::vector<PosData> q_;
|
||||
size_t idx_;
|
||||
};
|
||||
|
||||
// Returns the minimum possible copy length that can improve the cost of any
|
||||
// future position.
|
||||
size_t ComputeMinimumCopyLength(const StartPosQueue& queue,
|
||||
const std::vector<ZopfliNode>& nodes,
|
||||
const ZopfliNode* nodes,
|
||||
const ZopfliCostModel& model,
|
||||
size_t pos,
|
||||
double min_cost_cmd) {
|
||||
const size_t num_bytes,
|
||||
const size_t pos) {
|
||||
// Compute the minimum possible cost of reaching any future position.
|
||||
const size_t start0 = queue.GetStartPos(0);
|
||||
double min_cost = (nodes[start0].cost +
|
||||
const size_t start0 = queue.GetStartPosData(0).pos;
|
||||
float min_cost = (nodes[start0].cost +
|
||||
model.GetLiteralCosts(start0, pos) +
|
||||
min_cost_cmd);
|
||||
model.GetMinCostCmd());
|
||||
size_t len = 2;
|
||||
size_t next_len_bucket = 4;
|
||||
size_t next_len_offset = 10;
|
||||
while (pos + len < nodes.size() && nodes[pos + len].cost <= min_cost) {
|
||||
while (pos + len <= num_bytes && nodes[pos + len].cost <= min_cost) {
|
||||
// We already reached (pos + len) with no more cost than the minimum
|
||||
// possible cost of reaching anything from this pos, so there is no point in
|
||||
// looking for lengths <= len.
|
||||
@ -298,7 +257,7 @@ size_t ComputeMinimumCopyLength(const StartPosQueue& queue,
|
||||
if (len == next_len_offset) {
|
||||
// We reached the next copy length code bucket, so we add one more
|
||||
// extra bit to the minimum cost.
|
||||
min_cost += 1.0;
|
||||
min_cost += static_cast<float>(1.0);
|
||||
next_len_offset += next_len_bucket;
|
||||
next_len_bucket *= 2;
|
||||
}
|
||||
@ -306,56 +265,85 @@ size_t ComputeMinimumCopyLength(const StartPosQueue& queue,
|
||||
return len;
|
||||
}
|
||||
|
||||
void ZopfliIterate(size_t num_bytes,
|
||||
size_t position,
|
||||
// Fills in dist_cache[0..3] with the last four distances (as defined by
|
||||
// Section 4. of the Spec) that would be used at (block_start + pos) if we
|
||||
// used the shortest path of commands from block_start, computed from
|
||||
// nodes[0..pos]. The last four distances at block_start are in
|
||||
// starting_dist_cach[0..3].
|
||||
// REQUIRES: nodes[pos].cost < kInfinity
|
||||
// REQUIRES: nodes[0..pos] satisfies that "ZopfliNode array invariant".
|
||||
void ComputeDistanceCache(const size_t block_start,
|
||||
const size_t pos,
|
||||
const size_t max_backward,
|
||||
const int* starting_dist_cache,
|
||||
const ZopfliNode* nodes,
|
||||
int* dist_cache) {
|
||||
int idx = 0;
|
||||
size_t p = pos;
|
||||
// Because of prerequisite, does at most (pos + 1) / 2 iterations.
|
||||
while (idx < 4 && p > 0) {
|
||||
const size_t clen = nodes[p].copy_length();
|
||||
const size_t ilen = nodes[p].insert_length;
|
||||
const size_t dist = nodes[p].copy_distance();
|
||||
// Since block_start + p is the end position of the command, the copy part
|
||||
// starts from block_start + p - clen. Distances that are greater than this
|
||||
// or greater than max_backward are static dictionary references, and do
|
||||
// not update the last distances. Also distance code 0 (last distance)
|
||||
// does not update the last distances.
|
||||
if (dist + clen <= block_start + p && dist <= max_backward &&
|
||||
nodes[p].distance_code() > 0) {
|
||||
dist_cache[idx++] = static_cast<int>(dist);
|
||||
}
|
||||
// Because of prerequisite, p >= clen + ilen >= 2.
|
||||
p -= clen + ilen;
|
||||
}
|
||||
for (; idx < 4; ++idx) {
|
||||
dist_cache[idx] = *starting_dist_cache++;
|
||||
}
|
||||
}
|
||||
|
||||
void UpdateNodes(const size_t num_bytes,
|
||||
const size_t block_start,
|
||||
const size_t pos,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t ringbuffer_mask,
|
||||
const size_t ringbuffer_mask,
|
||||
const size_t max_backward_limit,
|
||||
const ZopfliCostModel& model,
|
||||
const std::vector<uint32_t>& num_matches,
|
||||
const std::vector<BackwardMatch>& matches,
|
||||
int* dist_cache,
|
||||
size_t* last_insert_len,
|
||||
Command* commands,
|
||||
size_t* num_commands,
|
||||
size_t* num_literals) {
|
||||
const Command * const orig_commands = commands;
|
||||
|
||||
std::vector<ZopfliNode> nodes(num_bytes + 1);
|
||||
nodes[0].length = 0;
|
||||
nodes[0].cost = 0;
|
||||
memcpy(nodes[0].distance_cache, dist_cache, 4 * sizeof(dist_cache[0]));
|
||||
|
||||
StartPosQueue queue(3);
|
||||
const double min_cost_cmd = model.GetMinCostCmd();
|
||||
|
||||
size_t cur_match_pos = 0;
|
||||
for (size_t i = 0; i + 3 < num_bytes; i++) {
|
||||
size_t cur_ix = position + i;
|
||||
const int* starting_dist_cache,
|
||||
const size_t num_matches,
|
||||
const BackwardMatch* matches,
|
||||
const ZopfliCostModel* model,
|
||||
StartPosQueue* queue,
|
||||
ZopfliNode* nodes) {
|
||||
size_t cur_ix = block_start + pos;
|
||||
size_t cur_ix_masked = cur_ix & ringbuffer_mask;
|
||||
size_t max_distance = std::min(cur_ix, max_backward_limit);
|
||||
size_t max_length = num_bytes - i;
|
||||
|
||||
queue.Push(i, nodes[i].cost - model.GetLiteralCosts(0, i));
|
||||
if (nodes[pos].cost <= model->GetLiteralCosts(0, pos)) {
|
||||
StartPosQueue::PosData posdata;
|
||||
posdata.pos = pos;
|
||||
posdata.costdiff = nodes[pos].cost - model->GetLiteralCosts(0, pos);
|
||||
ComputeDistanceCache(block_start, pos, max_backward_limit,
|
||||
starting_dist_cache, nodes, posdata.distance_cache);
|
||||
queue->Push(posdata);
|
||||
}
|
||||
|
||||
const size_t min_len = ComputeMinimumCopyLength(queue, nodes, model,
|
||||
i, min_cost_cmd);
|
||||
const size_t min_len = ComputeMinimumCopyLength(
|
||||
*queue, nodes, *model, num_bytes, pos);
|
||||
|
||||
// Go over the command starting positions in order of increasing cost
|
||||
// difference.
|
||||
for (size_t k = 0; k < 5 && k < queue.size(); ++k) {
|
||||
const size_t start = queue.GetStartPos(k);
|
||||
const double start_costdiff =
|
||||
nodes[start].cost - model.GetLiteralCosts(0, start);
|
||||
const int* dist_cache2 = &nodes[start].distance_cache[0];
|
||||
for (size_t k = 0; k < 5 && k < queue->size(); ++k) {
|
||||
const StartPosQueue::PosData& posdata = queue->GetStartPosData(k);
|
||||
const size_t start = posdata.pos;
|
||||
const float start_costdiff = posdata.costdiff;
|
||||
|
||||
// Look for last distance matches using the distance cache from this
|
||||
// starting position.
|
||||
size_t best_len = min_len - 1;
|
||||
for (size_t j = 0; j < kNumDistanceShortCodes; ++j) {
|
||||
const size_t idx = kDistanceCacheIndex[j];
|
||||
const size_t backward =
|
||||
static_cast<size_t>(dist_cache2[idx] + kDistanceCacheOffset[j]);
|
||||
const size_t backward = static_cast<size_t>(posdata.distance_cache[idx] +
|
||||
kDistanceCacheOffset[j]);
|
||||
size_t prev_ix = cur_ix - backward;
|
||||
if (prev_ix >= cur_ix) {
|
||||
continue;
|
||||
@ -374,14 +362,13 @@ void ZopfliIterate(size_t num_bytes,
|
||||
const size_t len =
|
||||
FindMatchLengthWithLimit(&ringbuffer[prev_ix],
|
||||
&ringbuffer[cur_ix_masked],
|
||||
max_length);
|
||||
num_bytes - pos);
|
||||
for (size_t l = best_len + 1; l <= len; ++l) {
|
||||
const size_t inslen = i - start;
|
||||
double cmd_cost = model.GetCommandCost(j, l, inslen);
|
||||
double cost = start_costdiff + cmd_cost + model.GetLiteralCosts(0, i);
|
||||
if (cost < nodes[i + l].cost) {
|
||||
UpdateZopfliNode(&nodes[0], i, start, l, l, backward, j,
|
||||
max_distance, dist_cache2, cost);
|
||||
const size_t inslen = pos - start;
|
||||
float cmd_cost = model->GetCommandCost(j, l, inslen);
|
||||
float cost = start_costdiff + cmd_cost + model->GetLiteralCosts(0, pos);
|
||||
if (cost < nodes[pos + l].cost) {
|
||||
UpdateZopfliNode(&nodes[0], pos, start, l, l, backward, j + 1, cost);
|
||||
}
|
||||
best_len = l;
|
||||
}
|
||||
@ -394,8 +381,8 @@ void ZopfliIterate(size_t num_bytes,
|
||||
|
||||
// Loop through all possible copy lengths at this position.
|
||||
size_t len = min_len;
|
||||
for (size_t j = 0; j < num_matches[i]; ++j) {
|
||||
BackwardMatch match = matches[cur_match_pos + j];
|
||||
for (size_t j = 0; j < num_matches; ++j) {
|
||||
BackwardMatch match = matches[j];
|
||||
size_t dist = match.distance;
|
||||
bool is_dictionary_match = dist > max_distance;
|
||||
// We already tried all possible last distance matches, so we can use
|
||||
@ -410,60 +397,62 @@ void ZopfliIterate(size_t num_bytes,
|
||||
}
|
||||
for (; len <= max_len; ++len) {
|
||||
size_t len_code = is_dictionary_match ? match.length_code() : len;
|
||||
const size_t inslen = i - start;
|
||||
double cmd_cost = model.GetCommandCost(dist_code, len_code, inslen);
|
||||
double cost = start_costdiff + cmd_cost + model.GetLiteralCosts(0, i);
|
||||
if (cost < nodes[i + len].cost) {
|
||||
UpdateZopfliNode(&nodes[0], i, start, len, len_code, dist,
|
||||
dist_code, max_distance, dist_cache2, cost);
|
||||
const size_t inslen = pos - start;
|
||||
float cmd_cost = model->GetCommandCost(dist_code, len_code, inslen);
|
||||
float cost = start_costdiff + cmd_cost + model->GetLiteralCosts(0, pos);
|
||||
if (cost < nodes[pos + len].cost) {
|
||||
UpdateZopfliNode(&nodes[0], pos, start, len, len_code, dist, 0, cost);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cur_match_pos += num_matches[i];
|
||||
|
||||
// The zopflification can be too slow in case of very long lengths, so in
|
||||
// such case skip it all, it does not cost a lot of compression ratio.
|
||||
if (num_matches[i] == 1 &&
|
||||
matches[cur_match_pos - 1].length() > kMaxZopfliLen) {
|
||||
i += matches[cur_match_pos - 1].length() - 1;
|
||||
queue.Clear();
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<uint32_t> backwards;
|
||||
void ComputeShortestPathFromNodes(size_t num_bytes,
|
||||
const ZopfliNode* nodes,
|
||||
std::vector<uint32_t>* path) {
|
||||
std::vector<uint32_t> backwards(num_bytes / 2 + 1);
|
||||
size_t index = num_bytes;
|
||||
while (nodes[index].cost == kInfinity) --index;
|
||||
size_t num_commands = 0;
|
||||
while (index != 0) {
|
||||
size_t len = nodes[index].length + nodes[index].insert_length;
|
||||
backwards.push_back(static_cast<uint32_t>(len));
|
||||
size_t len = nodes[index].command_length();
|
||||
backwards[num_commands++] = static_cast<uint32_t>(len);
|
||||
index -= len;
|
||||
}
|
||||
|
||||
std::vector<uint32_t> path;
|
||||
for (size_t i = backwards.size(); i > 0; i--) {
|
||||
path.push_back(backwards[i - 1]);
|
||||
path->resize(num_commands);
|
||||
for (size_t i = num_commands, j = 0; i > 0; --i, ++j) {
|
||||
(*path)[j] = backwards[i - 1];
|
||||
}
|
||||
}
|
||||
|
||||
void ZopfliCreateCommands(const size_t num_bytes,
|
||||
const size_t block_start,
|
||||
const size_t max_backward_limit,
|
||||
const std::vector<uint32_t>& path,
|
||||
const ZopfliNode* nodes,
|
||||
int* dist_cache,
|
||||
size_t* last_insert_len,
|
||||
Command* commands,
|
||||
size_t* num_literals) {
|
||||
size_t pos = 0;
|
||||
for (size_t i = 0; i < path.size(); i++) {
|
||||
const ZopfliNode& next = nodes[pos + path[i]];
|
||||
size_t copy_length = next.length;
|
||||
size_t copy_length = next.copy_length();
|
||||
size_t insert_length = next.insert_length;
|
||||
pos += insert_length;
|
||||
if (i == 0) {
|
||||
insert_length += *last_insert_len;
|
||||
*last_insert_len = 0;
|
||||
}
|
||||
size_t distance = next.distance;
|
||||
size_t len_code = next.length_code;
|
||||
size_t max_distance = std::min(position + pos, max_backward_limit);
|
||||
size_t distance = next.copy_distance();
|
||||
size_t len_code = next.length_code();
|
||||
size_t max_distance = std::min(block_start + pos, max_backward_limit);
|
||||
bool is_dictionary = (distance > max_distance);
|
||||
size_t dist_code = next.distance_code;
|
||||
size_t dist_code = next.distance_code();
|
||||
|
||||
Command cmd(insert_length, copy_length, len_code, dist_code);
|
||||
*commands++ = cmd;
|
||||
commands[i] = cmd;
|
||||
|
||||
if (!is_dictionary && dist_code > 0) {
|
||||
dist_cache[3] = dist_cache[2];
|
||||
@ -473,11 +462,85 @@ void ZopfliIterate(size_t num_bytes,
|
||||
}
|
||||
|
||||
*num_literals += insert_length;
|
||||
insert_length = 0;
|
||||
pos += copy_length;
|
||||
}
|
||||
*last_insert_len += num_bytes - pos;
|
||||
*num_commands += static_cast<size_t>(commands - orig_commands);
|
||||
}
|
||||
|
||||
void ZopfliIterate(size_t num_bytes,
|
||||
size_t position,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t ringbuffer_mask,
|
||||
const size_t max_backward_limit,
|
||||
const int* dist_cache,
|
||||
const ZopfliCostModel& model,
|
||||
const std::vector<uint32_t>& num_matches,
|
||||
const std::vector<BackwardMatch>& matches,
|
||||
ZopfliNode* nodes,
|
||||
std::vector<uint32_t>* path) {
|
||||
nodes[0].length = 0;
|
||||
nodes[0].cost = 0;
|
||||
StartPosQueue queue(3);
|
||||
size_t cur_match_pos = 0;
|
||||
for (size_t i = 0; i + 3 < num_bytes; i++) {
|
||||
UpdateNodes(num_bytes, position, i, ringbuffer, ringbuffer_mask,
|
||||
max_backward_limit, dist_cache, num_matches[i],
|
||||
&matches[cur_match_pos], &model, &queue, &nodes[0]);
|
||||
cur_match_pos += num_matches[i];
|
||||
// The zopflification can be too slow in case of very long lengths, so in
|
||||
// such case skip it all, it does not cost a lot of compression ratio.
|
||||
if (num_matches[i] == 1 &&
|
||||
matches[cur_match_pos - 1].length() > kMaxZopfliLen) {
|
||||
i += matches[cur_match_pos - 1].length() - 1;
|
||||
queue.Clear();
|
||||
}
|
||||
}
|
||||
ComputeShortestPathFromNodes(num_bytes, &nodes[0], path);
|
||||
}
|
||||
|
||||
|
||||
void ZopfliComputeShortestPath(size_t num_bytes,
|
||||
size_t position,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t ringbuffer_mask,
|
||||
const size_t max_backward_limit,
|
||||
const int* dist_cache,
|
||||
Hashers::H10* hasher,
|
||||
ZopfliNode* nodes,
|
||||
std::vector<uint32_t>* path) {
|
||||
nodes[0].length = 0;
|
||||
nodes[0].cost = 0;
|
||||
ZopfliCostModel* model = new ZopfliCostModel;
|
||||
model->SetFromLiteralCosts(num_bytes, position,
|
||||
ringbuffer, ringbuffer_mask);
|
||||
StartPosQueue queue(3);
|
||||
BackwardMatch matches[Hashers::H10::kMaxNumMatches];
|
||||
for (size_t i = 0; i + 3 < num_bytes; i++) {
|
||||
const size_t max_distance = std::min(position + i, max_backward_limit);
|
||||
size_t num_matches = hasher->FindAllMatches(
|
||||
ringbuffer, ringbuffer_mask, position + i, num_bytes - i, max_distance,
|
||||
matches);
|
||||
if (num_matches > 0 &&
|
||||
matches[num_matches - 1].length() > kMaxZopfliLen) {
|
||||
matches[0] = matches[num_matches - 1];
|
||||
num_matches = 1;
|
||||
}
|
||||
UpdateNodes(num_bytes, position, i, ringbuffer, ringbuffer_mask,
|
||||
max_backward_limit, dist_cache, num_matches, matches,
|
||||
model, &queue, nodes);
|
||||
if (num_matches == 1 && matches[0].length() > kMaxZopfliLen) {
|
||||
for (size_t j = 1; j < matches[0].length() && i + 4 < num_bytes; ++j) {
|
||||
++i;
|
||||
if (matches[0].length() - j < 64 &&
|
||||
num_bytes - i >= kMaxTreeCompLength) {
|
||||
hasher->Store(ringbuffer, ringbuffer_mask, position + i);
|
||||
}
|
||||
}
|
||||
queue.Clear();
|
||||
}
|
||||
}
|
||||
delete model;
|
||||
ComputeShortestPathFromNodes(num_bytes, nodes, path);
|
||||
}
|
||||
|
||||
template<typename Hasher>
|
||||
@ -527,7 +590,7 @@ void CreateBackwardReferences(size_t num_bytes,
|
||||
size_t apply_random_heuristics = i + random_heuristics_window_size;
|
||||
|
||||
// Minimum score to accept a backward reference.
|
||||
const int kMinScore = 4.0;
|
||||
const double kMinScore = 4.0;
|
||||
|
||||
while (i + Hasher::kHashTypeLength - 1 < i_end) {
|
||||
size_t max_length = i_end - i;
|
||||
@ -649,16 +712,23 @@ void CreateBackwardReferences(size_t num_bytes,
|
||||
if (zopflify) {
|
||||
Hashers::H10* hasher = hashers->hash_h10;
|
||||
hasher->Init(lgwin, position, num_bytes, is_last);
|
||||
if (num_bytes >= 3 && position >= kMaxTreeCompLength) {
|
||||
// Store the last `kMaxTreeCompLength - 1` positions in the hasher.
|
||||
// These could not be calculated before, since they require knowledge
|
||||
// of both the previous and the current block.
|
||||
for (size_t i = position - kMaxTreeCompLength + 1; i < position; ++i) {
|
||||
hasher->Store(ringbuffer, ringbuffer_mask, i, num_bytes + position - i);
|
||||
}
|
||||
}
|
||||
hasher->StitchToPreviousBlock(num_bytes, position,
|
||||
ringbuffer, ringbuffer_mask);
|
||||
// Set maximum distance, see section 9.1. of the spec.
|
||||
const size_t max_backward_limit = (1 << lgwin) - 16;
|
||||
if (quality == 10) {
|
||||
std::vector<ZopfliNode> nodes(num_bytes + 1);
|
||||
std::vector<uint32_t> path;
|
||||
ZopfliComputeShortestPath(num_bytes, position,
|
||||
ringbuffer, ringbuffer_mask,
|
||||
max_backward_limit, dist_cache, hasher,
|
||||
&nodes[0], &path);
|
||||
ZopfliCreateCommands(num_bytes, position, max_backward_limit, path,
|
||||
&nodes[0], dist_cache, last_insert_len, commands,
|
||||
num_literals);
|
||||
*num_commands += path.size();
|
||||
return;
|
||||
}
|
||||
std::vector<uint32_t> num_matches(num_bytes);
|
||||
std::vector<BackwardMatch> matches(4 * num_bytes);
|
||||
size_t cur_match_pos = 0;
|
||||
@ -686,9 +756,8 @@ void CreateBackwardReferences(size_t num_bytes,
|
||||
num_matches[i] = 1;
|
||||
for (size_t j = 1; j < match_len; ++j) {
|
||||
++i;
|
||||
if (match_len - j < 64) {
|
||||
hasher->Store(ringbuffer, ringbuffer_mask, position + i,
|
||||
num_bytes - i);
|
||||
if (match_len - j < 64 && num_bytes - i >= kMaxTreeCompLength) {
|
||||
hasher->Store(ringbuffer, ringbuffer_mask, position + i);
|
||||
}
|
||||
num_matches[i] = 0;
|
||||
}
|
||||
@ -719,9 +788,15 @@ void CreateBackwardReferences(size_t num_bytes,
|
||||
*num_literals = orig_num_literals;
|
||||
*last_insert_len = orig_last_insert_len;
|
||||
memcpy(dist_cache, orig_dist_cache, 4 * sizeof(dist_cache[0]));
|
||||
std::vector<ZopfliNode> nodes(num_bytes + 1);
|
||||
std::vector<uint32_t> path;
|
||||
ZopfliIterate(num_bytes, position, ringbuffer, ringbuffer_mask,
|
||||
max_backward_limit, model, num_matches, matches, dist_cache,
|
||||
last_insert_len, commands, num_commands, num_literals);
|
||||
max_backward_limit, dist_cache, model, num_matches, matches,
|
||||
&nodes[0], &path);
|
||||
ZopfliCreateCommands(num_bytes, position, max_backward_limit, path,
|
||||
&nodes[0], dist_cache, last_insert_len, commands,
|
||||
num_literals);
|
||||
*num_commands += path.size();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -9,6 +9,8 @@
|
||||
#ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_
|
||||
#define BROTLI_ENC_BACKWARD_REFERENCES_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "./hash.h"
|
||||
#include "./command.h"
|
||||
#include "./types.h"
|
||||
@ -34,6 +36,81 @@ void CreateBackwardReferences(size_t num_bytes,
|
||||
size_t* num_commands,
|
||||
size_t* num_literals);
|
||||
|
||||
static const float kInfinity = std::numeric_limits<float>::infinity();
|
||||
|
||||
struct ZopfliNode {
|
||||
ZopfliNode(void) : length(1),
|
||||
distance(0),
|
||||
insert_length(0),
|
||||
cost(kInfinity) {}
|
||||
|
||||
inline uint32_t copy_length() const {
|
||||
return length & 0xffffff;
|
||||
}
|
||||
|
||||
inline uint32_t length_code() const {
|
||||
const uint32_t modifier = length >> 24;
|
||||
return copy_length() + 9u - modifier;
|
||||
}
|
||||
|
||||
inline uint32_t copy_distance() const {
|
||||
return distance & 0x1ffffff;
|
||||
}
|
||||
|
||||
inline uint32_t distance_code() const {
|
||||
const uint32_t short_code = distance >> 25;
|
||||
return short_code == 0 ? copy_distance() + 15 : short_code - 1;
|
||||
}
|
||||
|
||||
inline uint32_t command_length() const {
|
||||
return copy_length() + insert_length;
|
||||
}
|
||||
|
||||
// best length to get up to this byte (not including this byte itself)
|
||||
// highest 8 bit is used to reconstruct the length code
|
||||
uint32_t length;
|
||||
// distance associated with the length
|
||||
// highest 7 bit contains distance short code + 1 (or zero if no short code)
|
||||
uint32_t distance;
|
||||
// number of literal inserts before this copy
|
||||
uint32_t insert_length;
|
||||
// smallest cost to get to this byte from the beginning, as found so far
|
||||
float cost;
|
||||
};
|
||||
|
||||
// Computes the shortest path of commands from position to at most
|
||||
// position + num_bytes.
|
||||
//
|
||||
// On return, path->size() is the number of commands found and path[i] is the
|
||||
// length of the ith command (copy length plus insert length).
|
||||
// Note that the sum of the lengths of all commands can be less than num_bytes.
|
||||
//
|
||||
// On return, the nodes[0..num_bytes] array will have the following
|
||||
// "ZopfliNode array invariant":
|
||||
// For each i in [1..num_bytes], if nodes[i].cost < kInfinity, then
|
||||
// (1) nodes[i].copy_length() >= 2
|
||||
// (2) nodes[i].command_length() <= i and
|
||||
// (3) nodes[i - nodes[i].command_length()].cost < kInfinity
|
||||
void ZopfliComputeShortestPath(size_t num_bytes,
|
||||
size_t position,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t ringbuffer_mask,
|
||||
const size_t max_backward_limit,
|
||||
const int* dist_cache,
|
||||
Hashers::H10* hasher,
|
||||
ZopfliNode* nodes,
|
||||
std::vector<uint32_t>* path);
|
||||
|
||||
void ZopfliCreateCommands(const size_t num_bytes,
|
||||
const size_t block_start,
|
||||
const size_t max_backward_limit,
|
||||
const std::vector<uint32_t>& path,
|
||||
const ZopfliNode* nodes,
|
||||
int* dist_cache,
|
||||
size_t* last_insert_len,
|
||||
Command* commands,
|
||||
size_t* num_literals);
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_BACKWARD_REFERENCES_H_
|
||||
|
@ -48,38 +48,62 @@ static inline double BitsEntropy(const uint32_t *population, size_t size) {
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
||||
template<int kSize>
|
||||
double PopulationCost(const Histogram<kSize>& histogram) {
|
||||
static const double kOneSymbolHistogramCost = 12;
|
||||
static const double kTwoSymbolHistogramCost = 20;
|
||||
static const double kThreeSymbolHistogramCost = 28;
|
||||
static const double kFourSymbolHistogramCost = 37;
|
||||
if (histogram.total_count_ == 0) {
|
||||
return 12;
|
||||
return kOneSymbolHistogramCost;
|
||||
}
|
||||
int count = 0;
|
||||
int s[5];
|
||||
for (int i = 0; i < kSize; ++i) {
|
||||
if (histogram.data_[i] > 0) {
|
||||
s[count] = i;
|
||||
++count;
|
||||
if (count > 4) break;
|
||||
}
|
||||
}
|
||||
if (count == 1) {
|
||||
return 12;
|
||||
return kOneSymbolHistogramCost;
|
||||
}
|
||||
if (count == 2) {
|
||||
return static_cast<double>(20 + histogram.total_count_);
|
||||
return (kTwoSymbolHistogramCost +
|
||||
static_cast<double>(histogram.total_count_));
|
||||
}
|
||||
double bits = 0;
|
||||
uint8_t depth_array[kSize] = { 0 };
|
||||
if (count <= 4) {
|
||||
// For very low symbol count we build the Huffman tree.
|
||||
CreateHuffmanTree(&histogram.data_[0], kSize, 15, depth_array);
|
||||
for (int i = 0; i < kSize; ++i) {
|
||||
bits += histogram.data_[i] * depth_array[i];
|
||||
if (count == 3) {
|
||||
const uint32_t histo0 = histogram.data_[s[0]];
|
||||
const uint32_t histo1 = histogram.data_[s[1]];
|
||||
const uint32_t histo2 = histogram.data_[s[2]];
|
||||
const uint32_t histomax = std::max(histo0, std::max(histo1, histo2));
|
||||
return (kThreeSymbolHistogramCost +
|
||||
2 * (histo0 + histo1 + histo2) - histomax);
|
||||
}
|
||||
return count == 3 ? bits + 28 : bits + 37;
|
||||
if (count == 4) {
|
||||
uint32_t histo[4];
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
histo[i] = histogram.data_[s[i]];
|
||||
}
|
||||
// Sort
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
for (int j = i + 1; j < 4; ++j) {
|
||||
if (histo[j] > histo[i]) {
|
||||
std::swap(histo[j], histo[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
const uint32_t h23 = histo[2] + histo[3];
|
||||
const uint32_t histomax = std::max(h23, histo[0]);
|
||||
return (kFourSymbolHistogramCost +
|
||||
3 * h23 + 2 * (histo[0] + histo[1]) - histomax);
|
||||
}
|
||||
|
||||
// In this loop we compute the entropy of the histogram and simultaneously
|
||||
// build a simplified histogram of the code length codes where we use the
|
||||
// zero repeat code 17, but we don't use the non-zero repeat code 16.
|
||||
double bits = 0;
|
||||
size_t max_depth = 1;
|
||||
uint32_t depth_histo[kCodeLengthCodes] = { 0 };
|
||||
const double log2total = FastLog2(histogram.total_count_);
|
||||
|
@ -13,7 +13,7 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include "./cluster.h"
|
||||
#include "./command.h"
|
||||
@ -70,20 +70,7 @@ void CopyLiteralsToByteArray(const Command* cmds,
|
||||
memcpy(&(*literals)[pos], data + from_pos, insert_len);
|
||||
pos += insert_len;
|
||||
}
|
||||
from_pos = (from_pos + insert_len + cmds[i].copy_len_) & mask;
|
||||
}
|
||||
}
|
||||
|
||||
void CopyCommandsToByteArray(const Command* cmds,
|
||||
const size_t num_commands,
|
||||
std::vector<uint16_t>* insert_and_copy_codes,
|
||||
std::vector<uint16_t>* distance_prefixes) {
|
||||
for (size_t i = 0; i < num_commands; ++i) {
|
||||
const Command& cmd = cmds[i];
|
||||
insert_and_copy_codes->push_back(cmd.cmd_prefix_);
|
||||
if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) {
|
||||
distance_prefixes->push_back(cmd.dist_prefix_);
|
||||
}
|
||||
from_pos = (from_pos + insert_len + cmds[i].copy_len()) & mask;
|
||||
}
|
||||
}
|
||||
|
||||
@ -97,27 +84,23 @@ inline static unsigned int MyRand(unsigned int* seed) {
|
||||
|
||||
template<typename HistogramType, typename DataType>
|
||||
void InitialEntropyCodes(const DataType* data, size_t length,
|
||||
size_t literals_per_histogram,
|
||||
size_t max_histograms,
|
||||
size_t stride,
|
||||
std::vector<HistogramType>* vec) {
|
||||
size_t total_histograms = length / literals_per_histogram + 1;
|
||||
if (total_histograms > max_histograms) {
|
||||
total_histograms = max_histograms;
|
||||
size_t num_histograms,
|
||||
HistogramType* histograms) {
|
||||
for (size_t i = 0; i < num_histograms; ++i) {
|
||||
histograms[i].Clear();
|
||||
}
|
||||
unsigned int seed = 7;
|
||||
size_t block_length = length / total_histograms;
|
||||
for (size_t i = 0; i < total_histograms; ++i) {
|
||||
size_t pos = length * i / total_histograms;
|
||||
size_t block_length = length / num_histograms;
|
||||
for (size_t i = 0; i < num_histograms; ++i) {
|
||||
size_t pos = length * i / num_histograms;
|
||||
if (i != 0) {
|
||||
pos += MyRand(&seed) % block_length;
|
||||
}
|
||||
if (pos + stride >= length) {
|
||||
pos = length - stride - 1;
|
||||
}
|
||||
HistogramType histo;
|
||||
histo.Add(data + pos, stride);
|
||||
vec->push_back(histo);
|
||||
histograms[i].Add(data + pos, stride);
|
||||
}
|
||||
}
|
||||
|
||||
@ -140,16 +123,17 @@ void RandomSample(unsigned int* seed,
|
||||
template<typename HistogramType, typename DataType>
|
||||
void RefineEntropyCodes(const DataType* data, size_t length,
|
||||
size_t stride,
|
||||
std::vector<HistogramType>* vec) {
|
||||
size_t num_histograms,
|
||||
HistogramType* histograms) {
|
||||
size_t iters =
|
||||
kIterMulForRefining * length / stride + kMinItersForRefining;
|
||||
unsigned int seed = 7;
|
||||
iters = ((iters + vec->size() - 1) / vec->size()) * vec->size();
|
||||
iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms;
|
||||
for (size_t iter = 0; iter < iters; ++iter) {
|
||||
HistogramType sample;
|
||||
RandomSample(&seed, data, length, stride, &sample);
|
||||
size_t ix = iter % vec->size();
|
||||
(*vec)[ix].AddHistogram(sample);
|
||||
size_t ix = iter % num_histograms;
|
||||
histograms[ix].AddHistogram(sample);
|
||||
}
|
||||
}
|
||||
|
||||
@ -157,34 +141,40 @@ inline static double BitCost(size_t count) {
|
||||
return count == 0 ? -2.0 : FastLog2(count);
|
||||
}
|
||||
|
||||
// Assigns a block id from the range [0, vec.size()) to each data element
|
||||
// in data[0..length) and fills in block_id[0..length) with the assigned values.
|
||||
// Returns the number of blocks, i.e. one plus the number of block switches.
|
||||
template<typename DataType, int kSize>
|
||||
void FindBlocks(const DataType* data, const size_t length,
|
||||
size_t FindBlocks(const DataType* data, const size_t length,
|
||||
const double block_switch_bitcost,
|
||||
const std::vector<Histogram<kSize> > &vec,
|
||||
const size_t num_histograms,
|
||||
const Histogram<kSize>* histograms,
|
||||
double* insert_cost,
|
||||
double* cost,
|
||||
uint8_t* switch_signal,
|
||||
uint8_t *block_id) {
|
||||
if (vec.size() <= 1) {
|
||||
if (num_histograms <= 1) {
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
block_id[i] = 0;
|
||||
}
|
||||
return;
|
||||
return 1;
|
||||
}
|
||||
size_t vecsize = vec.size();
|
||||
assert(vecsize <= 256);
|
||||
double* insert_cost = new double[kSize * vecsize];
|
||||
memset(insert_cost, 0, sizeof(insert_cost[0]) * kSize * vecsize);
|
||||
for (size_t j = 0; j < vecsize; ++j) {
|
||||
insert_cost[j] = FastLog2(static_cast<uint32_t>(vec[j].total_count_));
|
||||
const size_t bitmaplen = (num_histograms + 7) >> 3;
|
||||
assert(num_histograms <= 256);
|
||||
memset(insert_cost, 0, sizeof(insert_cost[0]) * kSize * num_histograms);
|
||||
for (size_t j = 0; j < num_histograms; ++j) {
|
||||
insert_cost[j] = FastLog2(static_cast<uint32_t>(
|
||||
histograms[j].total_count_));
|
||||
}
|
||||
for (size_t i = kSize; i != 0;) {
|
||||
--i;
|
||||
for (size_t j = 0; j < vecsize; ++j) {
|
||||
insert_cost[i * vecsize + j] = insert_cost[j] - BitCost(vec[j].data_[i]);
|
||||
for (size_t j = 0; j < num_histograms; ++j) {
|
||||
insert_cost[i * num_histograms + j] =
|
||||
insert_cost[j] - BitCost(histograms[j].data_[i]);
|
||||
}
|
||||
}
|
||||
double *cost = new double[vecsize];
|
||||
memset(cost, 0, sizeof(cost[0]) * vecsize);
|
||||
bool* switch_signal = new bool[length * vecsize];
|
||||
memset(switch_signal, 0, sizeof(switch_signal[0]) * length * vecsize);
|
||||
memset(cost, 0, sizeof(cost[0]) * num_histograms);
|
||||
memset(switch_signal, 0, sizeof(switch_signal[0]) * length * bitmaplen);
|
||||
// After each iteration of this loop, cost[k] will contain the difference
|
||||
// between the minimum cost of arriving at the current byte position using
|
||||
// entropy code k, and the minimum cost of arriving at the current byte
|
||||
@ -192,10 +182,10 @@ void FindBlocks(const DataType* data, const size_t length,
|
||||
// reaches block switch cost, it means that when we trace back from the last
|
||||
// position, we need to switch here.
|
||||
for (size_t byte_ix = 0; byte_ix < length; ++byte_ix) {
|
||||
size_t ix = byte_ix * vecsize;
|
||||
size_t insert_cost_ix = data[byte_ix] * vecsize;
|
||||
size_t ix = byte_ix * bitmaplen;
|
||||
size_t insert_cost_ix = data[byte_ix] * num_histograms;
|
||||
double min_cost = 1e99;
|
||||
for (size_t k = 0; k < vecsize; ++k) {
|
||||
for (size_t k = 0; k < num_histograms; ++k) {
|
||||
// We are coding the symbol in data[byte_ix] with entropy code k.
|
||||
cost[k] += insert_cost[insert_cost_ix + k];
|
||||
if (cost[k] < min_cost) {
|
||||
@ -208,110 +198,200 @@ void FindBlocks(const DataType* data, const size_t length,
|
||||
if (byte_ix < 2000) {
|
||||
block_switch_cost *= 0.77 + 0.07 * static_cast<double>(byte_ix) / 2000;
|
||||
}
|
||||
for (size_t k = 0; k < vecsize; ++k) {
|
||||
for (size_t k = 0; k < num_histograms; ++k) {
|
||||
cost[k] -= min_cost;
|
||||
if (cost[k] >= block_switch_cost) {
|
||||
cost[k] = block_switch_cost;
|
||||
switch_signal[ix + k] = true;
|
||||
const uint8_t mask = static_cast<uint8_t>(1u << (k & 7));
|
||||
assert((k >> 3) < bitmaplen);
|
||||
switch_signal[ix + (k >> 3)] |= mask;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Now trace back from the last position and switch at the marked places.
|
||||
size_t byte_ix = length - 1;
|
||||
size_t ix = byte_ix * vecsize;
|
||||
size_t ix = byte_ix * bitmaplen;
|
||||
uint8_t cur_id = block_id[byte_ix];
|
||||
size_t num_blocks = 1;
|
||||
while (byte_ix > 0) {
|
||||
--byte_ix;
|
||||
ix -= vecsize;
|
||||
if (switch_signal[ix + cur_id]) {
|
||||
ix -= bitmaplen;
|
||||
const uint8_t mask = static_cast<uint8_t>(1u << (cur_id & 7));
|
||||
assert((static_cast<size_t>(cur_id) >> 3) < bitmaplen);
|
||||
if (switch_signal[ix + (cur_id >> 3)] & mask) {
|
||||
if (cur_id != block_id[byte_ix]) {
|
||||
cur_id = block_id[byte_ix];
|
||||
++num_blocks;
|
||||
}
|
||||
}
|
||||
block_id[byte_ix] = cur_id;
|
||||
}
|
||||
delete[] insert_cost;
|
||||
delete[] cost;
|
||||
delete[] switch_signal;
|
||||
return num_blocks;
|
||||
}
|
||||
|
||||
size_t RemapBlockIds(uint8_t* block_ids, const size_t length) {
|
||||
std::map<uint8_t, uint8_t> new_id;
|
||||
size_t next_id = 0;
|
||||
size_t RemapBlockIds(uint8_t* block_ids, const size_t length,
|
||||
uint16_t* new_id, const size_t num_histograms) {
|
||||
static const uint16_t kInvalidId = 256;
|
||||
for (size_t i = 0; i < num_histograms; ++i) {
|
||||
new_id[i] = kInvalidId;
|
||||
}
|
||||
uint16_t next_id = 0;
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
if (new_id.find(block_ids[i]) == new_id.end()) {
|
||||
new_id[block_ids[i]] = static_cast<uint8_t>(next_id);
|
||||
++next_id;
|
||||
assert(block_ids[i] < num_histograms);
|
||||
if (new_id[block_ids[i]] == kInvalidId) {
|
||||
new_id[block_ids[i]] = next_id++;
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
block_ids[i] = new_id[block_ids[i]];
|
||||
block_ids[i] = static_cast<uint8_t>(new_id[block_ids[i]]);
|
||||
assert(block_ids[i] < num_histograms);
|
||||
}
|
||||
assert(next_id <= num_histograms);
|
||||
return next_id;
|
||||
}
|
||||
|
||||
template<typename HistogramType, typename DataType>
|
||||
void BuildBlockHistograms(const DataType* data, const size_t length,
|
||||
uint8_t* block_ids,
|
||||
std::vector<HistogramType>* histograms) {
|
||||
size_t num_types = RemapBlockIds(block_ids, length);
|
||||
assert(num_types <= 256);
|
||||
histograms->clear();
|
||||
histograms->resize(num_types);
|
||||
const uint8_t* block_ids,
|
||||
const size_t num_histograms,
|
||||
HistogramType* histograms) {
|
||||
for (size_t i = 0; i < num_histograms; ++i) {
|
||||
histograms[i].Clear();
|
||||
}
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
(*histograms)[block_ids[i]].Add(data[i]);
|
||||
histograms[block_ids[i]].Add(data[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename HistogramType, typename DataType>
|
||||
void ClusterBlocks(const DataType* data, const size_t length,
|
||||
uint8_t* block_ids) {
|
||||
std::vector<HistogramType> histograms;
|
||||
std::vector<uint32_t> block_index(length);
|
||||
uint32_t cur_idx = 0;
|
||||
HistogramType cur_histogram;
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
bool block_boundary = (i + 1 == length || block_ids[i] != block_ids[i + 1]);
|
||||
block_index[i] = cur_idx;
|
||||
cur_histogram.Add(data[i]);
|
||||
if (block_boundary) {
|
||||
histograms.push_back(cur_histogram);
|
||||
cur_histogram.Clear();
|
||||
++cur_idx;
|
||||
}
|
||||
}
|
||||
std::vector<HistogramType> clustered_histograms;
|
||||
std::vector<uint32_t> histogram_symbols;
|
||||
// Block ids need to fit in one byte.
|
||||
const size_t num_blocks,
|
||||
uint8_t* block_ids,
|
||||
BlockSplit* split) {
|
||||
static const size_t kMaxNumberOfBlockTypes = 256;
|
||||
ClusterHistograms(histograms, 1, histograms.size(),
|
||||
kMaxNumberOfBlockTypes,
|
||||
&clustered_histograms,
|
||||
&histogram_symbols);
|
||||
static const size_t kHistogramsPerBatch = 64;
|
||||
static const size_t kClustersPerBatch = 16;
|
||||
std::vector<uint32_t> histogram_symbols(num_blocks);
|
||||
std::vector<uint32_t> block_lengths(num_blocks);
|
||||
|
||||
size_t block_idx = 0;
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
block_ids[i] = static_cast<uint8_t>(histogram_symbols[block_index[i]]);
|
||||
assert(block_idx < num_blocks);
|
||||
++block_lengths[block_idx];
|
||||
if (i + 1 == length || block_ids[i] != block_ids[i + 1]) {
|
||||
++block_idx;
|
||||
}
|
||||
}
|
||||
assert(block_idx == num_blocks);
|
||||
|
||||
const size_t expected_num_clusters =
|
||||
kClustersPerBatch *
|
||||
(num_blocks + kHistogramsPerBatch - 1) / kHistogramsPerBatch;
|
||||
std::vector<HistogramType> all_histograms;
|
||||
std::vector<uint32_t> cluster_size;
|
||||
all_histograms.reserve(expected_num_clusters);
|
||||
cluster_size.reserve(expected_num_clusters);
|
||||
size_t num_clusters = 0;
|
||||
std::vector<HistogramType> histograms(
|
||||
std::min(num_blocks, kHistogramsPerBatch));
|
||||
size_t max_num_pairs = kHistogramsPerBatch * kHistogramsPerBatch / 2;
|
||||
std::vector<HistogramPair> pairs(max_num_pairs + 1);
|
||||
size_t pos = 0;
|
||||
for (size_t i = 0; i < num_blocks; i += kHistogramsPerBatch) {
|
||||
const size_t num_to_combine = std::min(num_blocks - i, kHistogramsPerBatch);
|
||||
uint32_t sizes[kHistogramsPerBatch];
|
||||
uint32_t clusters[kHistogramsPerBatch];
|
||||
uint32_t symbols[kHistogramsPerBatch];
|
||||
uint32_t remap[kHistogramsPerBatch];
|
||||
for (size_t j = 0; j < num_to_combine; ++j) {
|
||||
histograms[j].Clear();
|
||||
for (size_t k = 0; k < block_lengths[i + j]; ++k) {
|
||||
histograms[j].Add(data[pos++]);
|
||||
}
|
||||
histograms[j].bit_cost_ = PopulationCost(histograms[j]);
|
||||
symbols[j] = clusters[j] = static_cast<uint32_t>(j);
|
||||
sizes[j] = 1;
|
||||
}
|
||||
size_t num_new_clusters = HistogramCombine(
|
||||
&histograms[0], sizes, symbols, clusters, &pairs[0], num_to_combine,
|
||||
num_to_combine, kHistogramsPerBatch, max_num_pairs);
|
||||
for (size_t j = 0; j < num_new_clusters; ++j) {
|
||||
all_histograms.push_back(histograms[clusters[j]]);
|
||||
cluster_size.push_back(sizes[clusters[j]]);
|
||||
remap[clusters[j]] = static_cast<uint32_t>(j);
|
||||
}
|
||||
for (size_t j = 0; j < num_to_combine; ++j) {
|
||||
histogram_symbols[i + j] =
|
||||
static_cast<uint32_t>(num_clusters) + remap[symbols[j]];
|
||||
}
|
||||
num_clusters += num_new_clusters;
|
||||
assert(num_clusters == cluster_size.size());
|
||||
assert(num_clusters == all_histograms.size());
|
||||
}
|
||||
|
||||
void BuildBlockSplit(const std::vector<uint8_t>& block_ids, BlockSplit* split) {
|
||||
uint8_t cur_id = block_ids[0];
|
||||
uint8_t max_type = cur_id;
|
||||
uint32_t cur_length = 1;
|
||||
for (size_t i = 1; i < block_ids.size(); ++i) {
|
||||
uint8_t next_id = block_ids[i];
|
||||
if (next_id != cur_id) {
|
||||
split->types.push_back(cur_id);
|
||||
split->lengths.push_back(cur_length);
|
||||
max_type = std::max(max_type, next_id);
|
||||
cur_id = next_id;
|
||||
max_num_pairs =
|
||||
std::min(64 * num_clusters, (num_clusters / 2) * num_clusters);
|
||||
pairs.resize(max_num_pairs + 1);
|
||||
|
||||
std::vector<uint32_t> clusters(num_clusters);
|
||||
for (size_t i = 0; i < num_clusters; ++i) {
|
||||
clusters[i] = static_cast<uint32_t>(i);
|
||||
}
|
||||
size_t num_final_clusters =
|
||||
HistogramCombine(&all_histograms[0], &cluster_size[0],
|
||||
&histogram_symbols[0],
|
||||
&clusters[0], &pairs[0], num_clusters,
|
||||
num_blocks, kMaxNumberOfBlockTypes, max_num_pairs);
|
||||
|
||||
static const uint32_t kInvalidIndex = std::numeric_limits<uint32_t>::max();
|
||||
std::vector<uint32_t> new_index(num_clusters, kInvalidIndex);
|
||||
uint32_t next_index = 0;
|
||||
pos = 0;
|
||||
for (size_t i = 0; i < num_blocks; ++i) {
|
||||
HistogramType histo;
|
||||
for (size_t j = 0; j < block_lengths[i]; ++j) {
|
||||
histo.Add(data[pos++]);
|
||||
}
|
||||
uint32_t best_out =
|
||||
i == 0 ? histogram_symbols[0] : histogram_symbols[i - 1];
|
||||
double best_bits = HistogramBitCostDistance(
|
||||
histo, all_histograms[best_out]);
|
||||
for (size_t j = 0; j < num_final_clusters; ++j) {
|
||||
const double cur_bits = HistogramBitCostDistance(
|
||||
histo, all_histograms[clusters[j]]);
|
||||
if (cur_bits < best_bits) {
|
||||
best_bits = cur_bits;
|
||||
best_out = clusters[j];
|
||||
}
|
||||
}
|
||||
histogram_symbols[i] = best_out;
|
||||
if (new_index[best_out] == kInvalidIndex) {
|
||||
new_index[best_out] = next_index++;
|
||||
}
|
||||
}
|
||||
uint8_t max_type = 0;
|
||||
uint32_t cur_length = 0;
|
||||
block_idx = 0;
|
||||
split->types.resize(num_blocks);
|
||||
split->lengths.resize(num_blocks);
|
||||
for (size_t i = 0; i < num_blocks; ++i) {
|
||||
cur_length += block_lengths[i];
|
||||
if (i + 1 == num_blocks ||
|
||||
histogram_symbols[i] != histogram_symbols[i + 1]) {
|
||||
const uint8_t id = static_cast<uint8_t>(new_index[histogram_symbols[i]]);
|
||||
split->types[block_idx] = id;
|
||||
split->lengths[block_idx] = cur_length;
|
||||
max_type = std::max(max_type, id);
|
||||
cur_length = 0;
|
||||
++block_idx;
|
||||
}
|
||||
++cur_length;
|
||||
}
|
||||
split->types.push_back(cur_id);
|
||||
split->lengths.push_back(cur_length);
|
||||
split->types.resize(block_idx);
|
||||
split->lengths.resize(block_idx);
|
||||
split->num_types = static_cast<size_t>(max_type) + 1;
|
||||
}
|
||||
|
||||
template<typename HistogramType, typename DataType>
|
||||
template<int kSize, typename DataType>
|
||||
void SplitByteVector(const std::vector<DataType>& data,
|
||||
const size_t literals_per_histogram,
|
||||
const size_t max_histograms,
|
||||
@ -327,27 +407,44 @@ void SplitByteVector(const std::vector<DataType>& data,
|
||||
split->lengths.push_back(static_cast<uint32_t>(data.size()));
|
||||
return;
|
||||
}
|
||||
std::vector<HistogramType> histograms;
|
||||
size_t num_histograms = data.size() / literals_per_histogram + 1;
|
||||
if (num_histograms > max_histograms) {
|
||||
num_histograms = max_histograms;
|
||||
}
|
||||
Histogram<kSize>* histograms = new Histogram<kSize>[num_histograms];
|
||||
// Find good entropy codes.
|
||||
InitialEntropyCodes(&data[0], data.size(),
|
||||
literals_per_histogram,
|
||||
max_histograms,
|
||||
sampling_stride_length,
|
||||
&histograms);
|
||||
num_histograms, histograms);
|
||||
RefineEntropyCodes(&data[0], data.size(),
|
||||
sampling_stride_length,
|
||||
&histograms);
|
||||
num_histograms, histograms);
|
||||
// Find a good path through literals with the good entropy codes.
|
||||
std::vector<uint8_t> block_ids(data.size());
|
||||
size_t num_blocks;
|
||||
const size_t bitmaplen = (num_histograms + 7) >> 3;
|
||||
double* insert_cost = new double[kSize * num_histograms];
|
||||
double *cost = new double[num_histograms];
|
||||
uint8_t* switch_signal = new uint8_t[data.size() * bitmaplen];
|
||||
uint16_t* new_id = new uint16_t[num_histograms];
|
||||
for (size_t i = 0; i < 10; ++i) {
|
||||
FindBlocks(&data[0], data.size(),
|
||||
num_blocks = FindBlocks(&data[0], data.size(),
|
||||
block_switch_cost,
|
||||
histograms,
|
||||
num_histograms, histograms,
|
||||
insert_cost, cost, switch_signal,
|
||||
&block_ids[0]);
|
||||
BuildBlockHistograms(&data[0], data.size(), &block_ids[0], &histograms);
|
||||
num_histograms = RemapBlockIds(&block_ids[0], data.size(),
|
||||
new_id, num_histograms);
|
||||
BuildBlockHistograms(&data[0], data.size(), &block_ids[0],
|
||||
num_histograms, histograms);
|
||||
}
|
||||
ClusterBlocks<HistogramType>(&data[0], data.size(), &block_ids[0]);
|
||||
BuildBlockSplit(block_ids, split);
|
||||
delete[] insert_cost;
|
||||
delete[] cost;
|
||||
delete[] switch_signal;
|
||||
delete[] new_id;
|
||||
delete[] histograms;
|
||||
ClusterBlocks<Histogram<kSize> >(&data[0], data.size(), num_blocks,
|
||||
&block_ids[0], split);
|
||||
}
|
||||
|
||||
void SplitBlock(const Command* cmds,
|
||||
@ -358,32 +455,51 @@ void SplitBlock(const Command* cmds,
|
||||
BlockSplit* literal_split,
|
||||
BlockSplit* insert_and_copy_split,
|
||||
BlockSplit* dist_split) {
|
||||
{
|
||||
// Create a continuous array of literals.
|
||||
std::vector<uint8_t> literals;
|
||||
CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, &literals);
|
||||
|
||||
// Compute prefix codes for commands.
|
||||
std::vector<uint16_t> insert_and_copy_codes;
|
||||
std::vector<uint16_t> distance_prefixes;
|
||||
CopyCommandsToByteArray(cmds, num_commands,
|
||||
&insert_and_copy_codes,
|
||||
&distance_prefixes);
|
||||
|
||||
SplitByteVector<HistogramLiteral>(
|
||||
// Create the block split on the array of literals.
|
||||
// Literal histograms have alphabet size 256.
|
||||
SplitByteVector<256>(
|
||||
literals,
|
||||
kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
|
||||
kLiteralStrideLength, kLiteralBlockSwitchCost,
|
||||
literal_split);
|
||||
SplitByteVector<HistogramCommand>(
|
||||
}
|
||||
|
||||
{
|
||||
// Compute prefix codes for commands.
|
||||
std::vector<uint16_t> insert_and_copy_codes(num_commands);
|
||||
for (size_t i = 0; i < num_commands; ++i) {
|
||||
insert_and_copy_codes[i] = cmds[i].cmd_prefix_;
|
||||
}
|
||||
// Create the block split on the array of command prefixes.
|
||||
SplitByteVector<kNumCommandPrefixes>(
|
||||
insert_and_copy_codes,
|
||||
kSymbolsPerCommandHistogram, kMaxCommandHistograms,
|
||||
kCommandStrideLength, kCommandBlockSwitchCost,
|
||||
insert_and_copy_split);
|
||||
SplitByteVector<HistogramDistance>(
|
||||
}
|
||||
|
||||
{
|
||||
// Create a continuous array of distance prefixes.
|
||||
std::vector<uint16_t> distance_prefixes(num_commands);
|
||||
size_t pos = 0;
|
||||
for (size_t i = 0; i < num_commands; ++i) {
|
||||
const Command& cmd = cmds[i];
|
||||
if (cmd.copy_len() && cmd.cmd_prefix_ >= 128) {
|
||||
distance_prefixes[pos++] = cmd.dist_prefix_;
|
||||
}
|
||||
}
|
||||
distance_prefixes.resize(pos);
|
||||
// Create the block split on the array of distance prefixes.
|
||||
SplitByteVector<kNumDistancePrefixes>(
|
||||
distance_prefixes,
|
||||
kSymbolsPerDistanceHistogram, kMaxCommandHistograms,
|
||||
kCommandStrideLength, kDistanceBlockSwitchCost,
|
||||
dist_split);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace brotli
|
||||
|
@ -28,6 +28,12 @@ namespace brotli {
|
||||
|
||||
namespace {
|
||||
|
||||
static const size_t kMaxHuffmanTreeSize = 2 * kNumCommandPrefixes + 1;
|
||||
// Context map alphabet has 256 context id symbols plus max 16 rle symbols.
|
||||
static const size_t kContextMapAlphabetSize = 256 + 16;
|
||||
// Block type alphabet has 256 block id symbols plus 2 special symbols.
|
||||
static const size_t kBlockTypeAlphabetSize = 256 + 2;
|
||||
|
||||
// nibblesbits represents the 2 bits to encode MNIBBLES (0-3)
|
||||
// REQUIRES: length > 0
|
||||
// REQUIRES: length <= (1 << 24)
|
||||
@ -45,6 +51,18 @@ void EncodeMlen(size_t length, uint64_t* bits,
|
||||
*bits = length;
|
||||
}
|
||||
|
||||
static inline void StoreCommandExtra(
|
||||
const Command& cmd, size_t* storage_ix, uint8_t* storage) {
|
||||
uint32_t copylen_code = cmd.copy_len_code();
|
||||
uint16_t inscode = GetInsertLengthCode(cmd.insert_len_);
|
||||
uint16_t copycode = GetCopyLengthCode(copylen_code);
|
||||
uint32_t insnumextra = GetInsertExtra(inscode);
|
||||
uint64_t insextraval = cmd.insert_len_ - GetInsertBase(inscode);
|
||||
uint64_t copyextraval = copylen_code - GetCopyBase(copycode);
|
||||
uint64_t bits = (copyextraval << insnumextra) | insextraval;
|
||||
WriteBits(insnumextra + GetCopyExtra(copycode), bits, storage_ix, storage);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void StoreVarLenUint8(size_t n, size_t* storage_ix, uint8_t* storage) {
|
||||
@ -148,13 +166,14 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask(
|
||||
}
|
||||
|
||||
void StoreHuffmanTreeToBitMask(
|
||||
const std::vector<uint8_t> &huffman_tree,
|
||||
const std::vector<uint8_t> &huffman_tree_extra_bits,
|
||||
const size_t huffman_tree_size,
|
||||
const uint8_t* huffman_tree,
|
||||
const uint8_t* huffman_tree_extra_bits,
|
||||
const uint8_t* code_length_bitdepth,
|
||||
const std::vector<uint16_t> &code_length_bitdepth_symbols,
|
||||
const uint16_t* code_length_bitdepth_symbols,
|
||||
size_t * __restrict storage_ix,
|
||||
uint8_t * __restrict storage) {
|
||||
for (size_t i = 0; i < huffman_tree.size(); ++i) {
|
||||
for (size_t i = 0; i < huffman_tree_size; ++i) {
|
||||
size_t ix = huffman_tree[i];
|
||||
WriteBits(code_length_bitdepth[ix], code_length_bitdepth_symbols[ix],
|
||||
storage_ix, storage);
|
||||
@ -208,18 +227,21 @@ void StoreSimpleHuffmanTree(const uint8_t* depths,
|
||||
// num = alphabet size
|
||||
// depths = symbol depths
|
||||
void StoreHuffmanTree(const uint8_t* depths, size_t num,
|
||||
HuffmanTree* tree,
|
||||
size_t *storage_ix, uint8_t *storage) {
|
||||
// Write the Huffman tree into the brotli-representation.
|
||||
std::vector<uint8_t> huffman_tree;
|
||||
std::vector<uint8_t> huffman_tree_extra_bits;
|
||||
// TODO: Consider allocating these from stack.
|
||||
huffman_tree.reserve(256);
|
||||
huffman_tree_extra_bits.reserve(256);
|
||||
WriteHuffmanTree(depths, num, &huffman_tree, &huffman_tree_extra_bits);
|
||||
// The command alphabet is the largest, so this allocation will fit all
|
||||
// alphabets.
|
||||
assert(num <= kNumCommandPrefixes);
|
||||
uint8_t huffman_tree[kNumCommandPrefixes];
|
||||
uint8_t huffman_tree_extra_bits[kNumCommandPrefixes];
|
||||
size_t huffman_tree_size = 0;
|
||||
WriteHuffmanTree(depths, num, &huffman_tree_size, huffman_tree,
|
||||
huffman_tree_extra_bits);
|
||||
|
||||
// Calculate the statistics of the Huffman tree in brotli-representation.
|
||||
uint32_t huffman_tree_histogram[kCodeLengthCodes] = { 0 };
|
||||
for (size_t i = 0; i < huffman_tree.size(); ++i) {
|
||||
for (size_t i = 0; i < huffman_tree_size; ++i) {
|
||||
++huffman_tree_histogram[huffman_tree[i]];
|
||||
}
|
||||
|
||||
@ -239,11 +261,10 @@ void StoreHuffmanTree(const uint8_t* depths, size_t num,
|
||||
|
||||
// Calculate another Huffman tree to use for compressing both the
|
||||
// earlier Huffman tree with.
|
||||
// TODO: Consider allocating these from stack.
|
||||
uint8_t code_length_bitdepth[kCodeLengthCodes] = { 0 };
|
||||
std::vector<uint16_t> code_length_bitdepth_symbols(kCodeLengthCodes);
|
||||
uint16_t code_length_bitdepth_symbols[kCodeLengthCodes] = { 0 };
|
||||
CreateHuffmanTree(&huffman_tree_histogram[0], kCodeLengthCodes,
|
||||
5, &code_length_bitdepth[0]);
|
||||
5, tree, &code_length_bitdepth[0]);
|
||||
ConvertBitDepthsToSymbols(code_length_bitdepth, kCodeLengthCodes,
|
||||
&code_length_bitdepth_symbols[0]);
|
||||
|
||||
@ -256,16 +277,17 @@ void StoreHuffmanTree(const uint8_t* depths, size_t num,
|
||||
}
|
||||
|
||||
// Store the real huffman tree now.
|
||||
StoreHuffmanTreeToBitMask(huffman_tree,
|
||||
StoreHuffmanTreeToBitMask(huffman_tree_size,
|
||||
huffman_tree,
|
||||
huffman_tree_extra_bits,
|
||||
&code_length_bitdepth[0],
|
||||
code_length_bitdepth_symbols,
|
||||
storage_ix, storage);
|
||||
}
|
||||
|
||||
|
||||
void BuildAndStoreHuffmanTree(const uint32_t *histogram,
|
||||
const size_t length,
|
||||
HuffmanTree* tree,
|
||||
uint8_t* depth,
|
||||
uint16_t* bits,
|
||||
size_t* storage_ix,
|
||||
@ -296,16 +318,21 @@ void BuildAndStoreHuffmanTree(const uint32_t *histogram,
|
||||
return;
|
||||
}
|
||||
|
||||
CreateHuffmanTree(histogram, length, 15, depth);
|
||||
CreateHuffmanTree(histogram, length, 15, tree, depth);
|
||||
ConvertBitDepthsToSymbols(depth, length, bits);
|
||||
|
||||
if (count <= 4) {
|
||||
StoreSimpleHuffmanTree(depth, s4, count, max_bits, storage_ix, storage);
|
||||
} else {
|
||||
StoreHuffmanTree(depth, length, storage_ix, storage);
|
||||
StoreHuffmanTree(depth, length, tree, storage_ix, storage);
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool SortHuffmanTree(const HuffmanTree& v0,
|
||||
const HuffmanTree& v1) {
|
||||
return v0.total_count_ < v1.total_count_;
|
||||
}
|
||||
|
||||
void BuildAndStoreHuffmanTreeFast(const uint32_t *histogram,
|
||||
const size_t histogram_total,
|
||||
const size_t max_bits,
|
||||
@ -467,52 +494,58 @@ void BuildAndStoreHuffmanTreeFast(const uint32_t *histogram,
|
||||
}
|
||||
}
|
||||
|
||||
size_t IndexOf(const std::vector<uint32_t>& v, uint32_t value) {
|
||||
size_t IndexOf(const uint8_t* v, size_t v_size, uint8_t value) {
|
||||
size_t i = 0;
|
||||
for (; i < v.size(); ++i) {
|
||||
for (; i < v_size; ++i) {
|
||||
if (v[i] == value) return i;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
void MoveToFront(std::vector<uint32_t>* v, size_t index) {
|
||||
uint32_t value = (*v)[index];
|
||||
void MoveToFront(uint8_t* v, size_t index) {
|
||||
uint8_t value = v[index];
|
||||
for (size_t i = index; i != 0; --i) {
|
||||
(*v)[i] = (*v)[i - 1];
|
||||
v[i] = v[i - 1];
|
||||
}
|
||||
(*v)[0] = value;
|
||||
v[0] = value;
|
||||
}
|
||||
|
||||
std::vector<uint32_t> MoveToFrontTransform(const std::vector<uint32_t>& v) {
|
||||
if (v.empty()) return v;
|
||||
uint32_t max_value = *std::max_element(v.begin(), v.end());
|
||||
std::vector<uint32_t> mtf(max_value + 1);
|
||||
for (uint32_t i = 0; i <= max_value; ++i) mtf[i] = i;
|
||||
std::vector<uint32_t> result(v.size());
|
||||
for (size_t i = 0; i < v.size(); ++i) {
|
||||
size_t index = IndexOf(mtf, v[i]);
|
||||
assert(index < mtf.size());
|
||||
result[i] = static_cast<uint32_t>(index);
|
||||
MoveToFront(&mtf, index);
|
||||
void MoveToFrontTransform(const uint32_t* __restrict v_in,
|
||||
const size_t v_size,
|
||||
uint32_t* v_out) {
|
||||
if (v_size == 0) {
|
||||
return;
|
||||
}
|
||||
uint32_t max_value = *std::max_element(v_in, v_in + v_size);
|
||||
assert(max_value < 256u);
|
||||
uint8_t mtf[256];
|
||||
size_t mtf_size = max_value + 1;
|
||||
for (uint32_t i = 0; i <= max_value; ++i) {
|
||||
mtf[i] = static_cast<uint8_t>(i);
|
||||
}
|
||||
for (size_t i = 0; i < v_size; ++i) {
|
||||
size_t index = IndexOf(mtf, mtf_size, static_cast<uint8_t>(v_in[i]));
|
||||
assert(index < mtf_size);
|
||||
v_out[i] = static_cast<uint32_t>(index);
|
||||
MoveToFront(mtf, index);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Finds runs of zeros in v_in and replaces them with a prefix code of the run
|
||||
// length plus extra bits in *v_out and *extra_bits. Non-zero values in v_in are
|
||||
// shifted by *max_length_prefix. Will not create prefix codes bigger than the
|
||||
// initial value of *max_run_length_prefix. The prefix code of run length L is
|
||||
// simply Log2Floor(L) and the number of extra bits is the same as the prefix
|
||||
// code.
|
||||
void RunLengthCodeZeros(const std::vector<uint32_t>& v_in,
|
||||
uint32_t* max_run_length_prefix,
|
||||
std::vector<uint32_t>* v_out,
|
||||
std::vector<uint32_t>* extra_bits) {
|
||||
// Finds runs of zeros in v[0..in_size) and replaces them with a prefix code of
|
||||
// the run length plus extra bits (lower 9 bits is the prefix code and the rest
|
||||
// are the extra bits). Non-zero values in v[] are shifted by
|
||||
// *max_length_prefix. Will not create prefix codes bigger than the initial
|
||||
// value of *max_run_length_prefix. The prefix code of run length L is simply
|
||||
// Log2Floor(L) and the number of extra bits is the same as the prefix code.
|
||||
void RunLengthCodeZeros(const size_t in_size,
|
||||
uint32_t* __restrict v,
|
||||
size_t* __restrict out_size,
|
||||
uint32_t* __restrict max_run_length_prefix) {
|
||||
uint32_t max_reps = 0;
|
||||
for (size_t i = 0; i < v_in.size();) {
|
||||
for (; i < v_in.size() && v_in[i] != 0; ++i) ;
|
||||
for (size_t i = 0; i < in_size;) {
|
||||
for (; i < in_size && v[i] != 0; ++i) ;
|
||||
uint32_t reps = 0;
|
||||
for (; i < v_in.size() && v_in[i] == 0; ++i) {
|
||||
for (; i < in_size && v[i] == 0; ++i) {
|
||||
++reps;
|
||||
}
|
||||
max_reps = std::max(reps, max_reps);
|
||||
@ -520,27 +553,31 @@ void RunLengthCodeZeros(const std::vector<uint32_t>& v_in,
|
||||
uint32_t max_prefix = max_reps > 0 ? Log2FloorNonZero(max_reps) : 0;
|
||||
max_prefix = std::min(max_prefix, *max_run_length_prefix);
|
||||
*max_run_length_prefix = max_prefix;
|
||||
for (size_t i = 0; i < v_in.size();) {
|
||||
if (v_in[i] != 0) {
|
||||
v_out->push_back(v_in[i] + *max_run_length_prefix);
|
||||
extra_bits->push_back(0);
|
||||
*out_size = 0;
|
||||
for (size_t i = 0; i < in_size;) {
|
||||
assert(*out_size <= i);
|
||||
if (v[i] != 0) {
|
||||
v[*out_size] = v[i] + *max_run_length_prefix;
|
||||
++i;
|
||||
++(*out_size);
|
||||
} else {
|
||||
uint32_t reps = 1;
|
||||
for (size_t k = i + 1; k < v_in.size() && v_in[k] == 0; ++k) {
|
||||
for (size_t k = i + 1; k < in_size && v[k] == 0; ++k) {
|
||||
++reps;
|
||||
}
|
||||
i += reps;
|
||||
while (reps != 0) {
|
||||
if (reps < (2u << max_prefix)) {
|
||||
uint32_t run_length_prefix = Log2FloorNonZero(reps);
|
||||
v_out->push_back(run_length_prefix);
|
||||
extra_bits->push_back(reps - (1u << run_length_prefix));
|
||||
const uint32_t extra_bits = reps - (1u << run_length_prefix);
|
||||
v[*out_size] = run_length_prefix + (extra_bits << 9);
|
||||
++(*out_size);
|
||||
break;
|
||||
} else {
|
||||
v_out->push_back(max_prefix);
|
||||
extra_bits->push_back((1u << max_prefix) - 1u);
|
||||
const uint32_t extra_bits = (1u << max_prefix) - 1u;
|
||||
v[*out_size] = max_prefix + (extra_bits << 9);
|
||||
reps -= (2u << max_prefix) - 1u;
|
||||
++(*out_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -549,6 +586,7 @@ void RunLengthCodeZeros(const std::vector<uint32_t>& v_in,
|
||||
|
||||
void EncodeContextMap(const std::vector<uint32_t>& context_map,
|
||||
size_t num_clusters,
|
||||
HuffmanTree* tree,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
StoreVarLenUint8(num_clusters - 1, storage_ix, storage);
|
||||
|
||||
@ -556,37 +594,40 @@ void EncodeContextMap(const std::vector<uint32_t>& context_map,
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<uint32_t> transformed_symbols = MoveToFrontTransform(context_map);
|
||||
std::vector<uint32_t> rle_symbols;
|
||||
std::vector<uint32_t> extra_bits;
|
||||
uint32_t* rle_symbols = new uint32_t[context_map.size()];
|
||||
MoveToFrontTransform(&context_map[0], context_map.size(), rle_symbols);
|
||||
uint32_t max_run_length_prefix = 6;
|
||||
RunLengthCodeZeros(transformed_symbols, &max_run_length_prefix,
|
||||
&rle_symbols, &extra_bits);
|
||||
HistogramContextMap symbol_histogram;
|
||||
for (size_t i = 0; i < rle_symbols.size(); ++i) {
|
||||
symbol_histogram.Add(rle_symbols[i]);
|
||||
size_t num_rle_symbols = 0;
|
||||
RunLengthCodeZeros(context_map.size(), rle_symbols,
|
||||
&num_rle_symbols, &max_run_length_prefix);
|
||||
uint32_t histogram[kContextMapAlphabetSize];
|
||||
memset(histogram, 0, sizeof(histogram));
|
||||
static const int kSymbolBits = 9;
|
||||
static const uint32_t kSymbolMask = (1u << kSymbolBits) - 1u;
|
||||
for (size_t i = 0; i < num_rle_symbols; ++i) {
|
||||
++histogram[rle_symbols[i] & kSymbolMask];
|
||||
}
|
||||
bool use_rle = max_run_length_prefix > 0;
|
||||
WriteBits(1, use_rle, storage_ix, storage);
|
||||
if (use_rle) {
|
||||
WriteBits(4, max_run_length_prefix - 1, storage_ix, storage);
|
||||
}
|
||||
EntropyCodeContextMap symbol_code;
|
||||
memset(symbol_code.depth_, 0, sizeof(symbol_code.depth_));
|
||||
memset(symbol_code.bits_, 0, sizeof(symbol_code.bits_));
|
||||
BuildAndStoreHuffmanTree(symbol_histogram.data_,
|
||||
num_clusters + max_run_length_prefix,
|
||||
symbol_code.depth_, symbol_code.bits_,
|
||||
storage_ix, storage);
|
||||
for (size_t i = 0; i < rle_symbols.size(); ++i) {
|
||||
WriteBits(symbol_code.depth_[rle_symbols[i]],
|
||||
symbol_code.bits_[rle_symbols[i]],
|
||||
storage_ix, storage);
|
||||
if (rle_symbols[i] > 0 && rle_symbols[i] <= max_run_length_prefix) {
|
||||
WriteBits(rle_symbols[i], extra_bits[i], storage_ix, storage);
|
||||
uint8_t depths[kContextMapAlphabetSize];
|
||||
uint16_t bits[kContextMapAlphabetSize];
|
||||
memset(depths, 0, sizeof(depths));
|
||||
memset(bits, 0, sizeof(bits));
|
||||
BuildAndStoreHuffmanTree(histogram, num_clusters + max_run_length_prefix,
|
||||
tree, depths, bits, storage_ix, storage);
|
||||
for (size_t i = 0; i < num_rle_symbols; ++i) {
|
||||
const uint32_t rle_symbol = rle_symbols[i] & kSymbolMask;
|
||||
const uint32_t extra_bits_val = rle_symbols[i] >> kSymbolBits;
|
||||
WriteBits(depths[rle_symbol], bits[rle_symbol], storage_ix, storage);
|
||||
if (rle_symbol > 0 && rle_symbol <= max_run_length_prefix) {
|
||||
WriteBits(rle_symbol, extra_bits_val, storage_ix, storage);
|
||||
}
|
||||
}
|
||||
WriteBits(1, 1, storage_ix, storage); // use move-to-front
|
||||
delete[] rle_symbols;
|
||||
}
|
||||
|
||||
void StoreBlockSwitch(const BlockSplitCode& code,
|
||||
@ -608,12 +649,15 @@ void StoreBlockSwitch(const BlockSplitCode& code,
|
||||
void BuildAndStoreBlockSplitCode(const std::vector<uint8_t>& types,
|
||||
const std::vector<uint32_t>& lengths,
|
||||
const size_t num_types,
|
||||
HuffmanTree* tree,
|
||||
BlockSplitCode* code,
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
const size_t num_blocks = types.size();
|
||||
std::vector<uint32_t> type_histo(num_types + 2);
|
||||
std::vector<uint32_t> length_histo(26);
|
||||
uint32_t type_histo[kBlockTypeAlphabetSize];
|
||||
uint32_t length_histo[kNumBlockLenPrefixes];
|
||||
memset(type_histo, 0, (num_types + 2) * sizeof(type_histo[0]));
|
||||
memset(length_histo, 0, sizeof(length_histo));
|
||||
size_t last_type = 1;
|
||||
size_t second_last_type = 0;
|
||||
code->type_code.resize(num_blocks);
|
||||
@ -622,8 +666,8 @@ void BuildAndStoreBlockSplitCode(const std::vector<uint8_t>& types,
|
||||
code->length_extra.resize(num_blocks);
|
||||
code->type_depths.resize(num_types + 2);
|
||||
code->type_bits.resize(num_types + 2);
|
||||
code->length_depths.resize(26);
|
||||
code->length_bits.resize(26);
|
||||
memset(code->length_depths, 0, sizeof(code->length_depths));
|
||||
memset(code->length_bits, 0, sizeof(code->length_bits));
|
||||
for (size_t i = 0; i < num_blocks; ++i) {
|
||||
size_t type = types[i];
|
||||
size_t type_code = (type == last_type + 1 ? 1 :
|
||||
@ -641,10 +685,10 @@ void BuildAndStoreBlockSplitCode(const std::vector<uint8_t>& types,
|
||||
}
|
||||
StoreVarLenUint8(num_types - 1, storage_ix, storage);
|
||||
if (num_types > 1) {
|
||||
BuildAndStoreHuffmanTree(&type_histo[0], num_types + 2,
|
||||
BuildAndStoreHuffmanTree(&type_histo[0], num_types + 2, tree,
|
||||
&code->type_depths[0], &code->type_bits[0],
|
||||
storage_ix, storage);
|
||||
BuildAndStoreHuffmanTree(&length_histo[0], 26,
|
||||
BuildAndStoreHuffmanTree(&length_histo[0], kNumBlockLenPrefixes, tree,
|
||||
&code->length_depths[0], &code->length_bits[0],
|
||||
storage_ix, storage);
|
||||
StoreBlockSwitch(*code, 0, storage_ix, storage);
|
||||
@ -653,6 +697,7 @@ void BuildAndStoreBlockSplitCode(const std::vector<uint8_t>& types,
|
||||
|
||||
void StoreTrivialContextMap(size_t num_types,
|
||||
size_t context_bits,
|
||||
HuffmanTree* tree,
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
StoreVarLenUint8(num_types - 1, storage_ix, storage);
|
||||
@ -660,9 +705,12 @@ void StoreTrivialContextMap(size_t num_types,
|
||||
size_t repeat_code = context_bits - 1u;
|
||||
size_t repeat_bits = (1u << repeat_code) - 1u;
|
||||
size_t alphabet_size = num_types + repeat_code;
|
||||
std::vector<uint32_t> histogram(alphabet_size);
|
||||
std::vector<uint8_t> depths(alphabet_size);
|
||||
std::vector<uint16_t> bits(alphabet_size);
|
||||
uint32_t histogram[kContextMapAlphabetSize];
|
||||
uint8_t depths[kContextMapAlphabetSize];
|
||||
uint16_t bits[kContextMapAlphabetSize];
|
||||
memset(histogram, 0, alphabet_size * sizeof(histogram[0]));
|
||||
memset(depths, 0, alphabet_size * sizeof(depths[0]));
|
||||
memset(bits, 0, alphabet_size * sizeof(bits[0]));
|
||||
// Write RLEMAX.
|
||||
WriteBits(1, 1, storage_ix, storage);
|
||||
WriteBits(4, repeat_code - 1, storage_ix, storage);
|
||||
@ -671,7 +719,7 @@ void StoreTrivialContextMap(size_t num_types,
|
||||
for (size_t i = context_bits; i < alphabet_size; ++i) {
|
||||
histogram[i] = 1;
|
||||
}
|
||||
BuildAndStoreHuffmanTree(&histogram[0], alphabet_size,
|
||||
BuildAndStoreHuffmanTree(&histogram[0], alphabet_size, tree,
|
||||
&depths[0], &bits[0],
|
||||
storage_ix, storage);
|
||||
for (size_t i = 0; i < num_types; ++i) {
|
||||
@ -702,11 +750,12 @@ class BlockEncoder {
|
||||
|
||||
// Creates entropy codes of block lengths and block types and stores them
|
||||
// to the bit stream.
|
||||
void BuildAndStoreBlockSwitchEntropyCodes(size_t* storage_ix,
|
||||
void BuildAndStoreBlockSwitchEntropyCodes(HuffmanTree* tree,
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
BuildAndStoreBlockSplitCode(
|
||||
block_types_, block_lengths_, num_block_types_,
|
||||
&block_split_code_, storage_ix, storage);
|
||||
tree, &block_split_code_, storage_ix, storage);
|
||||
}
|
||||
|
||||
// Creates entropy codes for all block types and stores them to the bit
|
||||
@ -714,12 +763,14 @@ class BlockEncoder {
|
||||
template<int kSize>
|
||||
void BuildAndStoreEntropyCodes(
|
||||
const std::vector<Histogram<kSize> >& histograms,
|
||||
HuffmanTree* tree,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
depths_.resize(histograms.size() * alphabet_size_);
|
||||
bits_.resize(histograms.size() * alphabet_size_);
|
||||
for (size_t i = 0; i < histograms.size(); ++i) {
|
||||
size_t ix = i * alphabet_size_;
|
||||
BuildAndStoreHuffmanTree(&histograms[i].data_[0], alphabet_size_,
|
||||
tree,
|
||||
&depths_[ix], &bits_[ix],
|
||||
storage_ix, storage);
|
||||
}
|
||||
@ -798,6 +849,8 @@ void StoreMetaBlock(const uint8_t* input,
|
||||
kNumDistanceShortCodes + num_direct_distance_codes +
|
||||
(48u << distance_postfix_bits);
|
||||
|
||||
HuffmanTree* tree = static_cast<HuffmanTree*>(
|
||||
malloc(kMaxHuffmanTreeSize * sizeof(HuffmanTree)));
|
||||
BlockEncoder literal_enc(256,
|
||||
mb.literal_split.num_types,
|
||||
mb.literal_split.types,
|
||||
@ -811,9 +864,9 @@ void StoreMetaBlock(const uint8_t* input,
|
||||
mb.distance_split.types,
|
||||
mb.distance_split.lengths);
|
||||
|
||||
literal_enc.BuildAndStoreBlockSwitchEntropyCodes(storage_ix, storage);
|
||||
command_enc.BuildAndStoreBlockSwitchEntropyCodes(storage_ix, storage);
|
||||
distance_enc.BuildAndStoreBlockSwitchEntropyCodes(storage_ix, storage);
|
||||
literal_enc.BuildAndStoreBlockSwitchEntropyCodes(tree, storage_ix, storage);
|
||||
command_enc.BuildAndStoreBlockSwitchEntropyCodes(tree, storage_ix, storage);
|
||||
distance_enc.BuildAndStoreBlockSwitchEntropyCodes(tree, storage_ix, storage);
|
||||
|
||||
WriteBits(2, distance_postfix_bits, storage_ix, storage);
|
||||
WriteBits(4, num_direct_distance_codes >> distance_postfix_bits,
|
||||
@ -824,37 +877,36 @@ void StoreMetaBlock(const uint8_t* input,
|
||||
|
||||
size_t num_literal_histograms = mb.literal_histograms.size();
|
||||
if (mb.literal_context_map.empty()) {
|
||||
StoreTrivialContextMap(num_literal_histograms, kLiteralContextBits,
|
||||
StoreTrivialContextMap(num_literal_histograms, kLiteralContextBits, tree,
|
||||
storage_ix, storage);
|
||||
} else {
|
||||
EncodeContextMap(mb.literal_context_map, num_literal_histograms,
|
||||
EncodeContextMap(mb.literal_context_map, num_literal_histograms, tree,
|
||||
storage_ix, storage);
|
||||
}
|
||||
|
||||
size_t num_dist_histograms = mb.distance_histograms.size();
|
||||
if (mb.distance_context_map.empty()) {
|
||||
StoreTrivialContextMap(num_dist_histograms, kDistanceContextBits,
|
||||
StoreTrivialContextMap(num_dist_histograms, kDistanceContextBits, tree,
|
||||
storage_ix, storage);
|
||||
} else {
|
||||
EncodeContextMap(mb.distance_context_map, num_dist_histograms,
|
||||
EncodeContextMap(mb.distance_context_map, num_dist_histograms, tree,
|
||||
storage_ix, storage);
|
||||
}
|
||||
|
||||
literal_enc.BuildAndStoreEntropyCodes(mb.literal_histograms,
|
||||
literal_enc.BuildAndStoreEntropyCodes(mb.literal_histograms, tree,
|
||||
storage_ix, storage);
|
||||
command_enc.BuildAndStoreEntropyCodes(mb.command_histograms,
|
||||
command_enc.BuildAndStoreEntropyCodes(mb.command_histograms, tree,
|
||||
storage_ix, storage);
|
||||
distance_enc.BuildAndStoreEntropyCodes(mb.distance_histograms,
|
||||
distance_enc.BuildAndStoreEntropyCodes(mb.distance_histograms, tree,
|
||||
storage_ix, storage);
|
||||
free(tree);
|
||||
|
||||
size_t pos = start_pos;
|
||||
for (size_t i = 0; i < n_commands; ++i) {
|
||||
const Command cmd = commands[i];
|
||||
size_t cmd_code = cmd.cmd_prefix_;
|
||||
uint32_t lennumextra = static_cast<uint32_t>(cmd.cmd_extra_ >> 48);
|
||||
uint64_t lenextra = cmd.cmd_extra_ & 0xffffffffffffUL;
|
||||
command_enc.StoreSymbol(cmd_code, storage_ix, storage);
|
||||
WriteBits(lennumextra, lenextra, storage_ix, storage);
|
||||
StoreCommandExtra(cmd, storage_ix, storage);
|
||||
if (mb.literal_context_map.empty()) {
|
||||
for (size_t j = cmd.insert_len_; j != 0; --j) {
|
||||
literal_enc.StoreSymbol(input[pos & mask], storage_ix, storage);
|
||||
@ -871,8 +923,8 @@ void StoreMetaBlock(const uint8_t* input,
|
||||
++pos;
|
||||
}
|
||||
}
|
||||
pos += cmd.copy_len_;
|
||||
if (cmd.copy_len_ > 0) {
|
||||
pos += cmd.copy_len();
|
||||
if (cmd.copy_len()) {
|
||||
prev_byte2 = input[(pos - 2) & mask];
|
||||
prev_byte = input[(pos - 1) & mask];
|
||||
if (cmd.cmd_prefix_ >= 128) {
|
||||
@ -911,8 +963,8 @@ void BuildHistograms(const uint8_t* input,
|
||||
lit_histo->Add(input[pos & mask]);
|
||||
++pos;
|
||||
}
|
||||
pos += cmd.copy_len_;
|
||||
if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) {
|
||||
pos += cmd.copy_len();
|
||||
if (cmd.copy_len() && cmd.cmd_prefix_ >= 128) {
|
||||
dist_histo->Add(cmd.dist_prefix_);
|
||||
}
|
||||
}
|
||||
@ -935,17 +987,15 @@ void StoreDataWithHuffmanCodes(const uint8_t* input,
|
||||
for (size_t i = 0; i < n_commands; ++i) {
|
||||
const Command cmd = commands[i];
|
||||
const size_t cmd_code = cmd.cmd_prefix_;
|
||||
const uint32_t lennumextra = static_cast<uint32_t>(cmd.cmd_extra_ >> 48);
|
||||
const uint64_t lenextra = cmd.cmd_extra_ & 0xffffffffffffUL;
|
||||
WriteBits(cmd_depth[cmd_code], cmd_bits[cmd_code], storage_ix, storage);
|
||||
WriteBits(lennumextra, lenextra, storage_ix, storage);
|
||||
StoreCommandExtra(cmd, storage_ix, storage);
|
||||
for (size_t j = cmd.insert_len_; j != 0; --j) {
|
||||
const uint8_t literal = input[pos & mask];
|
||||
WriteBits(lit_depth[literal], lit_bits[literal], storage_ix, storage);
|
||||
++pos;
|
||||
}
|
||||
pos += cmd.copy_len_;
|
||||
if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) {
|
||||
pos += cmd.copy_len();
|
||||
if (cmd.copy_len() && cmd.cmd_prefix_ >= 128) {
|
||||
const size_t dist_code = cmd.dist_prefix_;
|
||||
const uint32_t distnumextra = cmd.dist_extra_ >> 24;
|
||||
const uint32_t distextra = cmd.dist_extra_ & 0xffffff;
|
||||
@ -983,15 +1033,18 @@ void StoreMetaBlockTrivial(const uint8_t* input,
|
||||
std::vector<uint8_t> dist_depth(64);
|
||||
std::vector<uint16_t> dist_bits(64);
|
||||
|
||||
BuildAndStoreHuffmanTree(&lit_histo.data_[0], 256,
|
||||
HuffmanTree* tree = static_cast<HuffmanTree*>(
|
||||
malloc(kMaxHuffmanTreeSize * sizeof(HuffmanTree)));
|
||||
BuildAndStoreHuffmanTree(&lit_histo.data_[0], 256, tree,
|
||||
&lit_depth[0], &lit_bits[0],
|
||||
storage_ix, storage);
|
||||
BuildAndStoreHuffmanTree(&cmd_histo.data_[0], kNumCommandPrefixes,
|
||||
BuildAndStoreHuffmanTree(&cmd_histo.data_[0], kNumCommandPrefixes, tree,
|
||||
&cmd_depth[0], &cmd_bits[0],
|
||||
storage_ix, storage);
|
||||
BuildAndStoreHuffmanTree(&dist_histo.data_[0], 64,
|
||||
BuildAndStoreHuffmanTree(&dist_histo.data_[0], 64, tree,
|
||||
&dist_depth[0], &dist_bits[0],
|
||||
storage_ix, storage);
|
||||
free(tree);
|
||||
StoreDataWithHuffmanCodes(input, start_pos, mask, commands,
|
||||
n_commands, &lit_depth[0], &lit_bits[0],
|
||||
&cmd_depth[0], &cmd_bits[0],
|
||||
@ -1026,7 +1079,7 @@ void StoreMetaBlockFast(const uint8_t* input,
|
||||
++pos;
|
||||
}
|
||||
num_literals += cmd.insert_len_;
|
||||
pos += cmd.copy_len_;
|
||||
pos += cmd.copy_len();
|
||||
}
|
||||
uint8_t lit_depth[256] = { 0 };
|
||||
uint16_t lit_bits[256] = { 0 };
|
||||
|
@ -48,6 +48,7 @@ void StoreUncompressedMetaBlockHeader(size_t length,
|
||||
// Stores a context map where the histogram type is always the block type.
|
||||
void StoreTrivialContextMap(size_t num_types,
|
||||
size_t context_bits,
|
||||
HuffmanTree* tree,
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage);
|
||||
|
||||
@ -57,13 +58,14 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask(
|
||||
size_t *storage_ix,
|
||||
uint8_t *storage);
|
||||
|
||||
void StoreHuffmanTree(const uint8_t* depths, size_t num,
|
||||
void StoreHuffmanTree(const uint8_t* depths, size_t num, HuffmanTree* tree,
|
||||
size_t *storage_ix, uint8_t *storage);
|
||||
|
||||
// Builds a Huffman tree from histogram[0:length] into depth[0:length] and
|
||||
// bits[0:length] and stores the encoded tree to the bit stream.
|
||||
void BuildAndStoreHuffmanTree(const uint32_t *histogram,
|
||||
const size_t length,
|
||||
HuffmanTree* tree,
|
||||
uint8_t* depth,
|
||||
uint16_t* bits,
|
||||
size_t* storage_ix,
|
||||
@ -81,6 +83,7 @@ void BuildAndStoreHuffmanTreeFast(const uint32_t *histogram,
|
||||
// histogram ids is given by num_clusters.
|
||||
void EncodeContextMap(const std::vector<uint32_t>& context_map,
|
||||
size_t num_clusters,
|
||||
HuffmanTree* tree,
|
||||
size_t* storage_ix, uint8_t* storage);
|
||||
|
||||
// Data structure that stores everything that is needed to encode each block
|
||||
@ -92,8 +95,8 @@ struct BlockSplitCode {
|
||||
std::vector<uint32_t> length_extra;
|
||||
std::vector<uint8_t> type_depths;
|
||||
std::vector<uint16_t> type_bits;
|
||||
std::vector<uint8_t> length_depths;
|
||||
std::vector<uint16_t> length_bits;
|
||||
uint8_t length_depths[kNumBlockLenPrefixes];
|
||||
uint16_t length_bits[kNumBlockLenPrefixes];
|
||||
};
|
||||
|
||||
// Builds a BlockSplitCode data structure from the block split given by the
|
||||
|
181
enc/cluster.h
181
enc/cluster.h
@ -11,7 +11,6 @@
|
||||
|
||||
#include <math.h>
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
@ -52,7 +51,9 @@ template<typename HistogramType>
|
||||
void CompareAndPushToQueue(const HistogramType* out,
|
||||
const uint32_t* cluster_size,
|
||||
uint32_t idx1, uint32_t idx2,
|
||||
std::vector<HistogramPair>* pairs) {
|
||||
size_t max_num_pairs,
|
||||
HistogramPair* pairs,
|
||||
size_t* num_pairs) {
|
||||
if (idx1 == idx2) {
|
||||
return;
|
||||
}
|
||||
@ -76,8 +77,8 @@ void CompareAndPushToQueue(const HistogramType* out,
|
||||
p.cost_combo = out[idx1].bit_cost_;
|
||||
store_pair = true;
|
||||
} else {
|
||||
double threshold = pairs->empty() ? 1e99 :
|
||||
std::max(0.0, (*pairs)[0].cost_diff);
|
||||
double threshold = *num_pairs == 0 ? 1e99 :
|
||||
std::max(0.0, pairs[0].cost_diff);
|
||||
HistogramType combo = out[idx1];
|
||||
combo.AddHistogram(out[idx2]);
|
||||
double cost_combo = PopulationCost(combo);
|
||||
@ -88,42 +89,44 @@ void CompareAndPushToQueue(const HistogramType* out,
|
||||
}
|
||||
if (store_pair) {
|
||||
p.cost_diff += p.cost_combo;
|
||||
if (!pairs->empty() && (pairs->front() < p)) {
|
||||
if (*num_pairs > 0 && pairs[0] < p) {
|
||||
// Replace the top of the queue if needed.
|
||||
pairs->push_back(pairs->front());
|
||||
pairs->front() = p;
|
||||
} else {
|
||||
pairs->push_back(p);
|
||||
if (*num_pairs < max_num_pairs) {
|
||||
pairs[*num_pairs] = pairs[0];
|
||||
++(*num_pairs);
|
||||
}
|
||||
pairs[0] = p;
|
||||
} else if (*num_pairs < max_num_pairs) {
|
||||
pairs[*num_pairs] = p;
|
||||
++(*num_pairs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename HistogramType>
|
||||
void HistogramCombine(HistogramType* out,
|
||||
size_t HistogramCombine(HistogramType* out,
|
||||
uint32_t* cluster_size,
|
||||
uint32_t* symbols,
|
||||
uint32_t* clusters,
|
||||
HistogramPair* pairs,
|
||||
size_t num_clusters,
|
||||
size_t symbols_size,
|
||||
size_t max_clusters) {
|
||||
size_t max_clusters,
|
||||
size_t max_num_pairs) {
|
||||
double cost_diff_threshold = 0.0;
|
||||
size_t min_cluster_size = 1;
|
||||
|
||||
// Uniquify the list of symbols.
|
||||
std::vector<uint32_t> clusters(symbols, symbols + symbols_size);
|
||||
std::sort(clusters.begin(), clusters.end());
|
||||
std::vector<uint32_t>::iterator last =
|
||||
std::unique(clusters.begin(), clusters.end());
|
||||
clusters.resize(static_cast<size_t>(last - clusters.begin()));
|
||||
|
||||
// We maintain a heap of histogram pairs, ordered by the bit cost reduction.
|
||||
std::vector<HistogramPair> pairs;
|
||||
for (size_t idx1 = 0; idx1 < clusters.size(); ++idx1) {
|
||||
for (size_t idx2 = idx1 + 1; idx2 < clusters.size(); ++idx2) {
|
||||
// We maintain a vector of histogram pairs, with the property that the pair
|
||||
// with the maximum bit cost reduction is the first.
|
||||
size_t num_pairs = 0;
|
||||
for (size_t idx1 = 0; idx1 < num_clusters; ++idx1) {
|
||||
for (size_t idx2 = idx1 + 1; idx2 < num_clusters; ++idx2) {
|
||||
CompareAndPushToQueue(out, cluster_size, clusters[idx1], clusters[idx2],
|
||||
&pairs);
|
||||
max_num_pairs, &pairs[0], &num_pairs);
|
||||
}
|
||||
}
|
||||
|
||||
while (clusters.size() > min_cluster_size) {
|
||||
while (num_clusters > min_cluster_size) {
|
||||
if (pairs[0].cost_diff >= cost_diff_threshold) {
|
||||
cost_diff_threshold = 1e99;
|
||||
min_cluster_size = max_clusters;
|
||||
@ -140,40 +143,42 @@ void HistogramCombine(HistogramType* out,
|
||||
symbols[i] = best_idx1;
|
||||
}
|
||||
}
|
||||
for (std::vector<uint32_t>::iterator cluster = clusters.begin();
|
||||
cluster != clusters.end(); ++cluster) {
|
||||
if (*cluster >= best_idx2) {
|
||||
clusters.erase(cluster);
|
||||
for (size_t i = 0; i < num_clusters; ++i) {
|
||||
if (clusters[i] == best_idx2) {
|
||||
memmove(&clusters[i], &clusters[i + 1],
|
||||
(num_clusters - i - 1) * sizeof(clusters[0]));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
--num_clusters;
|
||||
// Remove pairs intersecting the just combined best pair.
|
||||
size_t copy_to_idx = 0;
|
||||
for (size_t i = 0; i < pairs.size(); ++i) {
|
||||
for (size_t i = 0; i < num_pairs; ++i) {
|
||||
HistogramPair& p = pairs[i];
|
||||
if (p.idx1 == best_idx1 || p.idx2 == best_idx1 ||
|
||||
p.idx1 == best_idx2 || p.idx2 == best_idx2) {
|
||||
// Remove invalid pair from the queue.
|
||||
continue;
|
||||
}
|
||||
if (pairs.front() < p) {
|
||||
if (pairs[0] < p) {
|
||||
// Replace the top of the queue if needed.
|
||||
HistogramPair front = pairs.front();
|
||||
pairs.front() = p;
|
||||
HistogramPair front = pairs[0];
|
||||
pairs[0] = p;
|
||||
pairs[copy_to_idx] = front;
|
||||
} else {
|
||||
pairs[copy_to_idx] = p;
|
||||
}
|
||||
++copy_to_idx;
|
||||
}
|
||||
pairs.resize(copy_to_idx);
|
||||
num_pairs = copy_to_idx;
|
||||
|
||||
// Push new pairs formed with the combined histogram to the heap.
|
||||
for (size_t i = 0; i < clusters.size(); ++i) {
|
||||
CompareAndPushToQueue(out, cluster_size, best_idx1, clusters[i], &pairs);
|
||||
for (size_t i = 0; i < num_clusters; ++i) {
|
||||
CompareAndPushToQueue(out, cluster_size, best_idx1, clusters[i],
|
||||
max_num_pairs, &pairs[0], &num_pairs);
|
||||
}
|
||||
}
|
||||
return num_clusters;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
@ -192,61 +197,69 @@ double HistogramBitCostDistance(const HistogramType& histogram,
|
||||
}
|
||||
|
||||
// Find the best 'out' histogram for each of the 'in' histograms.
|
||||
// When called, clusters[0..num_clusters) contains the unique values from
|
||||
// symbols[0..in_size), but this property is not preserved in this function.
|
||||
// Note: we assume that out[]->bit_cost_ is already up-to-date.
|
||||
template<typename HistogramType>
|
||||
void HistogramRemap(const HistogramType* in, size_t in_size,
|
||||
const uint32_t* clusters, size_t num_clusters,
|
||||
HistogramType* out, uint32_t* symbols) {
|
||||
// Uniquify the list of symbols.
|
||||
std::vector<uint32_t> all_symbols(symbols, symbols + in_size);
|
||||
std::sort(all_symbols.begin(), all_symbols.end());
|
||||
std::vector<uint32_t>::iterator last =
|
||||
std::unique(all_symbols.begin(), all_symbols.end());
|
||||
all_symbols.resize(static_cast<size_t>(last - all_symbols.begin()));
|
||||
|
||||
for (size_t i = 0; i < in_size; ++i) {
|
||||
uint32_t best_out = i == 0 ? symbols[0] : symbols[i - 1];
|
||||
double best_bits = HistogramBitCostDistance(in[i], out[best_out]);
|
||||
for (std::vector<uint32_t>::const_iterator k = all_symbols.begin();
|
||||
k != all_symbols.end(); ++k) {
|
||||
const double cur_bits = HistogramBitCostDistance(in[i], out[*k]);
|
||||
for (size_t j = 0; j < num_clusters; ++j) {
|
||||
const double cur_bits = HistogramBitCostDistance(in[i], out[clusters[j]]);
|
||||
if (cur_bits < best_bits) {
|
||||
best_bits = cur_bits;
|
||||
best_out = *k;
|
||||
best_out = clusters[j];
|
||||
}
|
||||
}
|
||||
symbols[i] = best_out;
|
||||
}
|
||||
|
||||
|
||||
// Recompute each out based on raw and symbols.
|
||||
for (std::vector<uint32_t>::const_iterator k = all_symbols.begin();
|
||||
k != all_symbols.end(); ++k) {
|
||||
out[*k].Clear();
|
||||
for (size_t j = 0; j < num_clusters; ++j) {
|
||||
out[clusters[j]].Clear();
|
||||
}
|
||||
for (size_t i = 0; i < in_size; ++i) {
|
||||
out[symbols[i]].AddHistogram(in[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Reorder histograms in *out so that the new symbols in *symbols come in
|
||||
// Reorders elements of the out[0..length) array and changes values in
|
||||
// symbols[0..length) array in the following way:
|
||||
// * when called, symbols[] contains indexes into out[], and has N unique
|
||||
// values (possibly N < length)
|
||||
// * on return, symbols'[i] = f(symbols[i]) and
|
||||
// out'[symbols'[i]] = out[symbols[i]], for each 0 <= i < length,
|
||||
// where f is a bijection between the range of symbols[] and [0..N), and
|
||||
// the first occurrences of values in symbols'[i] come in consecutive
|
||||
// increasing order.
|
||||
// Returns N, the number of unique values in symbols[].
|
||||
template<typename HistogramType>
|
||||
void HistogramReindex(std::vector<HistogramType>* out,
|
||||
std::vector<uint32_t>* symbols) {
|
||||
std::vector<HistogramType> tmp(*out);
|
||||
std::map<uint32_t, uint32_t> new_index;
|
||||
size_t HistogramReindex(HistogramType* out, uint32_t* symbols, size_t length) {
|
||||
static const uint32_t kInvalidIndex = std::numeric_limits<uint32_t>::max();
|
||||
std::vector<uint32_t> new_index(length, kInvalidIndex);
|
||||
uint32_t next_index = 0;
|
||||
for (size_t i = 0; i < symbols->size(); ++i) {
|
||||
if (new_index.find((*symbols)[i]) == new_index.end()) {
|
||||
new_index[(*symbols)[i]] = next_index;
|
||||
(*out)[next_index] = tmp[(*symbols)[i]];
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
if (new_index[symbols[i]] == kInvalidIndex) {
|
||||
new_index[symbols[i]] = next_index;
|
||||
++next_index;
|
||||
}
|
||||
}
|
||||
out->resize(next_index);
|
||||
for (size_t i = 0; i < symbols->size(); ++i) {
|
||||
(*symbols)[i] = new_index[(*symbols)[i]];
|
||||
std::vector<HistogramType> tmp(next_index);
|
||||
next_index = 0;
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
if (new_index[symbols[i]] == next_index) {
|
||||
tmp[next_index] = out[symbols[i]];
|
||||
++next_index;
|
||||
}
|
||||
symbols[i] = new_index[symbols[i]];
|
||||
}
|
||||
for (size_t i = 0; i < next_index; ++i) {
|
||||
out[i] = tmp[i];
|
||||
}
|
||||
return next_index;
|
||||
}
|
||||
|
||||
// Clusters similar histograms in 'in' together, the selected histograms are
|
||||
@ -261,6 +274,8 @@ void ClusterHistograms(const std::vector<HistogramType>& in,
|
||||
const size_t in_size = num_contexts * num_blocks;
|
||||
assert(in_size == in.size());
|
||||
std::vector<uint32_t> cluster_size(in_size, 1);
|
||||
std::vector<uint32_t> clusters(in_size);
|
||||
size_t num_clusters = 0;
|
||||
out->resize(in_size);
|
||||
histogram_symbols->resize(in_size);
|
||||
for (size_t i = 0; i < in_size; ++i) {
|
||||
@ -269,29 +284,47 @@ void ClusterHistograms(const std::vector<HistogramType>& in,
|
||||
(*histogram_symbols)[i] = static_cast<uint32_t>(i);
|
||||
}
|
||||
|
||||
|
||||
const size_t max_input_histograms = 64;
|
||||
// For the first pass of clustering, we allow all pairs.
|
||||
size_t max_num_pairs = max_input_histograms * max_input_histograms / 2;
|
||||
std::vector<HistogramPair> pairs(max_num_pairs + 1);
|
||||
|
||||
for (size_t i = 0; i < in_size; i += max_input_histograms) {
|
||||
size_t num_to_combine = std::min(in_size - i, max_input_histograms);
|
||||
HistogramCombine(&(*out)[0], &cluster_size[0],
|
||||
&(*histogram_symbols)[i], num_to_combine,
|
||||
max_histograms);
|
||||
for (size_t j = 0; j < num_to_combine; ++j) {
|
||||
clusters[num_clusters + j] = static_cast<uint32_t>(i + j);
|
||||
}
|
||||
size_t num_new_clusters =
|
||||
HistogramCombine(&(*out)[0], &cluster_size[0],
|
||||
&(*histogram_symbols)[i],
|
||||
&clusters[num_clusters], &pairs[0],
|
||||
num_to_combine, num_to_combine,
|
||||
max_histograms, max_num_pairs);
|
||||
num_clusters += num_new_clusters;
|
||||
}
|
||||
|
||||
// For the second pass, we limit the total number of histogram pairs.
|
||||
// After this limit is reached, we only keep searching for the best pair.
|
||||
max_num_pairs =
|
||||
std::min(64 * num_clusters, (num_clusters / 2) * num_clusters);
|
||||
pairs.resize(max_num_pairs + 1);
|
||||
|
||||
// Collapse similar histograms.
|
||||
HistogramCombine(&(*out)[0], &cluster_size[0],
|
||||
&(*histogram_symbols)[0], in_size,
|
||||
max_histograms);
|
||||
num_clusters = HistogramCombine(&(*out)[0], &cluster_size[0],
|
||||
&(*histogram_symbols)[0], &clusters[0],
|
||||
&pairs[0], num_clusters, in_size,
|
||||
max_histograms, max_num_pairs);
|
||||
|
||||
// Find the optimal map from original histograms to the final ones.
|
||||
HistogramRemap(&in[0], in_size, &(*out)[0], &(*histogram_symbols)[0]);
|
||||
HistogramRemap(&in[0], in_size, &clusters[0], num_clusters,
|
||||
&(*out)[0], &(*histogram_symbols)[0]);
|
||||
|
||||
// Convert the context map to a canonical form.
|
||||
HistogramReindex(out, histogram_symbols);
|
||||
|
||||
size_t num_histograms =
|
||||
HistogramReindex(&(*out)[0], &(*histogram_symbols)[0], in_size);
|
||||
out->resize(num_histograms);
|
||||
}
|
||||
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_CLUSTER_H_
|
||||
|
@ -73,35 +73,47 @@ static inline uint16_t CombineLengthCodes(
|
||||
|
||||
static inline void GetLengthCode(size_t insertlen, size_t copylen,
|
||||
bool use_last_distance,
|
||||
uint16_t* code, uint64_t* extra) {
|
||||
uint16_t* code) {
|
||||
uint16_t inscode = GetInsertLengthCode(insertlen);
|
||||
uint16_t copycode = GetCopyLengthCode(copylen);
|
||||
uint64_t insnumextra = kInsExtra[inscode];
|
||||
uint64_t numextra = insnumextra + kCopyExtra[copycode];
|
||||
uint64_t insextraval = insertlen - kInsBase[inscode];
|
||||
uint64_t copyextraval = copylen - kCopyBase[copycode];
|
||||
*code = CombineLengthCodes(inscode, copycode, use_last_distance);
|
||||
*extra = (numextra << 48) | (copyextraval << insnumextra) | insextraval;
|
||||
}
|
||||
|
||||
static inline uint32_t GetInsertBase(uint16_t inscode) {
|
||||
return kInsBase[inscode];
|
||||
}
|
||||
|
||||
static inline uint32_t GetInsertExtra(uint16_t inscode) {
|
||||
return kInsExtra[inscode];
|
||||
}
|
||||
|
||||
static inline uint32_t GetCopyBase(uint16_t copycode) {
|
||||
return kCopyBase[copycode];
|
||||
}
|
||||
|
||||
static inline uint32_t GetCopyExtra(uint16_t copycode) {
|
||||
return kCopyExtra[copycode];
|
||||
}
|
||||
|
||||
struct Command {
|
||||
// distance_code is e.g. 0 for same-as-last short code, or 16 for offset 1.
|
||||
Command(size_t insertlen, size_t copylen, size_t copylen_code,
|
||||
size_t distance_code)
|
||||
: insert_len_(static_cast<uint32_t>(insertlen))
|
||||
, copy_len_(static_cast<uint32_t>(copylen)) {
|
||||
: insert_len_(static_cast<uint32_t>(insertlen)) {
|
||||
copy_len_ = static_cast<uint32_t>(
|
||||
copylen | ((copylen_code ^ copylen) << 24));
|
||||
// The distance prefix and extra bits are stored in this Command as if
|
||||
// npostfix and ndirect were 0, they are only recomputed later after the
|
||||
// clustering if needed.
|
||||
PrefixEncodeCopyDistance(distance_code, 0, 0, &dist_prefix_, &dist_extra_);
|
||||
GetLengthCode(insertlen, copylen_code, dist_prefix_ == 0,
|
||||
&cmd_prefix_, &cmd_extra_);
|
||||
&cmd_prefix_);
|
||||
}
|
||||
|
||||
explicit Command(size_t insertlen)
|
||||
: insert_len_(static_cast<uint32_t>(insertlen))
|
||||
, copy_len_(0), dist_extra_(0), dist_prefix_(16) {
|
||||
GetLengthCode(insertlen, 4, dist_prefix_ == 0, &cmd_prefix_, &cmd_extra_);
|
||||
, copy_len_(4 << 24), dist_extra_(0), dist_prefix_(16) {
|
||||
GetLengthCode(insertlen, 4, dist_prefix_ == 0, &cmd_prefix_);
|
||||
}
|
||||
|
||||
uint32_t DistanceCode(void) const {
|
||||
@ -123,9 +135,17 @@ struct Command {
|
||||
return 3;
|
||||
}
|
||||
|
||||
inline uint32_t copy_len(void) const {
|
||||
return copy_len_ & 0xFFFFFF;
|
||||
}
|
||||
|
||||
inline uint32_t copy_len_code(void) const {
|
||||
return (copy_len_ & 0xFFFFFF) ^ (copy_len_ >> 24);
|
||||
}
|
||||
|
||||
uint32_t insert_len_;
|
||||
/* Stores copy_len in low 24 bits and copy_len XOR copy_code in high 8 bit. */
|
||||
uint32_t copy_len_;
|
||||
uint64_t cmd_extra_;
|
||||
uint32_t dist_extra_;
|
||||
uint16_t cmd_prefix_;
|
||||
uint16_t dist_prefix_;
|
||||
|
@ -105,8 +105,11 @@ void BuildAndStoreLiteralPrefixCode(const uint8_t* input,
|
||||
void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128],
|
||||
uint8_t depth[128], uint16_t bits[128],
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
CreateHuffmanTree(histogram, 64, 15, depth);
|
||||
CreateHuffmanTree(&histogram[64], 64, 14, &depth[64]);
|
||||
// Tree size for building a tree over 64 symbols is 2 * 64 + 1.
|
||||
static const size_t kTreeSize = 129;
|
||||
HuffmanTree tree[kTreeSize];
|
||||
CreateHuffmanTree(histogram, 64, 15, tree, depth);
|
||||
CreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
|
||||
// We have to jump through a few hoopes here in order to compute
|
||||
// the command bits because the symbols are in a different order than in
|
||||
// the full alphabet. This looks complicated, but having the symbols
|
||||
@ -141,9 +144,9 @@ void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128],
|
||||
cmd_depth[256 + 8 * i] = depth[48 + i];
|
||||
cmd_depth[448 + 8 * i] = depth[56 + i];
|
||||
}
|
||||
StoreHuffmanTree(cmd_depth, 704, storage_ix, storage);
|
||||
StoreHuffmanTree(cmd_depth, 704, tree, storage_ix, storage);
|
||||
}
|
||||
StoreHuffmanTree(&depth[64], 64, storage_ix, storage);
|
||||
StoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
|
||||
}
|
||||
|
||||
// REQUIRES: insertlen < 6210
|
||||
@ -452,7 +455,8 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
|
||||
assert(table_size <= (1u << 31));
|
||||
assert((table_size & (table_size - 1)) == 0); // table must be power of two
|
||||
const size_t shift = 64u - Log2FloorNonZero(table_size);
|
||||
assert(static_cast<size_t>(0xffffffffffffffffU >> shift) == table_size - 1);
|
||||
assert(table_size - 1 == static_cast<size_t>(
|
||||
MAKE_UINT64_T(0xFFFFFFFF, 0xFFFFFF) >> shift));
|
||||
const uint8_t* ip_end = input + block_size;
|
||||
|
||||
int last_distance = -1;
|
||||
|
@ -57,8 +57,11 @@ static void BuildAndStoreCommandPrefixCode(
|
||||
const uint32_t histogram[128],
|
||||
uint8_t depth[128], uint16_t bits[128],
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
CreateHuffmanTree(histogram, 64, 15, depth);
|
||||
CreateHuffmanTree(&histogram[64], 64, 14, &depth[64]);
|
||||
// Tree size for building a tree over 64 symbols is 2 * 64 + 1.
|
||||
static const size_t kTreeSize = 129;
|
||||
HuffmanTree tree[kTreeSize];
|
||||
CreateHuffmanTree(histogram, 64, 15, tree, depth);
|
||||
CreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
|
||||
// We have to jump through a few hoopes here in order to compute
|
||||
// the command bits because the symbols are in a different order than in
|
||||
// the full alphabet. This looks complicated, but having the symbols
|
||||
@ -93,9 +96,9 @@ static void BuildAndStoreCommandPrefixCode(
|
||||
cmd_depth[256 + 8 * i] = depth[8 + i];
|
||||
cmd_depth[448 + 8 * i] = depth[16 + i];
|
||||
}
|
||||
StoreHuffmanTree(cmd_depth, 704, storage_ix, storage);
|
||||
StoreHuffmanTree(cmd_depth, 704, tree, storage_ix, storage);
|
||||
}
|
||||
StoreHuffmanTree(&depth[64], 64, storage_ix, storage);
|
||||
StoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
|
||||
}
|
||||
|
||||
inline void EmitInsertLen(uint32_t insertlen, uint32_t** commands) {
|
||||
@ -227,7 +230,8 @@ void CreateCommands(const uint8_t* input, size_t block_size, size_t input_size,
|
||||
assert(table_size <= (1u << 31));
|
||||
assert((table_size & (table_size - 1)) == 0); // table must be power of two
|
||||
const size_t shift = 64u - Log2FloorNonZero(table_size);
|
||||
assert(static_cast<size_t>(0xffffffffffffffffU >> shift) == table_size - 1);
|
||||
assert(table_size - 1 == static_cast<size_t>(
|
||||
MAKE_UINT64_T(0xFFFFFFFF, 0xFFFFFF) >> shift));
|
||||
const uint8_t* ip_end = input + block_size;
|
||||
// "next_emit" is a pointer to the first byte that is not covered by a
|
||||
// previous copy. Bytes between "next_emit" and the start of the next copy or
|
||||
|
681
enc/encode.cc
681
enc/encode.cc
@ -38,7 +38,7 @@ static const int kMinQualityForContextModeling = 5;
|
||||
static const int kMinQualityForOptimizeHistograms = 4;
|
||||
// For quality 2 there is no block splitting, so we buffer at most this much
|
||||
// literals and commands.
|
||||
static const int kMaxNumDelayedSymbols = 0x2fff;
|
||||
static const size_t kMaxNumDelayedSymbols = 0x2fff;
|
||||
|
||||
#define COPY_ARRAY(dst, src) memcpy(dst, src, sizeof(src));
|
||||
|
||||
@ -51,7 +51,7 @@ void RecomputeDistancePrefixes(Command* cmds,
|
||||
}
|
||||
for (size_t i = 0; i < num_commands; ++i) {
|
||||
Command* cmd = &cmds[i];
|
||||
if (cmd->copy_len_ > 0 && cmd->cmd_prefix_ >= 128) {
|
||||
if (cmd->copy_len() && cmd->cmd_prefix_ >= 128) {
|
||||
PrefixEncodeCopyDistance(cmd->DistanceCode(),
|
||||
num_direct_distance_codes,
|
||||
distance_postfix_bits,
|
||||
@ -180,6 +180,250 @@ void InitCommandPrefixCodes(uint8_t cmd_depths[128],
|
||||
*cmd_code_numbits = kDefaultCommandCodeNumBits;
|
||||
}
|
||||
|
||||
// Decide about the context map based on the ability of the prediction
|
||||
// ability of the previous byte UTF8-prefix on the next byte. The
|
||||
// prediction ability is calculated as shannon entropy. Here we need
|
||||
// shannon entropy instead of 'BitsEntropy' since the prefix will be
|
||||
// encoded with the remaining 6 bits of the following byte, and
|
||||
// BitsEntropy will assume that symbol to be stored alone using Huffman
|
||||
// coding.
|
||||
void ChooseContextMap(int quality,
|
||||
uint32_t* bigram_histo,
|
||||
size_t* num_literal_contexts,
|
||||
const uint32_t** literal_context_map) {
|
||||
uint32_t monogram_histo[3] = { 0 };
|
||||
uint32_t two_prefix_histo[6] = { 0 };
|
||||
size_t total = 0;
|
||||
for (size_t i = 0; i < 9; ++i) {
|
||||
total += bigram_histo[i];
|
||||
monogram_histo[i % 3] += bigram_histo[i];
|
||||
size_t j = i;
|
||||
if (j >= 6) {
|
||||
j -= 6;
|
||||
}
|
||||
two_prefix_histo[j] += bigram_histo[i];
|
||||
}
|
||||
size_t dummy;
|
||||
double entropy1 = ShannonEntropy(monogram_histo, 3, &dummy);
|
||||
double entropy2 = (ShannonEntropy(two_prefix_histo, 3, &dummy) +
|
||||
ShannonEntropy(two_prefix_histo + 3, 3, &dummy));
|
||||
double entropy3 = 0;
|
||||
for (size_t k = 0; k < 3; ++k) {
|
||||
entropy3 += ShannonEntropy(bigram_histo + 3 * k, 3, &dummy);
|
||||
}
|
||||
|
||||
assert(total != 0);
|
||||
double scale = 1.0 / static_cast<double>(total);
|
||||
entropy1 *= scale;
|
||||
entropy2 *= scale;
|
||||
entropy3 *= scale;
|
||||
|
||||
static const uint32_t kStaticContextMapContinuation[64] = {
|
||||
1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
static const uint32_t kStaticContextMapSimpleUTF8[64] = {
|
||||
0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
if (quality < 7) {
|
||||
// 3 context models is a bit slower, don't use it at lower qualities.
|
||||
entropy3 = entropy1 * 10;
|
||||
}
|
||||
// If expected savings by symbol are less than 0.2 bits, skip the
|
||||
// context modeling -- in exchange for faster decoding speed.
|
||||
if (entropy1 - entropy2 < 0.2 &&
|
||||
entropy1 - entropy3 < 0.2) {
|
||||
*num_literal_contexts = 1;
|
||||
} else if (entropy2 - entropy3 < 0.02) {
|
||||
*num_literal_contexts = 2;
|
||||
*literal_context_map = kStaticContextMapSimpleUTF8;
|
||||
} else {
|
||||
*num_literal_contexts = 3;
|
||||
*literal_context_map = kStaticContextMapContinuation;
|
||||
}
|
||||
}
|
||||
|
||||
void DecideOverLiteralContextModeling(const uint8_t* input,
|
||||
size_t start_pos,
|
||||
size_t length,
|
||||
size_t mask,
|
||||
int quality,
|
||||
ContextType* literal_context_mode,
|
||||
size_t* num_literal_contexts,
|
||||
const uint32_t** literal_context_map) {
|
||||
if (quality < kMinQualityForContextModeling || length < 64) {
|
||||
return;
|
||||
}
|
||||
// Gather bigram data of the UTF8 byte prefixes. To make the analysis of
|
||||
// UTF8 data faster we only examine 64 byte long strides at every 4kB
|
||||
// intervals.
|
||||
const size_t end_pos = start_pos + length;
|
||||
uint32_t bigram_prefix_histo[9] = { 0 };
|
||||
for (; start_pos + 64 <= end_pos; start_pos += 4096) {
|
||||
static const int lut[4] = { 0, 0, 1, 2 };
|
||||
const size_t stride_end_pos = start_pos + 64;
|
||||
int prev = lut[input[start_pos & mask] >> 6] * 3;
|
||||
for (size_t pos = start_pos + 1; pos < stride_end_pos; ++pos) {
|
||||
const uint8_t literal = input[pos & mask];
|
||||
++bigram_prefix_histo[prev + lut[literal >> 6]];
|
||||
prev = lut[literal >> 6] * 3;
|
||||
}
|
||||
}
|
||||
*literal_context_mode = CONTEXT_UTF8;
|
||||
ChooseContextMap(quality, &bigram_prefix_histo[0], num_literal_contexts,
|
||||
literal_context_map);
|
||||
}
|
||||
|
||||
bool ShouldCompress(const uint8_t* data,
|
||||
const size_t mask,
|
||||
const uint64_t last_flush_pos,
|
||||
const size_t bytes,
|
||||
const size_t num_literals,
|
||||
const size_t num_commands) {
|
||||
if (num_commands < (bytes >> 8) + 2) {
|
||||
if (num_literals > 0.99 * static_cast<double>(bytes)) {
|
||||
uint32_t literal_histo[256] = { 0 };
|
||||
static const uint32_t kSampleRate = 13;
|
||||
static const double kMinEntropy = 7.92;
|
||||
const double bit_cost_threshold =
|
||||
static_cast<double>(bytes) * kMinEntropy / kSampleRate;
|
||||
size_t t = (bytes + kSampleRate - 1) / kSampleRate;
|
||||
uint32_t pos = static_cast<uint32_t>(last_flush_pos);
|
||||
for (size_t i = 0; i < t; i++) {
|
||||
++literal_histo[data[pos & mask]];
|
||||
pos += kSampleRate;
|
||||
}
|
||||
if (BitsEntropy(literal_histo, 256) > bit_cost_threshold) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void WriteMetaBlockInternal(const uint8_t* data,
|
||||
const size_t mask,
|
||||
const uint64_t last_flush_pos,
|
||||
const size_t bytes,
|
||||
const bool is_last,
|
||||
const int quality,
|
||||
const bool font_mode,
|
||||
const uint8_t prev_byte,
|
||||
const uint8_t prev_byte2,
|
||||
const size_t num_literals,
|
||||
const size_t num_commands,
|
||||
Command* commands,
|
||||
const int* saved_dist_cache,
|
||||
int* dist_cache,
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
if (bytes == 0) {
|
||||
// Write the ISLAST and ISEMPTY bits.
|
||||
WriteBits(2, 3, storage_ix, storage);
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!ShouldCompress(data, mask, last_flush_pos, bytes,
|
||||
num_literals, num_commands)) {
|
||||
// Restore the distance cache, as its last update by
|
||||
// CreateBackwardReferences is now unused.
|
||||
memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
|
||||
StoreUncompressedMetaBlock(is_last, data,
|
||||
WrapPosition(last_flush_pos), mask, bytes,
|
||||
storage_ix, storage);
|
||||
return;
|
||||
}
|
||||
|
||||
const uint8_t last_byte = storage[0];
|
||||
const uint8_t last_byte_bits = static_cast<uint8_t>(*storage_ix & 0xff);
|
||||
uint32_t num_direct_distance_codes = 0;
|
||||
uint32_t distance_postfix_bits = 0;
|
||||
if (quality > 9 && font_mode) {
|
||||
num_direct_distance_codes = 12;
|
||||
distance_postfix_bits = 1;
|
||||
RecomputeDistancePrefixes(commands,
|
||||
num_commands,
|
||||
num_direct_distance_codes,
|
||||
distance_postfix_bits);
|
||||
}
|
||||
if (quality == 2) {
|
||||
StoreMetaBlockFast(data, WrapPosition(last_flush_pos),
|
||||
bytes, mask, is_last,
|
||||
commands, num_commands,
|
||||
storage_ix, storage);
|
||||
} else if (quality < kMinQualityForBlockSplit) {
|
||||
StoreMetaBlockTrivial(data, WrapPosition(last_flush_pos),
|
||||
bytes, mask, is_last,
|
||||
commands, num_commands,
|
||||
storage_ix, storage);
|
||||
} else {
|
||||
MetaBlockSplit mb;
|
||||
ContextType literal_context_mode = CONTEXT_UTF8;
|
||||
if (quality <= 9) {
|
||||
size_t num_literal_contexts = 1;
|
||||
const uint32_t* literal_context_map = NULL;
|
||||
DecideOverLiteralContextModeling(data, WrapPosition(last_flush_pos),
|
||||
bytes, mask,
|
||||
quality,
|
||||
&literal_context_mode,
|
||||
&num_literal_contexts,
|
||||
&literal_context_map);
|
||||
if (literal_context_map == NULL) {
|
||||
BuildMetaBlockGreedy(data, WrapPosition(last_flush_pos), mask,
|
||||
commands, num_commands, &mb);
|
||||
} else {
|
||||
BuildMetaBlockGreedyWithContexts(data, WrapPosition(last_flush_pos),
|
||||
mask,
|
||||
prev_byte, prev_byte2,
|
||||
literal_context_mode,
|
||||
num_literal_contexts,
|
||||
literal_context_map,
|
||||
commands, num_commands,
|
||||
&mb);
|
||||
}
|
||||
} else {
|
||||
if (!IsMostlyUTF8(data, WrapPosition(last_flush_pos), mask, bytes,
|
||||
kMinUTF8Ratio)) {
|
||||
literal_context_mode = CONTEXT_SIGNED;
|
||||
}
|
||||
BuildMetaBlock(data, WrapPosition(last_flush_pos), mask,
|
||||
prev_byte, prev_byte2,
|
||||
commands, num_commands,
|
||||
literal_context_mode,
|
||||
&mb);
|
||||
}
|
||||
if (quality >= kMinQualityForOptimizeHistograms) {
|
||||
OptimizeHistograms(num_direct_distance_codes,
|
||||
distance_postfix_bits,
|
||||
&mb);
|
||||
}
|
||||
StoreMetaBlock(data, WrapPosition(last_flush_pos), bytes, mask,
|
||||
prev_byte, prev_byte2,
|
||||
is_last,
|
||||
num_direct_distance_codes,
|
||||
distance_postfix_bits,
|
||||
literal_context_mode,
|
||||
commands, num_commands,
|
||||
mb,
|
||||
storage_ix, storage);
|
||||
}
|
||||
if (bytes + 4 < (*storage_ix >> 3)) {
|
||||
// Restore the distance cache and last byte.
|
||||
memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
|
||||
storage[0] = last_byte;
|
||||
*storage_ix = last_byte_bits;
|
||||
StoreUncompressedMetaBlock(is_last, data,
|
||||
WrapPosition(last_flush_pos), mask,
|
||||
bytes, storage_ix, storage);
|
||||
}
|
||||
}
|
||||
|
||||
BrotliCompressor::BrotliCompressor(BrotliParams params)
|
||||
: params_(params),
|
||||
hashers_(new Hashers()),
|
||||
@ -211,7 +455,7 @@ BrotliCompressor::BrotliCompressor(BrotliParams params)
|
||||
} else if (params_.lgblock == 0) {
|
||||
params_.lgblock = 16;
|
||||
if (params_.quality >= 9 && params_.lgwin > params_.lgblock) {
|
||||
params_.lgblock = std::min(21, params_.lgwin);
|
||||
params_.lgblock = std::min(18, params_.lgwin);
|
||||
}
|
||||
} else {
|
||||
params_.lgblock = std::min(kMaxInputBlockBits,
|
||||
@ -403,9 +647,13 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last,
|
||||
&num_literals_);
|
||||
|
||||
size_t max_length = std::min<size_t>(mask + 1, 1u << kMaxInputBlockBits);
|
||||
const size_t max_literals = max_length / 8;
|
||||
const size_t max_commands = max_length / 8;
|
||||
if (!is_last && !force_flush &&
|
||||
(params_.quality >= kMinQualityForBlockSplit ||
|
||||
(num_literals_ + num_commands_ < kMaxNumDelayedSymbols)) &&
|
||||
num_literals_ < max_literals &&
|
||||
num_commands_ < max_commands &&
|
||||
input_pos_ + input_block_size() <= last_flush_pos_ + max_length) {
|
||||
// Merge with next input block. Everything will happen later.
|
||||
last_processed_pos_ = input_pos_;
|
||||
@ -421,253 +669,36 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last,
|
||||
last_insert_len_ = 0;
|
||||
}
|
||||
|
||||
WriteMetaBlockInternal(is_last, out_size, output);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Decide about the context map based on the ability of the prediction
|
||||
// ability of the previous byte UTF8-prefix on the next byte. The
|
||||
// prediction ability is calculated as shannon entropy. Here we need
|
||||
// shannon entropy instead of 'BitsEntropy' since the prefix will be
|
||||
// encoded with the remaining 6 bits of the following byte, and
|
||||
// BitsEntropy will assume that symbol to be stored alone using Huffman
|
||||
// coding.
|
||||
void ChooseContextMap(int quality,
|
||||
uint32_t* bigram_histo,
|
||||
size_t* num_literal_contexts,
|
||||
const uint32_t** literal_context_map) {
|
||||
uint32_t monogram_histo[3] = { 0 };
|
||||
uint32_t two_prefix_histo[6] = { 0 };
|
||||
size_t total = 0;
|
||||
for (size_t i = 0; i < 9; ++i) {
|
||||
total += bigram_histo[i];
|
||||
monogram_histo[i % 3] += bigram_histo[i];
|
||||
size_t j = i;
|
||||
if (j >= 6) {
|
||||
j -= 6;
|
||||
}
|
||||
two_prefix_histo[j] += bigram_histo[i];
|
||||
}
|
||||
size_t dummy;
|
||||
double entropy1 = ShannonEntropy(monogram_histo, 3, &dummy);
|
||||
double entropy2 = (ShannonEntropy(two_prefix_histo, 3, &dummy) +
|
||||
ShannonEntropy(two_prefix_histo + 3, 3, &dummy));
|
||||
double entropy3 = 0;
|
||||
for (size_t k = 0; k < 3; ++k) {
|
||||
entropy3 += ShannonEntropy(bigram_histo + 3 * k, 3, &dummy);
|
||||
}
|
||||
|
||||
assert(total != 0);
|
||||
double scale = 1.0 / static_cast<double>(total);
|
||||
entropy1 *= scale;
|
||||
entropy2 *= scale;
|
||||
entropy3 *= scale;
|
||||
|
||||
static const uint32_t kStaticContextMapContinuation[64] = {
|
||||
1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
static const uint32_t kStaticContextMapSimpleUTF8[64] = {
|
||||
0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
if (quality < 7) {
|
||||
// 3 context models is a bit slower, don't use it at lower qualities.
|
||||
entropy3 = entropy1 * 10;
|
||||
}
|
||||
// If expected savings by symbol are less than 0.2 bits, skip the
|
||||
// context modeling -- in exchange for faster decoding speed.
|
||||
if (entropy1 - entropy2 < 0.2 &&
|
||||
entropy1 - entropy3 < 0.2) {
|
||||
*num_literal_contexts = 1;
|
||||
} else if (entropy2 - entropy3 < 0.02) {
|
||||
*num_literal_contexts = 2;
|
||||
*literal_context_map = kStaticContextMapSimpleUTF8;
|
||||
} else {
|
||||
*num_literal_contexts = 3;
|
||||
*literal_context_map = kStaticContextMapContinuation;
|
||||
}
|
||||
}
|
||||
|
||||
void DecideOverLiteralContextModeling(const uint8_t* input,
|
||||
size_t start_pos,
|
||||
size_t length,
|
||||
size_t mask,
|
||||
int quality,
|
||||
ContextType* literal_context_mode,
|
||||
size_t* num_literal_contexts,
|
||||
const uint32_t** literal_context_map) {
|
||||
if (quality < kMinQualityForContextModeling || length < 64) {
|
||||
return;
|
||||
}
|
||||
// Gather bigram data of the UTF8 byte prefixes. To make the analysis of
|
||||
// UTF8 data faster we only examine 64 byte long strides at every 4kB
|
||||
// intervals.
|
||||
const size_t end_pos = start_pos + length;
|
||||
uint32_t bigram_prefix_histo[9] = { 0 };
|
||||
for (; start_pos + 64 <= end_pos; start_pos += 4096) {
|
||||
static const int lut[4] = { 0, 0, 1, 2 };
|
||||
const size_t stride_end_pos = start_pos + 64;
|
||||
int prev = lut[input[start_pos & mask] >> 6] * 3;
|
||||
for (size_t pos = start_pos + 1; pos < stride_end_pos; ++pos) {
|
||||
const uint8_t literal = input[pos & mask];
|
||||
++bigram_prefix_histo[prev + lut[literal >> 6]];
|
||||
prev = lut[literal >> 6] * 3;
|
||||
}
|
||||
}
|
||||
*literal_context_mode = CONTEXT_UTF8;
|
||||
ChooseContextMap(quality, &bigram_prefix_histo[0], num_literal_contexts,
|
||||
literal_context_map);
|
||||
}
|
||||
|
||||
void BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
|
||||
size_t* out_size,
|
||||
uint8_t** output) {
|
||||
if (!is_last && input_pos_ == last_flush_pos_) {
|
||||
// We have no new input data and we don't have to finish the stream, so
|
||||
// nothing to do.
|
||||
*out_size = 0;
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
assert(input_pos_ >= last_flush_pos_);
|
||||
assert(input_pos_ > last_flush_pos_ || is_last);
|
||||
assert(input_pos_ - last_flush_pos_ <= 1u << 24);
|
||||
const uint32_t bytes = static_cast<uint32_t>(input_pos_ - last_flush_pos_);
|
||||
const uint8_t* data = ringbuffer_->start();
|
||||
const uint32_t mask = ringbuffer_->mask();
|
||||
const size_t max_out_size = 2 * bytes + 500;
|
||||
const uint32_t metablock_size =
|
||||
static_cast<uint32_t>(input_pos_ - last_flush_pos_);
|
||||
const size_t max_out_size = 2 * metablock_size + 500;
|
||||
uint8_t* storage = GetBrotliStorage(max_out_size);
|
||||
storage[0] = last_byte_;
|
||||
size_t storage_ix = last_byte_bits_;
|
||||
|
||||
bool uncompressed = false;
|
||||
if (num_commands_ < (bytes >> 8) + 2) {
|
||||
if (num_literals_ > 0.99 * static_cast<double>(bytes)) {
|
||||
uint32_t literal_histo[256] = { 0 };
|
||||
static const uint32_t kSampleRate = 13;
|
||||
static const double kMinEntropy = 7.92;
|
||||
const double bit_cost_threshold =
|
||||
static_cast<double>(bytes) * kMinEntropy / kSampleRate;
|
||||
size_t t = (bytes + kSampleRate - 1) / kSampleRate;
|
||||
uint32_t pos = static_cast<uint32_t>(last_flush_pos_);
|
||||
for (size_t i = 0; i < t; i++) {
|
||||
++literal_histo[data[pos & mask]];
|
||||
pos += kSampleRate;
|
||||
}
|
||||
if (BitsEntropy(literal_histo, 256) > bit_cost_threshold) {
|
||||
uncompressed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (bytes == 0) {
|
||||
// Write the ISLAST and ISEMPTY bits.
|
||||
WriteBits(2, 3, &storage_ix, &storage[0]);
|
||||
storage_ix = (storage_ix + 7u) & ~7u;
|
||||
} else if (uncompressed) {
|
||||
// Restore the distance cache, as its last update by
|
||||
// CreateBackwardReferences is now unused.
|
||||
memcpy(dist_cache_, saved_dist_cache_, sizeof(dist_cache_));
|
||||
StoreUncompressedMetaBlock(is_last, data,
|
||||
WrapPosition(last_flush_pos_), mask, bytes,
|
||||
&storage_ix,
|
||||
&storage[0]);
|
||||
} else {
|
||||
uint32_t num_direct_distance_codes = 0;
|
||||
uint32_t distance_postfix_bits = 0;
|
||||
if (params_.quality > 9 && params_.mode == BrotliParams::MODE_FONT) {
|
||||
num_direct_distance_codes = 12;
|
||||
distance_postfix_bits = 1;
|
||||
RecomputeDistancePrefixes(commands_,
|
||||
num_commands_,
|
||||
num_direct_distance_codes,
|
||||
distance_postfix_bits);
|
||||
}
|
||||
if (params_.quality == 2) {
|
||||
StoreMetaBlockFast(data, WrapPosition(last_flush_pos_),
|
||||
bytes, mask, is_last,
|
||||
commands_, num_commands_,
|
||||
&storage_ix,
|
||||
&storage[0]);
|
||||
} else if (params_.quality < kMinQualityForBlockSplit) {
|
||||
StoreMetaBlockTrivial(data, WrapPosition(last_flush_pos_),
|
||||
bytes, mask, is_last,
|
||||
commands_, num_commands_,
|
||||
&storage_ix,
|
||||
&storage[0]);
|
||||
} else {
|
||||
MetaBlockSplit mb;
|
||||
ContextType literal_context_mode = CONTEXT_UTF8;
|
||||
if (params_.quality <= 9) {
|
||||
size_t num_literal_contexts = 1;
|
||||
const uint32_t* literal_context_map = NULL;
|
||||
DecideOverLiteralContextModeling(data, WrapPosition(last_flush_pos_),
|
||||
bytes, mask,
|
||||
params_.quality,
|
||||
&literal_context_mode,
|
||||
&num_literal_contexts,
|
||||
&literal_context_map);
|
||||
if (literal_context_map == NULL) {
|
||||
BuildMetaBlockGreedy(data, WrapPosition(last_flush_pos_), mask,
|
||||
commands_, num_commands_,
|
||||
&mb);
|
||||
} else {
|
||||
BuildMetaBlockGreedyWithContexts(data, WrapPosition(last_flush_pos_),
|
||||
mask,
|
||||
prev_byte_, prev_byte2_,
|
||||
literal_context_mode,
|
||||
num_literal_contexts,
|
||||
literal_context_map,
|
||||
commands_, num_commands_,
|
||||
&mb);
|
||||
}
|
||||
} else {
|
||||
if (!IsMostlyUTF8(
|
||||
data, WrapPosition(last_flush_pos_), mask, bytes, kMinUTF8Ratio)) {
|
||||
literal_context_mode = CONTEXT_SIGNED;
|
||||
}
|
||||
BuildMetaBlock(data, WrapPosition(last_flush_pos_), mask,
|
||||
prev_byte_, prev_byte2_,
|
||||
commands_, num_commands_,
|
||||
literal_context_mode,
|
||||
&mb);
|
||||
}
|
||||
if (params_.quality >= kMinQualityForOptimizeHistograms) {
|
||||
OptimizeHistograms(num_direct_distance_codes,
|
||||
distance_postfix_bits,
|
||||
&mb);
|
||||
}
|
||||
StoreMetaBlock(data, WrapPosition(last_flush_pos_), bytes, mask,
|
||||
prev_byte_, prev_byte2_,
|
||||
is_last,
|
||||
num_direct_distance_codes,
|
||||
distance_postfix_bits,
|
||||
literal_context_mode,
|
||||
commands_, num_commands_,
|
||||
mb,
|
||||
&storage_ix,
|
||||
&storage[0]);
|
||||
}
|
||||
if (bytes + 4 < (storage_ix >> 3)) {
|
||||
// Restore the distance cache and last byte.
|
||||
memcpy(dist_cache_, saved_dist_cache_, sizeof(dist_cache_));
|
||||
storage[0] = last_byte_;
|
||||
storage_ix = last_byte_bits_;
|
||||
StoreUncompressedMetaBlock(is_last, data,
|
||||
WrapPosition(last_flush_pos_), mask,
|
||||
bytes, &storage_ix, &storage[0]);
|
||||
}
|
||||
}
|
||||
bool font_mode = params_.mode == BrotliParams::MODE_FONT;
|
||||
WriteMetaBlockInternal(
|
||||
data, mask, last_flush_pos_, metablock_size, is_last, params_.quality,
|
||||
font_mode, prev_byte_, prev_byte2_, num_literals_, num_commands_,
|
||||
commands_, saved_dist_cache_, dist_cache_, &storage_ix, storage);
|
||||
last_byte_ = storage[storage_ix >> 3];
|
||||
last_byte_bits_ = storage_ix & 7u;
|
||||
last_flush_pos_ = input_pos_;
|
||||
last_processed_pos_ = input_pos_;
|
||||
if (last_flush_pos_ > 0) {
|
||||
prev_byte_ = data[(static_cast<uint32_t>(last_flush_pos_) - 1) & mask];
|
||||
}
|
||||
if (last_flush_pos_ > 1) {
|
||||
prev_byte2_ = data[(static_cast<uint32_t>(last_flush_pos_) - 2) & mask];
|
||||
}
|
||||
num_commands_ = 0;
|
||||
num_literals_ = 0;
|
||||
// Save the state of the distance cache in case we need to restore it for
|
||||
@ -675,6 +706,7 @@ void BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
|
||||
memcpy(saved_dist_cache_, dist_cache_, sizeof(dist_cache_));
|
||||
*output = &storage[0];
|
||||
*out_size = storage_ix >> 3;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool BrotliCompressor::WriteMetaBlock(const size_t input_size,
|
||||
@ -739,6 +771,177 @@ bool BrotliCompressor::FinishStream(
|
||||
return WriteMetaBlock(0, NULL, true, encoded_size, encoded_buffer);
|
||||
}
|
||||
|
||||
int BrotliCompressBufferQuality10(int lgwin,
|
||||
size_t input_size,
|
||||
const uint8_t* input_buffer,
|
||||
size_t* encoded_size,
|
||||
uint8_t* encoded_buffer) {
|
||||
const size_t mask = std::numeric_limits<size_t>::max() >> 1;
|
||||
assert(input_size <= mask + 1);
|
||||
const size_t max_backward_limit = (1 << lgwin) - 16;
|
||||
int dist_cache[4] = { 4, 11, 15, 16 };
|
||||
int saved_dist_cache[4] = { 4, 11, 15, 16 };
|
||||
int ok = 1;
|
||||
const size_t max_out_size = *encoded_size;
|
||||
size_t total_out_size = 0;
|
||||
uint8_t last_byte;
|
||||
uint8_t last_byte_bits;
|
||||
EncodeWindowBits(lgwin, &last_byte, &last_byte_bits);
|
||||
|
||||
Hashers::H10* hasher = new Hashers::H10;
|
||||
const size_t hasher_eff_size = std::min(input_size, max_backward_limit + 16);
|
||||
hasher->Init(lgwin, 0, hasher_eff_size, true);
|
||||
|
||||
const int lgblock = std::min(18, lgwin);
|
||||
const int lgmetablock = std::min(24, lgwin + 1);
|
||||
const size_t max_block_size = static_cast<size_t>(1) << lgblock;
|
||||
const size_t max_metablock_size = static_cast<size_t>(1) << lgmetablock;
|
||||
const size_t max_literals_per_metablock = max_metablock_size / 8;
|
||||
const size_t max_commands_per_metablock = max_metablock_size / 8;
|
||||
size_t metablock_start = 0;
|
||||
uint8_t prev_byte = 0;
|
||||
uint8_t prev_byte2 = 0;
|
||||
while (ok && metablock_start < input_size) {
|
||||
const size_t metablock_end =
|
||||
std::min(input_size, metablock_start + max_metablock_size);
|
||||
const size_t expected_num_commands =
|
||||
(metablock_end - metablock_start) / 12 + 16;
|
||||
Command* commands = 0;
|
||||
size_t num_commands = 0;
|
||||
size_t last_insert_len = 0;
|
||||
size_t num_literals = 0;
|
||||
size_t metablock_size = 0;
|
||||
size_t cmd_alloc_size = 0;
|
||||
|
||||
for (size_t block_start = metablock_start; block_start < metablock_end; ) {
|
||||
size_t block_size = std::min(metablock_end - block_start, max_block_size);
|
||||
ZopfliNode* nodes = new ZopfliNode[block_size + 1];
|
||||
std::vector<uint32_t> path;
|
||||
hasher->StitchToPreviousBlock(block_size, block_start,
|
||||
input_buffer, mask);
|
||||
ZopfliComputeShortestPath(block_size, block_start, input_buffer, mask,
|
||||
max_backward_limit, dist_cache,
|
||||
hasher, nodes, &path);
|
||||
// We allocate a command buffer in the first iteration of this loop that
|
||||
// will be likely big enough for the whole metablock, so that for most
|
||||
// inputs we will not have to reallocate in later iterations. We do the
|
||||
// allocation here and not before the loop, because if the input is small,
|
||||
// this will be allocated after the zopfli cost model is freed, so this
|
||||
// will not increase peak memory usage.
|
||||
// TODO: If the first allocation is too small, increase command
|
||||
// buffer size exponentially.
|
||||
size_t new_cmd_alloc_size = std::max(expected_num_commands,
|
||||
num_commands + path.size() + 1);
|
||||
if (cmd_alloc_size != new_cmd_alloc_size) {
|
||||
cmd_alloc_size = new_cmd_alloc_size;
|
||||
commands = static_cast<Command*>(
|
||||
realloc(commands, cmd_alloc_size * sizeof(Command)));
|
||||
}
|
||||
ZopfliCreateCommands(block_size, block_start, max_backward_limit, path,
|
||||
&nodes[0], dist_cache, &last_insert_len,
|
||||
&commands[num_commands], &num_literals);
|
||||
num_commands += path.size();
|
||||
block_start += block_size;
|
||||
metablock_size += block_size;
|
||||
delete[] nodes;
|
||||
if (num_literals > max_literals_per_metablock ||
|
||||
num_commands > max_commands_per_metablock) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (last_insert_len > 0) {
|
||||
Command cmd(last_insert_len);
|
||||
commands[num_commands++] = cmd;
|
||||
num_literals += last_insert_len;
|
||||
}
|
||||
|
||||
const bool is_last = (metablock_start + metablock_size == input_size);
|
||||
uint8_t* storage = NULL;
|
||||
size_t storage_ix = last_byte_bits;
|
||||
|
||||
if (metablock_size == 0) {
|
||||
// Write the ISLAST and ISEMPTY bits.
|
||||
storage = new uint8_t[16];
|
||||
storage[0] = last_byte;
|
||||
WriteBits(2, 3, &storage_ix, storage);
|
||||
storage_ix = (storage_ix + 7u) & ~7u;
|
||||
} else if (!ShouldCompress(input_buffer, mask, metablock_start,
|
||||
metablock_size, num_literals, num_commands)) {
|
||||
// Restore the distance cache, as its last update by
|
||||
// CreateBackwardReferences is now unused.
|
||||
memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
|
||||
storage = new uint8_t[metablock_size + 16];
|
||||
storage[0] = last_byte;
|
||||
StoreUncompressedMetaBlock(is_last, input_buffer,
|
||||
metablock_start, mask, metablock_size,
|
||||
&storage_ix, storage);
|
||||
} else {
|
||||
uint32_t num_direct_distance_codes = 0;
|
||||
uint32_t distance_postfix_bits = 0;
|
||||
MetaBlockSplit mb;
|
||||
ContextType literal_context_mode = CONTEXT_UTF8;
|
||||
if (!IsMostlyUTF8(
|
||||
input_buffer, metablock_start, mask, metablock_size,
|
||||
kMinUTF8Ratio)) {
|
||||
literal_context_mode = CONTEXT_SIGNED;
|
||||
}
|
||||
BuildMetaBlock(input_buffer, metablock_start, mask,
|
||||
prev_byte, prev_byte2,
|
||||
commands, num_commands,
|
||||
literal_context_mode,
|
||||
&mb);
|
||||
OptimizeHistograms(num_direct_distance_codes,
|
||||
distance_postfix_bits,
|
||||
&mb);
|
||||
const size_t max_out_metablock_size = 2 * metablock_size + 500;
|
||||
storage = new uint8_t[max_out_metablock_size];
|
||||
storage[0] = last_byte;
|
||||
StoreMetaBlock(input_buffer, metablock_start, metablock_size, mask,
|
||||
prev_byte, prev_byte2,
|
||||
is_last,
|
||||
num_direct_distance_codes,
|
||||
distance_postfix_bits,
|
||||
literal_context_mode,
|
||||
commands, num_commands,
|
||||
mb,
|
||||
&storage_ix, storage);
|
||||
if (metablock_size + 4 < (storage_ix >> 3)) {
|
||||
// Restore the distance cache and last byte.
|
||||
memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
|
||||
storage[0] = last_byte;
|
||||
storage_ix = last_byte_bits;
|
||||
StoreUncompressedMetaBlock(is_last, input_buffer,
|
||||
metablock_start, mask,
|
||||
metablock_size, &storage_ix, storage);
|
||||
}
|
||||
}
|
||||
last_byte = storage[storage_ix >> 3];
|
||||
last_byte_bits = storage_ix & 7u;
|
||||
metablock_start += metablock_size;
|
||||
prev_byte = input_buffer[metablock_start - 1];
|
||||
prev_byte2 = input_buffer[metablock_start - 2];
|
||||
// Save the state of the distance cache in case we need to restore it for
|
||||
// emitting an uncompressed block.
|
||||
memcpy(saved_dist_cache, dist_cache, 4 * sizeof(dist_cache[0]));
|
||||
|
||||
const size_t out_size = storage_ix >> 3;
|
||||
total_out_size += out_size;
|
||||
if (total_out_size <= max_out_size) {
|
||||
memcpy(encoded_buffer, storage, out_size);
|
||||
encoded_buffer += out_size;
|
||||
} else {
|
||||
ok = 0;
|
||||
}
|
||||
delete[] storage;
|
||||
free(commands);
|
||||
}
|
||||
|
||||
*encoded_size = total_out_size;
|
||||
delete hasher;
|
||||
return ok;
|
||||
}
|
||||
|
||||
int BrotliCompressBuffer(BrotliParams params,
|
||||
size_t input_size,
|
||||
const uint8_t* input_buffer,
|
||||
@ -748,6 +951,18 @@ int BrotliCompressBuffer(BrotliParams params,
|
||||
// Output buffer needs at least one byte.
|
||||
return 0;
|
||||
}
|
||||
if (input_size == 0) {
|
||||
// Handle the special case of empty input.
|
||||
*encoded_size = 1;
|
||||
*encoded_buffer = 6;
|
||||
return 1;
|
||||
}
|
||||
if (params.quality == 10) {
|
||||
// TODO(user) Implement this direct path for all quality levels.
|
||||
const int lgwin = std::min(24, std::max(16, params.lgwin));
|
||||
return BrotliCompressBufferQuality10(lgwin, input_size, input_buffer,
|
||||
encoded_size, encoded_buffer);
|
||||
}
|
||||
BrotliMemIn in(input_buffer, input_size);
|
||||
BrotliMemOut out(encoded_buffer, *encoded_size);
|
||||
if (!BrotliCompress(params, &in, &out)) {
|
||||
|
@ -140,10 +140,6 @@ class BrotliCompressor {
|
||||
int* GetHashTable(int quality,
|
||||
size_t input_size, size_t* table_size);
|
||||
|
||||
void WriteMetaBlockInternal(const bool is_last,
|
||||
size_t* out_size,
|
||||
uint8_t** output);
|
||||
|
||||
BrotliParams params_;
|
||||
Hashers* hashers_;
|
||||
int hash_type_;
|
||||
|
@ -40,7 +40,7 @@ void RecomputeDistancePrefixes(Command* cmds, size_t num_commands,
|
||||
}
|
||||
for (size_t i = 0; i < num_commands; ++i) {
|
||||
Command* cmd = &cmds[i];
|
||||
if (cmd->copy_len_ > 0 && cmd->cmd_prefix_ >= 128) {
|
||||
if (cmd->copy_len() && cmd->cmd_prefix_ >= 128) {
|
||||
PrefixEncodeCopyDistance(cmd->DistanceCode(),
|
||||
num_direct_distance_codes,
|
||||
distance_postfix_bits,
|
||||
|
@ -10,7 +10,6 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
#include <cstdlib>
|
||||
|
||||
#include "./histogram.h"
|
||||
@ -32,6 +31,15 @@ void SetDepth(const HuffmanTree &p,
|
||||
}
|
||||
}
|
||||
|
||||
// Sort the root nodes, least popular first.
|
||||
static inline bool SortHuffmanTree(const HuffmanTree& v0,
|
||||
const HuffmanTree& v1) {
|
||||
if (v0.total_count_ != v1.total_count_) {
|
||||
return v0.total_count_ < v1.total_count_;
|
||||
}
|
||||
return v0.index_right_or_value_ > v1.index_right_or_value_;
|
||||
}
|
||||
|
||||
// This function will create a Huffman tree.
|
||||
//
|
||||
// The catch here is that the tree cannot be arbitrarily deep.
|
||||
@ -50,30 +58,28 @@ void SetDepth(const HuffmanTree &p,
|
||||
void CreateHuffmanTree(const uint32_t *data,
|
||||
const size_t length,
|
||||
const int tree_limit,
|
||||
HuffmanTree* tree,
|
||||
uint8_t *depth) {
|
||||
// For block sizes below 64 kB, we never need to do a second iteration
|
||||
// of this loop. Probably all of our block sizes will be smaller than
|
||||
// that, so this loop is mostly of academic interest. If we actually
|
||||
// would need this, we would be better off with the Katajainen algorithm.
|
||||
for (uint32_t count_limit = 1; ; count_limit *= 2) {
|
||||
std::vector<HuffmanTree> tree;
|
||||
tree.reserve(2 * length + 1);
|
||||
|
||||
size_t n = 0;
|
||||
for (size_t i = length; i != 0;) {
|
||||
--i;
|
||||
if (data[i]) {
|
||||
const uint32_t count = std::max(data[i], count_limit);
|
||||
tree.push_back(HuffmanTree(count, -1, static_cast<int16_t>(i)));
|
||||
tree[n++] = HuffmanTree(count, -1, static_cast<int16_t>(i));
|
||||
}
|
||||
}
|
||||
|
||||
const size_t n = tree.size();
|
||||
if (n == 1) {
|
||||
depth[tree[0].index_right_or_value_] = 1; // Only one element.
|
||||
break;
|
||||
}
|
||||
|
||||
std::stable_sort(tree.begin(), tree.end(), SortHuffmanTree);
|
||||
std::sort(tree, tree + n, SortHuffmanTree);
|
||||
|
||||
// The nodes are:
|
||||
// [0, n): the sorted leaf nodes that we start with.
|
||||
@ -83,8 +89,8 @@ void CreateHuffmanTree(const uint32_t *data,
|
||||
// [2n]: we add a sentinel at the end as well.
|
||||
// There will be (2n+1) elements at the end.
|
||||
const HuffmanTree sentinel(std::numeric_limits<uint32_t>::max(), -1, -1);
|
||||
tree.push_back(sentinel);
|
||||
tree.push_back(sentinel);
|
||||
tree[n] = sentinel;
|
||||
tree[n + 1] = sentinel;
|
||||
|
||||
size_t i = 0; // Points to the next leaf node.
|
||||
size_t j = n + 1; // Points to the next non-leaf node.
|
||||
@ -106,16 +112,15 @@ void CreateHuffmanTree(const uint32_t *data,
|
||||
}
|
||||
|
||||
// The sentinel node becomes the parent node.
|
||||
size_t j_end = tree.size() - 1;
|
||||
size_t j_end = 2 * n - k;
|
||||
tree[j_end].total_count_ =
|
||||
tree[left].total_count_ + tree[right].total_count_;
|
||||
tree[j_end].index_left_ = static_cast<int16_t>(left);
|
||||
tree[j_end].index_right_or_value_ = static_cast<int16_t>(right);
|
||||
|
||||
// Add back the last sentinel node.
|
||||
tree.push_back(sentinel);
|
||||
tree[j_end + 1] = sentinel;
|
||||
}
|
||||
assert(tree.size() == 2 * n + 1);
|
||||
SetDepth(tree[2 * n - 1], &tree[0], depth, 0);
|
||||
|
||||
// We need to pack the Huffman tree in tree_limit bits.
|
||||
@ -127,12 +132,12 @@ void CreateHuffmanTree(const uint32_t *data,
|
||||
}
|
||||
}
|
||||
|
||||
void Reverse(std::vector<uint8_t>* v, size_t start, size_t end) {
|
||||
void Reverse(uint8_t* v, size_t start, size_t end) {
|
||||
--end;
|
||||
while (start < end) {
|
||||
uint8_t tmp = (*v)[start];
|
||||
(*v)[start] = (*v)[end];
|
||||
(*v)[end] = tmp;
|
||||
uint8_t tmp = v[start];
|
||||
v[start] = v[end];
|
||||
v[end] = tmp;
|
||||
++start;
|
||||
--end;
|
||||
}
|
||||
@ -142,79 +147,88 @@ void WriteHuffmanTreeRepetitions(
|
||||
const uint8_t previous_value,
|
||||
const uint8_t value,
|
||||
size_t repetitions,
|
||||
std::vector<uint8_t> *tree,
|
||||
std::vector<uint8_t> *extra_bits_data) {
|
||||
size_t* tree_size,
|
||||
uint8_t* tree,
|
||||
uint8_t* extra_bits_data) {
|
||||
assert(repetitions > 0);
|
||||
if (previous_value != value) {
|
||||
tree->push_back(value);
|
||||
extra_bits_data->push_back(0);
|
||||
tree[*tree_size] = value;
|
||||
extra_bits_data[*tree_size] = 0;
|
||||
++(*tree_size);
|
||||
--repetitions;
|
||||
}
|
||||
if (repetitions == 7) {
|
||||
tree->push_back(value);
|
||||
extra_bits_data->push_back(0);
|
||||
tree[*tree_size] = value;
|
||||
extra_bits_data[*tree_size] = 0;
|
||||
++(*tree_size);
|
||||
--repetitions;
|
||||
}
|
||||
if (repetitions < 3) {
|
||||
for (size_t i = 0; i < repetitions; ++i) {
|
||||
tree->push_back(value);
|
||||
extra_bits_data->push_back(0);
|
||||
tree[*tree_size] = value;
|
||||
extra_bits_data[*tree_size] = 0;
|
||||
++(*tree_size);
|
||||
}
|
||||
} else {
|
||||
repetitions -= 3;
|
||||
size_t start = tree->size();
|
||||
size_t start = *tree_size;
|
||||
while (true) {
|
||||
tree->push_back(16);
|
||||
extra_bits_data->push_back(repetitions & 0x3);
|
||||
tree[*tree_size] = 16;
|
||||
extra_bits_data[*tree_size] = repetitions & 0x3;
|
||||
++(*tree_size);
|
||||
repetitions >>= 2;
|
||||
if (repetitions == 0) {
|
||||
break;
|
||||
}
|
||||
--repetitions;
|
||||
}
|
||||
Reverse(tree, start, tree->size());
|
||||
Reverse(extra_bits_data, start, tree->size());
|
||||
Reverse(tree, start, *tree_size);
|
||||
Reverse(extra_bits_data, start, *tree_size);
|
||||
}
|
||||
}
|
||||
|
||||
void WriteHuffmanTreeRepetitionsZeros(
|
||||
size_t repetitions,
|
||||
std::vector<uint8_t> *tree,
|
||||
std::vector<uint8_t> *extra_bits_data) {
|
||||
size_t* tree_size,
|
||||
uint8_t* tree,
|
||||
uint8_t* extra_bits_data) {
|
||||
if (repetitions == 11) {
|
||||
tree->push_back(0);
|
||||
extra_bits_data->push_back(0);
|
||||
tree[*tree_size] = 0;
|
||||
extra_bits_data[*tree_size] = 0;
|
||||
++(*tree_size);
|
||||
--repetitions;
|
||||
}
|
||||
if (repetitions < 3) {
|
||||
for (size_t i = 0; i < repetitions; ++i) {
|
||||
tree->push_back(0);
|
||||
extra_bits_data->push_back(0);
|
||||
tree[*tree_size] = 0;
|
||||
extra_bits_data[*tree_size] = 0;
|
||||
++(*tree_size);
|
||||
}
|
||||
} else {
|
||||
repetitions -= 3;
|
||||
size_t start = tree->size();
|
||||
size_t start = *tree_size;
|
||||
while (true) {
|
||||
tree->push_back(17);
|
||||
extra_bits_data->push_back(repetitions & 0x7);
|
||||
tree[*tree_size] = 17;
|
||||
extra_bits_data[*tree_size] = repetitions & 0x7;
|
||||
++(*tree_size);
|
||||
repetitions >>= 3;
|
||||
if (repetitions == 0) {
|
||||
break;
|
||||
}
|
||||
--repetitions;
|
||||
}
|
||||
Reverse(tree, start, tree->size());
|
||||
Reverse(extra_bits_data, start, tree->size());
|
||||
Reverse(tree, start, *tree_size);
|
||||
Reverse(extra_bits_data, start, *tree_size);
|
||||
}
|
||||
}
|
||||
|
||||
bool OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts) {
|
||||
void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
|
||||
uint8_t* good_for_rle) {
|
||||
size_t nonzero_count = 0;
|
||||
size_t stride;
|
||||
size_t limit;
|
||||
size_t sum;
|
||||
const size_t streak_limit = 1240;
|
||||
uint8_t* good_for_rle;
|
||||
// Let's make the Huffman code more compatible with rle encoding.
|
||||
size_t i;
|
||||
for (i = 0; i < length; i++) {
|
||||
@ -223,13 +237,13 @@ bool OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts) {
|
||||
}
|
||||
}
|
||||
if (nonzero_count < 16) {
|
||||
return 1;
|
||||
return;
|
||||
}
|
||||
while (length != 0 && counts[length - 1] == 0) {
|
||||
--length;
|
||||
}
|
||||
if (length == 0) {
|
||||
return 1; // All zeros.
|
||||
return; // All zeros.
|
||||
}
|
||||
// Now counts[0..length - 1] does not have trailing zeros.
|
||||
{
|
||||
@ -245,7 +259,7 @@ bool OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts) {
|
||||
}
|
||||
if (nonzeros < 5) {
|
||||
// Small histogram will model it well.
|
||||
return 1;
|
||||
return;
|
||||
}
|
||||
size_t zeros = length - nonzeros;
|
||||
if (smallest_nonzero < 4) {
|
||||
@ -258,15 +272,12 @@ bool OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts) {
|
||||
}
|
||||
}
|
||||
if (nonzeros < 28) {
|
||||
return 1;
|
||||
return;
|
||||
}
|
||||
}
|
||||
// 2) Let's mark all population counts that already can be encoded
|
||||
// with an rle code.
|
||||
good_for_rle = (uint8_t*)calloc(length, 1);
|
||||
if (good_for_rle == NULL) {
|
||||
return 0;
|
||||
}
|
||||
memset(good_for_rle, 0, length);
|
||||
{
|
||||
// Let's not spoil any of the existing good rle codes.
|
||||
// Mark any seq of 0's that is longer as 5 as a good_for_rle.
|
||||
@ -340,8 +351,6 @@ bool OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts) {
|
||||
}
|
||||
}
|
||||
}
|
||||
free(good_for_rle);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void DecideOverRleUse(const uint8_t* depth, const size_t length,
|
||||
@ -373,8 +382,9 @@ static void DecideOverRleUse(const uint8_t* depth, const size_t length,
|
||||
|
||||
void WriteHuffmanTree(const uint8_t* depth,
|
||||
size_t length,
|
||||
std::vector<uint8_t> *tree,
|
||||
std::vector<uint8_t> *extra_bits_data) {
|
||||
size_t* tree_size,
|
||||
uint8_t* tree,
|
||||
uint8_t* extra_bits_data) {
|
||||
uint8_t previous_value = 8;
|
||||
|
||||
// Throw away trailing zeros.
|
||||
@ -408,10 +418,11 @@ void WriteHuffmanTree(const uint8_t* depth,
|
||||
}
|
||||
}
|
||||
if (value == 0) {
|
||||
WriteHuffmanTreeRepetitionsZeros(reps, tree, extra_bits_data);
|
||||
WriteHuffmanTreeRepetitionsZeros(reps, tree_size, tree, extra_bits_data);
|
||||
} else {
|
||||
WriteHuffmanTreeRepetitions(previous_value,
|
||||
value, reps, tree, extra_bits_data);
|
||||
value, reps, tree_size,
|
||||
tree, extra_bits_data);
|
||||
previous_value = value;
|
||||
}
|
||||
i += reps;
|
||||
|
@ -10,7 +10,6 @@
|
||||
#define BROTLI_ENC_ENTROPY_ENCODE_H_
|
||||
|
||||
#include <string.h>
|
||||
#include <vector>
|
||||
#include "./histogram.h"
|
||||
#include "./prefix.h"
|
||||
#include "./types.h"
|
||||
@ -19,6 +18,7 @@ namespace brotli {
|
||||
|
||||
// A node of a Huffman tree.
|
||||
struct HuffmanTree {
|
||||
HuffmanTree() {}
|
||||
HuffmanTree(uint32_t count, int16_t left, int16_t right)
|
||||
: total_count_(count),
|
||||
index_left_(left),
|
||||
@ -29,11 +29,6 @@ struct HuffmanTree {
|
||||
int16_t index_right_or_value_;
|
||||
};
|
||||
|
||||
// Sort the root nodes, least popular first.
|
||||
inline bool SortHuffmanTree(const HuffmanTree &v0, const HuffmanTree &v1) {
|
||||
return v0.total_count_ < v1.total_count_;
|
||||
}
|
||||
|
||||
void SetDepth(const HuffmanTree &p, HuffmanTree *pool,
|
||||
uint8_t *depth, uint8_t level);
|
||||
|
||||
@ -45,10 +40,14 @@ void SetDepth(const HuffmanTree &p, HuffmanTree *pool,
|
||||
// The depth contains the tree, i.e., how many bits are used for
|
||||
// the symbol.
|
||||
//
|
||||
// The actual Huffman tree is constructed in the tree[] array, which has to
|
||||
// be at least 2 * length + 1 long.
|
||||
//
|
||||
// See http://en.wikipedia.org/wiki/Huffman_coding
|
||||
void CreateHuffmanTree(const uint32_t *data,
|
||||
const size_t length,
|
||||
const int tree_limit,
|
||||
HuffmanTree* tree,
|
||||
uint8_t *depth);
|
||||
|
||||
// Change the population counts in a way that the consequent
|
||||
@ -57,15 +56,18 @@ void CreateHuffmanTree(const uint32_t *data,
|
||||
//
|
||||
// length contains the size of the histogram.
|
||||
// counts contains the population counts.
|
||||
bool OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts);
|
||||
// good_for_rle is a buffer of at least length size
|
||||
void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
|
||||
uint8_t* good_for_rle);
|
||||
|
||||
// Write a Huffman tree from bit depths into the bitstream representation
|
||||
// of a Huffman tree. The generated Huffman tree is to be compressed once
|
||||
// more using a Huffman tree
|
||||
void WriteHuffmanTree(const uint8_t* depth,
|
||||
size_t num,
|
||||
std::vector<uint8_t> *tree,
|
||||
std::vector<uint8_t> *extra_bits_data);
|
||||
size_t* tree_size,
|
||||
uint8_t* tree,
|
||||
uint8_t* extra_bits_data);
|
||||
|
||||
// Get the actual bit values for a tree of bit depths.
|
||||
void ConvertBitDepthsToSymbols(const uint8_t *depth,
|
||||
|
@ -78,7 +78,7 @@ static const uint32_t kCodeLengthBits[18] = {
|
||||
};
|
||||
|
||||
inline void StoreStaticCodeLengthCode(size_t* storage_ix, uint8_t* storage) {
|
||||
WriteBits(40, 0x000000ff55555554U, storage_ix, storage);
|
||||
WriteBits(40, MAKE_UINT64_T(0xff, 0x55555554), storage_ix, storage);
|
||||
}
|
||||
|
||||
static const uint64_t kZeroRepsBits[704] = {
|
||||
|
43
enc/hash.h
43
enc/hash.h
@ -14,7 +14,6 @@
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
#include "./dictionary_hash.h"
|
||||
#include "./fast_log.h"
|
||||
@ -278,7 +277,7 @@ class HashLongestMatchQuickly {
|
||||
if (matchlen + kCutoffTransformsCount > len && matchlen > 0) {
|
||||
const size_t transform_id = kCutoffTransforms[len - matchlen];
|
||||
const size_t word_id =
|
||||
transform_id * (1 << kBrotliDictionarySizeBitsByLength[len]) +
|
||||
transform_id * (1u << kBrotliDictionarySizeBitsByLength[len]) +
|
||||
dist;
|
||||
const size_t backward = max_backward + word_id + 1;
|
||||
const double score = BackwardReferenceScore(matchlen, backward);
|
||||
@ -574,8 +573,10 @@ class HashLongestMatch {
|
||||
}
|
||||
buckets_[key][num_[key] & kBlockMask] = static_cast<uint32_t>(cur_ix);
|
||||
++num_[key];
|
||||
std::vector<uint32_t> dict_matches(kMaxDictionaryMatchLen + 1,
|
||||
kInvalidMatch);
|
||||
uint32_t dict_matches[kMaxDictionaryMatchLen + 1];
|
||||
for (size_t i = 0; i <= kMaxDictionaryMatchLen; ++i) {
|
||||
dict_matches[i] = kInvalidMatch;
|
||||
}
|
||||
size_t minlen = std::max<size_t>(4, best_len + 1);
|
||||
if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length,
|
||||
&dict_matches[0])) {
|
||||
@ -706,8 +707,10 @@ class HashToBinaryTree {
|
||||
matches = StoreAndFindMatches(data, cur_ix, ring_buffer_mask,
|
||||
max_length, &best_len, matches);
|
||||
}
|
||||
std::vector<uint32_t> dict_matches(kMaxDictionaryMatchLen + 1,
|
||||
kInvalidMatch);
|
||||
uint32_t dict_matches[kMaxDictionaryMatchLen + 1];
|
||||
for (size_t i = 0; i <= kMaxDictionaryMatchLen; ++i) {
|
||||
dict_matches[i] = kInvalidMatch;
|
||||
}
|
||||
size_t minlen = std::max<size_t>(4, best_len + 1);
|
||||
if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length,
|
||||
&dict_matches[0])) {
|
||||
@ -725,15 +728,34 @@ class HashToBinaryTree {
|
||||
|
||||
// Stores the hash of the next 4 bytes and re-roots the binary tree at the
|
||||
// current sequence, without returning any matches.
|
||||
// REQUIRES: cur_ix + kMaxTreeCompLength <= end-of-current-block
|
||||
void Store(const uint8_t* data,
|
||||
const size_t ring_buffer_mask,
|
||||
const size_t cur_ix,
|
||||
const size_t max_length) {
|
||||
const size_t cur_ix) {
|
||||
size_t best_len = 0;
|
||||
StoreAndFindMatches(data, cur_ix, ring_buffer_mask, max_length,
|
||||
StoreAndFindMatches(data, cur_ix, ring_buffer_mask, kMaxTreeCompLength,
|
||||
&best_len, NULL);
|
||||
}
|
||||
|
||||
void StitchToPreviousBlock(size_t num_bytes,
|
||||
size_t position,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t ringbuffer_mask) {
|
||||
if (num_bytes >= 3 && position >= kMaxTreeCompLength) {
|
||||
// Store the last `kMaxTreeCompLength - 1` positions in the hasher.
|
||||
// These could not be calculated before, since they require knowledge
|
||||
// of both the previous and the current block.
|
||||
const size_t i_start = position - kMaxTreeCompLength + 1;
|
||||
const size_t i_end = std::min(position, i_start + num_bytes);
|
||||
for (size_t i = i_start; i < i_end; ++i) {
|
||||
// We know that i + kMaxTreeCompLength <= position + num_bytes, i.e. the
|
||||
// end of the current block and that we have at least
|
||||
// kMaxTreeCompLength tail in the ringbuffer.
|
||||
Store(ringbuffer, ringbuffer_mask, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static const size_t kMaxNumMatches = 64 + kMaxTreeSearchDepth;
|
||||
|
||||
private:
|
||||
@ -928,8 +950,7 @@ struct Hashers {
|
||||
case 10:
|
||||
hash_h10->Init(lgwin, 0, size, false);
|
||||
for (size_t i = 0; i + kMaxTreeCompLength - 1 < size; ++i) {
|
||||
hash_h10->Store(dict, std::numeric_limits<size_t>::max(),
|
||||
i, size - i);
|
||||
hash_h10->Store(dict, std::numeric_limits<size_t>::max(), i);
|
||||
}
|
||||
break;
|
||||
default: break;
|
||||
|
@ -50,8 +50,8 @@ void BuildHistograms(
|
||||
prev_byte = ringbuffer[pos & mask];
|
||||
++pos;
|
||||
}
|
||||
pos += cmd.copy_len_;
|
||||
if (cmd.copy_len_ > 0) {
|
||||
pos += cmd.copy_len();
|
||||
if (cmd.copy_len()) {
|
||||
prev_byte2 = ringbuffer[(pos - 2) & mask];
|
||||
prev_byte = ringbuffer[(pos - 1) & mask];
|
||||
if (cmd.cmd_prefix_ >= 128) {
|
||||
|
@ -258,8 +258,8 @@ void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
|
||||
lit_blocks.AddSymbol(ringbuffer[pos & mask]);
|
||||
++pos;
|
||||
}
|
||||
pos += cmd.copy_len_;
|
||||
if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) {
|
||||
pos += cmd.copy_len();
|
||||
if (cmd.copy_len() && cmd.cmd_prefix_ >= 128) {
|
||||
dist_blocks.AddSymbol(cmd.dist_prefix_);
|
||||
}
|
||||
}
|
||||
@ -488,8 +488,8 @@ void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
|
||||
prev_byte = literal;
|
||||
++pos;
|
||||
}
|
||||
pos += cmd.copy_len_;
|
||||
if (cmd.copy_len_ > 0) {
|
||||
pos += cmd.copy_len();
|
||||
if (cmd.copy_len()) {
|
||||
prev_byte2 = ringbuffer[(pos - 2) & mask];
|
||||
prev_byte = ringbuffer[(pos - 1) & mask];
|
||||
if (cmd.cmd_prefix_ >= 128) {
|
||||
@ -515,20 +515,25 @@ void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
|
||||
void OptimizeHistograms(size_t num_direct_distance_codes,
|
||||
size_t distance_postfix_bits,
|
||||
MetaBlockSplit* mb) {
|
||||
uint8_t* good_for_rle = new uint8_t[kNumCommandPrefixes];
|
||||
for (size_t i = 0; i < mb->literal_histograms.size(); ++i) {
|
||||
OptimizeHuffmanCountsForRle(256, &mb->literal_histograms[i].data_[0]);
|
||||
OptimizeHuffmanCountsForRle(256, &mb->literal_histograms[i].data_[0],
|
||||
good_for_rle);
|
||||
}
|
||||
for (size_t i = 0; i < mb->command_histograms.size(); ++i) {
|
||||
OptimizeHuffmanCountsForRle(kNumCommandPrefixes,
|
||||
&mb->command_histograms[i].data_[0]);
|
||||
&mb->command_histograms[i].data_[0],
|
||||
good_for_rle);
|
||||
}
|
||||
size_t num_distance_codes =
|
||||
kNumDistanceShortCodes + num_direct_distance_codes +
|
||||
(48u << distance_postfix_bits);
|
||||
for (size_t i = 0; i < mb->distance_histograms.size(); ++i) {
|
||||
OptimizeHuffmanCountsForRle(num_distance_codes,
|
||||
&mb->distance_histograms[i].data_[0]);
|
||||
&mb->distance_histograms[i].data_[0],
|
||||
good_for_rle);
|
||||
}
|
||||
delete[] good_for_rle;
|
||||
}
|
||||
|
||||
} // namespace brotli
|
||||
|
@ -60,7 +60,7 @@ inline void PrefixEncodeCopyDistance(size_t distance_code,
|
||||
return;
|
||||
}
|
||||
distance_code -= kNumDistanceShortCodes + num_direct_codes; /* >= 0 */
|
||||
distance_code += (1 << (postfix_bits + 2)); /* > 0 */
|
||||
distance_code += (1u << (postfix_bits + 2u)); /* > 0 */
|
||||
size_t bucket = Log2FloorNonZero(distance_code) - 1;
|
||||
size_t postfix_mask = (1 << postfix_bits) - 1;
|
||||
size_t postfix = distance_code & postfix_mask;
|
||||
|
@ -31,24 +31,52 @@ class RingBuffer {
|
||||
: size_(1u << window_bits),
|
||||
mask_((1u << window_bits) - 1),
|
||||
tail_size_(1u << tail_bits),
|
||||
pos_(0) {
|
||||
static const size_t kSlackForEightByteHashingEverywhere = 7;
|
||||
const size_t buflen = size_ + tail_size_;
|
||||
data_ = new uint8_t[2 + buflen + kSlackForEightByteHashingEverywhere];
|
||||
buffer_ = data_ + 2;
|
||||
for (size_t i = 0; i < kSlackForEightByteHashingEverywhere; ++i) {
|
||||
buffer_[buflen + i] = 0;
|
||||
}
|
||||
// Initialize the last two bytes and their copy to zero.
|
||||
buffer_[-2] = buffer_[size_ - 2] = 0;
|
||||
buffer_[-1] = buffer_[size_ - 1] = 0;
|
||||
}
|
||||
total_size_(size_ + tail_size_),
|
||||
cur_size_(0),
|
||||
pos_(0),
|
||||
data_(0),
|
||||
buffer_(0) {}
|
||||
|
||||
~RingBuffer(void) {
|
||||
delete [] data_;
|
||||
free(data_);
|
||||
}
|
||||
|
||||
// Allocates or re-allocates data_ to the given length + plus some slack
|
||||
// region before and after. Fills the slack regions with zeros.
|
||||
inline void InitBuffer(const uint32_t buflen) {
|
||||
static const size_t kSlackForEightByteHashingEverywhere = 7;
|
||||
cur_size_ = buflen;
|
||||
data_ = static_cast<uint8_t*>(realloc(
|
||||
data_, 2 + buflen + kSlackForEightByteHashingEverywhere));
|
||||
buffer_ = data_ + 2;
|
||||
buffer_[-2] = buffer_[-1] = 0;
|
||||
for (size_t i = 0; i < kSlackForEightByteHashingEverywhere; ++i) {
|
||||
buffer_[cur_size_ + i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Push bytes into the ring buffer.
|
||||
void Write(const uint8_t *bytes, size_t n) {
|
||||
if (pos_ == 0 && n < tail_size_) {
|
||||
// Special case for the first write: to process the first block, we don't
|
||||
// need to allocate the whole ringbuffer and we don't need the tail
|
||||
// either. However, we do this memory usage optimization only if the
|
||||
// first write is less than the tail size, which is also the input block
|
||||
// size, otherwise it is likely that other blocks will follow and we
|
||||
// will need to reallocate to the full size anyway.
|
||||
pos_ = static_cast<uint32_t>(n);
|
||||
InitBuffer(pos_);
|
||||
memcpy(buffer_, bytes, n);
|
||||
return;
|
||||
}
|
||||
if (cur_size_ < total_size_) {
|
||||
// Lazily allocate the full buffer.
|
||||
InitBuffer(total_size_);
|
||||
// Initialize the last two bytes to zero, so that we don't have to worry
|
||||
// later when we copy the last two bytes to the first two positions.
|
||||
buffer_[size_ - 2] = 0;
|
||||
buffer_[size_ - 1] = 0;
|
||||
}
|
||||
const size_t masked_pos = pos_ & mask_;
|
||||
// The length of the writes is limited so that we do not need to worry
|
||||
// about a write
|
||||
@ -60,7 +88,7 @@ class RingBuffer {
|
||||
// Split into two writes.
|
||||
// Copy into the end of the buffer, including the tail buffer.
|
||||
memcpy(&buffer_[masked_pos], bytes,
|
||||
std::min(n, (size_ + tail_size_) - masked_pos));
|
||||
std::min(n, total_size_ - masked_pos));
|
||||
// Copy into the beginning of the buffer
|
||||
memcpy(&buffer_[0], bytes + (size_ - masked_pos),
|
||||
n - (size_ - masked_pos));
|
||||
@ -100,7 +128,9 @@ class RingBuffer {
|
||||
const uint32_t size_;
|
||||
const uint32_t mask_;
|
||||
const uint32_t tail_size_;
|
||||
const uint32_t total_size_;
|
||||
|
||||
uint32_t cur_size_;
|
||||
// Position to write in the ring buffer.
|
||||
uint32_t pos_;
|
||||
// The actual ring buffer containing the copy of the last two bytes, the data,
|
||||
|
@ -197,7 +197,7 @@ static size_t ToUpperCase(uint8_t *p, size_t len) {
|
||||
inline std::string TransformWord(
|
||||
WordTransformType transform_type, const uint8_t* word, size_t len) {
|
||||
if (transform_type <= kOmitLast9) {
|
||||
if (len <= transform_type) {
|
||||
if (len <= static_cast<size_t>(transform_type)) {
|
||||
return std::string();
|
||||
}
|
||||
return std::string(word, word + len - transform_type);
|
||||
|
@ -24,4 +24,6 @@ typedef __int64 int64_t;
|
||||
#include <stdint.h>
|
||||
#endif /* defined(_MSC_VER) && (_MSC_VER < 1600) */
|
||||
|
||||
#define MAKE_UINT64_T(high, low) ((((uint64_t)(high)) << 32) | low)
|
||||
|
||||
#endif /* BROTLI_ENC_TYPES_H_ */
|
||||
|
Loading…
Reference in New Issue
Block a user