Fix more conversion warnings.

This commit is contained in:
Zoltan Szabadka 2016-01-07 16:27:49 +01:00
parent 580db01252
commit 8844b7f0d7
32 changed files with 1471 additions and 1429 deletions

View File

@ -31,22 +31,22 @@ class ZopfliCostModel {
size_t ringbuffer_mask,
const Command* commands,
size_t num_commands,
int last_insert_len) {
std::vector<int> histogram_literal(256, 0);
std::vector<int> histogram_cmd(kNumCommandPrefixes, 0);
std::vector<int> histogram_dist(kNumDistancePrefixes, 0);
size_t last_insert_len) {
std::vector<uint32_t> histogram_literal(256, 0);
std::vector<uint32_t> histogram_cmd(kNumCommandPrefixes, 0);
std::vector<uint32_t> histogram_dist(kNumDistancePrefixes, 0);
size_t pos = position - last_insert_len;
for (size_t i = 0; i < num_commands; i++) {
int inslength = commands[i].insert_len_;
int copylength = commands[i].copy_len_;
int distcode = commands[i].dist_prefix_;
int cmdcode = commands[i].cmd_prefix_;
size_t inslength = commands[i].insert_len_;
size_t copylength = commands[i].copy_len_;
size_t distcode = commands[i].dist_prefix_;
size_t cmdcode = commands[i].cmd_prefix_;
histogram_cmd[cmdcode]++;
if (cmdcode >= 128) histogram_dist[distcode]++;
for (int j = 0; j < inslength; j++) {
for (size_t j = 0; j < inslength; j++) {
histogram_literal[ringbuffer[(pos + j) & ringbuffer_mask]]++;
}
@ -58,7 +58,7 @@ class ZopfliCostModel {
Set(histogram_cmd, &cost_cmd_);
Set(histogram_dist, &cost_dist_);
for (int i = 0; i < kNumCommandPrefixes; ++i) {
for (uint32_t i = 0; i < kNumCommandPrefixes; ++i) {
min_cost_cmd_ = std::min(min_cost_cmd_, cost_cmd_[i]);
}
@ -84,17 +84,17 @@ class ZopfliCostModel {
}
cost_cmd_.resize(kNumCommandPrefixes);
cost_dist_.resize(kNumDistancePrefixes);
for (int i = 0; i < kNumCommandPrefixes; ++i) {
for (uint32_t i = 0; i < kNumCommandPrefixes; ++i) {
cost_cmd_[i] = FastLog2(11 + i);
}
for (int i = 0; i < kNumDistancePrefixes; ++i) {
for (uint32_t i = 0; i < kNumDistancePrefixes; ++i) {
cost_dist_[i] = FastLog2(20 + i);
}
min_cost_cmd_ = FastLog2(11);
}
double GetCommandCost(
int dist_code, int length_code, int insert_length) const {
size_t dist_code, size_t length_code, size_t insert_length) const {
uint16_t inscode = GetInsertLengthCode(insert_length);
uint16_t copycode = GetCopyLengthCode(length_code);
uint16_t cmdcode = CombineLengthCodes(inscode, copycode, dist_code == 0);
@ -103,7 +103,8 @@ class ZopfliCostModel {
PrefixEncodeCopyDistance(dist_code, 0, 0, &dist_symbol, &distextra);
uint32_t distnumextra = distextra >> 24;
double result = insextra[inscode] + copyextra[copycode] + distnumextra;
double result = static_cast<double>(
kInsExtra[inscode] + kCopyExtra[copycode] + distnumextra);
result += cost_cmd_[cmdcode];
if (cmdcode >= 128) result += cost_dist_[dist_symbol];
return result;
@ -118,9 +119,9 @@ class ZopfliCostModel {
}
private:
void Set(const std::vector<int>& histogram, std::vector<double>* cost) {
void Set(const std::vector<uint32_t>& histogram, std::vector<double>* cost) {
cost->resize(histogram.size());
int sum = 0;
size_t sum = 0;
for (size_t i = 0; i < histogram.size(); i++) {
sum += histogram[i];
}
@ -146,40 +147,41 @@ class ZopfliCostModel {
double min_cost_cmd_;
};
inline void SetDistanceCache(int distance,
int distance_code,
int max_distance,
inline void SetDistanceCache(size_t distance,
size_t distance_code,
size_t max_distance,
const int* dist_cache,
int* result_dist_cache) {
if (distance <= max_distance && distance_code > 0) {
result_dist_cache[0] = distance;
result_dist_cache[0] = static_cast<int>(distance);
memcpy(&result_dist_cache[1], dist_cache, 3 * sizeof(dist_cache[0]));
} else {
memcpy(result_dist_cache, dist_cache, 4 * sizeof(dist_cache[0]));
}
}
inline int ComputeDistanceCode(int distance,
int max_distance,
int quality,
const int* dist_cache) {
inline size_t ComputeDistanceCode(size_t distance,
size_t max_distance,
int quality,
const int* dist_cache) {
if (distance <= max_distance) {
if (distance == dist_cache[0]) {
if (distance == static_cast<size_t>(dist_cache[0])) {
return 0;
} else if (distance == dist_cache[1]) {
} else if (distance == static_cast<size_t>(dist_cache[1])) {
return 1;
} else if (distance == dist_cache[2]) {
} else if (distance == static_cast<size_t>(dist_cache[2])) {
return 2;
} else if (distance == dist_cache[3]) {
} else if (distance == static_cast<size_t>(dist_cache[3])) {
return 3;
} else if (quality > 3 && distance >= 6) {
for (int k = 4; k < kNumDistanceShortCodes; ++k) {
int idx = kDistanceCacheIndex[k];
int candidate = dist_cache[idx] + kDistanceCacheOffset[k];
static const int kLimits[16] = { 0, 0, 0, 0,
6, 6, 11, 11,
11, 11, 11, 11,
12, 12, 12, 12 };
for (size_t k = 4; k < kNumDistanceShortCodes; ++k) {
size_t idx = kDistanceCacheIndex[k];
size_t candidate =
static_cast<size_t>(dist_cache[idx] + kDistanceCacheOffset[k]);
static const size_t kLimits[16] = { 0, 0, 0, 0,
6, 6, 11, 11,
11, 11, 11, 11,
12, 12, 12, 12 };
if (distance == candidate && distance >= kLimits[k]) {
return k;
}
@ -198,30 +200,30 @@ struct ZopfliNode {
cost(kInfinity) {}
// best length to get up to this byte (not including this byte itself)
int length;
uint32_t length;
// distance associated with the length
int distance;
int distance_code;
uint32_t distance;
uint32_t distance_code;
int distance_cache[4];
// length code associated with the length - usually the same as length,
// except in case of length-changing dictionary transformation.
int length_code;
uint32_t length_code;
// number of literal inserts before this copy
int insert_length;
uint32_t insert_length;
// smallest cost to get to this byte from the beginning, as found so far
double cost;
};
inline void UpdateZopfliNode(ZopfliNode* nodes, size_t pos, size_t start_pos,
int len, int len_code, int dist, int dist_code,
int max_dist, const int* dist_cache,
double cost) {
size_t len, size_t len_code, size_t dist,
size_t dist_code, size_t max_dist,
const int* dist_cache, double cost) {
ZopfliNode& next = nodes[pos + len];
next.length = len;
next.length_code = len_code;
next.distance = dist;
next.distance_code = dist_code;
next.insert_length = static_cast<int>(pos - start_pos);
next.length = static_cast<uint32_t>(len);
next.length_code = static_cast<uint32_t>(len_code);
next.distance = static_cast<uint32_t>(dist);
next.distance_code = static_cast<uint32_t>(dist_code);
next.insert_length = static_cast<uint32_t>(pos - start_pos);
next.cost = cost;
SetDistanceCache(dist, dist_code, max_dist, dist_cache,
&next.distance_cache[0]);
@ -231,7 +233,7 @@ inline void UpdateZopfliNode(ZopfliNode* nodes, size_t pos, size_t start_pos,
class StartPosQueue {
public:
explicit StartPosQueue(int bits)
: mask_((1 << bits) - 1), q_(1 << bits), idx_(0) {}
: mask_((1u << bits) - 1), q_(1 << bits), idx_(0) {}
void Clear() {
idx_ = 0;
@ -244,43 +246,47 @@ class StartPosQueue {
// have a copy of at least length 2.
return;
}
q_[idx_ & mask_] = std::make_pair(pos, costdiff);
// Restore the sorted order.
for (int i = idx_; i > 0 && i > idx_ - mask_; --i) {
if (q_[i & mask_].second > q_[(i - 1) & mask_].second) {
std::swap(q_[i & mask_], q_[(i - 1) & mask_]);
}
}
size_t offset = -idx_ & mask_;
++idx_;
size_t len = size();
q_[offset] = std::make_pair(pos, costdiff);
/* Restore the sorted order. In the list of |len| items at most |len - 1|
adjacent element comparisons / swaps are required. */
for (size_t i = 1; i < len; ++i) {
if (q_[offset & mask_].second > q_[(offset + 1) & mask_].second) {
std::swap(q_[offset & mask_], q_[(offset + 1) & mask_]);
}
++offset;
}
}
int size() const { return std::min(idx_, mask_ + 1); }
size_t size() const { return std::min(idx_, mask_ + 1); }
size_t GetStartPos(int k) const {
return q_[(idx_ - k - 1) & mask_].first;
size_t GetStartPos(size_t k) const {
return q_[(k + 1 - idx_) & mask_].first;
}
private:
const int mask_;
const size_t mask_;
std::vector<std::pair<size_t, double> > q_;
int idx_;
size_t idx_;
};
// Returns the minimum possible copy length that can improve the cost of any
// future position.
int ComputeMinimumCopyLength(const StartPosQueue& queue,
const std::vector<ZopfliNode>& nodes,
const ZopfliCostModel& model,
size_t pos,
double min_cost_cmd) {
size_t ComputeMinimumCopyLength(const StartPosQueue& queue,
const std::vector<ZopfliNode>& nodes,
const ZopfliCostModel& model,
size_t pos,
double min_cost_cmd) {
// Compute the minimum possible cost of reaching any future position.
const size_t start0 = queue.GetStartPos(0);
double min_cost = (nodes[start0].cost +
model.GetLiteralCosts(start0, pos) +
min_cost_cmd);
int len = 2;
int next_len_bucket = 4;
int next_len_offset = 10;
size_t len = 2;
size_t next_len_bucket = 4;
size_t next_len_offset = 10;
while (pos + len < nodes.size() && nodes[pos + len].cost <= min_cost) {
// We already reached (pos + len) with no more cost than the minimum
// possible cost of reaching anything from this pos, so there is no point in
@ -303,13 +309,13 @@ void ZopfliIterate(size_t num_bytes,
size_t ringbuffer_mask,
const size_t max_backward_limit,
const ZopfliCostModel& model,
const std::vector<int>& num_matches,
const std::vector<uint32_t>& num_matches,
const std::vector<BackwardMatch>& matches,
int* dist_cache,
int* last_insert_len,
size_t* last_insert_len,
Command* commands,
size_t* num_commands,
int* num_literals) {
size_t* num_literals) {
const Command * const orig_commands = commands;
std::vector<ZopfliNode> nodes(num_bytes + 1);
@ -324,17 +330,17 @@ void ZopfliIterate(size_t num_bytes,
for (size_t i = 0; i + 3 < num_bytes; i++) {
size_t cur_ix = position + i;
size_t cur_ix_masked = cur_ix & ringbuffer_mask;
int max_distance = static_cast<int>(std::min(cur_ix, max_backward_limit));
int max_length = static_cast<int>(num_bytes - i);
size_t max_distance = std::min(cur_ix, max_backward_limit);
size_t max_length = num_bytes - i;
queue.Push(i, nodes[i].cost - model.GetLiteralCosts(0, i));
const int min_len = ComputeMinimumCopyLength(queue, nodes, model,
i, min_cost_cmd);
const size_t min_len = ComputeMinimumCopyLength(queue, nodes, model,
i, min_cost_cmd);
// Go over the command starting positions in order of increasing cost
// difference.
for (int k = 0; k < 5 && k < queue.size(); ++k) {
for (size_t k = 0; k < 5 && k < queue.size(); ++k) {
const size_t start = queue.GetStartPos(k);
const double start_costdiff =
nodes[start].cost - model.GetLiteralCosts(0, start);
@ -342,10 +348,11 @@ void ZopfliIterate(size_t num_bytes,
// Look for last distance matches using the distance cache from this
// starting position.
int best_len = min_len - 1;
for (int j = 0; j < kNumDistanceShortCodes; ++j) {
const int idx = kDistanceCacheIndex[j];
const int backward = dist_cache2[idx] + kDistanceCacheOffset[j];
size_t best_len = min_len - 1;
for (size_t j = 0; j < kNumDistanceShortCodes; ++j) {
const size_t idx = kDistanceCacheIndex[j];
const size_t backward =
static_cast<size_t>(dist_cache2[idx] + kDistanceCacheOffset[j]);
size_t prev_ix = cur_ix - backward;
if (prev_ix >= cur_ix) {
continue;
@ -361,12 +368,12 @@ void ZopfliIterate(size_t num_bytes,
ringbuffer[prev_ix + best_len]) {
continue;
}
const int len =
const size_t len =
FindMatchLengthWithLimit(&ringbuffer[prev_ix],
&ringbuffer[cur_ix_masked],
max_length);
for (int l = best_len + 1; l <= len; ++l) {
const int inslen = static_cast<int>(i - start);
for (size_t l = best_len + 1; l <= len; ++l) {
const size_t inslen = i - start;
double cmd_cost = model.GetCommandCost(j, l, inslen);
double cost = start_costdiff + cmd_cost + model.GetLiteralCosts(0, i);
if (cost < nodes[i + l].cost) {
@ -383,24 +390,24 @@ void ZopfliIterate(size_t num_bytes,
if (k >= 2) continue;
// Loop through all possible copy lengths at this position.
int len = min_len;
for (int j = 0; j < num_matches[i]; ++j) {
size_t len = min_len;
for (size_t j = 0; j < num_matches[i]; ++j) {
BackwardMatch match = matches[cur_match_pos + j];
int dist = match.distance;
size_t dist = match.distance;
bool is_dictionary_match = dist > max_distance;
// We already tried all possible last distance matches, so we can use
// normal distance code here.
int dist_code = dist + 15;
size_t dist_code = dist + 15;
// Try all copy lengths up until the maximum copy length corresponding
// to this distance. If the distance refers to the static dictionary, or
// the maximum length is long enough, try only one maximum length.
int max_len = match.length();
size_t max_len = match.length();
if (len < max_len && (is_dictionary_match || max_len > kMaxZopfliLen)) {
len = max_len;
}
for (; len <= max_len; ++len) {
int len_code = is_dictionary_match ? match.length_code() : len;
const int inslen = static_cast<int>(i - start);
size_t len_code = is_dictionary_match ? match.length_code() : len;
const size_t inslen = i - start;
double cmd_cost = model.GetCommandCost(dist_code, len_code, inslen);
double cost = start_costdiff + cmd_cost + model.GetLiteralCosts(0, i);
if (cost < nodes[i + len].cost) {
@ -422,16 +429,16 @@ void ZopfliIterate(size_t num_bytes,
}
}
std::vector<int> backwards;
std::vector<uint32_t> backwards;
size_t index = num_bytes;
while (nodes[index].cost == kInfinity) --index;
while (index > 0) {
int len = nodes[index].length + nodes[index].insert_length;
backwards.push_back(len);
while (index != 0) {
size_t len = nodes[index].length + nodes[index].insert_length;
backwards.push_back(static_cast<uint32_t>(len));
index -= len;
}
std::vector<int> path;
std::vector<uint32_t> path;
for (size_t i = backwards.size(); i > 0; i--) {
path.push_back(backwards[i - 1]);
}
@ -439,19 +446,18 @@ void ZopfliIterate(size_t num_bytes,
size_t pos = 0;
for (size_t i = 0; i < path.size(); i++) {
const ZopfliNode& next = nodes[pos + path[i]];
int copy_length = next.length;
int insert_length = next.insert_length;
size_t copy_length = next.length;
size_t insert_length = next.insert_length;
pos += insert_length;
if (i == 0) {
insert_length += *last_insert_len;
*last_insert_len = 0;
}
int distance = next.distance;
int len_code = next.length_code;
int max_distance =
static_cast<int>(std::min(position + pos, max_backward_limit));
size_t distance = next.distance;
size_t len_code = next.length_code;
size_t max_distance = std::min(position + pos, max_backward_limit);
bool is_dictionary = (distance > max_distance);
int dist_code = next.distance_code;
size_t dist_code = next.distance_code;
Command cmd(insert_length, copy_length, len_code, dist_code);
*commands++ = cmd;
@ -460,30 +466,31 @@ void ZopfliIterate(size_t num_bytes,
dist_cache[3] = dist_cache[2];
dist_cache[2] = dist_cache[1];
dist_cache[1] = dist_cache[0];
dist_cache[0] = distance;
dist_cache[0] = static_cast<int>(distance);
}
*num_literals += insert_length;
insert_length = 0;
pos += copy_length;
}
*last_insert_len += static_cast<int>(num_bytes - pos);
*num_commands += (commands - orig_commands);
*last_insert_len += num_bytes - pos;
*num_commands += static_cast<size_t>(commands - orig_commands);
}
template<typename Hasher>
void CreateBackwardReferences(size_t num_bytes,
size_t position,
bool is_last,
const uint8_t* ringbuffer,
size_t ringbuffer_mask,
const size_t max_backward_limit,
const int quality,
Hasher* hasher,
int* dist_cache,
int* last_insert_len,
size_t* last_insert_len,
Command* commands,
size_t* num_commands,
int* num_literals) {
size_t* num_literals) {
if (num_bytes >= 3 && position >= 3) {
// Prepare the hashes for three last bytes of the last write.
// These could not be calculated before, since they require knowledge
@ -496,7 +503,7 @@ void CreateBackwardReferences(size_t num_bytes,
static_cast<uint32_t>(position - 1));
}
const Command * const orig_commands = commands;
int insert_length = *last_insert_len;
size_t insert_length = *last_insert_len;
size_t i = position & ringbuffer_mask;
const size_t i_diff = position - i;
const size_t i_end = i + num_bytes;
@ -509,12 +516,11 @@ void CreateBackwardReferences(size_t num_bytes,
const int kMinScore = 4.0;
while (i + Hasher::kHashTypeLength - 1 < i_end) {
int max_length = static_cast<int>(i_end - i);
int max_distance =
static_cast<int>(std::min(i + i_diff, max_backward_limit));
int best_len = 0;
int best_len_code = 0;
int best_dist = 0;
size_t max_length = i_end - i;
size_t max_distance = std::min(i + i_diff, max_backward_limit);
size_t best_len = 0;
size_t best_len_code = 0;
size_t best_dist = 0;
double best_score = kMinScore;
bool match_found = hasher->FindLongestMatch(
ringbuffer, ringbuffer_mask,
@ -525,12 +531,12 @@ void CreateBackwardReferences(size_t num_bytes,
int delayed_backward_references_in_row = 0;
for (;;) {
--max_length;
int best_len_2 = quality < 5 ? std::min(best_len - 1, max_length) : 0;
int best_len_code_2 = 0;
int best_dist_2 = 0;
size_t best_len_2 =
quality < 5 ? std::min(best_len - 1, max_length) : 0;
size_t best_len_code_2 = 0;
size_t best_dist_2 = 0;
double best_score_2 = kMinScore;
max_distance =
static_cast<int>(std::min(i + i_diff + 1, max_backward_limit));
max_distance = std::min(i + i_diff + 1, max_backward_limit);
hasher->Store(ringbuffer + i, static_cast<uint32_t>(i + i_diff));
match_found = hasher->FindLongestMatch(
ringbuffer, ringbuffer_mask,
@ -555,15 +561,15 @@ void CreateBackwardReferences(size_t num_bytes,
}
apply_random_heuristics =
i + 2 * best_len + random_heuristics_window_size;
max_distance = static_cast<int>(std::min(i + i_diff, max_backward_limit));
max_distance = std::min(i + i_diff, max_backward_limit);
// The first 16 codes are special shortcodes, and the minimum offset is 1.
int distance_code =
size_t distance_code =
ComputeDistanceCode(best_dist, max_distance, quality, dist_cache);
if (best_dist <= max_distance && distance_code > 0) {
dist_cache[3] = dist_cache[2];
dist_cache[2] = dist_cache[1];
dist_cache[1] = dist_cache[0];
dist_cache[0] = best_dist;
dist_cache[0] = static_cast<int>(best_dist);
}
Command cmd(insert_length, best_len, best_len_code, distance_code);
*commands++ = cmd;
@ -571,7 +577,7 @@ void CreateBackwardReferences(size_t num_bytes,
insert_length = 0;
// Put the hash keys into the table, if there are enough
// bytes left.
for (int j = 1; j < best_len; ++j) {
for (size_t j = 1; j < best_len; ++j) {
hasher->Store(&ringbuffer[i + j],
static_cast<uint32_t>(i + i_diff + j));
}
@ -608,13 +614,14 @@ void CreateBackwardReferences(size_t num_bytes,
}
}
}
insert_length += static_cast<int>(i_end - i);
insert_length += i_end - i;
*last_insert_len = insert_length;
*num_commands += commands - orig_commands;
*num_commands += static_cast<size_t>(commands - orig_commands);
}
void CreateBackwardReferences(size_t num_bytes,
size_t position,
bool is_last,
const uint8_t* ringbuffer,
size_t ringbuffer_mask,
const size_t max_backward_limit,
@ -622,10 +629,10 @@ void CreateBackwardReferences(size_t num_bytes,
Hashers* hashers,
int hash_type,
int* dist_cache,
int* last_insert_len,
size_t* last_insert_len,
Command* commands,
size_t* num_commands,
int* num_literals) {
size_t* num_literals) {
bool zopflify = quality > 9;
if (zopflify) {
Hashers::H9* hasher = hashers->hash_h9;
@ -640,28 +647,27 @@ void CreateBackwardReferences(size_t num_bytes,
hasher->Store(&ringbuffer[(position - 1) & ringbuffer_mask],
static_cast<uint32_t>(position - 1));
}
std::vector<int> num_matches(num_bytes);
std::vector<uint32_t> num_matches(num_bytes);
std::vector<BackwardMatch> matches(3 * num_bytes);
size_t cur_match_pos = 0;
for (size_t i = 0; i + 3 < num_bytes; ++i) {
int max_distance =
static_cast<int>(std::min(position + i, max_backward_limit));
int max_length = static_cast<int>(num_bytes - i);
size_t max_distance = std::min(position + i, max_backward_limit);
size_t max_length = num_bytes - i;
// Ensure that we have at least kMaxZopfliLen free slots.
if (matches.size() < cur_match_pos + kMaxZopfliLen) {
matches.resize(cur_match_pos + kMaxZopfliLen);
}
hasher->FindAllMatches(
ringbuffer, ringbuffer_mask,
static_cast<uint32_t>(position + i), max_length, max_distance,
&num_matches[i], &matches[cur_match_pos]);
size_t num_found_matches = hasher->FindAllMatches(
ringbuffer, ringbuffer_mask, position + i, max_length, max_distance,
&matches[cur_match_pos]);
num_matches[i] = static_cast<uint32_t>(num_found_matches);
hasher->Store(&ringbuffer[(position + i) & ringbuffer_mask],
static_cast<uint32_t>(position + i));
cur_match_pos += num_matches[i];
if (num_matches[i] == 1) {
const int match_len = matches[cur_match_pos - 1].length();
cur_match_pos += num_found_matches;
if (num_found_matches == 1) {
const size_t match_len = matches[cur_match_pos - 1].length();
if (match_len > kMaxZopfliLen) {
for (int j = 1; j < match_len; ++j) {
for (size_t j = 1; j < match_len; ++j) {
++i;
hasher->Store(&ringbuffer[(position + i) & ringbuffer_mask],
static_cast<uint32_t>(position + i));
@ -670,14 +676,14 @@ void CreateBackwardReferences(size_t num_bytes,
}
}
}
int orig_num_literals = *num_literals;
int orig_last_insert_len = *last_insert_len;
size_t orig_num_literals = *num_literals;
size_t orig_last_insert_len = *last_insert_len;
int orig_dist_cache[4] = {
dist_cache[0], dist_cache[1], dist_cache[2], dist_cache[3]
};
size_t orig_num_commands = *num_commands;
static const int kIterations = 2;
for (int i = 0; i < kIterations; i++) {
static const size_t kIterations = 2;
for (size_t i = 0; i < kIterations; i++) {
ZopfliCostModel model;
if (i == 0) {
model.SetFromLiteralCosts(num_bytes, position,
@ -702,57 +708,58 @@ void CreateBackwardReferences(size_t num_bytes,
switch (hash_type) {
case 1:
CreateBackwardReferences<Hashers::H1>(
num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
quality, hashers->hash_h1, dist_cache, last_insert_len,
commands, num_commands, num_literals);
num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
max_backward_limit, quality, hashers->hash_h1, dist_cache,
last_insert_len, commands, num_commands, num_literals);
break;
case 2:
CreateBackwardReferences<Hashers::H2>(
num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
quality, hashers->hash_h2, dist_cache, last_insert_len,
commands, num_commands, num_literals);
num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
max_backward_limit, quality, hashers->hash_h2, dist_cache,
last_insert_len, commands, num_commands, num_literals);
break;
case 3:
CreateBackwardReferences<Hashers::H3>(
num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
quality, hashers->hash_h3, dist_cache, last_insert_len,
commands, num_commands, num_literals);
num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
max_backward_limit, quality, hashers->hash_h3, dist_cache,
last_insert_len, commands, num_commands, num_literals);
break;
case 4:
CreateBackwardReferences<Hashers::H4>(
num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
quality, hashers->hash_h4, dist_cache, last_insert_len,
commands, num_commands, num_literals);
num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
max_backward_limit, quality, hashers->hash_h4, dist_cache,
last_insert_len, commands, num_commands, num_literals);
break;
case 5:
CreateBackwardReferences<Hashers::H5>(
num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
quality, hashers->hash_h5, dist_cache, last_insert_len,
commands, num_commands, num_literals);
num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
max_backward_limit, quality, hashers->hash_h5, dist_cache,
last_insert_len, commands, num_commands, num_literals);
break;
case 6:
CreateBackwardReferences<Hashers::H6>(
num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
quality, hashers->hash_h6, dist_cache, last_insert_len,
commands, num_commands, num_literals);
num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
max_backward_limit, quality, hashers->hash_h6, dist_cache,
last_insert_len, commands, num_commands, num_literals);
break;
case 7:
CreateBackwardReferences<Hashers::H7>(
num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
quality, hashers->hash_h7, dist_cache, last_insert_len,
commands, num_commands, num_literals);
num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
max_backward_limit, quality, hashers->hash_h7, dist_cache,
last_insert_len, commands, num_commands, num_literals);
break;
case 8:
CreateBackwardReferences<Hashers::H8>(
num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
quality, hashers->hash_h8, dist_cache, last_insert_len,
commands, num_commands, num_literals);
num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
max_backward_limit, quality, hashers->hash_h8, dist_cache,
last_insert_len, commands, num_commands, num_literals);
break;
case 9:
CreateBackwardReferences<Hashers::H9>(
num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
quality, hashers->hash_h9, dist_cache, last_insert_len,
commands, num_commands, num_literals);
num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
max_backward_limit, quality, hashers->hash_h9, dist_cache,
last_insert_len, commands, num_commands, num_literals);
break;
default:
break;

View File

@ -9,8 +9,6 @@
#ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_
#define BROTLI_ENC_BACKWARD_REFERENCES_H_
#include <vector>
#include "./hash.h"
#include "./command.h"
#include "./types.h"
@ -23,6 +21,7 @@ namespace brotli {
// by this call.
void CreateBackwardReferences(size_t num_bytes,
size_t position,
bool is_last,
const uint8_t* ringbuffer,
size_t ringbuffer_mask,
const size_t max_backward_limit,
@ -30,10 +29,10 @@ void CreateBackwardReferences(size_t num_bytes,
Hashers* hashers,
int hash_type,
int* dist_cache,
int* last_insert_len,
size_t* last_insert_len,
Command* commands,
size_t* num_commands,
int* num_literals);
size_t* num_literals);
} // namespace brotli

View File

@ -17,35 +17,35 @@
namespace brotli {
static inline double ShannonEntropy(const int *population, int size,
int *total) {
int sum = 0;
static inline double ShannonEntropy(const uint32_t *population, size_t size,
size_t *total) {
size_t sum = 0;
double retval = 0;
const int *population_end = population + size;
int p;
const uint32_t *population_end = population + size;
size_t p;
if (size & 1) {
goto odd_number_of_elements_left;
}
while (population < population_end) {
p = *population++;
sum += p;
retval -= p * FastLog2(p);
retval -= static_cast<double>(p) * FastLog2(p);
odd_number_of_elements_left:
p = *population++;
sum += p;
retval -= p * FastLog2(p);
retval -= static_cast<double>(p) * FastLog2(p);
}
if (sum) retval += sum * FastLog2(sum);
if (sum) retval += static_cast<double>(sum) * FastLog2(sum);
*total = sum;
return retval;
}
static inline double BitsEntropy(const int *population, int size) {
int sum;
static inline double BitsEntropy(const uint32_t *population, size_t size) {
size_t sum;
double retval = ShannonEntropy(population, size, &sum);
if (retval < sum) {
// At least one bit per literal is needed.
retval = sum;
retval = static_cast<double>(sum);
}
return retval;
}
@ -66,7 +66,7 @@ double PopulationCost(const Histogram<kSize>& histogram) {
return 12;
}
if (count == 2) {
return 20 + histogram.total_count_;
return static_cast<double>(20 + histogram.total_count_);
}
double bits = 0;
uint8_t depth_array[kSize] = { 0 };
@ -82,16 +82,16 @@ double PopulationCost(const Histogram<kSize>& histogram) {
// In this loop we compute the entropy of the histogram and simultaneously
// build a simplified histogram of the code length codes where we use the
// zero repeat code 17, but we don't use the non-zero repeat code 16.
int max_depth = 1;
int depth_histo[kCodeLengthCodes] = { 0 };
size_t max_depth = 1;
uint32_t depth_histo[kCodeLengthCodes] = { 0 };
const double log2total = FastLog2(histogram.total_count_);
for (int i = 0; i < kSize;) {
for (size_t i = 0; i < kSize;) {
if (histogram.data_[i] > 0) {
// Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
// = log2(total_count) - log2(count(symbol))
double log2p = log2total - FastLog2(histogram.data_[i]);
// Approximate the bit depth by round(-log2(P(symbol)))
int depth = static_cast<int>(log2p + 0.5);
size_t depth = static_cast<size_t>(log2p + 0.5);
bits += histogram.data_[i] * log2p;
if (depth > 15) {
depth = 15;
@ -104,8 +104,8 @@ double PopulationCost(const Histogram<kSize>& histogram) {
} else {
// Compute the run length of zeros and add the appropriate number of 0 and
// 17 code length codes to the code length code histogram.
int reps = 1;
for (int k = i + 1; k < kSize && histogram.data_[k] == 0; ++k) {
uint32_t reps = 1;
for (size_t k = i + 1; k < kSize && histogram.data_[k] == 0; ++k) {
++reps;
}
i += reps;
@ -128,7 +128,7 @@ double PopulationCost(const Histogram<kSize>& histogram) {
}
}
// Add the estimated encoding cost of the code length code histogram.
bits += 18 + 2 * max_depth;
bits += static_cast<double>(18 + 2 * max_depth);
// Add the entropy of the code length code histogram.
bits += BitsEntropy(depth_histo, kCodeLengthCodes);
return bits;

View File

@ -10,11 +10,9 @@
#include <assert.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <algorithm>
#include <cstring>
#include <map>
#include "./cluster.h"
@ -24,19 +22,19 @@
namespace brotli {
static const int kMaxLiteralHistograms = 100;
static const int kMaxCommandHistograms = 50;
static const size_t kMaxLiteralHistograms = 100;
static const size_t kMaxCommandHistograms = 50;
static const double kLiteralBlockSwitchCost = 28.1;
static const double kCommandBlockSwitchCost = 13.5;
static const double kDistanceBlockSwitchCost = 14.6;
static const int kLiteralStrideLength = 70;
static const int kCommandStrideLength = 40;
static const int kSymbolsPerLiteralHistogram = 544;
static const int kSymbolsPerCommandHistogram = 530;
static const int kSymbolsPerDistanceHistogram = 544;
static const int kMinLengthForBlockSplitting = 128;
static const int kIterMulForRefining = 2;
static const int kMinItersForRefining = 100;
static const size_t kLiteralStrideLength = 70;
static const size_t kCommandStrideLength = 40;
static const size_t kSymbolsPerLiteralHistogram = 544;
static const size_t kSymbolsPerCommandHistogram = 530;
static const size_t kSymbolsPerDistanceHistogram = 544;
static const size_t kMinLengthForBlockSplitting = 128;
static const size_t kIterMulForRefining = 2;
static const size_t kMinItersForRefining = 100;
void CopyLiteralsToByteArray(const Command* cmds,
const size_t num_commands,
@ -99,17 +97,17 @@ inline static unsigned int MyRand(unsigned int* seed) {
template<typename HistogramType, typename DataType>
void InitialEntropyCodes(const DataType* data, size_t length,
int literals_per_histogram,
int max_histograms,
size_t literals_per_histogram,
size_t max_histograms,
size_t stride,
std::vector<HistogramType>* vec) {
int total_histograms = static_cast<int>(length) / literals_per_histogram + 1;
size_t total_histograms = length / literals_per_histogram + 1;
if (total_histograms > max_histograms) {
total_histograms = max_histograms;
}
unsigned int seed = 7;
size_t block_length = length / total_histograms;
for (int i = 0; i < total_histograms; ++i) {
for (size_t i = 0; i < total_histograms; ++i) {
size_t pos = length * i / total_histograms;
if (i != 0) {
pos += MyRand(&seed) % block_length;
@ -155,8 +153,8 @@ void RefineEntropyCodes(const DataType* data, size_t length,
}
}
inline static double BitCost(int count) {
return count == 0 ? -2 : FastLog2(count);
inline static double BitCost(size_t count) {
return count == 0 ? -2.0 : FastLog2(count);
}
template<typename DataType, int kSize>
@ -170,15 +168,16 @@ void FindBlocks(const DataType* data, const size_t length,
}
return;
}
int vecsize = static_cast<int>(vec.size());
size_t vecsize = vec.size();
assert(vecsize <= 256);
double* insert_cost = new double[kSize * vecsize];
memset(insert_cost, 0, sizeof(insert_cost[0]) * kSize * vecsize);
for (int j = 0; j < vecsize; ++j) {
insert_cost[j] = FastLog2(vec[j].total_count_);
for (size_t j = 0; j < vecsize; ++j) {
insert_cost[j] = FastLog2(static_cast<uint32_t>(vec[j].total_count_));
}
for (int i = kSize - 1; i >= 0; --i) {
for (int j = 0; j < vecsize; ++j) {
for (size_t i = kSize; i != 0;) {
--i;
for (size_t j = 0; j < vecsize; ++j) {
insert_cost[i * vecsize + j] = insert_cost[j] - BitCost(vec[j].data_[i]);
}
}
@ -194,9 +193,9 @@ void FindBlocks(const DataType* data, const size_t length,
// position, we need to switch here.
for (size_t byte_ix = 0; byte_ix < length; ++byte_ix) {
size_t ix = byte_ix * vecsize;
int insert_cost_ix = data[byte_ix] * vecsize;
size_t insert_cost_ix = data[byte_ix] * vecsize;
double min_cost = 1e99;
for (int k = 0; k < vecsize; ++k) {
for (size_t k = 0; k < vecsize; ++k) {
// We are coding the symbol in data[byte_ix] with entropy code k.
cost[k] += insert_cost[insert_cost_ix + k];
if (cost[k] < min_cost) {
@ -207,9 +206,9 @@ void FindBlocks(const DataType* data, const size_t length,
double block_switch_cost = block_switch_bitcost;
// More blocks for the beginning.
if (byte_ix < 2000) {
block_switch_cost *= 0.77 + 0.07 * byte_ix / 2000;
block_switch_cost *= 0.77 + 0.07 * static_cast<double>(byte_ix) / 2000;
}
for (int k = 0; k < vecsize; ++k) {
for (size_t k = 0; k < vecsize; ++k) {
cost[k] -= min_cost;
if (cost[k] >= block_switch_cost) {
cost[k] = block_switch_cost;
@ -234,9 +233,9 @@ void FindBlocks(const DataType* data, const size_t length,
delete[] switch_signal;
}
int RemapBlockIds(uint8_t* block_ids, const size_t length) {
size_t RemapBlockIds(uint8_t* block_ids, const size_t length) {
std::map<uint8_t, uint8_t> new_id;
int next_id = 0;
size_t next_id = 0;
for (size_t i = 0; i < length; ++i) {
if (new_id.find(block_ids[i]) == new_id.end()) {
new_id[block_ids[i]] = static_cast<uint8_t>(next_id);
@ -253,7 +252,7 @@ template<typename HistogramType, typename DataType>
void BuildBlockHistograms(const DataType* data, const size_t length,
uint8_t* block_ids,
std::vector<HistogramType>* histograms) {
int num_types = RemapBlockIds(block_ids, length);
size_t num_types = RemapBlockIds(block_ids, length);
assert(num_types <= 256);
histograms->clear();
histograms->resize(num_types);
@ -266,8 +265,8 @@ template<typename HistogramType, typename DataType>
void ClusterBlocks(const DataType* data, const size_t length,
uint8_t* block_ids) {
std::vector<HistogramType> histograms;
std::vector<int> block_index(length);
int cur_idx = 0;
std::vector<uint32_t> block_index(length);
uint32_t cur_idx = 0;
HistogramType cur_histogram;
for (size_t i = 0; i < length; ++i) {
bool block_boundary = (i + 1 == length || block_ids[i] != block_ids[i + 1]);
@ -280,10 +279,10 @@ void ClusterBlocks(const DataType* data, const size_t length,
}
}
std::vector<HistogramType> clustered_histograms;
std::vector<int> histogram_symbols;
std::vector<uint32_t> histogram_symbols;
// Block ids need to fit in one byte.
static const size_t kMaxNumberOfBlockTypes = 256;
ClusterHistograms(histograms, 1, static_cast<int>(histograms.size()),
ClusterHistograms(histograms, 1, histograms.size(),
kMaxNumberOfBlockTypes,
&clustered_histograms,
&histogram_symbols);
@ -293,30 +292,30 @@ void ClusterBlocks(const DataType* data, const size_t length,
}
void BuildBlockSplit(const std::vector<uint8_t>& block_ids, BlockSplit* split) {
int cur_id = block_ids[0];
int cur_length = 1;
split->num_types = -1;
uint8_t cur_id = block_ids[0];
uint8_t max_type = cur_id;
uint32_t cur_length = 1;
for (size_t i = 1; i < block_ids.size(); ++i) {
if (block_ids[i] != cur_id) {
uint8_t next_id = block_ids[i];
if (next_id != cur_id) {
split->types.push_back(cur_id);
split->lengths.push_back(cur_length);
split->num_types = std::max(split->num_types, cur_id);
cur_id = block_ids[i];
max_type = std::max(max_type, next_id);
cur_id = next_id;
cur_length = 0;
}
++cur_length;
}
split->types.push_back(cur_id);
split->lengths.push_back(cur_length);
split->num_types = std::max(split->num_types, cur_id);
++split->num_types;
split->num_types = static_cast<size_t>(max_type) + 1;
}
template<typename HistogramType, typename DataType>
void SplitByteVector(const std::vector<DataType>& data,
const int literals_per_histogram,
const int max_histograms,
const int sampling_stride_length,
const size_t literals_per_histogram,
const size_t max_histograms,
const size_t sampling_stride_length,
const double block_switch_cost,
BlockSplit* split) {
if (data.empty()) {
@ -325,7 +324,7 @@ void SplitByteVector(const std::vector<DataType>& data,
} else if (data.size() < kMinLengthForBlockSplitting) {
split->num_types = 1;
split->types.push_back(0);
split->lengths.push_back(static_cast<int>(data.size()));
split->lengths.push_back(static_cast<uint32_t>(data.size()));
return;
}
std::vector<HistogramType> histograms;
@ -340,7 +339,7 @@ void SplitByteVector(const std::vector<DataType>& data,
&histograms);
// Find a good path through literals with the good entropy codes.
std::vector<uint8_t> block_ids(data.size());
for (int i = 0; i < 10; ++i) {
for (size_t i = 0; i < 10; ++i) {
FindBlocks(&data[0], data.size(),
block_switch_cost,
histograms,
@ -387,27 +386,4 @@ void SplitBlock(const Command* cmds,
dist_split);
}
void SplitBlockByTotalLength(const Command* all_commands,
const size_t num_commands,
int input_size,
int target_length,
std::vector<std::vector<Command> >* blocks) {
int num_blocks = input_size / target_length + 1;
int length_limit = input_size / num_blocks + 1;
int total_length = 0;
std::vector<Command> cur_block;
for (size_t i = 0; i < num_commands; ++i) {
const Command& cmd = all_commands[i];
int cmd_length = cmd.insert_len_ + cmd.copy_len_;
if (total_length > length_limit) {
blocks->push_back(cur_block);
cur_block.clear();
total_length = 0;
}
cur_block.push_back(cmd);
total_length += cmd_length;
}
blocks->push_back(cur_block);
}
} // namespace brotli

View File

@ -9,9 +9,7 @@
#ifndef BROTLI_ENC_BLOCK_SPLITTER_H_
#define BROTLI_ENC_BLOCK_SPLITTER_H_
#include <string.h>
#include <vector>
#include <utility>
#include "./command.h"
#include "./metablock.h"
@ -37,9 +35,9 @@ struct BlockSplitIterator {
}
const BlockSplit& split_;
int idx_;
int type_;
int length_;
size_t idx_;
size_t type_;
size_t length_;
};
void CopyLiteralsToByteArray(const Command* cmds,
@ -58,12 +56,6 @@ void SplitBlock(const Command* cmds,
BlockSplit* insert_and_copy_split,
BlockSplit* dist_split);
void SplitBlockByTotalLength(const Command* all_commands,
const size_t num_commands,
int input_size,
int target_length,
std::vector<std::vector<Command> >* blocks);
} // namespace brotli
#endif // BROTLI_ENC_BLOCK_SPLITTER_H_

View File

@ -11,6 +11,7 @@
#include "./brotli_bit_stream.h"
#include <algorithm>
#include <cstring>
#include <limits>
#include <vector>
@ -22,59 +23,53 @@
#include "./write_bits.h"
namespace brotli {
// returns false if fail
namespace {
// nibblesbits represents the 2 bits to encode MNIBBLES (0-3)
bool EncodeMlen(size_t length, int* bits, int* numbits, int* nibblesbits) {
if (length > (1 << 24)) {
return false;
}
// REQUIRES: length > 0
// REQUIRES: length <= (1 << 24)
void EncodeMlen(size_t length, uint64_t* bits,
size_t* numbits, uint64_t* nibblesbits) {
assert(length > 0);
assert(length <= (1 << 24));
length--; // MLEN - 1 is encoded
int lg = length == 0 ? 1 : Log2Floor(static_cast<uint32_t>(length)) + 1;
size_t lg = length == 0 ? 1 : Log2FloorNonZero(
static_cast<uint32_t>(length)) + 1;
assert(lg <= 24);
int mnibbles = (lg < 16 ? 16 : (lg + 3)) / 4;
size_t mnibbles = (lg < 16 ? 16 : (lg + 3)) / 4;
*nibblesbits = mnibbles - 4;
*numbits = mnibbles * 4;
*bits = static_cast<int>(length);
return true;
*bits = length;
}
void StoreVarLenUint8(int n, int* storage_ix, uint8_t* storage) {
} // namespace
void StoreVarLenUint8(size_t n, size_t* storage_ix, uint8_t* storage) {
if (n == 0) {
WriteBits(1, 0, storage_ix, storage);
} else {
WriteBits(1, 1, storage_ix, storage);
int nbits = Log2Floor(n);
size_t nbits = Log2FloorNonZero(n);
WriteBits(3, nbits, storage_ix, storage);
WriteBits(nbits, n - (1 << nbits), storage_ix, storage);
}
}
bool StoreCompressedMetaBlockHeader(bool final_block,
void StoreCompressedMetaBlockHeader(bool final_block,
size_t length,
int* storage_ix,
size_t* storage_ix,
uint8_t* storage) {
// Write ISLAST bit.
WriteBits(1, final_block, storage_ix, storage);
// Write ISEMPTY bit.
if (final_block) {
WriteBits(1, length == 0, storage_ix, storage);
if (length == 0) {
return true;
}
}
if (length == 0) {
// Only the last meta-block can be empty.
return false;
}
int lenbits;
int nlenbits;
int nibblesbits;
if (!EncodeMlen(length, &lenbits, &nlenbits, &nibblesbits)) {
return false;
WriteBits(1, 0, storage_ix, storage);
}
uint64_t lenbits;
size_t nlenbits;
uint64_t nibblesbits;
EncodeMlen(length, &lenbits, &nlenbits, &nibblesbits);
WriteBits(2, nibblesbits, storage_ix, storage);
WriteBits(nlenbits, lenbits, storage_ix, storage);
@ -82,31 +77,27 @@ bool StoreCompressedMetaBlockHeader(bool final_block,
// Write ISUNCOMPRESSED bit.
WriteBits(1, 0, storage_ix, storage);
}
return true;
}
bool StoreUncompressedMetaBlockHeader(size_t length,
int* storage_ix,
void StoreUncompressedMetaBlockHeader(size_t length,
size_t* storage_ix,
uint8_t* storage) {
// Write ISLAST bit. Uncompressed block cannot be the last one, so set to 0.
WriteBits(1, 0, storage_ix, storage);
int lenbits;
int nlenbits;
int nibblesbits;
if (!EncodeMlen(length, &lenbits, &nlenbits, &nibblesbits)) {
return false;
}
uint64_t lenbits;
size_t nlenbits;
uint64_t nibblesbits;
EncodeMlen(length, &lenbits, &nlenbits, &nibblesbits);
WriteBits(2, nibblesbits, storage_ix, storage);
WriteBits(nlenbits, lenbits, storage_ix, storage);
// Write ISUNCOMPRESSED bit.
WriteBits(1, 1, storage_ix, storage);
return true;
}
void StoreHuffmanTreeOfHuffmanTreeToBitMask(
const int num_codes,
const uint8_t *code_length_bitdepth,
int *storage_ix,
size_t *storage_ix,
uint8_t *storage) {
static const uint8_t kStorageOrder[kCodeLengthCodes] = {
1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15
@ -129,7 +120,7 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask(
};
// Throw away trailing zeros:
int codes_to_store = kCodeLengthCodes;
size_t codes_to_store = kCodeLengthCodes;
if (num_codes > 1) {
for (; codes_to_store > 0; --codes_to_store) {
if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
@ -137,7 +128,7 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask(
}
}
}
int skip_some = 0; // skips none.
size_t skip_some = 0; // skips none.
if (code_length_bitdepth[kStorageOrder[0]] == 0 &&
code_length_bitdepth[kStorageOrder[1]] == 0) {
skip_some = 2; // skips two.
@ -146,8 +137,8 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask(
}
}
WriteBits(2, skip_some, storage_ix, storage);
for (int i = skip_some; i < codes_to_store; ++i) {
uint8_t l = code_length_bitdepth[kStorageOrder[i]];
for (size_t i = skip_some; i < codes_to_store; ++i) {
size_t l = code_length_bitdepth[kStorageOrder[i]];
WriteBits(kHuffmanBitLengthHuffmanCodeBitLengths[l],
kHuffmanBitLengthHuffmanCodeSymbols[l], storage_ix, storage);
}
@ -158,10 +149,10 @@ void StoreHuffmanTreeToBitMask(
const std::vector<uint8_t> &huffman_tree_extra_bits,
const uint8_t *code_length_bitdepth,
const std::vector<uint16_t> &code_length_bitdepth_symbols,
int * __restrict storage_ix,
size_t * __restrict storage_ix,
uint8_t * __restrict storage) {
for (size_t i = 0; i < huffman_tree.size(); ++i) {
int ix = huffman_tree[i];
size_t ix = huffman_tree[i];
WriteBits(code_length_bitdepth[ix], code_length_bitdepth_symbols[ix],
storage_ix, storage);
// Extra bits
@ -177,17 +168,17 @@ void StoreHuffmanTreeToBitMask(
}
void StoreSimpleHuffmanTree(const uint8_t* depths,
int symbols[4],
int num_symbols,
int max_bits,
int *storage_ix, uint8_t *storage) {
size_t symbols[4],
size_t num_symbols,
size_t max_bits,
size_t *storage_ix, uint8_t *storage) {
// value of 1 indicates a simple Huffman code
WriteBits(2, 1, storage_ix, storage);
WriteBits(2, num_symbols - 1, storage_ix, storage); // NSYM - 1
// Sort
for (int i = 0; i < num_symbols; i++) {
for (int j = i + 1; j < num_symbols; j++) {
for (size_t i = 0; i < num_symbols; i++) {
for (size_t j = i + 1; j < num_symbols; j++) {
if (depths[symbols[j]] < depths[symbols[i]]) {
std::swap(symbols[j], symbols[i]);
}
@ -213,8 +204,8 @@ void StoreSimpleHuffmanTree(const uint8_t* depths,
// num = alphabet size
// depths = symbol depths
void StoreHuffmanTree(const uint8_t* depths, int num,
int *storage_ix, uint8_t *storage) {
void StoreHuffmanTree(const uint8_t* depths, size_t num,
size_t *storage_ix, uint8_t *storage) {
// Write the Huffman tree into the brotli-representation.
std::vector<uint8_t> huffman_tree;
std::vector<uint8_t> huffman_tree_extra_bits;
@ -224,7 +215,7 @@ void StoreHuffmanTree(const uint8_t* depths, int num,
WriteHuffmanTree(depths, num, &huffman_tree, &huffman_tree_extra_bits);
// Calculate the statistics of the Huffman tree in brotli-representation.
int huffman_tree_histogram[kCodeLengthCodes] = { 0 };
uint32_t huffman_tree_histogram[kCodeLengthCodes] = { 0 };
for (size_t i = 0; i < huffman_tree.size(); ++i) {
++huffman_tree_histogram[huffman_tree[i]];
}
@ -270,15 +261,15 @@ void StoreHuffmanTree(const uint8_t* depths, int num,
}
void BuildAndStoreHuffmanTree(const int *histogram,
const int length,
void BuildAndStoreHuffmanTree(const uint32_t *histogram,
const size_t length,
uint8_t* depth,
uint16_t* bits,
int* storage_ix,
size_t* storage_ix,
uint8_t* storage) {
int count = 0;
int s4[4] = { 0 };
for (int i = 0; i < length; i++) {
size_t count = 0;
size_t s4[4] = { 0 };
for (size_t i = 0; i < length; i++) {
if (histogram[i]) {
if (count < 4) {
s4[count] = i;
@ -289,8 +280,8 @@ void BuildAndStoreHuffmanTree(const int *histogram,
}
}
int max_bits_counter = length - 1;
int max_bits = 0;
size_t max_bits_counter = length - 1;
size_t max_bits = 0;
while (max_bits_counter) {
max_bits_counter >>= 1;
++max_bits;
@ -312,30 +303,32 @@ void BuildAndStoreHuffmanTree(const int *histogram,
}
}
int IndexOf(const std::vector<int>& v, int value) {
for (int i = 0; i < static_cast<int>(v.size()); ++i) {
size_t IndexOf(const std::vector<uint32_t>& v, uint32_t value) {
size_t i = 0;
for (; i < v.size(); ++i) {
if (v[i] == value) return i;
}
return -1;
return i;
}
void MoveToFront(std::vector<int>* v, int index) {
int value = (*v)[index];
for (int i = index; i > 0; --i) {
void MoveToFront(std::vector<uint32_t>* v, size_t index) {
uint32_t value = (*v)[index];
for (size_t i = index; i != 0; --i) {
(*v)[i] = (*v)[i - 1];
}
(*v)[0] = value;
}
std::vector<int> MoveToFrontTransform(const std::vector<int>& v) {
std::vector<uint32_t> MoveToFrontTransform(const std::vector<uint32_t>& v) {
if (v.empty()) return v;
std::vector<int> mtf(*std::max_element(v.begin(), v.end()) + 1);
for (int i = 0; i < static_cast<int>(mtf.size()); ++i) mtf[i] = i;
std::vector<int> result(v.size());
uint32_t max_value = *std::max_element(v.begin(), v.end());
std::vector<uint32_t> mtf(max_value + 1);
for (uint32_t i = 0; i <= max_value; ++i) mtf[i] = i;
std::vector<uint32_t> result(v.size());
for (size_t i = 0; i < v.size(); ++i) {
int index = IndexOf(mtf, v[i]);
assert(index >= 0);
result[i] = index;
size_t index = IndexOf(mtf, v[i]);
assert(index < mtf.size());
result[i] = static_cast<uint32_t>(index);
MoveToFront(&mtf, index);
}
return result;
@ -347,61 +340,62 @@ std::vector<int> MoveToFrontTransform(const std::vector<int>& v) {
// initial value of *max_run_length_prefix. The prefix code of run length L is
// simply Log2Floor(L) and the number of extra bits is the same as the prefix
// code.
void RunLengthCodeZeros(const std::vector<int>& v_in,
int* max_run_length_prefix,
std::vector<int>* v_out,
std::vector<int>* extra_bits) {
int max_reps = 0;
void RunLengthCodeZeros(const std::vector<uint32_t>& v_in,
uint32_t* max_run_length_prefix,
std::vector<uint32_t>* v_out,
std::vector<uint32_t>* extra_bits) {
uint32_t max_reps = 0;
for (size_t i = 0; i < v_in.size();) {
for (; i < v_in.size() && v_in[i] != 0; ++i) ;
int reps = 0;
uint32_t reps = 0;
for (; i < v_in.size() && v_in[i] == 0; ++i) {
++reps;
}
max_reps = std::max(reps, max_reps);
}
int max_prefix = max_reps > 0 ? Log2Floor(max_reps) : 0;
*max_run_length_prefix = std::min(max_prefix, *max_run_length_prefix);
uint32_t max_prefix = max_reps > 0 ? Log2FloorNonZero(max_reps) : 0;
max_prefix = std::min(max_prefix, *max_run_length_prefix);
*max_run_length_prefix = max_prefix;
for (size_t i = 0; i < v_in.size();) {
if (v_in[i] != 0) {
v_out->push_back(v_in[i] + *max_run_length_prefix);
extra_bits->push_back(0);
++i;
} else {
int reps = 1;
uint32_t reps = 1;
for (size_t k = i + 1; k < v_in.size() && v_in[k] == 0; ++k) {
++reps;
}
i += reps;
while (reps) {
if (reps < (2 << *max_run_length_prefix)) {
int run_length_prefix = Log2Floor(reps);
while (reps != 0) {
if (reps < (2u << max_prefix)) {
uint32_t run_length_prefix = Log2FloorNonZero(reps);
v_out->push_back(run_length_prefix);
extra_bits->push_back(reps - (1 << run_length_prefix));
extra_bits->push_back(reps - (1u << run_length_prefix));
break;
} else {
v_out->push_back(*max_run_length_prefix);
extra_bits->push_back((1 << *max_run_length_prefix) - 1);
reps -= (2 << *max_run_length_prefix) - 1;
v_out->push_back(max_prefix);
extra_bits->push_back((1u << max_prefix) - 1u);
reps -= (2u << max_prefix) - 1u;
}
}
}
}
}
void EncodeContextMap(const std::vector<int>& context_map,
int num_clusters,
int* storage_ix, uint8_t* storage) {
void EncodeContextMap(const std::vector<uint32_t>& context_map,
size_t num_clusters,
size_t* storage_ix, uint8_t* storage) {
StoreVarLenUint8(num_clusters - 1, storage_ix, storage);
if (num_clusters == 1) {
return;
}
std::vector<int> transformed_symbols = MoveToFrontTransform(context_map);
std::vector<int> rle_symbols;
std::vector<int> extra_bits;
int max_run_length_prefix = 6;
std::vector<uint32_t> transformed_symbols = MoveToFrontTransform(context_map);
std::vector<uint32_t> rle_symbols;
std::vector<uint32_t> extra_bits;
uint32_t max_run_length_prefix = 6;
RunLengthCodeZeros(transformed_symbols, &max_run_length_prefix,
&rle_symbols, &extra_bits);
HistogramContextMap symbol_histogram;
@ -432,32 +426,32 @@ void EncodeContextMap(const std::vector<int>& context_map,
}
void StoreBlockSwitch(const BlockSplitCode& code,
const int block_ix,
int* storage_ix,
const size_t block_ix,
size_t* storage_ix,
uint8_t* storage) {
if (block_ix > 0) {
int typecode = code.type_code[block_ix];
size_t typecode = code.type_code[block_ix];
WriteBits(code.type_depths[typecode], code.type_bits[typecode],
storage_ix, storage);
}
int lencode = code.length_prefix[block_ix];
size_t lencode = code.length_prefix[block_ix];
WriteBits(code.length_depths[lencode], code.length_bits[lencode],
storage_ix, storage);
WriteBits(code.length_nextra[block_ix], code.length_extra[block_ix],
storage_ix, storage);
}
void BuildAndStoreBlockSplitCode(const std::vector<int>& types,
const std::vector<int>& lengths,
const int num_types,
void BuildAndStoreBlockSplitCode(const std::vector<uint8_t>& types,
const std::vector<uint32_t>& lengths,
const size_t num_types,
BlockSplitCode* code,
int* storage_ix,
size_t* storage_ix,
uint8_t* storage) {
const int num_blocks = static_cast<int>(types.size());
std::vector<int> type_histo(num_types + 2);
std::vector<int> length_histo(26);
int last_type = 1;
int second_last_type = 0;
const size_t num_blocks = types.size();
std::vector<uint32_t> type_histo(num_types + 2);
std::vector<uint32_t> length_histo(26);
size_t last_type = 1;
size_t second_last_type = 0;
code->type_code.resize(num_blocks);
code->length_prefix.resize(num_blocks);
code->length_nextra.resize(num_blocks);
@ -466,15 +460,15 @@ void BuildAndStoreBlockSplitCode(const std::vector<int>& types,
code->type_bits.resize(num_types + 2);
code->length_depths.resize(26);
code->length_bits.resize(26);
for (int i = 0; i < num_blocks; ++i) {
int type = types[i];
int type_code = (type == last_type + 1 ? 1 :
for (size_t i = 0; i < num_blocks; ++i) {
size_t type = types[i];
size_t type_code = (type == last_type + 1 ? 1 :
type == second_last_type ? 0 :
type + 2);
second_last_type = last_type;
last_type = type;
code->type_code[i] = type_code;
if (i > 0) ++type_histo[type_code];
code->type_code[i] = static_cast<uint32_t>(type_code);
if (i != 0) ++type_histo[type_code];
GetBlockLengthPrefixCode(lengths[i],
&code->length_prefix[i],
&code->length_nextra[i],
@ -493,31 +487,31 @@ void BuildAndStoreBlockSplitCode(const std::vector<int>& types,
}
}
void StoreTrivialContextMap(int num_types,
int context_bits,
int* storage_ix,
void StoreTrivialContextMap(size_t num_types,
size_t context_bits,
size_t* storage_ix,
uint8_t* storage) {
StoreVarLenUint8(num_types - 1, storage_ix, storage);
if (num_types > 1) {
int repeat_code = context_bits - 1;
int repeat_bits = (1 << repeat_code) - 1;
int alphabet_size = num_types + repeat_code;
std::vector<int> histogram(alphabet_size);
size_t repeat_code = context_bits - 1;
uint64_t repeat_bits = (1 << repeat_code) - 1;
size_t alphabet_size = num_types + repeat_code;
std::vector<uint32_t> histogram(alphabet_size);
std::vector<uint8_t> depths(alphabet_size);
std::vector<uint16_t> bits(alphabet_size);
// Write RLEMAX.
WriteBits(1, 1, storage_ix, storage);
WriteBits(4, repeat_code - 1, storage_ix, storage);
histogram[repeat_code] = num_types;
histogram[repeat_code] = static_cast<uint32_t>(num_types);
histogram[0] = 1;
for (int i = context_bits; i < alphabet_size; ++i) {
for (size_t i = context_bits; i < alphabet_size; ++i) {
histogram[i] = 1;
}
BuildAndStoreHuffmanTree(&histogram[0], alphabet_size,
&depths[0], &bits[0],
storage_ix, storage);
for (int i = 0; i < num_types; ++i) {
int code = (i == 0 ? 0 : i + context_bits - 1);
for (size_t i = 0; i < num_types; ++i) {
size_t code = (i == 0 ? 0 : i + context_bits - 1);
WriteBits(depths[code], bits[code], storage_ix, storage);
WriteBits(depths[repeat_code], bits[repeat_code], storage_ix, storage);
WriteBits(repeat_code, repeat_bits, storage_ix, storage);
@ -530,10 +524,10 @@ void StoreTrivialContextMap(int num_types,
// Manages the encoding of one block category (literal, command or distance).
class BlockEncoder {
public:
BlockEncoder(int alphabet_size,
int num_block_types,
const std::vector<int>& block_types,
const std::vector<int>& block_lengths)
BlockEncoder(size_t alphabet_size,
size_t num_block_types,
const std::vector<uint8_t>& block_types,
const std::vector<uint32_t>& block_lengths)
: alphabet_size_(alphabet_size),
num_block_types_(num_block_types),
block_types_(block_types),
@ -544,7 +538,8 @@ class BlockEncoder {
// Creates entropy codes of block lengths and block types and stores them
// to the bit stream.
void BuildAndStoreBlockSwitchEntropyCodes(int* storage_ix, uint8_t* storage) {
void BuildAndStoreBlockSwitchEntropyCodes(size_t* storage_ix,
uint8_t* storage) {
BuildAndStoreBlockSplitCode(
block_types_, block_lengths_, num_block_types_,
&block_split_code_, storage_ix, storage);
@ -555,7 +550,7 @@ class BlockEncoder {
template<int kSize>
void BuildAndStoreEntropyCodes(
const std::vector<Histogram<kSize> >& histograms,
int* storage_ix, uint8_t* storage) {
size_t* storage_ix, uint8_t* storage) {
depths_.resize(histograms.size() * alphabet_size_);
bits_.resize(histograms.size() * alphabet_size_);
for (size_t i = 0; i < histograms.size(); ++i) {
@ -568,7 +563,7 @@ class BlockEncoder {
// Stores the next symbol with the entropy code of the current block type.
// Updates the block type and block length at block boundaries.
void StoreSymbol(int symbol, int* storage_ix, uint8_t* storage) {
void StoreSymbol(size_t symbol, size_t* storage_ix, uint8_t* storage) {
if (block_len_ == 0) {
++block_ix_;
block_len_ = block_lengths_[block_ix_];
@ -576,7 +571,7 @@ class BlockEncoder {
StoreBlockSwitch(block_split_code_, block_ix_, storage_ix, storage);
}
--block_len_;
int ix = entropy_ix_ + symbol;
size_t ix = entropy_ix_ + symbol;
WriteBits(depths_[ix], bits_[ix], storage_ix, storage);
}
@ -584,67 +579,60 @@ class BlockEncoder {
// context value.
// Updates the block type and block length at block boundaries.
template<int kContextBits>
void StoreSymbolWithContext(int symbol, int context,
const std::vector<int>& context_map,
int* storage_ix, uint8_t* storage) {
void StoreSymbolWithContext(size_t symbol, size_t context,
const std::vector<uint32_t>& context_map,
size_t* storage_ix, uint8_t* storage) {
if (block_len_ == 0) {
++block_ix_;
block_len_ = block_lengths_[block_ix_];
entropy_ix_ = block_types_[block_ix_] << kContextBits;
size_t block_type = block_types_[block_ix_];
entropy_ix_ = block_type << kContextBits;
StoreBlockSwitch(block_split_code_, block_ix_, storage_ix, storage);
}
--block_len_;
int histo_ix = context_map[entropy_ix_ + context];
int ix = histo_ix * alphabet_size_ + symbol;
size_t histo_ix = context_map[entropy_ix_ + context];
size_t ix = histo_ix * alphabet_size_ + symbol;
WriteBits(depths_[ix], bits_[ix], storage_ix, storage);
}
private:
const int alphabet_size_;
const int num_block_types_;
const std::vector<int>& block_types_;
const std::vector<int>& block_lengths_;
const size_t alphabet_size_;
const size_t num_block_types_;
const std::vector<uint8_t>& block_types_;
const std::vector<uint32_t>& block_lengths_;
BlockSplitCode block_split_code_;
int block_ix_;
int block_len_;
int entropy_ix_;
size_t block_ix_;
size_t block_len_;
size_t entropy_ix_;
std::vector<uint8_t> depths_;
std::vector<uint16_t> bits_;
};
void JumpToByteBoundary(int* storage_ix, uint8_t* storage) {
*storage_ix = (*storage_ix + 7) & ~7;
void JumpToByteBoundary(size_t* storage_ix, uint8_t* storage) {
*storage_ix = (*storage_ix + 7u) & ~7u;
storage[*storage_ix >> 3] = 0;
}
bool StoreMetaBlock(const uint8_t* input,
void StoreMetaBlock(const uint8_t* input,
size_t start_pos,
size_t length,
size_t mask,
uint8_t prev_byte,
uint8_t prev_byte2,
bool is_last,
int num_direct_distance_codes,
int distance_postfix_bits,
int literal_context_mode,
uint32_t num_direct_distance_codes,
uint32_t distance_postfix_bits,
ContextType literal_context_mode,
const brotli::Command *commands,
size_t n_commands,
const MetaBlockSplit& mb,
int *storage_ix,
size_t *storage_ix,
uint8_t *storage) {
if (!StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage)) {
return false;
}
StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage);
if (length == 0) {
// Only the last meta-block can be empty, so jump to next byte.
JumpToByteBoundary(storage_ix, storage);
return true;
}
int num_distance_codes =
size_t num_distance_codes =
kNumDistanceShortCodes + num_direct_distance_codes +
(48 << distance_postfix_bits);
(48u << distance_postfix_bits);
BlockEncoder literal_enc(256,
mb.literal_split.num_types,
@ -666,11 +654,11 @@ bool StoreMetaBlock(const uint8_t* input,
WriteBits(2, distance_postfix_bits, storage_ix, storage);
WriteBits(4, num_direct_distance_codes >> distance_postfix_bits,
storage_ix, storage);
for (int i = 0; i < mb.literal_split.num_types; ++i) {
for (size_t i = 0; i < mb.literal_split.num_types; ++i) {
WriteBits(2, literal_context_mode, storage_ix, storage);
}
int num_literal_histograms = static_cast<int>(mb.literal_histograms.size());
size_t num_literal_histograms = mb.literal_histograms.size();
if (mb.literal_context_map.empty()) {
StoreTrivialContextMap(num_literal_histograms, kLiteralContextBits,
storage_ix, storage);
@ -679,7 +667,7 @@ bool StoreMetaBlock(const uint8_t* input,
storage_ix, storage);
}
int num_dist_histograms = static_cast<int>(mb.distance_histograms.size());
size_t num_dist_histograms = mb.distance_histograms.size();
if (mb.distance_context_map.empty()) {
StoreTrivialContextMap(num_dist_histograms, kDistanceContextBits,
storage_ix, storage);
@ -698,20 +686,19 @@ bool StoreMetaBlock(const uint8_t* input,
size_t pos = start_pos;
for (size_t i = 0; i < n_commands; ++i) {
const Command cmd = commands[i];
int cmd_code = cmd.cmd_prefix_;
int lennumextra = static_cast<int>(cmd.cmd_extra_ >> 48);
size_t cmd_code = cmd.cmd_prefix_;
uint32_t lennumextra = static_cast<uint32_t>(cmd.cmd_extra_ >> 48);
uint64_t lenextra = cmd.cmd_extra_ & 0xffffffffffffUL;
command_enc.StoreSymbol(cmd_code, storage_ix, storage);
WriteBits(lennumextra, lenextra, storage_ix, storage);
if (mb.literal_context_map.empty()) {
for (int j = 0; j < cmd.insert_len_; j++) {
for (size_t j = cmd.insert_len_; j != 0; --j) {
literal_enc.StoreSymbol(input[pos & mask], storage_ix, storage);
++pos;
}
} else {
for (int j = 0; j < cmd.insert_len_; ++j) {
int context = Context(prev_byte, prev_byte2,
literal_context_mode);
for (size_t j = cmd.insert_len_; j != 0; --j) {
size_t context = Context(prev_byte, prev_byte2, literal_context_mode);
uint8_t literal = input[pos & mask];
literal_enc.StoreSymbolWithContext<kLiteralContextBits>(
literal, context, mb.literal_context_map, storage_ix, storage);
@ -725,13 +712,13 @@ bool StoreMetaBlock(const uint8_t* input,
prev_byte2 = input[(pos - 2) & mask];
prev_byte = input[(pos - 1) & mask];
if (cmd.cmd_prefix_ >= 128) {
int dist_code = cmd.dist_prefix_;
int distnumextra = cmd.dist_extra_ >> 24;
int distextra = cmd.dist_extra_ & 0xffffff;
size_t dist_code = cmd.dist_prefix_;
uint32_t distnumextra = cmd.dist_extra_ >> 24;
uint64_t distextra = cmd.dist_extra_ & 0xffffff;
if (mb.distance_context_map.empty()) {
distance_enc.StoreSymbol(dist_code, storage_ix, storage);
} else {
int context = cmd.DistanceContext();
size_t context = cmd.DistanceContext();
distance_enc.StoreSymbolWithContext<kDistanceContextBits>(
dist_code, context, mb.distance_context_map, storage_ix, storage);
}
@ -742,45 +729,86 @@ bool StoreMetaBlock(const uint8_t* input,
if (is_last) {
JumpToByteBoundary(storage_ix, storage);
}
return true;
}
bool StoreMetaBlockTrivial(const uint8_t* input,
void BuildHistograms(const uint8_t* input,
size_t start_pos,
size_t mask,
const brotli::Command *commands,
size_t n_commands,
HistogramLiteral* lit_histo,
HistogramCommand* cmd_histo,
HistogramDistance* dist_histo) {
size_t pos = start_pos;
for (size_t i = 0; i < n_commands; ++i) {
const Command cmd = commands[i];
cmd_histo->Add(cmd.cmd_prefix_);
for (size_t j = cmd.insert_len_; j != 0; --j) {
lit_histo->Add(input[pos & mask]);
++pos;
}
pos += cmd.copy_len_;
if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) {
dist_histo->Add(cmd.dist_prefix_);
}
}
}
void StoreDataWithHuffmanCodes(const uint8_t* input,
size_t start_pos,
size_t mask,
const brotli::Command *commands,
size_t n_commands,
const uint8_t* lit_depth,
const uint16_t* lit_bits,
const uint8_t* cmd_depth,
const uint16_t* cmd_bits,
const uint8_t* dist_depth,
const uint16_t* dist_bits,
size_t* storage_ix,
uint8_t* storage) {
size_t pos = start_pos;
for (size_t i = 0; i < n_commands; ++i) {
const Command cmd = commands[i];
const size_t cmd_code = cmd.cmd_prefix_;
const uint32_t lennumextra = static_cast<uint32_t>(cmd.cmd_extra_ >> 48);
const uint64_t lenextra = cmd.cmd_extra_ & 0xffffffffffffUL;
WriteBits(cmd_depth[cmd_code], cmd_bits[cmd_code], storage_ix, storage);
WriteBits(lennumextra, lenextra, storage_ix, storage);
for (size_t j = cmd.insert_len_; j != 0; --j) {
const uint8_t literal = input[pos & mask];
WriteBits(lit_depth[literal], lit_bits[literal], storage_ix, storage);
++pos;
}
pos += cmd.copy_len_;
if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) {
const size_t dist_code = cmd.dist_prefix_;
const uint32_t distnumextra = cmd.dist_extra_ >> 24;
const uint32_t distextra = cmd.dist_extra_ & 0xffffff;
WriteBits(dist_depth[dist_code], dist_bits[dist_code],
storage_ix, storage);
WriteBits(distnumextra, distextra, storage_ix, storage);
}
}
}
void StoreMetaBlockTrivial(const uint8_t* input,
size_t start_pos,
size_t length,
size_t mask,
bool is_last,
const brotli::Command *commands,
size_t n_commands,
int *storage_ix,
size_t *storage_ix,
uint8_t *storage) {
if (!StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage)) {
return false;
}
if (length == 0) {
// Only the last meta-block can be empty, so jump to next byte.
JumpToByteBoundary(storage_ix, storage);
return true;
}
StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage);
HistogramLiteral lit_histo;
HistogramCommand cmd_histo;
HistogramDistance dist_histo;
size_t pos = start_pos;
for (size_t i = 0; i < n_commands; ++i) {
const Command cmd = commands[i];
cmd_histo.Add(cmd.cmd_prefix_);
for (int j = 0; j < cmd.insert_len_; ++j) {
lit_histo.Add(input[pos & mask]);
++pos;
}
pos += cmd.copy_len_;
if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) {
dist_histo.Add(cmd.dist_prefix_);
}
}
BuildHistograms(input, start_pos, mask, commands, n_commands,
&lit_histo, &cmd_histo, &dist_histo);
WriteBits(13, 0, storage_ix, storage);
@ -800,59 +828,37 @@ bool StoreMetaBlockTrivial(const uint8_t* input,
BuildAndStoreHuffmanTree(&dist_histo.data_[0], 64,
&dist_depth[0], &dist_bits[0],
storage_ix, storage);
pos = start_pos;
for (size_t i = 0; i < n_commands; ++i) {
const Command cmd = commands[i];
const int cmd_code = cmd.cmd_prefix_;
const int lennumextra = static_cast<int>(cmd.cmd_extra_ >> 48);
const uint64_t lenextra = cmd.cmd_extra_ & 0xffffffffffffUL;
WriteBits(cmd_depth[cmd_code], cmd_bits[cmd_code], storage_ix, storage);
WriteBits(lennumextra, lenextra, storage_ix, storage);
for (int j = 0; j < cmd.insert_len_; j++) {
const uint8_t literal = input[pos & mask];
WriteBits(lit_depth[literal], lit_bits[literal], storage_ix, storage);
++pos;
}
pos += cmd.copy_len_;
if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) {
const int dist_code = cmd.dist_prefix_;
const int distnumextra = cmd.dist_extra_ >> 24;
const int distextra = cmd.dist_extra_ & 0xffffff;
WriteBits(dist_depth[dist_code], dist_bits[dist_code],
storage_ix, storage);
WriteBits(distnumextra, distextra, storage_ix, storage);
}
}
StoreDataWithHuffmanCodes(input, start_pos, mask, commands,
n_commands, &lit_depth[0], &lit_bits[0],
&cmd_depth[0], &cmd_bits[0],
&dist_depth[0], &dist_bits[0],
storage_ix, storage);
if (is_last) {
JumpToByteBoundary(storage_ix, storage);
}
return true;
}
// This is for storing uncompressed blocks (simple raw storage of
// bytes-as-bytes).
bool StoreUncompressedMetaBlock(bool final_block,
void StoreUncompressedMetaBlock(bool final_block,
const uint8_t * __restrict input,
size_t position, size_t mask,
size_t len,
int * __restrict storage_ix,
size_t * __restrict storage_ix,
uint8_t * __restrict storage) {
if (!brotli::StoreUncompressedMetaBlockHeader(len, storage_ix, storage)) {
return false;
}
StoreUncompressedMetaBlockHeader(len, storage_ix, storage);
JumpToByteBoundary(storage_ix, storage);
size_t masked_pos = position & mask;
if (masked_pos + len > mask + 1) {
size_t len1 = mask + 1 - masked_pos;
memcpy(&storage[*storage_ix >> 3], &input[masked_pos], len1);
*storage_ix += static_cast<int>(len1 << 3);
*storage_ix += len1 << 3;
len -= len1;
masked_pos = 0;
}
memcpy(&storage[*storage_ix >> 3], &input[masked_pos], len);
*storage_ix += static_cast<int>(len << 3);
*storage_ix += len << 3;
// We need to clear the next 4 bytes to continue to be
// compatible with WriteBits.
@ -865,10 +871,9 @@ bool StoreUncompressedMetaBlock(bool final_block,
brotli::WriteBits(1, 1, storage_ix, storage); // isempty
JumpToByteBoundary(storage_ix, storage);
}
return true;
}
void StoreSyncMetaBlock(int * __restrict storage_ix,
void StoreSyncMetaBlock(size_t * __restrict storage_ix,
uint8_t * __restrict storage) {
// Empty metadata meta-block bit pattern:
// 1 bit: is_last (0)

View File

@ -27,53 +27,60 @@ namespace brotli {
// position for the current storage.
// Stores a number between 0 and 255.
void StoreVarLenUint8(int n, int* storage_ix, uint8_t* storage);
void StoreVarLenUint8(size_t n, size_t* storage_ix, uint8_t* storage);
// Stores the compressed meta-block header.
bool StoreCompressedMetaBlockHeader(bool final_block,
// REQUIRES: length > 0
// REQUIRES: length <= (1 << 24)
void StoreCompressedMetaBlockHeader(bool final_block,
size_t length,
int* storage_ix,
size_t* storage_ix,
uint8_t* storage);
// Stores the uncompressed meta-block header.
bool StoreUncompressedMetaBlockHeader(size_t length,
int* storage_ix,
// REQUIRES: length > 0
// REQUIRES: length <= (1 << 24)
void StoreUncompressedMetaBlockHeader(size_t length,
size_t* storage_ix,
uint8_t* storage);
// Stores a context map where the histogram type is always the block type.
void StoreTrivialContextMap(int num_types,
int context_bits,
int* storage_ix,
void StoreTrivialContextMap(size_t num_types,
size_t context_bits,
size_t* storage_ix,
uint8_t* storage);
void StoreHuffmanTreeOfHuffmanTreeToBitMask(
const int num_codes,
const uint8_t *code_length_bitdepth,
int *storage_ix,
size_t *storage_ix,
uint8_t *storage);
void StoreHuffmanTree(const uint8_t* depths, size_t num,
size_t *storage_ix, uint8_t *storage);
// Builds a Huffman tree from histogram[0:length] into depth[0:length] and
// bits[0:length] and stores the encoded tree to the bit stream.
void BuildAndStoreHuffmanTree(const int *histogram,
const int length,
void BuildAndStoreHuffmanTree(const uint32_t *histogram,
const size_t length,
uint8_t* depth,
uint16_t* bits,
int* storage_ix,
size_t* storage_ix,
uint8_t* storage);
// Encodes the given context map to the bit stream. The number of different
// histogram ids is given by num_clusters.
void EncodeContextMap(const std::vector<int>& context_map,
int num_clusters,
int* storage_ix, uint8_t* storage);
void EncodeContextMap(const std::vector<uint32_t>& context_map,
size_t num_clusters,
size_t* storage_ix, uint8_t* storage);
// Data structure that stores everything that is needed to encode each block
// switch command.
struct BlockSplitCode {
std::vector<int> type_code;
std::vector<int> length_prefix;
std::vector<int> length_nextra;
std::vector<int> length_extra;
std::vector<uint32_t> type_code;
std::vector<uint32_t> length_prefix;
std::vector<uint32_t> length_nextra;
std::vector<uint32_t> length_extra;
std::vector<uint8_t> type_depths;
std::vector<uint16_t> type_bits;
std::vector<uint8_t> length_depths;
@ -82,58 +89,64 @@ struct BlockSplitCode {
// Builds a BlockSplitCode data structure from the block split given by the
// vector of block types and block lengths and stores it to the bit stream.
void BuildAndStoreBlockSplitCode(const std::vector<int>& types,
const std::vector<int>& lengths,
const int num_types,
void BuildAndStoreBlockSplitCode(const std::vector<uint8_t>& types,
const std::vector<uint32_t>& lengths,
const size_t num_types,
BlockSplitCode* code,
int* storage_ix,
size_t* storage_ix,
uint8_t* storage);
// Stores the block switch command with index block_ix to the bit stream.
void StoreBlockSwitch(const BlockSplitCode& code,
const int block_ix,
int* storage_ix,
const size_t block_ix,
size_t* storage_ix,
uint8_t* storage);
bool StoreMetaBlock(const uint8_t* input,
// REQUIRES: length > 0
// REQUIRES: length <= (1 << 24)
void StoreMetaBlock(const uint8_t* input,
size_t start_pos,
size_t length,
size_t mask,
uint8_t prev_byte,
uint8_t prev_byte2,
bool final_block,
int num_direct_distance_codes,
int distance_postfix_bits,
int literal_context_mode,
uint32_t num_direct_distance_codes,
uint32_t distance_postfix_bits,
ContextType literal_context_mode,
const brotli::Command *commands,
size_t n_commands,
const MetaBlockSplit& mb,
int *storage_ix,
size_t *storage_ix,
uint8_t *storage);
// Stores the meta-block without doing any block splitting, just collects
// one histogram per block category and uses that for entropy coding.
bool StoreMetaBlockTrivial(const uint8_t* input,
// REQUIRES: length > 0
// REQUIRES: length <= (1 << 24)
void StoreMetaBlockTrivial(const uint8_t* input,
size_t start_pos,
size_t length,
size_t mask,
bool is_last,
const brotli::Command *commands,
size_t n_commands,
int *storage_ix,
size_t *storage_ix,
uint8_t *storage);
// This is for storing uncompressed blocks (simple raw storage of
// bytes-as-bytes).
bool StoreUncompressedMetaBlock(bool final_block,
// REQUIRES: length > 0
// REQUIRES: length <= (1 << 24)
void StoreUncompressedMetaBlock(bool final_block,
const uint8_t* input,
size_t position, size_t mask,
size_t len,
int* storage_ix,
size_t* storage_ix,
uint8_t* storage);
// Stores an empty metadata meta-block and syncs to a byte boundary.
void StoreSyncMetaBlock(int* storage_ix, uint8_t* storage);
void StoreSyncMetaBlock(size_t* storage_ix, uint8_t* storage);
} // namespace brotli

View File

@ -10,11 +10,8 @@
#define BROTLI_ENC_CLUSTER_H_
#include <math.h>
#include <stdio.h>
#include <algorithm>
#include <complex>
#include <map>
#include <set>
#include <utility>
#include <vector>
@ -28,41 +25,39 @@
namespace brotli {
struct HistogramPair {
int idx1;
int idx2;
bool valid;
uint32_t idx1;
uint32_t idx2;
double cost_combo;
double cost_diff;
};
struct HistogramPairComparator {
bool operator()(const HistogramPair& p1, const HistogramPair& p2) const {
if (p1.cost_diff != p2.cost_diff) {
return p1.cost_diff > p2.cost_diff;
}
return abs(p1.idx1 - p1.idx2) > abs(p2.idx1 - p2.idx2);
inline bool operator<(const HistogramPair& p1, const HistogramPair& p2) {
if (p1.cost_diff != p2.cost_diff) {
return p1.cost_diff > p2.cost_diff;
}
};
return (p1.idx2 - p1.idx1) > (p2.idx2 - p2.idx1);
}
// Returns entropy reduction of the context map when we combine two clusters.
inline double ClusterCostDiff(int size_a, int size_b) {
int size_c = size_a + size_b;
return size_a * FastLog2(size_a) + size_b * FastLog2(size_b) -
size_c * FastLog2(size_c);
inline double ClusterCostDiff(size_t size_a, size_t size_b) {
size_t size_c = size_a + size_b;
return static_cast<double>(size_a) * FastLog2(size_a) +
static_cast<double>(size_b) * FastLog2(size_b) -
static_cast<double>(size_c) * FastLog2(size_c);
}
// Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
// it is below a threshold, stores the pair (idx1, idx2) in the *pairs heap.
// it is below a threshold, stores the pair (idx1, idx2) in the *pairs queue.
template<typename HistogramType>
void CompareAndPushToHeap(const HistogramType* out,
const int* cluster_size,
int idx1, int idx2,
std::vector<HistogramPair>* pairs) {
void CompareAndPushToQueue(const HistogramType* out,
const uint32_t* cluster_size,
uint32_t idx1, uint32_t idx2,
std::vector<HistogramPair>* pairs) {
if (idx1 == idx2) {
return;
}
if (idx2 < idx1) {
int t = idx2;
uint32_t t = idx2;
idx2 = idx1;
idx1 = t;
}
@ -70,7 +65,6 @@ void CompareAndPushToHeap(const HistogramType* out,
HistogramPair p;
p.idx1 = idx1;
p.idx2 = idx2;
p.valid = true;
p.cost_diff = 0.5 * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]);
p.cost_diff -= out[idx1].bit_cost_;
p.cost_diff -= out[idx2].bit_cost_;
@ -94,37 +88,38 @@ void CompareAndPushToHeap(const HistogramType* out,
}
if (store_pair) {
p.cost_diff += p.cost_combo;
pairs->push_back(p);
std::push_heap(pairs->begin(), pairs->end(), HistogramPairComparator());
if (!pairs->empty() && (pairs->front() < p)) {
// Replace the top of the queue if needed.
pairs->push_back(pairs->front());
pairs->front() = p;
} else {
pairs->push_back(p);
}
}
}
template<typename HistogramType>
void HistogramCombine(HistogramType* out,
int* cluster_size,
int* symbols,
int symbols_size,
uint32_t* cluster_size,
uint32_t* symbols,
size_t symbols_size,
size_t max_clusters) {
double cost_diff_threshold = 0.0;
size_t min_cluster_size = 1;
std::set<int> all_symbols;
std::vector<int> clusters;
for (int i = 0; i < symbols_size; ++i) {
if (all_symbols.find(symbols[i]) == all_symbols.end()) {
all_symbols.insert(symbols[i]);
if (!clusters.empty()) {
BROTLI_DCHECK(clusters.back() < symbols[i]);
}
clusters.push_back(symbols[i]);
}
}
// Uniquify the list of symbols.
std::vector<uint32_t> clusters(symbols, symbols + symbols_size);
std::sort(clusters.begin(), clusters.end());
std::vector<uint32_t>::iterator last =
std::unique(clusters.begin(), clusters.end());
clusters.resize(static_cast<size_t>(last - clusters.begin()));
// We maintain a heap of histogram pairs, ordered by the bit cost reduction.
std::vector<HistogramPair> pairs;
for (size_t idx1 = 0; idx1 < clusters.size(); ++idx1) {
for (size_t idx2 = idx1 + 1; idx2 < clusters.size(); ++idx2) {
CompareAndPushToHeap(out, cluster_size, clusters[idx1], clusters[idx2],
&pairs);
CompareAndPushToQueue(out, cluster_size, clusters[idx1], clusters[idx2],
&pairs);
}
}
@ -135,38 +130,48 @@ void HistogramCombine(HistogramType* out,
continue;
}
// Take the best pair from the top of heap.
int best_idx1 = pairs[0].idx1;
int best_idx2 = pairs[0].idx2;
uint32_t best_idx1 = pairs[0].idx1;
uint32_t best_idx2 = pairs[0].idx2;
out[best_idx1].AddHistogram(out[best_idx2]);
out[best_idx1].bit_cost_ = pairs[0].cost_combo;
cluster_size[best_idx1] += cluster_size[best_idx2];
for (int i = 0; i < symbols_size; ++i) {
for (size_t i = 0; i < symbols_size; ++i) {
if (symbols[i] == best_idx2) {
symbols[i] = best_idx1;
}
}
for (size_t i = 0; i + 1 < clusters.size(); ++i) {
if (clusters[i] >= best_idx2) {
clusters[i] = clusters[i + 1];
for (std::vector<uint32_t>::iterator cluster = clusters.begin();
cluster != clusters.end(); ++cluster) {
if (*cluster >= best_idx2) {
clusters.erase(cluster);
break;
}
}
clusters.pop_back();
// Invalidate pairs intersecting the just combined best pair.
// Remove pairs intersecting the just combined best pair.
size_t copy_to_idx = 0;
for (size_t i = 0; i < pairs.size(); ++i) {
HistogramPair& p = pairs[i];
if (p.idx1 == best_idx1 || p.idx2 == best_idx1 ||
p.idx1 == best_idx2 || p.idx2 == best_idx2) {
p.valid = false;
// Remove invalid pair from the queue.
continue;
}
if (pairs.front() < p) {
// Replace the top of the queue if needed.
HistogramPair front = pairs.front();
pairs.front() = p;
pairs[copy_to_idx] = front;
} else {
pairs[copy_to_idx] = p;
}
++copy_to_idx;
}
// Pop invalid pairs from the top of the heap.
while (!pairs.empty() && !pairs[0].valid) {
std::pop_heap(pairs.begin(), pairs.end(), HistogramPairComparator());
pairs.pop_back();
}
pairs.resize(copy_to_idx);
// Push new pairs formed with the combined histogram to the heap.
for (size_t i = 0; i < clusters.size(); ++i) {
CompareAndPushToHeap(out, cluster_size, best_idx1, clusters[i], &pairs);
CompareAndPushToQueue(out, cluster_size, best_idx1, clusters[i], &pairs);
}
}
}
@ -189,16 +194,19 @@ double HistogramBitCostDistance(const HistogramType& histogram,
// Find the best 'out' histogram for each of the 'in' histograms.
// Note: we assume that out[]->bit_cost_ is already up-to-date.
template<typename HistogramType>
void HistogramRemap(const HistogramType* in, int in_size,
HistogramType* out, int* symbols) {
std::set<int> all_symbols;
for (int i = 0; i < in_size; ++i) {
all_symbols.insert(symbols[i]);
}
for (int i = 0; i < in_size; ++i) {
int best_out = i == 0 ? symbols[0] : symbols[i - 1];
void HistogramRemap(const HistogramType* in, size_t in_size,
HistogramType* out, uint32_t* symbols) {
// Uniquify the list of symbols.
std::vector<uint32_t> all_symbols(symbols, symbols + in_size);
std::sort(all_symbols.begin(), all_symbols.end());
std::vector<uint32_t>::iterator last =
std::unique(all_symbols.begin(), all_symbols.end());
all_symbols.resize(static_cast<size_t>(last - all_symbols.begin()));
for (size_t i = 0; i < in_size; ++i) {
uint32_t best_out = i == 0 ? symbols[0] : symbols[i - 1];
double best_bits = HistogramBitCostDistance(in[i], out[best_out]);
for (std::set<int>::const_iterator k = all_symbols.begin();
for (std::vector<uint32_t>::const_iterator k = all_symbols.begin();
k != all_symbols.end(); ++k) {
const double cur_bits = HistogramBitCostDistance(in[i], out[*k]);
if (cur_bits < best_bits) {
@ -211,11 +219,11 @@ void HistogramRemap(const HistogramType* in, int in_size,
// Recompute each out based on raw and symbols.
for (std::set<int>::const_iterator k = all_symbols.begin();
for (std::vector<uint32_t>::const_iterator k = all_symbols.begin();
k != all_symbols.end(); ++k) {
out[*k].Clear();
}
for (int i = 0; i < in_size; ++i) {
for (size_t i = 0; i < in_size; ++i) {
out[symbols[i]].AddHistogram(in[i]);
}
}
@ -224,10 +232,10 @@ void HistogramRemap(const HistogramType* in, int in_size,
// increasing order.
template<typename HistogramType>
void HistogramReindex(std::vector<HistogramType>* out,
std::vector<int>* symbols) {
std::vector<uint32_t>* symbols) {
std::vector<HistogramType> tmp(*out);
std::map<int, int> new_index;
int next_index = 0;
std::map<uint32_t, uint32_t> new_index;
uint32_t next_index = 0;
for (size_t i = 0; i < symbols->size(); ++i) {
if (new_index.find((*symbols)[i]) == new_index.end()) {
new_index[(*symbols)[i]] = next_index;
@ -246,25 +254,25 @@ void HistogramReindex(std::vector<HistogramType>* out,
// indicate which of the 'out' histograms is the best approximation.
template<typename HistogramType>
void ClusterHistograms(const std::vector<HistogramType>& in,
int num_contexts, int num_blocks,
size_t num_contexts, size_t num_blocks,
size_t max_histograms,
std::vector<HistogramType>* out,
std::vector<int>* histogram_symbols) {
const int in_size = num_contexts * num_blocks;
BROTLI_DCHECK(in_size == in.size());
std::vector<int> cluster_size(in_size, 1);
std::vector<uint32_t>* histogram_symbols) {
const size_t in_size = num_contexts * num_blocks;
assert(in_size == in.size());
std::vector<uint32_t> cluster_size(in_size, 1);
out->resize(in_size);
histogram_symbols->resize(in_size);
for (int i = 0; i < in_size; ++i) {
for (size_t i = 0; i < in_size; ++i) {
(*out)[i] = in[i];
(*out)[i].bit_cost_ = PopulationCost(in[i]);
(*histogram_symbols)[i] = i;
(*histogram_symbols)[i] = static_cast<uint32_t>(i);
}
const int max_input_histograms = 64;
for (int i = 0; i < in_size; i += max_input_histograms) {
int num_to_combine = std::min(in_size - i, max_input_histograms);
const size_t max_input_histograms = 64;
for (size_t i = 0; i < in_size; i += max_input_histograms) {
size_t num_to_combine = std::min(in_size - i, max_input_histograms);
HistogramCombine(&(*out)[0], &cluster_size[0],
&(*histogram_symbols)[i], num_to_combine,
max_histograms);

View File

@ -15,21 +15,21 @@
namespace brotli {
static int insbase[] = { 0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50, 66,
98, 130, 194, 322, 578, 1090, 2114, 6210, 22594 };
static int insextra[] = { 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5,
5, 6, 7, 8, 9, 10, 12, 14, 24 };
static int copybase[] = { 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 18, 22, 30, 38,
54, 70, 102, 134, 198, 326, 582, 1094, 2118 };
static int copyextra[] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4,
4, 5, 5, 6, 7, 8, 9, 10, 24 };
static uint32_t kInsBase[] = { 0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50,
66, 98, 130, 194, 322, 578, 1090, 2114, 6210, 22594 };
static uint32_t kInsExtra[] = { 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4,
5, 5, 6, 7, 8, 9, 10, 12, 14, 24 };
static uint32_t kCopyBase[] = { 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 18, 22, 30,
38, 54, 70, 102, 134, 198, 326, 582, 1094, 2118 };
static uint32_t kCopyExtra[] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
4, 4, 5, 5, 6, 7, 8, 9, 10, 24 };
static inline uint16_t GetInsertLengthCode(int insertlen) {
static inline uint16_t GetInsertLengthCode(size_t insertlen) {
if (insertlen < 6) {
return static_cast<uint16_t>(insertlen);
} else if (insertlen < 130) {
insertlen -= 2;
int nbits = Log2FloorNonZero(insertlen) - 1;
uint32_t nbits = Log2FloorNonZero(insertlen) - 1u;
return static_cast<uint16_t>((nbits << 1) + (insertlen >> nbits) + 2);
} else if (insertlen < 2114) {
return static_cast<uint16_t>(Log2FloorNonZero(insertlen - 66) + 10);
@ -42,12 +42,12 @@ static inline uint16_t GetInsertLengthCode(int insertlen) {
}
}
static inline uint16_t GetCopyLengthCode(int copylen) {
static inline uint16_t GetCopyLengthCode(size_t copylen) {
if (copylen < 10) {
return static_cast<uint16_t>(copylen - 2);
} else if (copylen < 134) {
copylen -= 6;
int nbits = Log2FloorNonZero(copylen) - 1;
uint32_t nbits = Log2FloorNonZero(copylen) - 1u;
return static_cast<uint16_t>((nbits << 1) + (copylen >> nbits) + 4);
} else if (copylen < 2118) {
return static_cast<uint16_t>(Log2FloorNonZero(copylen - 70) + 12);
@ -71,23 +71,25 @@ static inline uint16_t CombineLengthCodes(
}
}
static inline void GetLengthCode(int insertlen, int copylen,
static inline void GetLengthCode(size_t insertlen, size_t copylen,
bool use_last_distance,
uint16_t* code, uint64_t* extra) {
uint16_t inscode = GetInsertLengthCode(insertlen);
uint16_t copycode = GetCopyLengthCode(copylen);
uint64_t insnumextra = insextra[inscode];
uint64_t numextra = insnumextra + copyextra[copycode];
uint64_t insextraval = insertlen - insbase[inscode];
uint64_t copyextraval = copylen - copybase[copycode];
uint64_t insnumextra = kInsExtra[inscode];
uint64_t numextra = insnumextra + kCopyExtra[copycode];
uint64_t insextraval = insertlen - kInsBase[inscode];
uint64_t copyextraval = copylen - kCopyBase[copycode];
*code = CombineLengthCodes(inscode, copycode, use_last_distance);
*extra = (numextra << 48) | (copyextraval << insnumextra) | insextraval;
}
struct Command {
// distance_code is e.g. 0 for same-as-last short code, or 16 for offset 1.
Command(int insertlen, int copylen, int copylen_code, int distance_code)
: insert_len_(insertlen), copy_len_(copylen) {
Command(size_t insertlen, size_t copylen, size_t copylen_code,
size_t distance_code)
: insert_len_(static_cast<uint32_t>(insertlen))
, copy_len_(static_cast<uint32_t>(copylen)) {
// The distance prefix and extra bits are stored in this Command as if
// npostfix and ndirect were 0, they are only recomputed later after the
// clustering if needed.
@ -96,32 +98,33 @@ struct Command {
&cmd_prefix_, &cmd_extra_);
}
Command(int insertlen)
: insert_len_(insertlen), copy_len_(0), dist_prefix_(16), dist_extra_(0) {
explicit Command(size_t insertlen)
: insert_len_(static_cast<uint32_t>(insertlen))
, copy_len_(0), dist_prefix_(16), dist_extra_(0) {
GetLengthCode(insertlen, 4, dist_prefix_ == 0, &cmd_prefix_, &cmd_extra_);
}
int DistanceCode() const {
uint32_t DistanceCode() const {
if (dist_prefix_ < 16) {
return dist_prefix_;
}
int nbits = dist_extra_ >> 24;
int extra = dist_extra_ & 0xffffff;
int prefix = dist_prefix_ - 12 - 2 * nbits;
uint32_t nbits = dist_extra_ >> 24;
uint32_t extra = dist_extra_ & 0xffffff;
uint32_t prefix = dist_prefix_ - 12 - 2 * nbits;
return (prefix << nbits) + extra + 12;
}
int DistanceContext() const {
int r = cmd_prefix_ >> 6;
int c = cmd_prefix_ & 7;
uint32_t DistanceContext() const {
uint32_t r = cmd_prefix_ >> 6;
uint32_t c = cmd_prefix_ & 7;
if ((r == 0 || r == 2 || r == 4 || r == 7) && (c <= 2)) {
return c;
}
return 3;
}
int insert_len_;
int copy_len_;
uint32_t insert_len_;
uint32_t copy_len_;
uint16_t cmd_prefix_;
uint16_t dist_prefix_;
uint64_t cmd_extra_;

View File

@ -157,7 +157,7 @@ enum ContextType {
CONTEXT_SIGNED = 3
};
static inline uint8_t Context(uint8_t p1, uint8_t p2, int mode) {
static inline uint8_t Context(uint8_t p1, uint8_t p2, ContextType mode) {
switch (mode) {
case CONTEXT_LSB6:
return p1 & 0x3f;

View File

@ -19,13 +19,13 @@ extern "C" {
extern const uint8_t kBrotliDictionary[122784];
static const int kBrotliDictionaryOffsetsByLength[] = {
static const uint32_t kBrotliDictionaryOffsetsByLength[] = {
0, 0, 0, 0, 0, 4096, 9216, 21504, 35840, 44032,
53248, 63488, 74752, 87040, 93696, 100864, 104704, 106752, 108928, 113536,
115968, 118528, 119872, 121280, 122016,
};
static const int kBrotliDictionarySizeBitsByLength[] = {
static const uint8_t kBrotliDictionarySizeBitsByLength[] = {
0, 0, 0, 0, 10, 10, 11, 11, 10, 10,
10, 10, 10, 9, 9, 8, 7, 7, 8, 7,
7, 6, 6, 5, 5,

View File

@ -9,6 +9,7 @@
#include "./encode.h"
#include <algorithm>
#include <cstring>
#include <limits>
#include "./backward_references.h"
@ -38,8 +39,8 @@ static const int kMaxNumDelayedSymbols = 0x2fff;
void RecomputeDistancePrefixes(Command* cmds,
size_t num_commands,
int num_direct_distance_codes,
int distance_postfix_bits) {
uint32_t num_direct_distance_codes,
uint32_t distance_postfix_bits) {
if (num_direct_distance_codes == 0 && distance_postfix_bits == 0) {
return;
}
@ -55,6 +56,16 @@ void RecomputeDistancePrefixes(Command* cmds,
}
}
/* Wraps 64-bit input position to 32-bit ringbuffer position preserving
"not-a-first-lap" feature. */
uint32_t WrapPosition(uint64_t position) {
uint32_t result = static_cast<uint32_t>(position);
if (position > (1u << 30)) {
result = (result & ((1u << 30) - 1)) | (1u << 30);
}
return result;
}
uint8_t* BrotliCompressor::GetBrotliStorage(size_t size) {
if (storage_size_ < size) {
delete[] storage_;
@ -64,6 +75,22 @@ uint8_t* BrotliCompressor::GetBrotliStorage(size_t size) {
return storage_;
}
void EncodeWindowBits(int lgwin, uint8_t* last_byte, uint8_t* last_byte_bits) {
if (lgwin == 16) {
*last_byte = 0;
*last_byte_bits = 1;
} else if (lgwin == 17) {
*last_byte = 1;
*last_byte_bits = 7;
} else if (lgwin > 17) {
*last_byte = static_cast<uint8_t>(((lgwin - 17) << 1) | 1);
*last_byte_bits = 4;
} else {
*last_byte = static_cast<uint8_t>(((lgwin - 8) << 4) | 1);
*last_byte_bits = 7;
}
}
BrotliCompressor::BrotliCompressor(BrotliParams params)
: params_(params),
hashers_(new Hashers()),
@ -109,19 +136,7 @@ BrotliCompressor::BrotliCompressor(BrotliParams params)
cmd_alloc_size_ = 0;
// Initialize last byte with stream header.
if (params_.lgwin == 16) {
last_byte_ = 0;
last_byte_bits_ = 1;
} else if (params_.lgwin == 17) {
last_byte_ = 1;
last_byte_bits_ = 7;
} else if (params_.lgwin > 17) {
last_byte_ = static_cast<uint8_t>(((params_.lgwin - 17) << 1) | 1);
last_byte_bits_ = 4;
} else {
last_byte_ = static_cast<uint8_t>(((params_.lgwin - 8) << 4) | 1);
last_byte_bits_ = 7;
}
EncodeWindowBits(params_.lgwin, &last_byte_, &last_byte_bits_);
// Initialize distance cache.
dist_cache_[0] = 4;
@ -213,13 +228,14 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last,
const bool force_flush,
size_t* out_size,
uint8_t** output) {
const size_t bytes = input_pos_ - last_processed_pos_;
const uint64_t delta = input_pos_ - last_processed_pos_;
const uint8_t* data = ringbuffer_->start();
const size_t mask = ringbuffer_->mask();
const uint32_t mask = ringbuffer_->mask();
if (bytes > input_block_size()) {
if (delta > input_block_size() || (delta == 0 && !is_last)) {
return false;
}
const uint32_t bytes = static_cast<uint32_t>(delta);
// Theoretical max number of commands is 1 per 2 bytes.
size_t newsize = num_commands_ + bytes / 2 + 1;
@ -232,7 +248,8 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last,
static_cast<Command*>(realloc(commands_, sizeof(Command) * newsize));
}
CreateBackwardReferences(bytes, last_processed_pos_, data, mask,
CreateBackwardReferences(bytes, WrapPosition(last_processed_pos_),
is_last, data, mask,
max_backward_distance_,
params_.quality,
hashers_,
@ -262,7 +279,8 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last,
last_insert_len_ = 0;
}
return WriteMetaBlockInternal(is_last, out_size, output);
WriteMetaBlockInternal(is_last, out_size, output);
return true;
}
// Decide about the context map based on the ability of the prediction
@ -273,42 +291,43 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last,
// BitsEntropy will assume that symbol to be stored alone using Huffman
// coding.
void ChooseContextMap(int quality,
int* bigram_histo,
int* num_literal_contexts,
const int** literal_context_map) {
int monogram_histo[3] = { 0 };
int two_prefix_histo[6] = { 0 };
int total = 0;
for (int i = 0; i < 9; ++i) {
uint32_t* bigram_histo,
size_t* num_literal_contexts,
const uint32_t** literal_context_map) {
uint32_t monogram_histo[3] = { 0 };
uint32_t two_prefix_histo[6] = { 0 };
size_t total = 0;
for (size_t i = 0; i < 9; ++i) {
total += bigram_histo[i];
monogram_histo[i % 3] += bigram_histo[i];
int j = i;
size_t j = i;
if (j >= 6) {
j -= 6;
}
two_prefix_histo[j] += bigram_histo[i];
}
int dummy;
size_t dummy;
double entropy1 = ShannonEntropy(monogram_histo, 3, &dummy);
double entropy2 = (ShannonEntropy(two_prefix_histo, 3, &dummy) +
ShannonEntropy(two_prefix_histo + 3, 3, &dummy));
double entropy3 = 0;
for (int k = 0; k < 3; ++k) {
for (size_t k = 0; k < 3; ++k) {
entropy3 += ShannonEntropy(bigram_histo + 3 * k, 3, &dummy);
}
assert(total != 0);
entropy1 *= (1.0 / total);
entropy2 *= (1.0 / total);
entropy3 *= (1.0 / total);
double scale = 1.0 / static_cast<double>(total);
entropy1 *= scale;
entropy2 *= scale;
entropy3 *= scale;
static const int kStaticContextMapContinuation[64] = {
static const uint32_t kStaticContextMapContinuation[64] = {
1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
static const int kStaticContextMapSimpleUTF8[64] = {
static const uint32_t kStaticContextMapSimpleUTF8[64] = {
0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@ -337,9 +356,9 @@ void DecideOverLiteralContextModeling(const uint8_t* input,
size_t length,
size_t mask,
int quality,
int* literal_context_mode,
int* num_literal_contexts,
const int** literal_context_map) {
ContextType* literal_context_mode,
size_t* num_literal_contexts,
const uint32_t** literal_context_map) {
if (quality < kMinQualityForContextModeling || length < 64) {
return;
}
@ -347,7 +366,7 @@ void DecideOverLiteralContextModeling(const uint8_t* input,
// UTF8 data faster we only examine 64 byte long strides at every 4kB
// intervals.
const size_t end_pos = start_pos + length;
int bigram_prefix_histo[9] = { 0 };
uint32_t bigram_prefix_histo[9] = { 0 };
for (; start_pos + 64 <= end_pos; start_pos += 4096) {
static const int lut[4] = { 0, 0, 1, 2 };
const size_t stride_end_pos = start_pos + 64;
@ -363,27 +382,33 @@ void DecideOverLiteralContextModeling(const uint8_t* input,
literal_context_map);
}
bool BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
void BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
size_t* out_size,
uint8_t** output) {
const size_t bytes = input_pos_ - last_flush_pos_;
assert(input_pos_ >= last_flush_pos_);
assert(input_pos_ > last_flush_pos_ || is_last);
assert(input_pos_ - last_flush_pos_ <= 1u << 24);
const uint32_t bytes = static_cast<uint32_t>(input_pos_ - last_flush_pos_);
const uint8_t* data = ringbuffer_->start();
const size_t mask = ringbuffer_->mask();
const uint32_t mask = ringbuffer_->mask();
const size_t max_out_size = 2 * bytes + 500;
uint8_t* storage = GetBrotliStorage(max_out_size);
storage[0] = last_byte_;
int storage_ix = last_byte_bits_;
size_t storage_ix = last_byte_bits_;
bool uncompressed = false;
if (num_commands_ < (bytes >> 8) + 2) {
if (num_literals_ > 0.99 * static_cast<double>(bytes)) {
int literal_histo[256] = { 0 };
static const int kSampleRate = 13;
uint32_t literal_histo[256] = { 0 };
static const uint32_t kSampleRate = 13;
static const double kMinEntropy = 7.92;
const double bit_cost_threshold =
static_cast<double>(bytes) * kMinEntropy / kSampleRate;
for (size_t i = last_flush_pos_; i < input_pos_; i += kSampleRate) {
++literal_histo[data[i & mask]];
size_t t = (bytes + kSampleRate - 1) / kSampleRate;
uint32_t pos = static_cast<uint32_t>(last_flush_pos_);
for (size_t i = 0; i < t; i++) {
++literal_histo[data[pos & mask]];
pos += kSampleRate;
}
if (BitsEntropy(literal_histo, 256) > bit_cost_threshold) {
uncompressed = true;
@ -392,23 +417,20 @@ bool BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
}
if (bytes == 0) {
if (!StoreCompressedMetaBlockHeader(is_last, 0, &storage_ix, &storage[0])) {
return false;
}
storage_ix = (storage_ix + 7) & ~7;
// Write the ISLAST and ISEMPTY bits.
WriteBits(2, 3, &storage_ix, &storage[0]);
storage_ix = (storage_ix + 7u) & ~7u;
} else if (uncompressed) {
// Restore the distance cache, as its last update by
// CreateBackwardReferences is now unused.
memcpy(dist_cache_, saved_dist_cache_, sizeof(dist_cache_));
if (!StoreUncompressedMetaBlock(is_last,
data, last_flush_pos_, mask, bytes,
&storage_ix,
&storage[0])) {
return false;
}
StoreUncompressedMetaBlock(is_last, data,
WrapPosition(last_flush_pos_), mask, bytes,
&storage_ix,
&storage[0]);
} else {
int num_direct_distance_codes = 0;
int distance_postfix_bits = 0;
uint32_t num_direct_distance_codes = 0;
uint32_t distance_postfix_bits = 0;
if (params_.quality > 9 && params_.mode == BrotliParams::MODE_FONT) {
num_direct_distance_codes = 12;
distance_postfix_bits = 1;
@ -418,29 +440,30 @@ bool BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
distance_postfix_bits);
}
if (params_.quality < kMinQualityForBlockSplit) {
if (!StoreMetaBlockTrivial(data, last_flush_pos_, bytes, mask, is_last,
commands_, num_commands_,
&storage_ix,
&storage[0])) {
return false;
}
StoreMetaBlockTrivial(data, WrapPosition(last_flush_pos_),
bytes, mask, is_last,
commands_, num_commands_,
&storage_ix,
&storage[0]);
} else {
MetaBlockSplit mb;
int literal_context_mode = CONTEXT_UTF8;
ContextType literal_context_mode = CONTEXT_UTF8;
if (params_.quality <= 9) {
int num_literal_contexts = 1;
const int* literal_context_map = NULL;
DecideOverLiteralContextModeling(data, last_flush_pos_, bytes, mask,
size_t num_literal_contexts = 1;
const uint32_t* literal_context_map = NULL;
DecideOverLiteralContextModeling(data, WrapPosition(last_flush_pos_),
bytes, mask,
params_.quality,
&literal_context_mode,
&num_literal_contexts,
&literal_context_map);
if (literal_context_map == NULL) {
BuildMetaBlockGreedy(data, last_flush_pos_, mask,
BuildMetaBlockGreedy(data, WrapPosition(last_flush_pos_), mask,
commands_, num_commands_,
&mb);
} else {
BuildMetaBlockGreedyWithContexts(data, last_flush_pos_, mask,
BuildMetaBlockGreedyWithContexts(data, WrapPosition(last_flush_pos_),
mask,
prev_byte_, prev_byte2_,
literal_context_mode,
num_literal_contexts,
@ -449,10 +472,11 @@ bool BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
&mb);
}
} else {
if (!IsMostlyUTF8(data, last_flush_pos_, mask, bytes, kMinUTF8Ratio)) {
if (!IsMostlyUTF8(
data, WrapPosition(last_flush_pos_), mask, bytes, kMinUTF8Ratio)) {
literal_context_mode = CONTEXT_SIGNED;
}
BuildMetaBlock(data, last_flush_pos_, mask,
BuildMetaBlock(data, WrapPosition(last_flush_pos_), mask,
prev_byte_, prev_byte2_,
commands_, num_commands_,
literal_context_mode,
@ -463,36 +487,33 @@ bool BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
distance_postfix_bits,
&mb);
}
if (!StoreMetaBlock(data, last_flush_pos_, bytes, mask,
prev_byte_, prev_byte2_,
is_last,
num_direct_distance_codes,
distance_postfix_bits,
literal_context_mode,
commands_, num_commands_,
mb,
&storage_ix,
&storage[0])) {
return false;
}
StoreMetaBlock(data, WrapPosition(last_flush_pos_), bytes, mask,
prev_byte_, prev_byte2_,
is_last,
num_direct_distance_codes,
distance_postfix_bits,
literal_context_mode,
commands_, num_commands_,
mb,
&storage_ix,
&storage[0]);
}
if (bytes + 4 < static_cast<size_t>(storage_ix >> 3)) {
if (bytes + 4 < (storage_ix >> 3)) {
// Restore the distance cache and last byte.
memcpy(dist_cache_, saved_dist_cache_, sizeof(dist_cache_));
storage[0] = last_byte_;
storage_ix = last_byte_bits_;
if (!StoreUncompressedMetaBlock(is_last, data, last_flush_pos_, mask,
bytes, &storage_ix, &storage[0])) {
return false;
}
StoreUncompressedMetaBlock(is_last, data,
WrapPosition(last_flush_pos_), mask,
bytes, &storage_ix, &storage[0]);
}
}
last_byte_ = storage[storage_ix >> 3];
last_byte_bits_ = storage_ix & 7;
last_byte_bits_ = storage_ix & 7u;
last_flush_pos_ = input_pos_;
last_processed_pos_ = input_pos_;
prev_byte_ = data[(last_flush_pos_ - 1) & mask];
prev_byte2_ = data[(last_flush_pos_ - 2) & mask];
prev_byte_ = data[(static_cast<uint32_t>(last_flush_pos_) - 1) & mask];
prev_byte2_ = data[(static_cast<uint32_t>(last_flush_pos_) - 2) & mask];
num_commands_ = 0;
num_literals_ = 0;
// Save the state of the distance cache in case we need to restore it for
@ -500,7 +521,6 @@ bool BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
memcpy(saved_dist_cache_, dist_cache_, sizeof(dist_cache_));
*output = &storage[0];
*out_size = storage_ix >> 3;
return true;
}
bool BrotliCompressor::WriteMetaBlock(const size_t input_size,
@ -532,21 +552,22 @@ bool BrotliCompressor::WriteMetadata(const size_t input_size,
}
uint64_t hdr_buffer_data[2];
uint8_t* hdr_buffer = reinterpret_cast<uint8_t*>(&hdr_buffer_data[0]);
int storage_ix = last_byte_bits_;
size_t storage_ix = last_byte_bits_;
hdr_buffer[0] = last_byte_;
WriteBits(1, 0, &storage_ix, hdr_buffer);
WriteBits(2, 3, &storage_ix, hdr_buffer);
WriteBits(1, 0, &storage_ix, hdr_buffer);
if (input_size == 0) {
WriteBits(2, 0, &storage_ix, hdr_buffer);
*encoded_size = (storage_ix + 7) >> 3;
*encoded_size = (storage_ix + 7u) >> 3;
memcpy(encoded_buffer, hdr_buffer, *encoded_size);
} else {
int nbits = Log2Floor(static_cast<uint32_t>(input_size) - 1) + 1;
int nbytes = (nbits + 7) / 8;
uint32_t nbits = (input_size == 1) ? 0 : (Log2FloorNonZero(
static_cast<uint32_t>(input_size) - 1) + 1);
uint32_t nbytes = (nbits + 7) / 8;
WriteBits(2, nbytes, &storage_ix, hdr_buffer);
WriteBits(8 * nbytes, input_size - 1, &storage_ix, hdr_buffer);
size_t hdr_size = (storage_ix + 7) >> 3;
size_t hdr_size = (storage_ix + 7u) >> 3;
memcpy(encoded_buffer, hdr_buffer, hdr_size);
memcpy(&encoded_buffer[hdr_size], input_buffer, input_size);
*encoded_size = hdr_size + input_size;
@ -582,38 +603,52 @@ int BrotliCompressBuffer(BrotliParams params,
return 1;
}
size_t CopyOneBlockToRingBuffer(BrotliIn* r, BrotliCompressor* compressor) {
const size_t block_size = compressor->input_block_size();
size_t bytes_read = 0;
const uint8_t* data = reinterpret_cast<const uint8_t*>(
r->Read(block_size, &bytes_read));
if (data == NULL) {
return 0;
}
compressor->CopyInputToRingBuffer(bytes_read, data);
// Read more bytes until block_size is filled or an EOF (data == NULL) is
// received. This is useful to get deterministic compressed output for the
// same input no matter how r->Read splits the input to chunks.
for (size_t remaining = block_size - bytes_read; remaining > 0; ) {
size_t more_bytes_read = 0;
data = reinterpret_cast<const uint8_t*>(
r->Read(remaining, &more_bytes_read));
if (data == NULL) {
break;
}
compressor->CopyInputToRingBuffer(more_bytes_read, data);
bytes_read += more_bytes_read;
remaining -= more_bytes_read;
}
return bytes_read;
}
bool BrotliInIsFinished(BrotliIn* r) {
size_t read_bytes;
return r->Read(0, &read_bytes) == NULL;
}
const uint8_t* BrotliInReadAndCheckEnd(const size_t block_size,
BrotliIn* r,
size_t* bytes_read,
bool* is_last) {
*bytes_read = 0;
const uint8_t* data = reinterpret_cast<const uint8_t*>(
r->Read(block_size, bytes_read));
assert((data == NULL) == (*bytes_read == 0));
*is_last = BrotliInIsFinished(r);
return data;
}
bool CopyOneBlockToRingBuffer(BrotliIn* r,
BrotliCompressor* compressor,
size_t* bytes_read,
bool* is_last) {
const size_t block_size = compressor->input_block_size();
const uint8_t* data = BrotliInReadAndCheckEnd(block_size, r,
bytes_read, is_last);
if (data == NULL) {
return *is_last;
}
compressor->CopyInputToRingBuffer(*bytes_read, data);
// Read more bytes until block_size is filled or an EOF (data == NULL) is
// received. This is useful to get deterministic compressed output for the
// same input no matter how r->Read splits the input to chunks.
for (size_t remaining = block_size - *bytes_read; remaining > 0; ) {
size_t more_bytes_read = 0;
data = BrotliInReadAndCheckEnd(remaining, r, &more_bytes_read, is_last);
if (data == NULL) {
return *is_last;
}
compressor->CopyInputToRingBuffer(more_bytes_read, data);
*bytes_read += more_bytes_read;
remaining -= more_bytes_read;
}
return true;
}
int BrotliCompress(BrotliParams params, BrotliIn* in, BrotliOut* out) {
return BrotliCompressWithCustomDictionary(0, 0, params, in, out);
}
@ -628,8 +663,9 @@ int BrotliCompressWithCustomDictionary(size_t dictsize, const uint8_t* dict,
BrotliCompressor compressor(params);
if (dictsize != 0) compressor.BrotliSetCustomDictionary(dictsize, dict);
while (!final_block) {
in_bytes = CopyOneBlockToRingBuffer(in, &compressor);
final_block = in_bytes == 0 || BrotliInIsFinished(in);
if (!CopyOneBlockToRingBuffer(in, &compressor, &in_bytes, &final_block)) {
return false;
}
out_bytes = 0;
if (!compressor.WriteBrotliData(final_block,
/* force_flush = */ false,

View File

@ -115,9 +115,9 @@ class BrotliCompressor {
// the new output meta-block, or to zero if no new output meta-block was
// created (in this case the processed input data is buffered internally).
// If *out_size is positive, *output points to the start of the output data.
// Returns false if the size of the input data is larger than
// input_block_size() or if there was an error during writing the output.
// If is_last or force_flush is true, an output meta-block is always created.
// Returns false if the size of the input data is larger than
// input_block_size() or if there is no new input data and is_last is false.
bool WriteBrotliData(const bool is_last, const bool force_flush,
size_t* out_size, uint8_t** output);
@ -134,23 +134,23 @@ class BrotliCompressor {
private:
uint8_t* GetBrotliStorage(size_t size);
bool WriteMetaBlockInternal(const bool is_last,
void WriteMetaBlockInternal(const bool is_last,
size_t* out_size,
uint8_t** output);
BrotliParams params_;
int max_backward_distance_;
size_t max_backward_distance_;
Hashers* hashers_;
int hash_type_;
size_t input_pos_;
uint64_t input_pos_;
RingBuffer* ringbuffer_;
size_t cmd_alloc_size_;
Command* commands_;
size_t num_commands_;
int num_literals_;
int last_insert_len_;
size_t last_flush_pos_;
size_t last_processed_pos_;
size_t num_literals_;
size_t last_insert_len_;
uint64_t last_flush_pos_;
uint64_t last_processed_pos_;
int dist_cache_[4];
int saved_dist_cache_[4];
uint8_t last_byte_;

View File

@ -32,8 +32,8 @@ namespace brotli {
namespace {
void RecomputeDistancePrefixes(Command* cmds, size_t num_commands,
int num_direct_distance_codes,
int distance_postfix_bits) {
uint32_t num_direct_distance_codes,
uint32_t distance_postfix_bits) {
if (num_direct_distance_codes == 0 &&
distance_postfix_bits == 0) {
return;
@ -51,21 +51,20 @@ void RecomputeDistancePrefixes(Command* cmds, size_t num_commands,
}
bool WriteMetaBlockParallel(const BrotliParams& params,
const size_t block_size,
const uint32_t input_size,
const uint8_t* input_buffer,
const size_t prefix_size,
const uint32_t prefix_size,
const uint8_t* prefix_buffer,
const bool is_first,
const bool is_last,
size_t* encoded_size,
uint8_t* encoded_buffer) {
if (block_size == 0) {
if (input_size == 0) {
return false;
}
const size_t input_size = block_size;
// Copy prefix + next input block into a continuous area.
size_t input_pos = prefix_size;
uint32_t input_pos = prefix_size;
// CreateBackwardReferences reads up to 3 bytes past the end of input if the
// mask points past the end of input.
// FindMatchLengthWithLimit could do another 8 bytes look-forward.
@ -75,7 +74,7 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
// Since we don't have a ringbuffer, masking is a no-op.
// We use one less bit than the full range because some of the code uses
// mask + 1 as the size of the ringbuffer.
const size_t mask = std::numeric_limits<size_t>::max() >> 1;
const uint32_t mask = std::numeric_limits<uint32_t>::max() >> 1;
uint8_t prev_byte = input_pos > 0 ? input[(input_pos - 1) & mask] : 0;
uint8_t prev_byte2 = input_pos > 1 ? input[(input_pos - 2) & mask] : 0;
@ -91,10 +90,10 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
hashers->Init(hash_type);
// Compute backward references.
int last_insert_len = 0;
size_t last_insert_len = 0;
size_t num_commands = 0;
int num_literals = 0;
int max_backward_distance = (1 << params.lgwin) - 16;
size_t num_literals = 0;
uint32_t max_backward_distance = (1 << params.lgwin) - 16;
int dist_cache[4] = { -4, -4, -4, -4 };
Command* commands = static_cast<Command*>(
malloc(sizeof(Command) * ((input_size + 1) >> 1)));
@ -103,7 +102,7 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
return false;
}
CreateBackwardReferences(
input_size, input_pos,
input_size, input_pos, is_last,
&input[0], mask,
max_backward_distance,
params.quality,
@ -123,10 +122,11 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
// Build the meta-block.
MetaBlockSplit mb;
int num_direct_distance_codes =
uint32_t num_direct_distance_codes =
params.mode == BrotliParams::MODE_FONT ? 12 : 0;
int distance_postfix_bits = params.mode == BrotliParams::MODE_FONT ? 1 : 0;
int literal_context_mode = utf8_mode ? CONTEXT_UTF8 : CONTEXT_SIGNED;
uint32_t distance_postfix_bits =
params.mode == BrotliParams::MODE_FONT ? 1 : 0;
ContextType literal_context_mode = utf8_mode ? CONTEXT_UTF8 : CONTEXT_SIGNED;
RecomputeDistancePrefixes(commands, num_commands,
num_direct_distance_codes,
distance_postfix_bits);
@ -145,8 +145,8 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
// Set up the temporary output storage.
const size_t max_out_size = 2 * input_size + 500;
std::vector<uint8_t> storage(max_out_size);
int first_byte = 0;
int first_byte_bits = 0;
uint8_t first_byte = 0;
size_t first_byte_bits = 0;
if (is_first) {
if (params.lgwin == 16) {
first_byte = 0;
@ -155,26 +155,23 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
first_byte = 1;
first_byte_bits = 7;
} else {
first_byte = ((params.lgwin - 17) << 1) | 1;
first_byte = static_cast<uint8_t>(((params.lgwin - 17) << 1) | 1);
first_byte_bits = 4;
}
}
storage[0] = static_cast<uint8_t>(first_byte);
int storage_ix = first_byte_bits;
size_t storage_ix = first_byte_bits;
// Store the meta-block to the temporary output.
if (!StoreMetaBlock(&input[0], input_pos, input_size, mask,
prev_byte, prev_byte2,
is_last,
num_direct_distance_codes,
distance_postfix_bits,
literal_context_mode,
commands, num_commands,
mb,
&storage_ix, &storage[0])) {
free(commands);
return false;
}
StoreMetaBlock(&input[0], input_pos, input_size, mask,
prev_byte, prev_byte2,
is_last,
num_direct_distance_codes,
distance_postfix_bits,
literal_context_mode,
commands, num_commands,
mb,
&storage_ix, &storage[0]);
free(commands);
// If this is not the last meta-block, store an empty metadata
@ -189,11 +186,9 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
if (input_size + 4 < output_size) {
storage[0] = static_cast<uint8_t>(first_byte);
storage_ix = first_byte_bits;
if (!StoreUncompressedMetaBlock(is_last, &input[0], input_pos, mask,
input_size,
&storage_ix, &storage[0])) {
return false;
}
StoreUncompressedMetaBlock(is_last, &input[0], input_pos, mask,
input_size,
&storage_ix, &storage[0]);
output_size = storage_ix >> 3;
}
@ -239,19 +234,23 @@ int BrotliCompressBufferParallel(BrotliParams params,
params.lgblock = kMaxInputBlockBits;
}
size_t max_input_block_size = 1 << params.lgblock;
size_t max_prefix_size = 1u << params.lgwin;
std::vector<std::vector<uint8_t> > compressed_pieces;
// Compress block-by-block independently.
for (size_t pos = 0; pos < input_size; ) {
size_t input_block_size = std::min(max_input_block_size, input_size - pos);
uint32_t input_block_size =
static_cast<uint32_t>(std::min(max_input_block_size, input_size - pos));
uint32_t prefix_size =
static_cast<uint32_t>(std::min(max_prefix_size, pos));
size_t out_size = input_block_size + (input_block_size >> 3) + 1024;
std::vector<uint8_t> out(out_size);
if (!WriteMetaBlockParallel(params,
input_block_size,
&input_buffer[pos],
pos,
input_buffer,
prefix_size,
&input_buffer[pos - prefix_size],
pos == 0,
pos + input_block_size == input_size,
&out_size,

View File

@ -19,24 +19,6 @@
namespace brotli {
namespace {
struct HuffmanTree {
HuffmanTree(int count, int16_t left, int16_t right)
: total_count_(count),
index_left_(left),
index_right_or_value_(right) {
}
int total_count_;
int16_t index_left_;
int16_t index_right_or_value_;
};
// Sort the root nodes, least popular first.
bool SortHuffmanTree(const HuffmanTree &v0, const HuffmanTree &v1) {
return v0.total_count_ < v1.total_count_;
}
void SetDepth(const HuffmanTree &p,
HuffmanTree *pool,
uint8_t *depth,
@ -50,8 +32,6 @@ void SetDepth(const HuffmanTree &p,
}
}
} // namespace
// This function will create a Huffman tree.
//
// The catch here is that the tree cannot be arbitrarily deep.
@ -67,26 +47,27 @@ void SetDepth(const HuffmanTree &p,
// we are not planning to use this with extremely long blocks.
//
// See http://en.wikipedia.org/wiki/Huffman_coding
void CreateHuffmanTree(const int *data,
const int length,
void CreateHuffmanTree(const uint32_t *data,
const size_t length,
const int tree_limit,
uint8_t *depth) {
// For block sizes below 64 kB, we never need to do a second iteration
// of this loop. Probably all of our block sizes will be smaller than
// that, so this loop is mostly of academic interest. If we actually
// would need this, we would be better off with the Katajainen algorithm.
for (int count_limit = 1; ; count_limit *= 2) {
for (uint32_t count_limit = 1; ; count_limit *= 2) {
std::vector<HuffmanTree> tree;
tree.reserve(2 * length + 1);
for (int i = length - 1; i >= 0; --i) {
for (size_t i = length; i != 0;) {
--i;
if (data[i]) {
const int count = std::max(data[i], count_limit);
const uint32_t count = std::max(data[i], count_limit);
tree.push_back(HuffmanTree(count, -1, static_cast<int16_t>(i)));
}
}
const int n = static_cast<int>(tree.size());
const size_t n = tree.size();
if (n == 1) {
depth[tree[0].index_right_or_value_] = 1; // Only one element.
break;
@ -101,14 +82,14 @@ void CreateHuffmanTree(const int *data,
// (n+1). These are naturally in ascending order.
// [2n]: we add a sentinel at the end as well.
// There will be (2n+1) elements at the end.
const HuffmanTree sentinel(std::numeric_limits<int>::max(), -1, -1);
const HuffmanTree sentinel(std::numeric_limits<uint32_t>::max(), -1, -1);
tree.push_back(sentinel);
tree.push_back(sentinel);
int i = 0; // Points to the next leaf node.
int j = n + 1; // Points to the next non-leaf node.
for (int k = n - 1; k > 0; --k) {
int left, right;
size_t i = 0; // Points to the next leaf node.
size_t j = n + 1; // Points to the next non-leaf node.
for (size_t k = n - 1; k != 0; --k) {
size_t left, right;
if (tree[i].total_count_ <= tree[j].total_count_) {
left = i;
++i;
@ -125,7 +106,7 @@ void CreateHuffmanTree(const int *data,
}
// The sentinel node becomes the parent node.
int j_end = static_cast<int>(tree.size()) - 1;
size_t j_end = tree.size() - 1;
tree[j_end].total_count_ =
tree[left].total_count_ + tree[right].total_count_;
tree[j_end].index_left_ = static_cast<int16_t>(left);
@ -134,7 +115,7 @@ void CreateHuffmanTree(const int *data,
// Add back the last sentinel node.
tree.push_back(sentinel);
}
BROTLI_DCHECK(tree.size() == 2 * n + 1);
assert(tree.size() == 2 * n + 1);
SetDepth(tree[2 * n - 1], &tree[0], depth, 0);
// We need to pack the Huffman tree in tree_limit bits.
@ -146,7 +127,7 @@ void CreateHuffmanTree(const int *data,
}
}
void Reverse(std::vector<uint8_t>* v, int start, int end) {
void Reverse(std::vector<uint8_t>* v, size_t start, size_t end) {
--end;
while (start < end) {
uint8_t tmp = (*v)[start];
@ -160,9 +141,10 @@ void Reverse(std::vector<uint8_t>* v, int start, int end) {
void WriteHuffmanTreeRepetitions(
const uint8_t previous_value,
const uint8_t value,
int repetitions,
size_t repetitions,
std::vector<uint8_t> *tree,
std::vector<uint8_t> *extra_bits_data) {
assert(repetitions > 0);
if (previous_value != value) {
tree->push_back(value);
extra_bits_data->push_back(0);
@ -174,26 +156,29 @@ void WriteHuffmanTreeRepetitions(
--repetitions;
}
if (repetitions < 3) {
for (int i = 0; i < repetitions; ++i) {
for (size_t i = 0; i < repetitions; ++i) {
tree->push_back(value);
extra_bits_data->push_back(0);
}
} else {
repetitions -= 3;
int start = static_cast<int>(tree->size());
while (repetitions >= 0) {
size_t start = tree->size();
while (true) {
tree->push_back(16);
extra_bits_data->push_back(repetitions & 0x3);
repetitions >>= 2;
if (repetitions == 0) {
break;
}
--repetitions;
}
Reverse(tree, start, static_cast<int>(tree->size()));
Reverse(extra_bits_data, start, static_cast<int>(tree->size()));
Reverse(tree, start, tree->size());
Reverse(extra_bits_data, start, tree->size());
}
}
void WriteHuffmanTreeRepetitionsZeros(
int repetitions,
size_t repetitions,
std::vector<uint8_t> *tree,
std::vector<uint8_t> *extra_bits_data) {
if (repetitions == 11) {
@ -202,32 +187,36 @@ void WriteHuffmanTreeRepetitionsZeros(
--repetitions;
}
if (repetitions < 3) {
for (int i = 0; i < repetitions; ++i) {
for (size_t i = 0; i < repetitions; ++i) {
tree->push_back(0);
extra_bits_data->push_back(0);
}
} else {
repetitions -= 3;
int start = static_cast<int>(tree->size());
while (repetitions >= 0) {
size_t start = tree->size();
while (true) {
tree->push_back(17);
extra_bits_data->push_back(repetitions & 0x7);
repetitions >>= 3;
if (repetitions == 0) {
break;
}
--repetitions;
}
Reverse(tree, start, static_cast<int>(tree->size()));
Reverse(extra_bits_data, start, static_cast<int>(tree->size()));
Reverse(tree, start, tree->size());
Reverse(extra_bits_data, start, tree->size());
}
}
int OptimizeHuffmanCountsForRle(int length, int* counts) {
int nonzero_count = 0;
int stride;
int limit;
int sum;
bool OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts) {
size_t nonzero_count = 0;
size_t stride;
size_t limit;
size_t sum;
const size_t streak_limit = 1240;
uint8_t* good_for_rle;
// Let's make the Huffman code more compatible with rle encoding.
int i;
size_t i;
for (i = 0; i < length; i++) {
if (counts[i]) {
++nonzero_count;
@ -236,18 +225,16 @@ int OptimizeHuffmanCountsForRle(int length, int* counts) {
if (nonzero_count < 16) {
return 1;
}
for (; length >= 0; --length) {
if (length == 0) {
return 1; // All zeros.
}
if (counts[length - 1] != 0) {
// Now counts[0..length - 1] does not have trailing zeros.
break;
}
while (length != 0 && counts[length - 1] == 0) {
--length;
}
if (length == 0) {
return 1; // All zeros.
}
// Now counts[0..length - 1] does not have trailing zeros.
{
int nonzeros = 0;
int smallest_nonzero = 1 << 30;
size_t nonzeros = 0;
uint32_t smallest_nonzero = 1 << 30;
for (i = 0; i < length; ++i) {
if (counts[i] != 0) {
++nonzeros;
@ -260,7 +247,7 @@ int OptimizeHuffmanCountsForRle(int length, int* counts) {
// Small histogram will model it well.
return 1;
}
int zeros = length - nonzeros;
size_t zeros = length - nonzeros;
if (smallest_nonzero < 4) {
if (zeros < 6) {
for (i = 1; i < length - 1; ++i) {
@ -284,41 +271,40 @@ int OptimizeHuffmanCountsForRle(int length, int* counts) {
// Let's not spoil any of the existing good rle codes.
// Mark any seq of 0's that is longer as 5 as a good_for_rle.
// Mark any seq of non-0's that is longer as 7 as a good_for_rle.
int symbol = counts[0];
int stride = 0;
for (i = 0; i < length + 1; ++i) {
uint32_t symbol = counts[0];
size_t step = 0;
for (i = 0; i <= length; ++i) {
if (i == length || counts[i] != symbol) {
if ((symbol == 0 && stride >= 5) ||
(symbol != 0 && stride >= 7)) {
int k;
for (k = 0; k < stride; ++k) {
if ((symbol == 0 && step >= 5) ||
(symbol != 0 && step >= 7)) {
size_t k;
for (k = 0; k < step; ++k) {
good_for_rle[i - k - 1] = 1;
}
}
stride = 1;
step = 1;
if (i != length) {
symbol = counts[i];
}
} else {
++stride;
++step;
}
}
}
// 3) Let's replace those population counts that lead to more rle codes.
// Math here is in 24.8 fixed point representation.
const int streak_limit = 1240;
stride = 0;
limit = 256 * (counts[0] + counts[1] + counts[2]) / 3 + 420;
sum = 0;
for (i = 0; i < length + 1; ++i) {
for (i = 0; i <= length; ++i) {
if (i == length || good_for_rle[i] ||
(i != 0 && good_for_rle[i - 1]) ||
abs(256 * counts[i] - limit) >= streak_limit) {
(256 * counts[i] - limit + streak_limit) >= 2 * streak_limit) {
if (stride >= 4 || (stride >= 3 && sum == 0)) {
int k;
size_t k;
// The stride must end, collapse what we have, if we have enough (4).
int count = (sum + stride / 2) / stride;
if (count < 1) {
size_t count = (sum + stride / 2) / stride;
if (count == 0) {
count = 1;
}
if (sum == 0) {
@ -328,7 +314,7 @@ int OptimizeHuffmanCountsForRle(int length, int* counts) {
for (k = 0; k < stride; ++k) {
// We don't want to change value at counts[i],
// that is already belonging to the next stride. Thus - 1.
counts[i - k - 1] = count;
counts[i - k - 1] = static_cast<uint32_t>(count);
}
}
stride = 0;
@ -358,17 +344,17 @@ int OptimizeHuffmanCountsForRle(int length, int* counts) {
return 1;
}
static void DecideOverRleUse(const uint8_t* depth, const int length,
static void DecideOverRleUse(const uint8_t* depth, const size_t length,
bool *use_rle_for_non_zero,
bool *use_rle_for_zero) {
int total_reps_zero = 0;
int total_reps_non_zero = 0;
int count_reps_zero = 0;
int count_reps_non_zero = 0;
for (int i = 0; i < length;) {
const int value = depth[i];
int reps = 1;
for (int k = i + 1; k < length && depth[k] == value; ++k) {
size_t total_reps_zero = 0;
size_t total_reps_non_zero = 0;
size_t count_reps_zero = 1;
size_t count_reps_non_zero = 1;
for (size_t i = 0; i < length;) {
const uint8_t value = depth[i];
size_t reps = 1;
for (size_t k = i + 1; k < length && depth[k] == value; ++k) {
++reps;
}
if (reps >= 3 && value == 0) {
@ -381,21 +367,19 @@ static void DecideOverRleUse(const uint8_t* depth, const int length,
}
i += reps;
}
total_reps_non_zero -= count_reps_non_zero * 2;
total_reps_zero -= count_reps_zero * 2;
*use_rle_for_non_zero = total_reps_non_zero > 2;
*use_rle_for_zero = total_reps_zero > 2;
*use_rle_for_non_zero = total_reps_non_zero > count_reps_non_zero * 2;
*use_rle_for_zero = total_reps_zero > count_reps_zero * 2;
}
void WriteHuffmanTree(const uint8_t* depth,
uint32_t length,
size_t length,
std::vector<uint8_t> *tree,
std::vector<uint8_t> *extra_bits_data) {
uint8_t previous_value = 8;
// Throw away trailing zeros.
uint32_t new_length = length;
for (uint32_t i = 0; i < length; ++i) {
size_t new_length = length;
for (size_t i = 0; i < length; ++i) {
if (depth[length - i - 1] == 0) {
--new_length;
} else {
@ -414,12 +398,12 @@ void WriteHuffmanTree(const uint8_t* depth,
}
// Actual rle coding.
for (uint32_t i = 0; i < new_length;) {
for (size_t i = 0; i < new_length;) {
const uint8_t value = depth[i];
int reps = 1;
size_t reps = 1;
if ((value != 0 && use_rle_for_non_zero) ||
(value == 0 && use_rle_for_zero)) {
for (uint32_t k = i + 1; k < new_length && depth[k] == value; ++k) {
for (size_t k = i + 1; k < new_length && depth[k] == value; ++k) {
++reps;
}
}
@ -453,13 +437,15 @@ uint16_t ReverseBits(int num_bits, uint16_t bits) {
} // namespace
void ConvertBitDepthsToSymbols(const uint8_t *depth, int len, uint16_t *bits) {
void ConvertBitDepthsToSymbols(const uint8_t *depth,
size_t len,
uint16_t *bits) {
// In Brotli, all bit depths are [1..15]
// 0 bit depth means that the symbol does not exist.
const int kMaxBits = 16; // 0..15 are values for bits
uint16_t bl_count[kMaxBits] = { 0 };
{
for (int i = 0; i < len; ++i) {
for (size_t i = 0; i < len; ++i) {
++bl_count[depth[i]];
}
bl_count[0] = 0;
@ -473,7 +459,7 @@ void ConvertBitDepthsToSymbols(const uint8_t *depth, int len, uint16_t *bits) {
next_code[bits] = static_cast<uint16_t>(code);
}
}
for (int i = 0; i < len; ++i) {
for (size_t i = 0; i < len; ++i) {
if (depth[i]) {
bits[i] = ReverseBits(depth[i], next_code[depth[i]]++);
}

View File

@ -17,6 +17,26 @@
namespace brotli {
// A node of a Huffman tree.
struct HuffmanTree {
HuffmanTree(uint32_t count, int16_t left, int16_t right)
: total_count_(count),
index_left_(left),
index_right_or_value_(right) {
}
uint32_t total_count_;
int16_t index_left_;
int16_t index_right_or_value_;
};
// Sort the root nodes, least popular first.
inline bool SortHuffmanTree(const HuffmanTree &v0, const HuffmanTree &v1) {
return v0.total_count_ < v1.total_count_;
}
void SetDepth(const HuffmanTree &p, HuffmanTree *pool,
uint8_t *depth, uint8_t level);
// This function will create a Huffman tree.
//
// The (data,length) contains the population counts.
@ -26,8 +46,8 @@ namespace brotli {
// the symbol.
//
// See http://en.wikipedia.org/wiki/Huffman_coding
void CreateHuffmanTree(const int *data,
const int length,
void CreateHuffmanTree(const uint32_t *data,
const size_t length,
const int tree_limit,
uint8_t *depth);
@ -37,18 +57,20 @@ void CreateHuffmanTree(const int *data,
//
// length contains the size of the histogram.
// counts contains the population counts.
int OptimizeHuffmanCountsForRle(int length, int* counts);
bool OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts);
// Write a Huffman tree from bit depths into the bitstream representation
// of a Huffman tree. The generated Huffman tree is to be compressed once
// more using a Huffman tree
void WriteHuffmanTree(const uint8_t* depth,
uint32_t num,
size_t num,
std::vector<uint8_t> *tree,
std::vector<uint8_t> *extra_bits_data);
// Get the actual bit values for a tree of bit depths.
void ConvertBitDepthsToSymbols(const uint8_t *depth, int len, uint16_t *bits);
void ConvertBitDepthsToSymbols(const uint8_t *depth,
size_t len,
uint16_t *bits);
template<int kSize>
struct EntropyCode {

View File

@ -16,49 +16,16 @@
namespace brotli {
// Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0.
inline int Log2Floor(uint32_t n) {
#if defined(__clang__) || \
(defined(__GNUC__) && \
((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4))
return n == 0 ? -1 : 31 ^ __builtin_clz(n);
#else
if (n == 0)
return -1;
int log = 0;
uint32_t value = n;
for (int i = 4; i >= 0; --i) {
int shift = (1 << i);
uint32_t x = value >> shift;
if (x != 0) {
value = x;
log += shift;
}
}
assert(value == 1);
return log;
#endif
}
static inline int Log2FloorNonZero(uint32_t n) {
static inline uint32_t Log2FloorNonZero(size_t n) {
#ifdef __GNUC__
return 31 ^ __builtin_clz(n);
return 31u ^ static_cast<uint32_t>(__builtin_clz(static_cast<uint32_t>(n)));
#else
unsigned int result = 0;
uint32_t result = 0;
while (n >>= 1) result++;
return result;
#endif
}
// Return ceiling(log2(n)) for positive integer n. Returns -1 iff n == 0.
inline int Log2Ceiling(uint32_t n) {
int floor = Log2Floor(n);
if (n == (n &~ (n - 1))) // zero or a power of two
return floor;
else
return floor + 1;
}
// A lookup table for small values of log2(int) to be used in entropy
// computation.
//
@ -153,8 +120,8 @@ static const float kLog2Table[] = {
};
// Faster logarithm for small integers, with the property of log2(0) == 0.
static inline double FastLog2(int v) {
if (v < (int)(sizeof(kLog2Table) / sizeof(kLog2Table[0]))) {
static inline double FastLog2(size_t v) {
if (v < sizeof(kLog2Table) / sizeof(kLog2Table[0])) {
return kLog2Table[v];
}
#if defined(_MSC_VER) && _MSC_VER <= 1600

View File

@ -18,10 +18,10 @@ namespace brotli {
// Separate implementation for little-endian 64-bit targets, for speed.
#if defined(__GNUC__) && defined(_LP64) && defined(IS_LITTLE_ENDIAN)
static inline int FindMatchLengthWithLimit(const uint8_t* s1,
const uint8_t* s2,
size_t limit) {
int matched = 0;
static inline size_t FindMatchLengthWithLimit(const uint8_t* s1,
const uint8_t* s2,
size_t limit) {
size_t matched = 0;
size_t limit2 = (limit >> 3) + 1; // + 1 is for pre-decrement in while
while (PREDICT_TRUE(--limit2)) {
if (PREDICT_FALSE(BROTLI_UNALIGNED_LOAD64(s2) ==
@ -31,7 +31,7 @@ static inline int FindMatchLengthWithLimit(const uint8_t* s1,
} else {
uint64_t x =
BROTLI_UNALIGNED_LOAD64(s2) ^ BROTLI_UNALIGNED_LOAD64(s1 + matched);
int matching_bits = __builtin_ctzll(x);
size_t matching_bits = static_cast<size_t>(__builtin_ctzll(x));
matched += matching_bits >> 3;
return matched;
}
@ -48,10 +48,10 @@ static inline int FindMatchLengthWithLimit(const uint8_t* s1,
return matched;
}
#else
static inline int FindMatchLengthWithLimit(const uint8_t* s1,
const uint8_t* s2,
size_t limit) {
int matched = 0;
static inline size_t FindMatchLengthWithLimit(const uint8_t* s1,
const uint8_t* s2,
size_t limit) {
size_t matched = 0;
const uint8_t* s2_limit = s2 + limit;
const uint8_t* s2_ptr = s2;
// Find out how long the match is. We loop over the data 32 bits at a

View File

@ -10,12 +10,10 @@
#ifndef BROTLI_ENC_HASH_H_
#define BROTLI_ENC_HASH_H_
#include <string.h>
#include <sys/types.h>
#include <algorithm>
#include <cstdlib>
#include <memory>
#include <string>
#include <cstring>
#include <vector>
#include "./dictionary_hash.h"
#include "./fast_log.h"
@ -28,15 +26,17 @@
namespace brotli {
static const int kDistanceCacheIndex[] = {
static const uint32_t kDistanceCacheIndex[] = {
0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
};
static const int kDistanceCacheOffset[] = {
0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
};
static const int kCutoffTransformsCount = 10;
static const int kCutoffTransforms[] = {0, 12, 27, 23, 42, 63, 56, 48, 59, 64};
static const uint32_t kCutoffTransformsCount = 10;
static const uint8_t kCutoffTransforms[] = {
0, 12, 27, 23, 42, 63, 56, 48, 59, 64
};
// kHashMul32 multiplier has these properties:
// * The multiplier must be odd. Otherwise we may lose the highest bit.
@ -68,41 +68,47 @@ inline uint32_t Hash(const uint8_t *data) {
// This function is used to sometimes discard a longer backward reference
// when it is not much longer and the bit cost for encoding it is more
// than the saved literals.
inline double BackwardReferenceScore(int copy_length,
int backward_reference_offset) {
return 5.4 * copy_length - 1.20 * Log2Floor(backward_reference_offset);
//
// backward_reference_offset MUST be positive.
inline double BackwardReferenceScore(size_t copy_length,
size_t backward_reference_offset) {
return 5.4 * static_cast<double>(copy_length) -
1.20 * Log2FloorNonZero(backward_reference_offset);
}
inline double BackwardReferenceScoreUsingLastDistance(int copy_length,
int distance_short_code) {
inline double BackwardReferenceScoreUsingLastDistance(size_t copy_length,
size_t distance_short_code) {
static const double kDistanceShortCodeBitCost[16] = {
-0.6, 0.95, 1.17, 1.27,
0.93, 0.93, 0.96, 0.96, 0.99, 0.99,
1.05, 1.05, 1.15, 1.15, 1.25, 1.25
};
return 5.4 * copy_length - kDistanceShortCodeBitCost[distance_short_code];
return 5.4 * static_cast<double>(copy_length) -
kDistanceShortCodeBitCost[distance_short_code];
}
struct BackwardMatch {
BackwardMatch() : distance(0), length_and_code(0) {}
BackwardMatch(int dist, int len)
: distance(dist), length_and_code((len << 5)) {}
BackwardMatch(size_t dist, size_t len)
: distance(static_cast<uint32_t>(dist))
, length_and_code(static_cast<uint32_t>(len << 5)) {}
BackwardMatch(int dist, int len, int len_code)
: distance(dist),
length_and_code((len << 5) | (len == len_code ? 0 : len_code)) {}
BackwardMatch(size_t dist, size_t len, size_t len_code)
: distance(static_cast<uint32_t>(dist))
, length_and_code(static_cast<uint32_t>(
(len << 5) | (len == len_code ? 0 : len_code))) {}
int length() const {
size_t length() const {
return length_and_code >> 5;
}
int length_code() const {
int code = length_and_code & 31;
size_t length_code() const {
size_t code = length_and_code & 31;
return code ? code : length();
}
int distance;
int length_and_code;
uint32_t distance;
uint32_t length_and_code;
};
// A (forgetful) hash table to the data seen by the compressor, to
@ -146,27 +152,27 @@ class HashLongestMatchQuickly {
inline bool FindLongestMatch(const uint8_t * __restrict ring_buffer,
const size_t ring_buffer_mask,
const int* __restrict distance_cache,
const uint32_t cur_ix,
const int max_length,
const uint32_t max_backward,
int * __restrict best_len_out,
int * __restrict best_len_code_out,
int * __restrict best_distance_out,
const size_t cur_ix,
const size_t max_length,
const size_t max_backward,
size_t * __restrict best_len_out,
size_t * __restrict best_len_code_out,
size_t * __restrict best_distance_out,
double* __restrict best_score_out) {
const int best_len_in = *best_len_out;
const size_t best_len_in = *best_len_out;
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
int compare_char = ring_buffer[cur_ix_masked + best_len_in];
double best_score = *best_score_out;
int best_len = best_len_in;
int cached_backward = distance_cache[0];
uint32_t prev_ix = cur_ix - cached_backward;
size_t best_len = best_len_in;
size_t cached_backward = static_cast<size_t>(distance_cache[0]);
size_t prev_ix = cur_ix - cached_backward;
bool match_found = false;
if (prev_ix < cur_ix) {
prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
if (compare_char == ring_buffer[prev_ix + best_len]) {
int len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
&ring_buffer[cur_ix_masked],
max_length);
size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
&ring_buffer[cur_ix_masked],
max_length);
if (len >= 4) {
best_score = BackwardReferenceScoreUsingLastDistance(len, 0);
best_len = len;
@ -187,7 +193,7 @@ class HashLongestMatchQuickly {
if (kBucketSweep == 1) {
// Only one to look for, don't bother to prepare for a loop.
prev_ix = buckets_[key];
uint32_t backward = cur_ix - prev_ix;
size_t backward = cur_ix - prev_ix;
prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
if (compare_char != ring_buffer[prev_ix + best_len_in]) {
return false;
@ -195,9 +201,9 @@ class HashLongestMatchQuickly {
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
return false;
}
const int len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
&ring_buffer[cur_ix_masked],
max_length);
const size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
&ring_buffer[cur_ix_masked],
max_length);
if (len >= 4) {
*best_len_out = len;
*best_len_code_out = len;
@ -209,7 +215,7 @@ class HashLongestMatchQuickly {
uint32_t *bucket = buckets_ + key;
prev_ix = *bucket++;
for (int i = 0; i < kBucketSweep; ++i, prev_ix = *bucket++) {
const uint32_t backward = cur_ix - prev_ix;
const size_t backward = cur_ix - prev_ix;
prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
if (compare_char != ring_buffer[prev_ix + best_len]) {
continue;
@ -217,10 +223,9 @@ class HashLongestMatchQuickly {
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
continue;
}
const int len =
FindMatchLengthWithLimit(&ring_buffer[prev_ix],
&ring_buffer[cur_ix_masked],
max_length);
const size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
&ring_buffer[cur_ix_masked],
max_length);
if (len >= 4) {
const double score = BackwardReferenceScore(len, backward);
if (best_score < score) {
@ -242,19 +247,20 @@ class HashLongestMatchQuickly {
const uint32_t dict_key = Hash<14>(&ring_buffer[cur_ix_masked]) << 1;
const uint16_t v = kStaticDictionaryHash[dict_key];
if (v > 0) {
const int len = v & 31;
const int dist = v >> 5;
const int offset = kBrotliDictionaryOffsetsByLength[len] + len * dist;
const uint32_t len = v & 31;
const uint32_t dist = v >> 5;
const size_t offset =
kBrotliDictionaryOffsetsByLength[len] + len * dist;
if (len <= max_length) {
const int matchlen =
const size_t matchlen =
FindMatchLengthWithLimit(&ring_buffer[cur_ix_masked],
&kBrotliDictionary[offset], len);
if (matchlen > len - kCutoffTransformsCount && matchlen > 0) {
const int transform_id = kCutoffTransforms[len - matchlen];
const int word_id =
if (matchlen + kCutoffTransformsCount > len && matchlen > 0) {
const size_t transform_id = kCutoffTransforms[len - matchlen];
const size_t word_id =
transform_id * (1 << kBrotliDictionarySizeBitsByLength[len]) +
dist;
const int backward = max_backward + word_id + 1;
const size_t backward = max_backward + word_id + 1;
const double score = BackwardReferenceScore(matchlen, backward);
if (best_score < score) {
++num_dict_matches_;
@ -295,7 +301,7 @@ class HashLongestMatchQuickly {
};
// The maximum length for which the zopflification uses distinct distances.
static const int kMaxZopfliLen = 325;
static const uint16_t kMaxZopfliLen = 325;
// A (forgetful) hash table to the data seen by the compressor, to
// help create backward references to previous data.
@ -339,41 +345,42 @@ class HashLongestMatch {
bool FindLongestMatch(const uint8_t * __restrict data,
const size_t ring_buffer_mask,
const int* __restrict distance_cache,
const uint32_t cur_ix,
const int max_length,
const uint32_t max_backward,
int * __restrict best_len_out,
int * __restrict best_len_code_out,
int * __restrict best_distance_out,
const size_t cur_ix,
const size_t max_length,
const size_t max_backward,
size_t * __restrict best_len_out,
size_t * __restrict best_len_code_out,
size_t * __restrict best_distance_out,
double * __restrict best_score_out) {
*best_len_code_out = 0;
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
bool match_found = false;
// Don't accept a short copy from far away.
double best_score = *best_score_out;
int best_len = *best_len_out;
size_t best_len = *best_len_out;
*best_len_out = 0;
// Try last distance first.
for (int i = 0; i < kNumLastDistancesToCheck; ++i) {
const int idx = kDistanceCacheIndex[i];
const int backward = distance_cache[idx] + kDistanceCacheOffset[i];
uint32_t prev_ix = cur_ix - backward;
for (size_t i = 0; i < kNumLastDistancesToCheck; ++i) {
const size_t idx = kDistanceCacheIndex[i];
const size_t backward =
static_cast<size_t>(distance_cache[idx] + kDistanceCacheOffset[i]);
size_t prev_ix = static_cast<size_t>(cur_ix - backward);
if (prev_ix >= cur_ix) {
continue;
}
if (PREDICT_FALSE(backward > (int)max_backward)) {
if (PREDICT_FALSE(backward > max_backward)) {
continue;
}
prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
prev_ix &= ring_buffer_mask;
if (cur_ix_masked + best_len > ring_buffer_mask ||
prev_ix + best_len > ring_buffer_mask ||
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
continue;
}
const int len =
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
max_length);
const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
&data[cur_ix_masked],
max_length);
if (len >= 3 || (len == 2 && i < 2)) {
// Comparing for >= 2 does not change the semantics, but just saves for
// a few unnecessary binary logarithms in backward reference score,
@ -392,22 +399,23 @@ class HashLongestMatch {
}
const uint32_t key = HashBytes(&data[cur_ix_masked]);
const uint32_t * __restrict const bucket = &buckets_[key][0];
const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
for (int i = num_[key] - 1; i >= down; --i) {
uint32_t prev_ix = bucket[i & kBlockMask];
const uint32_t backward = cur_ix - prev_ix;
const size_t down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
for (size_t i = num_[key]; i > down;) {
--i;
size_t prev_ix = bucket[i & kBlockMask];
const size_t backward = cur_ix - prev_ix;
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
break;
}
prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
prev_ix &= ring_buffer_mask;
if (cur_ix_masked + best_len > ring_buffer_mask ||
prev_ix + best_len > ring_buffer_mask ||
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
continue;
}
const int len =
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
max_length);
const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
&data[cur_ix_masked],
max_length);
if (len >= 4) {
// Comparing for >= 3 does not change the semantics, but just saves
// for a few unnecessary binary logarithms in backward reference
@ -425,24 +433,25 @@ class HashLongestMatch {
}
}
if (!match_found && num_dict_matches_ >= (num_dict_lookups_ >> 7)) {
uint32_t dict_key = Hash<14>(&data[cur_ix_masked]) << 1;
size_t dict_key = Hash<14>(&data[cur_ix_masked]) << 1;
for (int k = 0; k < 2; ++k, ++dict_key) {
++num_dict_lookups_;
const uint16_t v = kStaticDictionaryHash[dict_key];
if (v > 0) {
const int len = v & 31;
const int dist = v >> 5;
const int offset = kBrotliDictionaryOffsetsByLength[len] + len * dist;
const size_t len = v & 31;
const size_t dist = v >> 5;
const size_t offset =
kBrotliDictionaryOffsetsByLength[len] + len * dist;
if (len <= max_length) {
const int matchlen =
const size_t matchlen =
FindMatchLengthWithLimit(&data[cur_ix_masked],
&kBrotliDictionary[offset], len);
if (matchlen > len - kCutoffTransformsCount && matchlen > 0) {
const int transform_id = kCutoffTransforms[len - matchlen];
const int word_id =
if (matchlen + kCutoffTransformsCount > len && matchlen > 0) {
const size_t transform_id = kCutoffTransforms[len - matchlen];
const size_t word_id =
transform_id * (1 << kBrotliDictionarySizeBitsByLength[len]) +
dist;
const int backward = max_backward + word_id + 1;
const size_t backward = max_backward + word_id + 1;
double score = BackwardReferenceScore(matchlen, backward);
if (best_score < score) {
++num_dict_matches_;
@ -471,19 +480,18 @@ class HashLongestMatch {
// longest match.
//
// Requires that at least kMaxZopfliLen space is available in matches.
void FindAllMatches(const uint8_t* data,
const size_t ring_buffer_mask,
const uint32_t cur_ix,
const int max_length,
const uint32_t max_backward,
int* num_matches,
BackwardMatch* matches) const {
size_t FindAllMatches(const uint8_t* data,
const size_t ring_buffer_mask,
const size_t cur_ix,
const size_t max_length,
const size_t max_backward,
BackwardMatch* matches) const {
BackwardMatch* const orig_matches = matches;
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
int best_len = 1;
int stop = static_cast<int>(cur_ix) - 64;
if (stop < 0) { stop = 0; }
for (int i = cur_ix - 1; i > stop && best_len <= 2; --i) {
size_t best_len = 1;
size_t stop = cur_ix - 64;
if (cur_ix < 64) { stop = 0; }
for (size_t i = cur_ix - 1; i > stop && best_len <= 2; --i) {
size_t prev_ix = i;
const size_t backward = cur_ix - prev_ix;
if (PREDICT_FALSE(backward > max_backward)) {
@ -494,33 +502,7 @@ class HashLongestMatch {
data[cur_ix_masked + 1] != data[prev_ix + 1]) {
continue;
}
const int len =
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
max_length);
if (len > best_len) {
best_len = len;
if (len > kMaxZopfliLen) {
matches = orig_matches;
}
*matches++ = BackwardMatch(static_cast<int>(backward), len);
}
}
const uint32_t key = HashBytes(&data[cur_ix_masked]);
const uint32_t * __restrict const bucket = &buckets_[key][0];
const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
for (int i = num_[key] - 1; i >= down; --i) {
uint32_t prev_ix = bucket[i & kBlockMask];
const uint32_t backward = cur_ix - prev_ix;
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
break;
}
prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
if (cur_ix_masked + best_len > ring_buffer_mask ||
prev_ix + best_len > ring_buffer_mask ||
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
continue;
}
const int len =
const size_t len =
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
max_length);
if (len > best_len) {
@ -531,20 +513,48 @@ class HashLongestMatch {
*matches++ = BackwardMatch(backward, len);
}
}
std::vector<int> dict_matches(kMaxDictionaryMatchLen + 1, kInvalidMatch);
int minlen = std::max<int>(4, best_len + 1);
const uint32_t key = HashBytes(&data[cur_ix_masked]);
const uint32_t * __restrict const bucket = &buckets_[key][0];
const size_t down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
for (size_t i = num_[key]; i > down;) {
--i;
size_t prev_ix = bucket[i & kBlockMask];
const size_t backward = cur_ix - prev_ix;
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
break;
}
prev_ix &= ring_buffer_mask;
if (cur_ix_masked + best_len > ring_buffer_mask ||
prev_ix + best_len > ring_buffer_mask ||
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
continue;
}
const size_t len =
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
max_length);
if (len > best_len) {
best_len = len;
if (len > kMaxZopfliLen) {
matches = orig_matches;
}
*matches++ = BackwardMatch(backward, len);
}
}
std::vector<uint32_t> dict_matches(kMaxDictionaryMatchLen + 1,
kInvalidMatch);
size_t minlen = std::max<size_t>(4, best_len + 1);
if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length,
&dict_matches[0])) {
int maxlen = std::min<int>(kMaxDictionaryMatchLen, max_length);
for (int l = minlen; l <= maxlen; ++l) {
int dict_id = dict_matches[l];
size_t maxlen = std::min<size_t>(kMaxDictionaryMatchLen, max_length);
for (size_t l = minlen; l <= maxlen; ++l) {
uint32_t dict_id = dict_matches[l];
if (dict_id < kInvalidMatch) {
*matches++ = BackwardMatch(max_backward + (dict_id >> 5) + 1, l,
dict_id & 31);
}
}
}
*num_matches += static_cast<int>(matches - orig_matches);
return static_cast<size_t>(matches - orig_matches);
}
enum { kHashLength = 4 };

View File

@ -28,7 +28,7 @@ void BuildHistograms(
size_t mask,
uint8_t prev_byte,
uint8_t prev_byte2,
const std::vector<int>& context_modes,
const std::vector<ContextType>& context_modes,
std::vector<HistogramLiteral>* literal_histograms,
std::vector<HistogramCommand>* insert_and_copy_histograms,
std::vector<HistogramDistance>* copy_dist_histograms) {
@ -41,9 +41,9 @@ void BuildHistograms(
insert_and_copy_it.Next();
(*insert_and_copy_histograms)[insert_and_copy_it.type_].Add(
cmd.cmd_prefix_);
for (int j = 0; j < cmd.insert_len_; ++j) {
for (size_t j = cmd.insert_len_; j != 0; --j) {
literal_it.Next();
int context = (literal_it.type_ << kLiteralContextBits) +
size_t context = (literal_it.type_ << kLiteralContextBits) +
Context(prev_byte, prev_byte2, context_modes[literal_it.type_]);
(*literal_histograms)[context].Add(ringbuffer[pos & mask]);
prev_byte2 = prev_byte;
@ -56,7 +56,7 @@ void BuildHistograms(
prev_byte = ringbuffer[(pos - 1) & mask];
if (cmd.cmd_prefix_ >= 128) {
dist_it.Next();
int context = (dist_it.type_ << kDistanceContextBits) +
size_t context = (dist_it.type_ << kDistanceContextBits) +
cmd.DistanceContext();
(*copy_dist_histograms)[context].Add(cmd.dist_prefix_);
}

View File

@ -9,10 +9,10 @@
#ifndef BROTLI_ENC_HISTOGRAM_H_
#define BROTLI_ENC_HISTOGRAM_H_
#include <string.h>
#include <cstring>
#include <limits>
#include <vector>
#include <utility>
#include "./context.h"
#include "./command.h"
#include "./fast_log.h"
#include "./prefix.h"
@ -33,29 +33,29 @@ struct Histogram {
total_count_ = 0;
bit_cost_ = std::numeric_limits<double>::infinity();
}
void Add(int val) {
void Add(size_t val) {
++data_[val];
++total_count_;
}
void Remove(int val) {
void Remove(size_t val) {
--data_[val];
--total_count_;
}
template<typename DataType>
void Add(const DataType *p, size_t n) {
total_count_ += static_cast<int>(n);
total_count_ += n;
n += 1;
while(--n) ++data_[*p++];
}
void AddHistogram(const Histogram& v) {
total_count_ += v.total_count_;
for (int i = 0; i < kDataSize; ++i) {
for (size_t i = 0; i < kDataSize; ++i) {
data_[i] += v.data_[i];
}
}
int data_[kDataSize];
int total_count_;
uint32_t data_[kDataSize];
size_t total_count_;
double bit_cost_;
};
@ -70,8 +70,8 @@ typedef Histogram<272> HistogramContextMap;
// Block type histogram, 256 block types + 2 special symbols.
typedef Histogram<258> HistogramBlockType;
static const int kLiteralContextBits = 6;
static const int kDistanceContextBits = 2;
static const size_t kLiteralContextBits = 6;
static const size_t kDistanceContextBits = 2;
void BuildHistograms(
const Command* cmds,
@ -84,7 +84,7 @@ void BuildHistograms(
size_t mask,
uint8_t prev_byte,
uint8_t prev_byte2,
const std::vector<int>& context_modes,
const std::vector<ContextType>& context_modes,
std::vector<HistogramLiteral>* literal_histograms,
std::vector<HistogramCommand>* insert_and_copy_histograms,
std::vector<HistogramDistance>* copy_dist_histograms);

View File

@ -17,29 +17,29 @@
namespace brotli {
static int UTF8Position(int last, int c, int clamp) {
static size_t UTF8Position(size_t last, size_t c, size_t clamp) {
if (c < 128) {
return 0; // Next one is the 'Byte 1' again.
} else if (c >= 192) {
return std::min(1, clamp); // Next one is the 'Byte 2' of utf-8 encoding.
} else if (c >= 192) { // Next one is the 'Byte 2' of utf-8 encoding.
return std::min<size_t>(1, clamp);
} else {
// Let's decide over the last byte if this ends the sequence.
if (last < 0xe0) {
return 0; // Completed two or three byte coding.
} else {
return std::min(2, clamp); // Next one is the 'Byte 3' of utf-8 encoding.
} else { // Next one is the 'Byte 3' of utf-8 encoding.
return std::min<size_t>(2, clamp);
}
}
}
static int DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
const uint8_t *data) {
int counts[3] = { 0 };
int max_utf8 = 1; // should be 2, but 1 compresses better.
int last_c = 0;
int utf8_pos = 0;
static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
const uint8_t *data) {
size_t counts[3] = { 0 };
size_t max_utf8 = 1; // should be 2, but 1 compresses better.
size_t last_c = 0;
size_t utf8_pos = 0;
for (size_t i = 0; i < len; ++i) {
int c = data[(pos + i) & mask];
size_t c = data[(pos + i) & mask];
utf8_pos = UTF8Position(last_c, c, 2);
++counts[utf8_pos];
last_c = c;
@ -58,17 +58,17 @@ void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
// max_utf8 is 0 (normal ascii single byte modeling),
// 1 (for 2-byte utf-8 modeling), or 2 (for 3-byte utf-8 modeling).
const int max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data);
int histogram[3][256] = { { 0 } };
int window_half = 495;
int in_window = std::min(window_half, static_cast<int>(len));
int in_window_utf8[3] = { 0 };
const size_t max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data);
size_t histogram[3][256] = { { 0 } };
size_t window_half = 495;
size_t in_window = std::min(window_half, len);
size_t in_window_utf8[3] = { 0 };
// Bootstrap histograms.
int last_c = 0;
int utf8_pos = 0;
for (int i = 0; i < in_window; ++i) {
int c = data[(pos + i) & mask];
size_t last_c = 0;
size_t utf8_pos = 0;
for (size_t i = 0; i < in_window; ++i) {
size_t c = data[(pos + i) & mask];
++histogram[utf8_pos][c];
++in_window_utf8[utf8_pos];
utf8_pos = UTF8Position(last_c, c, max_utf8);
@ -76,30 +76,30 @@ void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
}
// Compute bit costs with sliding window.
for (int i = 0; i < static_cast<int>(len); ++i) {
if (i - window_half >= 0) {
for (size_t i = 0; i < len; ++i) {
if (i >= window_half) {
// Remove a byte in the past.
int c = (i - window_half - 1) < 0 ?
size_t c = i < window_half + 1 ?
0 : data[(pos + i - window_half - 1) & mask];
int last_c = (i - window_half - 2) < 0 ?
size_t last_c = i < window_half + 2 ?
0 : data[(pos + i - window_half - 2) & mask];
int utf8_pos2 = UTF8Position(last_c, c, max_utf8);
size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
--histogram[utf8_pos2][data[(pos + i - window_half) & mask]];
--in_window_utf8[utf8_pos2];
}
if (i + window_half < static_cast<int>(len)) {
if (i + window_half < len) {
// Add a byte in the future.
int c = data[(pos + i + window_half - 1) & mask];
int last_c = data[(pos + i + window_half - 2) & mask];
int utf8_pos2 = UTF8Position(last_c, c, max_utf8);
size_t c = data[(pos + i + window_half - 1) & mask];
size_t last_c = data[(pos + i + window_half - 2) & mask];
size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
++histogram[utf8_pos2][data[(pos + i + window_half) & mask]];
++in_window_utf8[utf8_pos2];
}
int c = i < 1 ? 0 : data[(pos + i - 1) & mask];
int last_c = i < 2 ? 0 : data[(pos + i - 2) & mask];
int utf8_pos = UTF8Position(last_c, c, max_utf8);
size_t c = i < 1 ? 0 : data[(pos + i - 1) & mask];
size_t last_c = i < 2 ? 0 : data[(pos + i - 2) & mask];
size_t utf8_pos = UTF8Position(last_c, c, max_utf8);
size_t masked_pos = (pos + i) & mask;
int histo = histogram[utf8_pos][data[masked_pos]];
size_t histo = histogram[utf8_pos][data[masked_pos]];
if (histo == 0) {
histo = 1;
}
@ -114,7 +114,7 @@ void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
// rapidly in the beginning of the file, perhaps because the beginning
// of the data is a statistical "anomaly".
if (i < 2000) {
lit_cost += 0.7 - ((2000 - i) / 2000.0 * 0.35);
lit_cost += 0.7 - (static_cast<double>(2000 - i) / 2000.0 * 0.35);
}
cost[i] = static_cast<float>(lit_cost);
}
@ -126,28 +126,28 @@ void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
EstimateBitCostsForLiteralsUTF8(pos, len, mask, data, cost);
return;
}
int histogram[256] = { 0 };
int window_half = 2000;
int in_window = std::min(window_half, static_cast<int>(len));
size_t histogram[256] = { 0 };
size_t window_half = 2000;
size_t in_window = std::min(window_half, len);
// Bootstrap histogram.
for (int i = 0; i < in_window; ++i) {
for (size_t i = 0; i < in_window; ++i) {
++histogram[data[(pos + i) & mask]];
}
// Compute bit costs with sliding window.
for (int i = 0; i < static_cast<int>(len); ++i) {
if (i - window_half >= 0) {
for (size_t i = 0; i < len; ++i) {
if (i >= window_half) {
// Remove a byte in the past.
--histogram[data[(pos + i - window_half) & mask]];
--in_window;
}
if (i + window_half < static_cast<int>(len)) {
if (i + window_half < len) {
// Add a byte in the future.
++histogram[data[(pos + i + window_half) & mask]];
++in_window;
}
int histo = histogram[data[(pos + i) & mask]];
size_t histo = histogram[data[(pos + i) & mask]];
if (histo == 0) {
histo = 1;
}

View File

@ -23,7 +23,7 @@ void BuildMetaBlock(const uint8_t* ringbuffer,
uint8_t prev_byte2,
const Command* cmds,
size_t num_commands,
int literal_context_mode,
ContextType literal_context_mode,
MetaBlockSplit* mb) {
SplitBlock(cmds, num_commands,
ringbuffer, pos, mask,
@ -31,12 +31,12 @@ void BuildMetaBlock(const uint8_t* ringbuffer,
&mb->command_split,
&mb->distance_split);
std::vector<int> literal_context_modes(mb->literal_split.num_types,
literal_context_mode);
std::vector<ContextType> literal_context_modes(mb->literal_split.num_types,
literal_context_mode);
int num_literal_contexts =
size_t num_literal_contexts =
mb->literal_split.num_types << kLiteralContextBits;
int num_distance_contexts =
size_t num_distance_contexts =
mb->distance_split.num_types << kDistanceContextBits;
std::vector<HistogramLiteral> literal_histograms(num_literal_contexts);
mb->command_histograms.resize(mb->command_split.num_types);
@ -58,17 +58,15 @@ void BuildMetaBlock(const uint8_t* ringbuffer,
// Histogram ids need to fit in one byte.
static const size_t kMaxNumberOfHistograms = 256;
mb->literal_histograms = literal_histograms;
ClusterHistograms(literal_histograms,
1 << kLiteralContextBits,
1u << kLiteralContextBits,
mb->literal_split.num_types,
kMaxNumberOfHistograms,
&mb->literal_histograms,
&mb->literal_context_map);
mb->distance_histograms = distance_histograms;
ClusterHistograms(distance_histograms,
1 << kDistanceContextBits,
1u << kDistanceContextBits,
mb->distance_split.num_types,
kMaxNumberOfHistograms,
&mb->distance_histograms,
@ -79,10 +77,10 @@ void BuildMetaBlock(const uint8_t* ringbuffer,
template<typename HistogramType>
class BlockSplitter {
public:
BlockSplitter(int alphabet_size,
int min_block_size,
BlockSplitter(size_t alphabet_size,
size_t min_block_size,
double split_threshold,
int num_symbols,
size_t num_symbols,
BlockSplit* split,
std::vector<HistogramType>* histograms)
: alphabet_size_(alphabet_size),
@ -95,10 +93,10 @@ class BlockSplitter {
block_size_(0),
curr_histogram_ix_(0),
merge_last_count_(0) {
int max_num_blocks = num_symbols / min_block_size + 1;
size_t max_num_blocks = num_symbols / min_block_size + 1;
// We have to allocate one more histogram than the maximum number of block
// types for the current histogram when the meta-block is too big.
int max_num_types = std::min(max_num_blocks, kMaxBlockTypes + 1);
size_t max_num_types = std::min<size_t>(max_num_blocks, kMaxBlockTypes + 1);
split_->lengths.resize(max_num_blocks);
split_->types.resize(max_num_blocks);
histograms_->resize(max_num_types);
@ -107,7 +105,7 @@ class BlockSplitter {
// Adds the next symbol to the current histogram. When the current histogram
// reaches the target size, decides on merging the block.
void AddSymbol(int symbol) {
void AddSymbol(size_t symbol) {
(*histograms_)[curr_histogram_ix_].Add(symbol);
++block_size_;
if (block_size_ == target_block_size_) {
@ -125,7 +123,7 @@ class BlockSplitter {
}
if (num_blocks_ == 0) {
// Create first block.
split_->lengths[0] = block_size_;
split_->lengths[0] = static_cast<uint32_t>(block_size_);
split_->types[0] = 0;
last_entropy_[0] =
BitsEntropy(&(*histograms_)[0].data_[0], alphabet_size_);
@ -140,8 +138,8 @@ class BlockSplitter {
HistogramType combined_histo[2];
double combined_entropy[2];
double diff[2];
for (int j = 0; j < 2; ++j) {
int last_histogram_ix = last_histogram_ix_[j];
for (size_t j = 0; j < 2; ++j) {
size_t last_histogram_ix = last_histogram_ix_[j];
combined_histo[j] = (*histograms_)[curr_histogram_ix_];
combined_histo[j].AddHistogram((*histograms_)[last_histogram_ix]);
combined_entropy[j] = BitsEntropy(
@ -153,10 +151,10 @@ class BlockSplitter {
diff[0] > split_threshold_ &&
diff[1] > split_threshold_) {
// Create new block.
split_->lengths[num_blocks_] = block_size_;
split_->types[num_blocks_] = split_->num_types;
split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
split_->types[num_blocks_] = static_cast<uint8_t>(split_->num_types);
last_histogram_ix_[1] = last_histogram_ix_[0];
last_histogram_ix_[0] = split_->num_types;
last_histogram_ix_[0] = static_cast<uint8_t>(split_->num_types);
last_entropy_[1] = last_entropy_[0];
last_entropy_[0] = entropy;
++num_blocks_;
@ -167,7 +165,7 @@ class BlockSplitter {
target_block_size_ = min_block_size_;
} else if (diff[1] < diff[0] - 20.0) {
// Combine this block with second last block.
split_->lengths[num_blocks_] = block_size_;
split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
split_->types[num_blocks_] = split_->types[num_blocks_ - 2];
std::swap(last_histogram_ix_[0], last_histogram_ix_[1]);
(*histograms_)[last_histogram_ix_[0]] = combined_histo[1];
@ -180,7 +178,7 @@ class BlockSplitter {
target_block_size_ = min_block_size_;
} else {
// Combine this block with last block.
split_->lengths[num_blocks_ - 1] += block_size_;
split_->lengths[num_blocks_ - 1] += static_cast<uint32_t>(block_size_);
(*histograms_)[last_histogram_ix_[0]] = combined_histo[0];
last_entropy_[0] = combined_entropy[0];
if (split_->num_types == 1) {
@ -201,35 +199,35 @@ class BlockSplitter {
}
private:
static const int kMaxBlockTypes = 256;
static const uint16_t kMaxBlockTypes = 256;
// Alphabet size of particular block category.
const int alphabet_size_;
const size_t alphabet_size_;
// We collect at least this many symbols for each block.
const int min_block_size_;
const size_t min_block_size_;
// We merge histograms A and B if
// entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
// where A is the current histogram and B is the histogram of the last or the
// second last block type.
const double split_threshold_;
int num_blocks_;
size_t num_blocks_;
BlockSplit* split_; // not owned
std::vector<HistogramType>* histograms_; // not owned
// The number of symbols that we want to collect before deciding on whether
// or not to merge the block with a previous one or emit a new block.
int target_block_size_;
size_t target_block_size_;
// The number of symbols in the current histogram.
int block_size_;
size_t block_size_;
// Offset of the current histogram.
int curr_histogram_ix_;
size_t curr_histogram_ix_;
// Offset of the histograms of the previous two block types.
int last_histogram_ix_[2];
size_t last_histogram_ix_[2];
// Entropy of the previous two block types.
double last_entropy_[2];
// The number of times we merged the current block with the last one.
int merge_last_count_;
size_t merge_last_count_;
};
void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
@ -238,7 +236,7 @@ void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
const Command *commands,
size_t n_commands,
MetaBlockSplit* mb) {
int num_literals = 0;
size_t num_literals = 0;
for (size_t i = 0; i < n_commands; ++i) {
num_literals += commands[i].insert_len_;
}
@ -247,16 +245,16 @@ void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
256, 512, 400.0, num_literals,
&mb->literal_split, &mb->literal_histograms);
BlockSplitter<HistogramCommand> cmd_blocks(
kNumCommandPrefixes, 1024, 500.0, static_cast<int>(n_commands),
kNumCommandPrefixes, 1024, 500.0, n_commands,
&mb->command_split, &mb->command_histograms);
BlockSplitter<HistogramDistance> dist_blocks(
64, 512, 100.0, static_cast<int>(n_commands),
64, 512, 100.0, n_commands,
&mb->distance_split, &mb->distance_histograms);
for (size_t i = 0; i < n_commands; ++i) {
const Command cmd = commands[i];
cmd_blocks.AddSymbol(cmd.cmd_prefix_);
for (int j = 0; j < cmd.insert_len_; ++j) {
for (size_t j = cmd.insert_len_; j != 0; --j) {
lit_blocks.AddSymbol(ringbuffer[pos & mask]);
++pos;
}
@ -276,11 +274,11 @@ void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
template<typename HistogramType>
class ContextBlockSplitter {
public:
ContextBlockSplitter(int alphabet_size,
int num_contexts,
int min_block_size,
ContextBlockSplitter(size_t alphabet_size,
size_t num_contexts,
size_t min_block_size,
double split_threshold,
int num_symbols,
size_t num_symbols,
BlockSplit* split,
std::vector<HistogramType>* histograms)
: alphabet_size_(alphabet_size),
@ -296,10 +294,10 @@ class ContextBlockSplitter {
curr_histogram_ix_(0),
last_entropy_(2 * num_contexts),
merge_last_count_(0) {
int max_num_blocks = num_symbols / min_block_size + 1;
size_t max_num_blocks = num_symbols / min_block_size + 1;
// We have to allocate one more histogram than the maximum number of block
// types for the current histogram when the meta-block is too big.
int max_num_types = std::min(max_num_blocks, max_block_types_ + 1);
size_t max_num_types = std::min(max_num_blocks, max_block_types_ + 1);
split_->lengths.resize(max_num_blocks);
split_->types.resize(max_num_blocks);
histograms_->resize(max_num_types * num_contexts);
@ -308,7 +306,7 @@ class ContextBlockSplitter {
// Adds the next symbol to the current block type and context. When the
// current block reaches the target size, decides on merging the block.
void AddSymbol(int symbol, int context) {
void AddSymbol(size_t symbol, size_t context) {
(*histograms_)[curr_histogram_ix_ + context].Add(symbol);
++block_size_;
if (block_size_ == target_block_size_) {
@ -326,9 +324,9 @@ class ContextBlockSplitter {
}
if (num_blocks_ == 0) {
// Create first block.
split_->lengths[0] = block_size_;
split_->lengths[0] = static_cast<uint32_t>(block_size_);
split_->types[0] = 0;
for (int i = 0; i < num_contexts_; ++i) {
for (size_t i = 0; i < num_contexts_; ++i) {
last_entropy_[i] =
BitsEntropy(&(*histograms_)[i].data_[0], alphabet_size_);
last_entropy_[num_contexts_ + i] = last_entropy_[i];
@ -346,13 +344,13 @@ class ContextBlockSplitter {
std::vector<HistogramType> combined_histo(2 * num_contexts_);
std::vector<double> combined_entropy(2 * num_contexts_);
double diff[2] = { 0.0 };
for (int i = 0; i < num_contexts_; ++i) {
int curr_histo_ix = curr_histogram_ix_ + i;
for (size_t i = 0; i < num_contexts_; ++i) {
size_t curr_histo_ix = curr_histogram_ix_ + i;
entropy[i] = BitsEntropy(&(*histograms_)[curr_histo_ix].data_[0],
alphabet_size_);
for (int j = 0; j < 2; ++j) {
int jx = j * num_contexts_ + i;
int last_histogram_ix = last_histogram_ix_[j] + i;
for (size_t j = 0; j < 2; ++j) {
size_t jx = j * num_contexts_ + i;
size_t last_histogram_ix = last_histogram_ix_[j] + i;
combined_histo[jx] = (*histograms_)[curr_histo_ix];
combined_histo[jx].AddHistogram((*histograms_)[last_histogram_ix]);
combined_entropy[jx] = BitsEntropy(
@ -365,11 +363,11 @@ class ContextBlockSplitter {
diff[0] > split_threshold_ &&
diff[1] > split_threshold_) {
// Create new block.
split_->lengths[num_blocks_] = block_size_;
split_->types[num_blocks_] = split_->num_types;
split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
split_->types[num_blocks_] = static_cast<uint8_t>(split_->num_types);
last_histogram_ix_[1] = last_histogram_ix_[0];
last_histogram_ix_[0] = split_->num_types * num_contexts_;
for (int i = 0; i < num_contexts_; ++i) {
for (size_t i = 0; i < num_contexts_; ++i) {
last_entropy_[num_contexts_ + i] = last_entropy_[i];
last_entropy_[i] = entropy[i];
}
@ -381,10 +379,10 @@ class ContextBlockSplitter {
target_block_size_ = min_block_size_;
} else if (diff[1] < diff[0] - 20.0) {
// Combine this block with second last block.
split_->lengths[num_blocks_] = block_size_;
split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
split_->types[num_blocks_] = split_->types[num_blocks_ - 2];
std::swap(last_histogram_ix_[0], last_histogram_ix_[1]);
for (int i = 0; i < num_contexts_; ++i) {
for (size_t i = 0; i < num_contexts_; ++i) {
(*histograms_)[last_histogram_ix_[0] + i] =
combined_histo[num_contexts_ + i];
last_entropy_[num_contexts_ + i] = last_entropy_[i];
@ -397,8 +395,8 @@ class ContextBlockSplitter {
target_block_size_ = min_block_size_;
} else {
// Combine this block with last block.
split_->lengths[num_blocks_ - 1] += block_size_;
for (int i = 0; i < num_contexts_; ++i) {
split_->lengths[num_blocks_ - 1] += static_cast<uint32_t>(block_size_);
for (size_t i = 0; i < num_contexts_; ++i) {
(*histograms_)[last_histogram_ix_[0] + i] = combined_histo[i];
last_entropy_[i] = combined_entropy[i];
if (split_->num_types == 1) {
@ -423,34 +421,34 @@ class ContextBlockSplitter {
static const int kMaxBlockTypes = 256;
// Alphabet size of particular block category.
const int alphabet_size_;
const int num_contexts_;
const int max_block_types_;
const size_t alphabet_size_;
const size_t num_contexts_;
const size_t max_block_types_;
// We collect at least this many symbols for each block.
const int min_block_size_;
const size_t min_block_size_;
// We merge histograms A and B if
// entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
// where A is the current histogram and B is the histogram of the last or the
// second last block type.
const double split_threshold_;
int num_blocks_;
size_t num_blocks_;
BlockSplit* split_; // not owned
std::vector<HistogramType>* histograms_; // not owned
// The number of symbols that we want to collect before deciding on whether
// or not to merge the block with a previous one or emit a new block.
int target_block_size_;
size_t target_block_size_;
// The number of symbols in the current histogram.
int block_size_;
size_t block_size_;
// Offset of the current histogram.
int curr_histogram_ix_;
size_t curr_histogram_ix_;
// Offset of the histograms of the previous two block types.
int last_histogram_ix_[2];
size_t last_histogram_ix_[2];
// Entropy of the previous two block types.
std::vector<double> last_entropy_;
// The number of times we merged the current block with the last one.
int merge_last_count_;
size_t merge_last_count_;
};
void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
@ -458,13 +456,13 @@ void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
size_t mask,
uint8_t prev_byte,
uint8_t prev_byte2,
int literal_context_mode,
int num_contexts,
const int* static_context_map,
ContextType literal_context_mode,
size_t num_contexts,
const uint32_t* static_context_map,
const Command *commands,
size_t n_commands,
MetaBlockSplit* mb) {
int num_literals = 0;
size_t num_literals = 0;
for (size_t i = 0; i < n_commands; ++i) {
num_literals += commands[i].insert_len_;
}
@ -473,17 +471,17 @@ void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
256, num_contexts, 512, 400.0, num_literals,
&mb->literal_split, &mb->literal_histograms);
BlockSplitter<HistogramCommand> cmd_blocks(
kNumCommandPrefixes, 1024, 500.0, static_cast<int>(n_commands),
kNumCommandPrefixes, 1024, 500.0, n_commands,
&mb->command_split, &mb->command_histograms);
BlockSplitter<HistogramDistance> dist_blocks(
64, 512, 100.0, static_cast<int>(n_commands),
64, 512, 100.0, n_commands,
&mb->distance_split, &mb->distance_histograms);
for (size_t i = 0; i < n_commands; ++i) {
const Command cmd = commands[i];
cmd_blocks.AddSymbol(cmd.cmd_prefix_);
for (int j = 0; j < cmd.insert_len_; ++j) {
int context = Context(prev_byte, prev_byte2, literal_context_mode);
for (size_t j = cmd.insert_len_; j != 0; --j) {
size_t context = Context(prev_byte, prev_byte2, literal_context_mode);
uint8_t literal = ringbuffer[pos & mask];
lit_blocks.AddSymbol(literal, static_context_map[context]);
prev_byte2 = prev_byte;
@ -506,16 +504,16 @@ void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
mb->literal_context_map.resize(
mb->literal_split.num_types << kLiteralContextBits);
for (int i = 0; i < mb->literal_split.num_types; ++i) {
for (int j = 0; j < (1 << kLiteralContextBits); ++j) {
for (size_t i = 0; i < mb->literal_split.num_types; ++i) {
for (size_t j = 0; j < (1u << kLiteralContextBits); ++j) {
mb->literal_context_map[(i << kLiteralContextBits) + j] =
i * num_contexts + static_context_map[j];
static_cast<uint32_t>(i * num_contexts) + static_context_map[j];
}
}
}
void OptimizeHistograms(int num_direct_distance_codes,
int distance_postfix_bits,
void OptimizeHistograms(size_t num_direct_distance_codes,
size_t distance_postfix_bits,
MetaBlockSplit* mb) {
for (size_t i = 0; i < mb->literal_histograms.size(); ++i) {
OptimizeHuffmanCountsForRle(256, &mb->literal_histograms[i].data_[0]);
@ -524,9 +522,9 @@ void OptimizeHistograms(int num_direct_distance_codes,
OptimizeHuffmanCountsForRle(kNumCommandPrefixes,
&mb->command_histograms[i].data_[0]);
}
int num_distance_codes =
size_t num_distance_codes =
kNumDistanceShortCodes + num_direct_distance_codes +
(48 << distance_postfix_bits);
(48u << distance_postfix_bits);
for (size_t i = 0; i < mb->distance_histograms.size(); ++i) {
OptimizeHuffmanCountsForRle(num_distance_codes,
&mb->distance_histograms[i].data_[0]);

View File

@ -20,17 +20,17 @@ namespace brotli {
struct BlockSplit {
BlockSplit() : num_types(0) {}
int num_types;
std::vector<int> types;
std::vector<int> lengths;
size_t num_types;
std::vector<uint8_t> types;
std::vector<uint32_t> lengths;
};
struct MetaBlockSplit {
BlockSplit literal_split;
BlockSplit command_split;
BlockSplit distance_split;
std::vector<int> literal_context_map;
std::vector<int> distance_context_map;
std::vector<uint32_t> literal_context_map;
std::vector<uint32_t> distance_context_map;
std::vector<HistogramLiteral> literal_histograms;
std::vector<HistogramCommand> command_histograms;
std::vector<HistogramDistance> distance_histograms;
@ -44,7 +44,7 @@ void BuildMetaBlock(const uint8_t* ringbuffer,
uint8_t prev_byte2,
const Command* cmds,
size_t num_commands,
int literal_context_mode,
ContextType literal_context_mode,
MetaBlockSplit* mb);
// Uses a fast greedy block splitter that tries to merge current block with the
@ -64,15 +64,15 @@ void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
size_t mask,
uint8_t prev_byte,
uint8_t prev_byte2,
int literal_context_mode,
int num_contexts,
const int* static_context_map,
ContextType literal_context_mode,
size_t num_contexts,
const uint32_t* static_context_map,
const Command *commands,
size_t n_commands,
MetaBlockSplit* mb);
void OptimizeHistograms(int num_direct_distance_codes,
int distance_postfix_bits,
void OptimizeHistograms(size_t num_direct_distance_codes,
size_t distance_postfix_bits,
MetaBlockSplit* mb);
} // namespace brotli

View File

@ -22,10 +22,9 @@
/* Let's try and follow the Linux convention */
#define __BYTE_ORDER BYTE_ORDER
#define __LITTLE_ENDIAN LITTLE_ENDIAN
#define __BIG_ENDIAN BIG_ENDIAN
#endif
// define the macros IS_LITTLE_ENDIAN or IS_BIG_ENDIAN
// define the macro IS_LITTLE_ENDIAN
// using the above endian definitions from endian.h if
// endian.h was included
#ifdef __BYTE_ORDER
@ -33,19 +32,17 @@
#define IS_LITTLE_ENDIAN
#endif
#if __BYTE_ORDER == __BIG_ENDIAN
#define IS_BIG_ENDIAN
#endif
#else
#if defined(__LITTLE_ENDIAN__)
#define IS_LITTLE_ENDIAN
#elif defined(__BIG_ENDIAN__)
#define IS_BIG_ENDIAN
#endif
#endif // __BYTE_ORDER
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
#define IS_LITTLE_ENDIAN
#endif
// Enable little-endian optimization for x64 architecture on Windows.
#if (defined(_WIN32) || defined(_WIN64)) && defined(_M_X64)
#define IS_LITTLE_ENDIAN
@ -69,8 +66,8 @@
// On some platforms, like ARM, the copy functions can be more efficient
// then a load and a store.
#if defined(ARCH_PIII) || defined(ARCH_ATHLON) || \
defined(ARCH_K8) || defined(_ARCH_PPC)
#if defined(ARCH_PIII) || \
defined(ARCH_ATHLON) || defined(ARCH_K8) || defined(_ARCH_PPC)
// x86 and x86-64 can perform unaligned loads/stores directly;
// modern PowerPC hardware can also do unaligned integer loads and stores;
@ -142,10 +139,4 @@ inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
#endif
#ifdef BROTLI_ENCODE_DEBUG
#define BROTLI_DCHECK(x) assert(x)
#else
#define BROTLI_DCHECK(x)
#endif
#endif // BROTLI_ENC_PORT_H_

View File

@ -15,18 +15,18 @@
namespace brotli {
static const int kNumInsertLenPrefixes = 24;
static const int kNumCopyLenPrefixes = 24;
static const int kNumCommandPrefixes = 704;
static const int kNumBlockLenPrefixes = 26;
static const int kNumDistanceShortCodes = 16;
static const int kNumDistancePrefixes = 520;
static const uint32_t kNumInsertLenPrefixes = 24;
static const uint32_t kNumCopyLenPrefixes = 24;
static const uint32_t kNumCommandPrefixes = 704;
static const uint32_t kNumBlockLenPrefixes = 26;
static const uint32_t kNumDistanceShortCodes = 16;
static const uint32_t kNumDistancePrefixes = 520;
// Represents the range of values belonging to a prefix code:
// [offset, offset + 2^nbits)
struct PrefixCodeRange {
int offset;
int nbits;
uint32_t offset;
uint32_t nbits;
};
static const PrefixCodeRange kBlockLengthPrefixCode[kNumBlockLenPrefixes] = {
@ -39,8 +39,8 @@ static const PrefixCodeRange kBlockLengthPrefixCode[kNumBlockLenPrefixes] = {
{8433, 13}, {16625, 24}
};
inline void GetBlockLengthPrefixCode(int len,
int* code, int* n_extra, int* extra) {
inline void GetBlockLengthPrefixCode(uint32_t len, uint32_t* code,
uint32_t* n_extra, uint32_t* extra) {
*code = 0;
while (*code < 25 && len >= kBlockLengthPrefixCode[*code + 1].offset) {
++(*code);
@ -49,9 +49,9 @@ inline void GetBlockLengthPrefixCode(int len,
*extra = len - kBlockLengthPrefixCode[*code].offset;
}
inline void PrefixEncodeCopyDistance(int distance_code,
int num_direct_codes,
int postfix_bits,
inline void PrefixEncodeCopyDistance(size_t distance_code,
size_t num_direct_codes,
size_t postfix_bits,
uint16_t* code,
uint32_t* extra_bits) {
if (distance_code < kNumDistanceShortCodes + num_direct_codes) {
@ -59,18 +59,19 @@ inline void PrefixEncodeCopyDistance(int distance_code,
*extra_bits = 0;
return;
}
distance_code -= kNumDistanceShortCodes + num_direct_codes;
distance_code += (1 << (postfix_bits + 2));
int bucket = Log2Floor(distance_code) - 1;
int postfix_mask = (1 << postfix_bits) - 1;
int postfix = distance_code & postfix_mask;
int prefix = (distance_code >> bucket) & 1;
int offset = (2 + prefix) << bucket;
int nbits = bucket - postfix_bits;
distance_code -= kNumDistanceShortCodes + num_direct_codes; /* >= 0 */
distance_code += (1 << (postfix_bits + 2)); /* > 0 */
size_t bucket = Log2FloorNonZero(distance_code) - 1;
size_t postfix_mask = (1 << postfix_bits) - 1;
size_t postfix = distance_code & postfix_mask;
size_t prefix = (distance_code >> bucket) & 1;
size_t offset = (2 + prefix) << bucket;
size_t nbits = bucket - postfix_bits;
*code = static_cast<uint16_t>(
(kNumDistanceShortCodes + num_direct_codes +
((2 * (nbits - 1) + prefix) << postfix_bits) + postfix));
*extra_bits = (nbits << 24) | ((distance_code - offset) >> postfix_bits);
*extra_bits = static_cast<uint32_t>(
(nbits << 24) | ((distance_code - offset) >> postfix_bits));
}
} // namespace brotli

View File

@ -16,26 +16,34 @@
namespace brotli {
// A RingBuffer(window_bits, tail_bits) contains `1 << window_bits' bytes of
// data in a circular manner: writing a byte writes it to
// `position() % (1 << window_bits)'. For convenience, the RingBuffer array
// contains another copy of the first `1 << tail_bits' bytes:
// buffer_[i] == buffer_[i + (1 << window_bits)] if i < (1 << tail_bits).
// data in a circular manner: writing a byte writes it to:
// `position() % (1 << window_bits)'.
// For convenience, the RingBuffer array contains another copy of the
// first `1 << tail_bits' bytes:
// buffer_[i] == buffer_[i + (1 << window_bits)], if i < (1 << tail_bits),
// and another copy of the last two bytes:
// buffer_[-1] == buffer_[(1 << window_bits) - 1] and
// buffer_[-2] == buffer_[(1 << window_bits) - 2].
class RingBuffer {
public:
RingBuffer(int window_bits, int tail_bits)
: size_((size_t(1) << window_bits)),
mask_((size_t(1) << window_bits) - 1),
tail_size_(size_t(1) << tail_bits),
: size_(1u << window_bits),
mask_((1u << window_bits) - 1),
tail_size_(1u << tail_bits),
pos_(0) {
static const int kSlackForEightByteHashingEverywhere = 7;
static const size_t kSlackForEightByteHashingEverywhere = 7;
const size_t buflen = size_ + tail_size_;
buffer_ = new uint8_t[buflen + kSlackForEightByteHashingEverywhere];
for (int i = 0; i < kSlackForEightByteHashingEverywhere; ++i) {
data_ = new uint8_t[2 + buflen + kSlackForEightByteHashingEverywhere];
buffer_ = data_ + 2;
for (size_t i = 0; i < kSlackForEightByteHashingEverywhere; ++i) {
buffer_[buflen + i] = 0;
}
// Initialize the last two bytes and their copy to zero.
buffer_[-2] = buffer_[size_ - 2] = 0;
buffer_[-1] = buffer_[size_ - 1] = 0;
}
~RingBuffer() {
delete [] buffer_;
delete [] data_;
}
// Push bytes into the ring buffer.
@ -56,7 +64,12 @@ class RingBuffer {
memcpy(&buffer_[0], bytes + (size_ - masked_pos),
n - (size_ - masked_pos));
}
pos_ += n;
buffer_[-2] = buffer_[size_ - 2];
buffer_[-1] = buffer_[size_ - 1];
pos_ += static_cast<uint32_t>(n);
if (pos_ > (1u << 30)) { /* Wrap, but preserve not-a-first-lap feature. */
pos_ = (pos_ & ((1u << 30) - 1)) | (1u << 30);
}
}
void Reset() {
@ -64,10 +77,10 @@ class RingBuffer {
}
// Logical cursor position in the ring buffer.
size_t position() const { return pos_; }
uint32_t position() const { return pos_; }
// Bit mask for getting the physical position for a logical position.
size_t mask() const { return mask_; }
uint32_t mask() const { return mask_; }
uint8_t *start() { return &buffer_[0]; }
const uint8_t *start() const { return &buffer_[0]; }
@ -83,14 +96,16 @@ class RingBuffer {
}
// Size of the ringbuffer is (1 << window_bits) + tail_size_.
const size_t size_;
const size_t mask_;
const size_t tail_size_;
const uint32_t size_;
const uint32_t mask_;
const uint32_t tail_size_;
// Position to write in the ring buffer.
size_t pos_;
// The actual ring buffer containing the data and the copy of the beginning
// as a tail.
uint32_t pos_;
// The actual ring buffer containing the copy of the last two bytes, the data,
// and the copy of the beginning as a tail.
uint8_t *data_;
// The start of the ringbuffer.
uint8_t *buffer_;
};

View File

@ -22,19 +22,24 @@ inline uint32_t Hash(const uint8_t *data) {
return h >> (32 - kDictNumBits);
}
inline void AddMatch(int distance, int len, int len_code, int* matches) {
matches[len] = std::min(matches[len], (distance << 5) + len_code);
inline void AddMatch(size_t distance, size_t len, size_t len_code,
uint32_t* matches) {
uint32_t match = static_cast<uint32_t>((distance << 5) + len_code);
matches[len] = std::min(matches[len], match);
}
inline int DictMatchLength(const uint8_t* data, int id, int len, int maxlen) {
const int offset = kBrotliDictionaryOffsetsByLength[len] + len * id;
inline size_t DictMatchLength(const uint8_t* data,
size_t id,
size_t len,
size_t maxlen) {
const size_t offset = kBrotliDictionaryOffsetsByLength[len] + len * id;
return FindMatchLengthWithLimit(&kBrotliDictionary[offset], data,
std::min(len, maxlen));
}
inline bool IsMatch(DictWord w, const uint8_t* data, int max_length) {
inline bool IsMatch(DictWord w, const uint8_t* data, size_t max_length) {
if (w.len > max_length) return false;
const int offset = kBrotliDictionaryOffsetsByLength[w.len] + w.len * w.idx;
const size_t offset = kBrotliDictionaryOffsetsByLength[w.len] + w.len * w.idx;
const uint8_t* dict = &kBrotliDictionary[offset];
if (w.transform == 0) {
// Match against base dictionary word.
@ -44,12 +49,12 @@ inline bool IsMatch(DictWord w, const uint8_t* data, int max_length) {
// Note that there are only ASCII uppercase words in the lookup table.
return (dict[0] >= 'a' && dict[0] <= 'z' &&
(dict[0] ^ 32) == data[0] &&
FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1) ==
w.len - 1);
FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) ==
w.len - 1u);
} else {
// Match against uppercase all transform.
// Note that there are only ASCII uppercase words in the lookup table.
for (int i = 0; i < w.len; ++i) {
for (size_t i = 0; i < w.len; ++i) {
if (dict[i] >= 'a' && dict[i] <= 'z') {
if ((dict[i] ^ 32) != data[i]) return false;
} else {
@ -61,22 +66,22 @@ inline bool IsMatch(DictWord w, const uint8_t* data, int max_length) {
}
bool FindAllStaticDictionaryMatches(const uint8_t* data,
int min_length,
int max_length,
int* matches) {
size_t min_length,
size_t max_length,
uint32_t* matches) {
bool found_match = false;
uint32_t key = Hash(data);
uint32_t bucket = kStaticDictionaryBuckets[key];
size_t key = Hash(data);
size_t bucket = kStaticDictionaryBuckets[key];
if (bucket != 0) {
int num = bucket & 0xff;
int offset = bucket >> 8;
for (int i = 0; i < num; ++i) {
size_t num = bucket & 0xff;
size_t offset = bucket >> 8;
for (size_t i = 0; i < num; ++i) {
const DictWord w = kStaticDictionaryWords[offset + i];
const int l = w.len;
const int n = 1 << kBrotliDictionarySizeBitsByLength[l];
const int id = w.idx;
const size_t l = w.len;
const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
const size_t id = w.idx;
if (w.transform == 0) {
const int matchlen = DictMatchLength(data, id, l, max_length);
const size_t matchlen = DictMatchLength(data, id, l, max_length);
// Transform "" + kIdentity + ""
if (matchlen == l) {
AddMatch(id, l, l, matches);
@ -93,9 +98,10 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
found_match = true;
}
// Transform "" + kOmitLastN + "" (N = 2 .. 9)
int minlen = std::max<int>(min_length, l - 9);
int maxlen = std::min<int>(matchlen, l - 2);
for (int len = minlen; len <= maxlen; ++len) {
size_t minlen = min_length;
if (l > 9) minlen = std::max(minlen, l - 9);
size_t maxlen = std::min(matchlen, l - 2);
for (size_t len = minlen; len <= maxlen; ++len) {
AddMatch(id + kOmitLastNTransforms[l - len] * n, len, l, matches);
found_match = true;
}
@ -250,8 +256,9 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
}
}
} else {
// Set t=0 for kUppercaseFirst and t=1 for kUppercaseAll transform.
const int t = w.transform - 10;
// Set t=false for kUppercaseFirst and
// t=true otherwise (kUppercaseAll) transform.
const bool t = w.transform != kUppercaseFirst;
if (!IsMatch(w, data, max_length)) {
continue;
}
@ -299,13 +306,13 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
bool is_space = (data[0] == ' ');
key = Hash(&data[1]);
bucket = kStaticDictionaryBuckets[key];
int num = bucket & 0xff;
int offset = bucket >> 8;
for (int i = 0; i < num; ++i) {
size_t num = bucket & 0xff;
size_t offset = bucket >> 8;
for (size_t i = 0; i < num; ++i) {
const DictWord w = kStaticDictionaryWords[offset + i];
const int l = w.len;
const int n = 1 << kBrotliDictionarySizeBitsByLength[l];
const int id = w.idx;
const size_t l = w.len;
const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
const size_t id = w.idx;
if (w.transform == 0) {
if (!IsMatch(w, &data[1], max_length - 1)) {
continue;
@ -342,8 +349,9 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
}
}
} else if (is_space) {
// Set t=0 for kUppercaseFirst and t=1 for kUppercaseAll transform.
const int t = w.transform - 10;
// Set t=false for kUppercaseFirst and
// t=true otherwise (kUppercaseAll) transform.
const bool t = w.transform != kUppercaseFirst;
if (!IsMatch(w, &data[1], max_length - 1)) {
continue;
}
@ -358,7 +366,7 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
if (s[0] == ' ') {
AddMatch(id + (t ? 83 : 15) * n, l + 2, l, matches);
} else if (s[0] == ',') {
if (t == 0) {
if (!t) {
AddMatch(id + 109 * n, l + 2, l, matches);
}
if (s[1] == ' ') {
@ -386,19 +394,19 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
(data[0] == 0xc2 && data[1] == 0xa0)) {
key = Hash(&data[2]);
bucket = kStaticDictionaryBuckets[key];
int num = bucket & 0xff;
int offset = bucket >> 8;
for (int i = 0; i < num; ++i) {
size_t num = bucket & 0xff;
size_t offset = bucket >> 8;
for (size_t i = 0; i < num; ++i) {
const DictWord w = kStaticDictionaryWords[offset + i];
const int l = w.len;
const int n = 1 << kBrotliDictionarySizeBitsByLength[l];
const int id = w.idx;
const size_t l = w.len;
const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
const size_t id = w.idx;
if (w.transform == 0 && IsMatch(w, &data[2], max_length - 2)) {
if (data[0] == 0xc2) {
AddMatch(id + 102 * n, l + 2, l, matches);
found_match = true;
} else if (l + 2 < max_length && data[l + 2] == ' ') {
int t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13);
size_t t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13);
AddMatch(id + t * n, l + 3, l, matches);
found_match = true;
}
@ -414,13 +422,13 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
data[3] == 'm' && data[4] == '/')) {
key = Hash(&data[5]);
bucket = kStaticDictionaryBuckets[key];
int num = bucket & 0xff;
int offset = bucket >> 8;
for (int i = 0; i < num; ++i) {
size_t num = bucket & 0xff;
size_t offset = bucket >> 8;
for (size_t i = 0; i < num; ++i) {
const DictWord w = kStaticDictionaryWords[offset + i];
const int l = w.len;
const int n = 1 << kBrotliDictionarySizeBitsByLength[l];
const int id = w.idx;
const size_t l = w.len;
const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
const size_t id = w.idx;
if (w.transform == 0 && IsMatch(w, &data[5], max_length - 5)) {
AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);
found_match = true;

View File

@ -13,8 +13,8 @@
namespace brotli {
static const int kMaxDictionaryMatchLen = 37;
static const int kInvalidMatch = 0xfffffff;
static const size_t kMaxDictionaryMatchLen = 37;
static const uint32_t kInvalidMatch = 0xfffffff;
// Matches data against static dictionary words, and for each length l,
// for which a match is found, updates matches[l] to be the minimum possible
@ -23,9 +23,9 @@ static const int kInvalidMatch = 0xfffffff;
// matches array is at least kMaxDictionaryMatchLen + 1 long
// all elements are initialized to kInvalidMatch
bool FindAllStaticDictionaryMatches(const uint8_t* data,
int min_length,
int max_length,
int* matches);
size_t min_length,
size_t max_length,
uint32_t* matches);
} // namespace brotli

View File

@ -172,15 +172,11 @@ static const Transform kTransforms[] = {
static const size_t kNumTransforms =
sizeof(kTransforms) / sizeof(kTransforms[0]);
static const int kOmitFirstNTransforms[10] = {
0, 3, 11, 26, 34, 39, 40, 55, 0, 54
};
static const int kOmitLastNTransforms[10] = {
static const size_t kOmitLastNTransforms[10] = {
0, 12, 27, 23, 42, 63, 56, 48, 59, 64,
};
static int ToUpperCase(uint8_t *p, int len) {
static size_t ToUpperCase(uint8_t *p, size_t len) {
if (len == 1 || p[0] < 0xc0) {
if (p[0] >= 'a' && p[0] <= 'z') {
p[0] ^= 32;
@ -198,41 +194,50 @@ static int ToUpperCase(uint8_t *p, int len) {
return 3;
}
inline std::string ApplyTransform(
const Transform& t, const uint8_t* word, int len) {
std::string ret(t.prefix);
if (t.word_transform <= kOmitLast9) {
len -= t.word_transform;
inline std::string TransformWord(
WordTransformType transform_type, const uint8_t* word, size_t len) {
if (transform_type <= kOmitLast9) {
if (len <= transform_type) {
return std::string();
}
return std::string(word, word + len - transform_type);
}
if (len > 0) {
if (t.word_transform >= kOmitFirst1) {
const int skip = t.word_transform - (kOmitFirst1 - 1);
if (len > skip) {
ret += std::string(word + skip, word + len);
}
} else {
ret += std::string(word, word + len);
uint8_t *uppercase = reinterpret_cast<uint8_t*>(&ret[ret.size() - len]);
if (t.word_transform == kUppercaseFirst) {
ToUpperCase(uppercase, len);
} else if (t.word_transform == kUppercaseAll) {
while (len > 0) {
int step = ToUpperCase(uppercase, len);
uppercase += step;
len -= step;
}
}
if (transform_type >= kOmitFirst1) {
const size_t skip = transform_type - (kOmitFirst1 - 1);
if (len <= skip) {
return std::string();
}
return std::string(word + skip, word + len);
}
std::string ret = std::string(word, word + len);
uint8_t *uppercase = reinterpret_cast<uint8_t*>(&ret[0]);
if (transform_type == kUppercaseFirst) {
ToUpperCase(uppercase, len);
} else if (transform_type == kUppercaseAll) {
size_t position = 0;
while (position < len) {
size_t step = ToUpperCase(uppercase, len - position);
uppercase += step;
position += step;
}
}
ret += std::string(t.suffix);
return ret;
}
inline std::string GetTransformedDictionaryWord(int len_code, int word_id) {
int num_words = 1 << kBrotliDictionarySizeBitsByLength[len_code];
int offset = kBrotliDictionaryOffsetsByLength[len_code];
int t = word_id / num_words;
int word_idx = word_id % num_words;
inline std::string ApplyTransform(
const Transform& t, const uint8_t* word, size_t len) {
return std::string(t.prefix) +
TransformWord(t.word_transform, word, len) + std::string(t.suffix);
}
inline std::string GetTransformedDictionaryWord(size_t len_code,
size_t word_id) {
size_t num_words = 1u << kBrotliDictionarySizeBitsByLength[len_code];
size_t offset = kBrotliDictionaryOffsetsByLength[len_code];
size_t t = word_id / num_words;
size_t word_idx = word_id % num_words;
offset += len_code * word_idx;
const uint8_t* word = &kBrotliDictionary[offset];
return ApplyTransform(kTransforms[t], word, len_code);

View File

@ -14,7 +14,7 @@ namespace brotli {
namespace {
int ParseAsUTF8(int* symbol, const uint8_t* input, size_t size) {
size_t ParseAsUTF8(int* symbol, const uint8_t* input, size_t size) {
// ASCII
if ((input[0] & 0x80) == 0) {
*symbol = input[0];
@ -72,7 +72,8 @@ bool IsMostlyUTF8(const uint8_t* data, const size_t pos, const size_t mask,
size_t i = 0;
while (i < length) {
int symbol;
int bytes_read = ParseAsUTF8(&symbol, &data[(pos + i) & mask], length - i);
size_t bytes_read = ParseAsUTF8(
&symbol, &data[(pos + i) & mask], length - i);
i += bytes_read;
if (symbol < 0x110000) size_utf8 += bytes_read;
}

View File

@ -34,9 +34,9 @@ namespace brotli {
//
// For n bits, we take the last 5 bits, OR that with high bits in BYTE-0,
// and locate the rest in BYTE+1, BYTE+2, etc.
inline void WriteBits(int n_bits,
inline void WriteBits(size_t n_bits,
uint64_t bits,
int * __restrict pos,
size_t * __restrict pos,
uint8_t * __restrict array) {
#ifdef BIT_WRITER_DEBUG
printf("WriteBits %2d 0x%016llx %10d\n", n_bits, bits, *pos);
@ -57,11 +57,11 @@ inline void WriteBits(int n_bits,
#else
// implicit & 0xff is assumed for uint8_t arithmetics
uint8_t *array_pos = &array[*pos >> 3];
const int bits_reserved_in_first_byte = (*pos & 7);
const size_t bits_reserved_in_first_byte = (*pos & 7);
bits <<= bits_reserved_in_first_byte;
*array_pos++ |= static_cast<uint8_t>(bits);
for (int bits_left_to_write = n_bits - 8 + bits_reserved_in_first_byte;
bits_left_to_write >= 1;
for (size_t bits_left_to_write = n_bits + bits_reserved_in_first_byte;
bits_left_to_write >= 9;
bits_left_to_write -= 8) {
bits >>= 8;
*array_pos++ = static_cast<uint8_t>(bits);
@ -71,7 +71,7 @@ inline void WriteBits(int n_bits,
#endif
}
inline void WriteBitsPrepareStorage(int pos, uint8_t *array) {
inline void WriteBitsPrepareStorage(size_t pos, uint8_t *array) {
#ifdef BIT_WRITER_DEBUG
printf("WriteBitsPrepareStorage %10d\n", pos);
#endif