mirror of
https://github.com/google/brotli.git
synced 2025-01-01 04:40:08 +00:00
New version of the backward reference search code.
The new interface of the backward reference search function makes it possible to use it in a streaming manner. Using the advanced cost model and static dictionary can be turned on/off by template parameters. The distance short codes are now computed as part of the backward reference search. Added a faster version of the Hasher.
This commit is contained in:
parent
f580616386
commit
b4f39bf540
@ -23,173 +23,230 @@
|
|||||||
|
|
||||||
namespace brotli {
|
namespace brotli {
|
||||||
|
|
||||||
template<typename Hasher>
|
template<typename Hasher, bool kUseCostModel, bool kUseDictionary>
|
||||||
void CreateBackwardReferences(size_t num_bytes,
|
void CreateBackwardReferences(size_t num_bytes,
|
||||||
size_t position,
|
size_t position,
|
||||||
const uint8_t* ringbuffer,
|
const uint8_t* ringbuffer,
|
||||||
const float* literal_cost,
|
|
||||||
size_t ringbuffer_mask,
|
size_t ringbuffer_mask,
|
||||||
|
const float* literal_cost,
|
||||||
|
size_t literal_cost_mask,
|
||||||
const size_t max_backward_limit,
|
const size_t max_backward_limit,
|
||||||
|
const double base_min_score,
|
||||||
|
const int quality,
|
||||||
Hasher* hasher,
|
Hasher* hasher,
|
||||||
std::vector<Command>* commands) {
|
int* dist_cache,
|
||||||
// Length heuristic that seems to help probably by better selection
|
int* last_insert_len,
|
||||||
// of lazy matches of similar lengths.
|
Command* commands,
|
||||||
int insert_length = 0;
|
int* num_commands) {
|
||||||
|
if (num_bytes >= 3 && position >= 3) {
|
||||||
|
// Prepare the hashes for three last bytes of the last write.
|
||||||
|
// These could not be calculated before, since they require knowledge
|
||||||
|
// of both the previous and the current block.
|
||||||
|
hasher->Store(&ringbuffer[(position - 3) & ringbuffer_mask],
|
||||||
|
position - 3);
|
||||||
|
hasher->Store(&ringbuffer[(position - 2) & ringbuffer_mask],
|
||||||
|
position - 2);
|
||||||
|
hasher->Store(&ringbuffer[(position - 1) & ringbuffer_mask],
|
||||||
|
position - 1);
|
||||||
|
}
|
||||||
|
const Command * const orig_commands = commands;
|
||||||
|
int insert_length = *last_insert_len;
|
||||||
size_t i = position & ringbuffer_mask;
|
size_t i = position & ringbuffer_mask;
|
||||||
const int i_diff = position - i;
|
const int i_diff = position - i;
|
||||||
const size_t i_end = i + num_bytes;
|
const size_t i_end = i + num_bytes;
|
||||||
|
|
||||||
const int random_heuristics_window_size = 512;
|
// For speed up heuristics for random data.
|
||||||
|
const int random_heuristics_window_size = quality < 9 ? 64 : 512;
|
||||||
int apply_random_heuristics = i + random_heuristics_window_size;
|
int apply_random_heuristics = i + random_heuristics_window_size;
|
||||||
|
|
||||||
double average_cost = 0.0;
|
double average_cost = 5.4;
|
||||||
for (int k = position; k < position + num_bytes; ++k) {
|
if (kUseCostModel) {
|
||||||
average_cost += literal_cost[k & ringbuffer_mask];
|
average_cost = 0.0;
|
||||||
|
for (int k = position; k < position + num_bytes; ++k) {
|
||||||
|
average_cost += literal_cost[k & literal_cost_mask];
|
||||||
|
}
|
||||||
|
average_cost /= num_bytes;
|
||||||
}
|
}
|
||||||
average_cost /= num_bytes;
|
|
||||||
hasher->set_average_cost(average_cost);
|
|
||||||
|
|
||||||
// M1 match is for considering for two repeated copies, if moving
|
// M1 match is for considering for two repeated copies, if moving
|
||||||
// one literal form the previous copy to the current one allows the
|
// one literal form the previous copy to the current one allows the
|
||||||
// current copy to be more efficient (because the way static dictionary
|
// current copy to be more efficient (because the way static dictionary
|
||||||
// codes words). M1 matching improves text compression density by ~0.15 %.
|
// codes words). M1 matching improves text compression density by ~0.15 %.
|
||||||
bool match_found_M1 = false;
|
bool match_found_M1 = false;
|
||||||
size_t best_len_M1 = 0;
|
int best_len_M1 = 0;
|
||||||
size_t best_len_code_M1 = 0;
|
int best_len_code_M1 = 0;
|
||||||
size_t best_dist_M1 = 0;
|
int best_dist_M1 = 0;
|
||||||
double best_score_M1 = 0;
|
double best_score_M1 = 0;
|
||||||
while (i + 2 < i_end) {
|
while (i + 3 < i_end) {
|
||||||
size_t best_len = 0;
|
int max_length = i_end - i;
|
||||||
size_t best_len_code = 0;
|
|
||||||
size_t best_dist = 0;
|
|
||||||
double best_score = 0;
|
|
||||||
size_t max_distance = std::min(i + i_diff, max_backward_limit);
|
size_t max_distance = std::min(i + i_diff, max_backward_limit);
|
||||||
bool in_dictionary;
|
double min_score = base_min_score;
|
||||||
hasher->set_insert_length(insert_length);
|
if (kUseCostModel && insert_length < 8) {
|
||||||
|
double cost_diff[8] =
|
||||||
|
{ 0.1, 0.038, 0.019, 0.013, 0.001, 0.001, 0.001, 0.001 };
|
||||||
|
min_score += cost_diff[insert_length];
|
||||||
|
}
|
||||||
|
int best_len = 0;
|
||||||
|
int best_len_code = 0;
|
||||||
|
int best_dist = 0;
|
||||||
|
double best_score = min_score;
|
||||||
bool match_found = hasher->FindLongestMatch(
|
bool match_found = hasher->FindLongestMatch(
|
||||||
ringbuffer, literal_cost, ringbuffer_mask,
|
ringbuffer, ringbuffer_mask,
|
||||||
i + i_diff, i_end - i, max_distance,
|
literal_cost, literal_cost_mask, average_cost,
|
||||||
&best_len, &best_len_code, &best_dist, &best_score,
|
dist_cache, i + i_diff, max_length, max_distance,
|
||||||
&in_dictionary);
|
&best_len, &best_len_code, &best_dist, &best_score);
|
||||||
bool best_in_dictionary = in_dictionary;
|
|
||||||
if (match_found) {
|
if (match_found) {
|
||||||
if (match_found_M1 && best_score_M1 > best_score) {
|
if (kUseDictionary && match_found_M1 && best_score_M1 > best_score) {
|
||||||
// Two copies after each other. Take the last literal from the
|
// Two copies after each other. Take the last literal from the
|
||||||
// last copy, and use it as the first of this one.
|
// last copy, and use it as the first of this one.
|
||||||
(commands->rbegin())->copy_length_ -= 1;
|
Command prev_cmd = commands[-1];
|
||||||
(commands->rbegin())->copy_length_code_ -= 1;
|
commands[-1] = Command(prev_cmd.insert_len_,
|
||||||
|
prev_cmd.copy_len_ - 1,
|
||||||
|
prev_cmd.copy_len_ - 1,
|
||||||
|
prev_cmd.DistanceCode());
|
||||||
hasher->Store(ringbuffer + i, i + i_diff);
|
hasher->Store(ringbuffer + i, i + i_diff);
|
||||||
--i;
|
--i;
|
||||||
best_len = best_len_M1;
|
best_len = best_len_M1;
|
||||||
best_len_code = best_len_code_M1;
|
best_len_code = best_len_code_M1;
|
||||||
best_dist = best_dist_M1;
|
best_dist = best_dist_M1;
|
||||||
best_score = best_score_M1;
|
best_score = best_score_M1;
|
||||||
// in_dictionary doesn't need to be correct, but it is the only
|
|
||||||
// reason why M1 matching should be beneficial here. Setting it here
|
|
||||||
// will only disable further M1 matching against this copy.
|
|
||||||
best_in_dictionary = true;
|
|
||||||
in_dictionary = true;
|
|
||||||
} else {
|
} else {
|
||||||
// Found a match. Let's look for something even better ahead.
|
// Found a match. Let's look for something even better ahead.
|
||||||
int delayed_backward_references_in_row = 0;
|
int delayed_backward_references_in_row = 0;
|
||||||
while (i + 4 < i_end &&
|
for (;;) {
|
||||||
delayed_backward_references_in_row < 4) {
|
--max_length;
|
||||||
size_t best_len_2 = 0;
|
int best_len_2 = quality < 4 ? std::min(best_len - 1, max_length) : 0;
|
||||||
size_t best_len_code_2 = 0;
|
int best_len_code_2 = 0;
|
||||||
size_t best_dist_2 = 0;
|
int best_dist_2 = 0;
|
||||||
double best_score_2 = 0;
|
double best_score_2 = min_score;
|
||||||
max_distance = std::min(i + i_diff + 1, max_backward_limit);
|
max_distance = std::min(i + i_diff + 1, max_backward_limit);
|
||||||
hasher->Store(ringbuffer + i, i + i_diff);
|
hasher->Store(ringbuffer + i, i + i_diff);
|
||||||
match_found = hasher->FindLongestMatch(
|
match_found = hasher->FindLongestMatch(
|
||||||
ringbuffer, literal_cost, ringbuffer_mask,
|
ringbuffer, ringbuffer_mask,
|
||||||
i + i_diff + 1, i_end - i - 1, max_distance,
|
literal_cost, literal_cost_mask, average_cost,
|
||||||
&best_len_2, &best_len_code_2, &best_dist_2, &best_score_2,
|
dist_cache, i + i_diff + 1, max_length, max_distance,
|
||||||
&in_dictionary);
|
&best_len_2, &best_len_code_2, &best_dist_2, &best_score_2);
|
||||||
double cost_diff_lazy = 0;
|
double cost_diff_lazy = 7.0;
|
||||||
if (best_len >= 4) {
|
if (kUseCostModel) {
|
||||||
cost_diff_lazy +=
|
cost_diff_lazy = 0.0;
|
||||||
literal_cost[(i + 4) & ringbuffer_mask] - average_cost;
|
if (best_len >= 4) {
|
||||||
}
|
cost_diff_lazy +=
|
||||||
{
|
literal_cost[(i + 4) & literal_cost_mask] - average_cost;
|
||||||
const int tail_length = best_len_2 - best_len + 1;
|
|
||||||
for (int k = 0; k < tail_length; ++k) {
|
|
||||||
cost_diff_lazy -=
|
|
||||||
literal_cost[(i + best_len + k) & ringbuffer_mask] -
|
|
||||||
average_cost;
|
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
const int tail_length = best_len_2 - best_len + 1;
|
||||||
|
for (int k = 0; k < tail_length; ++k) {
|
||||||
|
cost_diff_lazy -=
|
||||||
|
literal_cost[(i + best_len + k) & literal_cost_mask] -
|
||||||
|
average_cost;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// If we are not inserting any symbols, inserting one is more
|
||||||
|
// expensive than if we were inserting symbols anyways.
|
||||||
|
if (insert_length < 1) {
|
||||||
|
cost_diff_lazy += 0.97;
|
||||||
|
}
|
||||||
|
// Add bias to slightly avoid lazy matching.
|
||||||
|
cost_diff_lazy += 2.0 + delayed_backward_references_in_row * 0.2;
|
||||||
|
cost_diff_lazy += 0.04 * literal_cost[i & literal_cost_mask];
|
||||||
}
|
}
|
||||||
// If we are not inserting any symbols, inserting one is more
|
|
||||||
// expensive than if we were inserting symbols anyways.
|
|
||||||
if (insert_length < 1) {
|
|
||||||
cost_diff_lazy += 0.97;
|
|
||||||
}
|
|
||||||
// Add bias to slightly avoid lazy matching.
|
|
||||||
cost_diff_lazy += 2.0 + delayed_backward_references_in_row * 0.2;
|
|
||||||
cost_diff_lazy += 0.04 * literal_cost[i & ringbuffer_mask];
|
|
||||||
|
|
||||||
if (match_found && best_score_2 >= best_score + cost_diff_lazy) {
|
if (match_found && best_score_2 >= best_score + cost_diff_lazy) {
|
||||||
// Ok, let's just write one byte for now and start a match from the
|
// Ok, let's just write one byte for now and start a match from the
|
||||||
// next byte.
|
// next byte.
|
||||||
|
++i;
|
||||||
++insert_length;
|
++insert_length;
|
||||||
++delayed_backward_references_in_row;
|
|
||||||
best_len = best_len_2;
|
best_len = best_len_2;
|
||||||
best_len_code = best_len_code_2;
|
best_len_code = best_len_code_2;
|
||||||
best_dist = best_dist_2;
|
best_dist = best_dist_2;
|
||||||
best_score = best_score_2;
|
best_score = best_score_2;
|
||||||
best_in_dictionary = in_dictionary;
|
if (++delayed_backward_references_in_row < 4) {
|
||||||
i++;
|
continue;
|
||||||
} else {
|
}
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
apply_random_heuristics =
|
apply_random_heuristics =
|
||||||
i + 2 * best_len + random_heuristics_window_size;
|
i + 2 * best_len + random_heuristics_window_size;
|
||||||
Command cmd;
|
max_distance = std::min(i + i_diff, max_backward_limit);
|
||||||
cmd.insert_length_ = insert_length;
|
int distance_code = best_dist + 16;
|
||||||
cmd.copy_length_ = best_len;
|
if (best_dist <= max_distance) {
|
||||||
cmd.copy_length_code_ = best_len_code;
|
if (best_dist == dist_cache[0]) {
|
||||||
cmd.copy_distance_ = best_dist;
|
distance_code = 1;
|
||||||
commands->push_back(cmd);
|
} else if (best_dist == dist_cache[1]) {
|
||||||
insert_length = 0;
|
distance_code = 2;
|
||||||
++i;
|
} else if (best_dist == dist_cache[2]) {
|
||||||
if (best_dist <= std::min(i + i_diff, max_backward_limit)) {
|
distance_code = 3;
|
||||||
hasher->set_last_distance(best_dist);
|
} else if (best_dist == dist_cache[3]) {
|
||||||
|
distance_code = 4;
|
||||||
|
} else if (quality > 1 && best_dist >= 6) {
|
||||||
|
for (int k = 4; k < kNumDistanceShortCodes; ++k) {
|
||||||
|
int idx = kDistanceCacheIndex[k];
|
||||||
|
int candidate = dist_cache[idx] + kDistanceCacheOffset[k];
|
||||||
|
static const int kLimits[16] = { 0, 0, 0, 0,
|
||||||
|
6, 6, 11, 11,
|
||||||
|
11, 11, 11, 11,
|
||||||
|
12, 12, 12, 12 };
|
||||||
|
if (best_dist == candidate && best_dist >= kLimits[k]) {
|
||||||
|
distance_code = k + 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (distance_code > 1) {
|
||||||
|
dist_cache[3] = dist_cache[2];
|
||||||
|
dist_cache[2] = dist_cache[1];
|
||||||
|
dist_cache[1] = dist_cache[0];
|
||||||
|
dist_cache[0] = best_dist;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Command cmd(insert_length, best_len, best_len_code, distance_code);
|
||||||
// Copy all copied literals to the hasher, except the last one.
|
*commands++ = cmd;
|
||||||
// We cannot store the last one yet, otherwise we couldn't find
|
insert_length = 0;
|
||||||
// the possible M1 match.
|
if (kUseDictionary) {
|
||||||
for (int j = 1; j < best_len - 1; ++j) {
|
++i;
|
||||||
if (i + 2 < i_end) {
|
// Copy all copied literals to the hasher, except the last one.
|
||||||
|
// We cannot store the last one yet, otherwise we couldn't find
|
||||||
|
// the possible M1 match.
|
||||||
|
for (int j = 1; j < best_len - 1; ++j) {
|
||||||
|
if (i + 3 < i_end) {
|
||||||
|
hasher->Store(ringbuffer + i, i + i_diff);
|
||||||
|
}
|
||||||
|
++i;
|
||||||
|
}
|
||||||
|
// Prepare M1 match.
|
||||||
|
if (hasher->HasStaticDictionary() &&
|
||||||
|
best_len >= 4 && i + 20 < i_end && best_dist <= max_distance) {
|
||||||
|
max_distance = std::min(i + i_diff, max_backward_limit);
|
||||||
|
best_score_M1 = min_score;
|
||||||
|
match_found_M1 = hasher->FindLongestMatch(
|
||||||
|
ringbuffer, ringbuffer_mask,
|
||||||
|
literal_cost, literal_cost_mask, average_cost,
|
||||||
|
dist_cache, i + i_diff, i_end - i, max_distance,
|
||||||
|
&best_len_M1, &best_len_code_M1, &best_dist_M1, &best_score_M1);
|
||||||
|
} else {
|
||||||
|
match_found_M1 = false;
|
||||||
|
}
|
||||||
|
if (kUseCostModel) {
|
||||||
|
// This byte is just moved from the previous copy to the current,
|
||||||
|
// that is no gain.
|
||||||
|
best_score_M1 -= literal_cost[i & literal_cost_mask];
|
||||||
|
// Adjust for losing the opportunity for lazy matching.
|
||||||
|
best_score_M1 -= 3.75;
|
||||||
|
}
|
||||||
|
// Store the last one of the match.
|
||||||
|
if (i + 3 < i_end) {
|
||||||
hasher->Store(ringbuffer + i, i + i_diff);
|
hasher->Store(ringbuffer + i, i + i_diff);
|
||||||
}
|
}
|
||||||
++i;
|
++i;
|
||||||
}
|
|
||||||
// Prepare M1 match.
|
|
||||||
if (hasher->HasStaticDictionary() &&
|
|
||||||
best_len >= 4 && i + 20 < i_end && !best_in_dictionary) {
|
|
||||||
max_distance = std::min(i + i_diff, max_backward_limit);
|
|
||||||
match_found_M1 = hasher->FindLongestMatch(
|
|
||||||
ringbuffer, literal_cost, ringbuffer_mask,
|
|
||||||
i + i_diff, i_end - i, max_distance,
|
|
||||||
&best_len_M1, &best_len_code_M1, &best_dist_M1, &best_score_M1,
|
|
||||||
&in_dictionary);
|
|
||||||
} else {
|
} else {
|
||||||
match_found_M1 = false;
|
// Put the hash keys into the table, if there are enough
|
||||||
in_dictionary = false;
|
// bytes left.
|
||||||
|
for (int j = 1; j < best_len; ++j) {
|
||||||
|
hasher->Store(&ringbuffer[i + j], i + i_diff + j);
|
||||||
|
}
|
||||||
|
i += best_len;
|
||||||
}
|
}
|
||||||
// This byte is just moved from the previous copy to the current,
|
|
||||||
// that is no gain.
|
|
||||||
best_score_M1 -= literal_cost[i & ringbuffer_mask];
|
|
||||||
// Adjust for losing the opportunity for lazy matching.
|
|
||||||
best_score_M1 -= 3.75;
|
|
||||||
|
|
||||||
// Store the last one of the match.
|
|
||||||
if (i + 2 < i_end) {
|
|
||||||
hasher->Store(ringbuffer + i, i + i_diff);
|
|
||||||
}
|
|
||||||
++i;
|
|
||||||
} else {
|
} else {
|
||||||
match_found_M1 = false;
|
match_found_M1 = false;
|
||||||
++insert_length;
|
++insert_length;
|
||||||
@ -214,7 +271,7 @@ void CreateBackwardReferences(size_t num_bytes,
|
|||||||
insert_length += 4;
|
insert_length += 4;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
int i_jump = std::min(i + 8, i_end - 2);
|
int i_jump = std::min(i + 8, i_end - 3);
|
||||||
for (; i < i_jump; i += 2) {
|
for (; i < i_jump; i += 2) {
|
||||||
hasher->Store(ringbuffer + i, i + i_diff);
|
hasher->Store(ringbuffer + i, i + i_diff);
|
||||||
insert_length += 2;
|
insert_length += 2;
|
||||||
@ -224,44 +281,92 @@ void CreateBackwardReferences(size_t num_bytes,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
insert_length += (i_end - i);
|
insert_length += (i_end - i);
|
||||||
|
*last_insert_len = insert_length;
|
||||||
if (insert_length > 0) {
|
*num_commands += (commands - orig_commands);
|
||||||
Command cmd;
|
|
||||||
cmd.insert_length_ = insert_length;
|
|
||||||
cmd.copy_length_ = 0;
|
|
||||||
cmd.copy_distance_ = 0;
|
|
||||||
commands->push_back(cmd);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void CreateBackwardReferences(size_t num_bytes,
|
void CreateBackwardReferences(size_t num_bytes,
|
||||||
size_t position,
|
size_t position,
|
||||||
const uint8_t* ringbuffer,
|
const uint8_t* ringbuffer,
|
||||||
const float* literal_cost,
|
|
||||||
size_t ringbuffer_mask,
|
size_t ringbuffer_mask,
|
||||||
|
const float* literal_cost,
|
||||||
|
size_t literal_cost_mask,
|
||||||
const size_t max_backward_limit,
|
const size_t max_backward_limit,
|
||||||
|
const double base_min_score,
|
||||||
|
const int quality,
|
||||||
Hashers* hashers,
|
Hashers* hashers,
|
||||||
Hashers::Type hash_type,
|
int hash_type,
|
||||||
std::vector<Command>* commands) {
|
int* dist_cache,
|
||||||
|
int* last_insert_len,
|
||||||
|
Command* commands,
|
||||||
|
int* num_commands) {
|
||||||
switch (hash_type) {
|
switch (hash_type) {
|
||||||
case Hashers::HASH_15_8_4:
|
case 1:
|
||||||
CreateBackwardReferences(
|
CreateBackwardReferences<Hashers::H1, false, false>(
|
||||||
num_bytes, position, ringbuffer, literal_cost,
|
num_bytes, position, ringbuffer, ringbuffer_mask,
|
||||||
ringbuffer_mask, max_backward_limit,
|
literal_cost, literal_cost_mask, max_backward_limit, base_min_score,
|
||||||
hashers->hash_15_8_4.get(),
|
quality, hashers->hash_h1.get(), dist_cache, last_insert_len,
|
||||||
commands);
|
commands, num_commands);
|
||||||
break;
|
break;
|
||||||
case Hashers::HASH_15_8_2:
|
case 2:
|
||||||
CreateBackwardReferences(
|
CreateBackwardReferences<Hashers::H2, false, false>(
|
||||||
num_bytes, position, ringbuffer, literal_cost,
|
num_bytes, position, ringbuffer, ringbuffer_mask,
|
||||||
ringbuffer_mask, max_backward_limit,
|
literal_cost, literal_cost_mask, max_backward_limit, base_min_score,
|
||||||
hashers->hash_15_8_2.get(),
|
quality, hashers->hash_h2.get(), dist_cache, last_insert_len,
|
||||||
commands);
|
commands, num_commands);
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
CreateBackwardReferences<Hashers::H3, false, false>(
|
||||||
|
num_bytes, position, ringbuffer, ringbuffer_mask,
|
||||||
|
literal_cost, literal_cost_mask, max_backward_limit, base_min_score,
|
||||||
|
quality, hashers->hash_h3.get(), dist_cache, last_insert_len,
|
||||||
|
commands, num_commands);
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
CreateBackwardReferences<Hashers::H4, false, false>(
|
||||||
|
num_bytes, position, ringbuffer, ringbuffer_mask,
|
||||||
|
literal_cost, literal_cost_mask, max_backward_limit, base_min_score,
|
||||||
|
quality, hashers->hash_h4.get(), dist_cache, last_insert_len,
|
||||||
|
commands, num_commands);
|
||||||
|
break;
|
||||||
|
case 5:
|
||||||
|
CreateBackwardReferences<Hashers::H5, false, false>(
|
||||||
|
num_bytes, position, ringbuffer, ringbuffer_mask,
|
||||||
|
literal_cost, literal_cost_mask, max_backward_limit, base_min_score,
|
||||||
|
quality, hashers->hash_h5.get(), dist_cache, last_insert_len,
|
||||||
|
commands, num_commands);
|
||||||
|
break;
|
||||||
|
case 6:
|
||||||
|
CreateBackwardReferences<Hashers::H6, false, false>(
|
||||||
|
num_bytes, position, ringbuffer, ringbuffer_mask,
|
||||||
|
literal_cost, literal_cost_mask, max_backward_limit, base_min_score,
|
||||||
|
quality, hashers->hash_h6.get(), dist_cache, last_insert_len,
|
||||||
|
commands, num_commands);
|
||||||
|
break;
|
||||||
|
case 7:
|
||||||
|
CreateBackwardReferences<Hashers::H7, true, false>(
|
||||||
|
num_bytes, position, ringbuffer, ringbuffer_mask,
|
||||||
|
literal_cost, literal_cost_mask, max_backward_limit, base_min_score,
|
||||||
|
quality, hashers->hash_h7.get(), dist_cache, last_insert_len,
|
||||||
|
commands, num_commands);
|
||||||
|
break;
|
||||||
|
case 8:
|
||||||
|
CreateBackwardReferences<Hashers::H8, true, true>(
|
||||||
|
num_bytes, position, ringbuffer, ringbuffer_mask,
|
||||||
|
literal_cost, literal_cost_mask, max_backward_limit, base_min_score,
|
||||||
|
quality, hashers->hash_h8.get(), dist_cache, last_insert_len,
|
||||||
|
commands, num_commands);
|
||||||
|
break;
|
||||||
|
case 9:
|
||||||
|
CreateBackwardReferences<Hashers::H9, true, false>(
|
||||||
|
num_bytes, position, ringbuffer, ringbuffer_mask,
|
||||||
|
literal_cost, literal_cost_mask, max_backward_limit, base_min_score,
|
||||||
|
quality, hashers->hash_h9.get(), dist_cache, last_insert_len,
|
||||||
|
commands, num_commands);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
} // namespace brotli
|
} // namespace brotli
|
||||||
|
@ -28,12 +28,18 @@ namespace brotli {
|
|||||||
void CreateBackwardReferences(size_t num_bytes,
|
void CreateBackwardReferences(size_t num_bytes,
|
||||||
size_t position,
|
size_t position,
|
||||||
const uint8_t* ringbuffer,
|
const uint8_t* ringbuffer,
|
||||||
const float* literal_cost,
|
|
||||||
size_t ringbuffer_mask,
|
size_t ringbuffer_mask,
|
||||||
|
const float* literal_cost,
|
||||||
|
size_t literal_cost_mask,
|
||||||
const size_t max_backward_limit,
|
const size_t max_backward_limit,
|
||||||
|
const double base_min_score,
|
||||||
|
const int quality,
|
||||||
Hashers* hashers,
|
Hashers* hashers,
|
||||||
Hashers::Type hash_type,
|
int hash_type,
|
||||||
std::vector<Command>* commands);
|
int* dist_cache,
|
||||||
|
int* last_insert_len,
|
||||||
|
Command* commands,
|
||||||
|
int* num_commands);
|
||||||
|
|
||||||
} // namespace brotli
|
} // namespace brotli
|
||||||
|
|
||||||
|
@ -51,7 +51,7 @@ void CopyLiteralsToByteArray(const std::vector<Command>& cmds,
|
|||||||
// Count how many we have.
|
// Count how many we have.
|
||||||
size_t total_length = 0;
|
size_t total_length = 0;
|
||||||
for (int i = 0; i < cmds.size(); ++i) {
|
for (int i = 0; i < cmds.size(); ++i) {
|
||||||
total_length += cmds[i].insert_length_;
|
total_length += cmds[i].insert_len_;
|
||||||
}
|
}
|
||||||
if (total_length == 0) {
|
if (total_length == 0) {
|
||||||
return;
|
return;
|
||||||
@ -64,9 +64,9 @@ void CopyLiteralsToByteArray(const std::vector<Command>& cmds,
|
|||||||
size_t pos = 0;
|
size_t pos = 0;
|
||||||
size_t from_pos = 0;
|
size_t from_pos = 0;
|
||||||
for (int i = 0; i < cmds.size() && pos < total_length; ++i) {
|
for (int i = 0; i < cmds.size() && pos < total_length; ++i) {
|
||||||
memcpy(&(*literals)[pos], data + from_pos, cmds[i].insert_length_);
|
memcpy(&(*literals)[pos], data + from_pos, cmds[i].insert_len_);
|
||||||
pos += cmds[i].insert_length_;
|
pos += cmds[i].insert_len_;
|
||||||
from_pos += cmds[i].insert_length_ + cmds[i].copy_length_;
|
from_pos += cmds[i].insert_len_ + cmds[i].copy_len_;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -75,9 +75,9 @@ void CopyCommandsToByteArray(const std::vector<Command>& cmds,
|
|||||||
std::vector<uint8_t>* distance_prefixes) {
|
std::vector<uint8_t>* distance_prefixes) {
|
||||||
for (int i = 0; i < cmds.size(); ++i) {
|
for (int i = 0; i < cmds.size(); ++i) {
|
||||||
const Command& cmd = cmds[i];
|
const Command& cmd = cmds[i];
|
||||||
insert_and_copy_codes->push_back(cmd.command_prefix_);
|
insert_and_copy_codes->push_back(cmd.cmd_prefix_);
|
||||||
if (cmd.copy_length_ > 0 && cmd.distance_prefix_ != 0xffff) {
|
if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) {
|
||||||
distance_prefixes->push_back(cmd.distance_prefix_);
|
distance_prefixes->push_back(cmd.dist_prefix_);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -376,7 +376,7 @@ void SplitBlockByTotalLength(const std::vector<Command>& all_commands,
|
|||||||
std::vector<Command> cur_block;
|
std::vector<Command> cur_block;
|
||||||
for (int i = 0; i < all_commands.size(); ++i) {
|
for (int i = 0; i < all_commands.size(); ++i) {
|
||||||
const Command& cmd = all_commands[i];
|
const Command& cmd = all_commands[i];
|
||||||
int cmd_length = cmd.insert_length_ + cmd.copy_length_;
|
int cmd_length = cmd.insert_len_ + cmd.copy_len_;
|
||||||
if (total_length > length_limit) {
|
if (total_length > length_limit) {
|
||||||
blocks->push_back(cur_block);
|
blocks->push_back(cur_block);
|
||||||
cur_block.clear();
|
cur_block.clear();
|
||||||
|
142
enc/command.h
142
enc/command.h
@ -18,31 +18,131 @@
|
|||||||
#define BROTLI_ENC_COMMAND_H_
|
#define BROTLI_ENC_COMMAND_H_
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
#include "./fast_log.h"
|
||||||
|
|
||||||
namespace brotli {
|
namespace brotli {
|
||||||
|
|
||||||
// Command holds a sequence of literals and a backward reference copy.
|
static inline void GetDistCode(int distance_code,
|
||||||
class Command {
|
uint16_t* code, uint32_t* extra) {
|
||||||
public:
|
distance_code -= 1;
|
||||||
// distance_code_ is initialized to 17 because it refers to the distance
|
if (distance_code < 16) {
|
||||||
// code of a backward distance of 1, this way the last insert-only command
|
*code = distance_code;
|
||||||
// won't use the last-distance short code, and accordingly distance_prefix_ is
|
*extra = 0;
|
||||||
// set to 16
|
} else {
|
||||||
Command() : insert_length_(0), copy_length_(0), copy_length_code_(0),
|
distance_code -= 12;
|
||||||
copy_distance_(0), distance_code_(17),
|
int numextra = Log2FloorNonZero(distance_code) - 1;
|
||||||
distance_prefix_(16), command_prefix_(0),
|
int prefix = distance_code >> numextra;
|
||||||
distance_extra_bits_(0), distance_extra_bits_value_(0) {}
|
*code = 12 + 2 * numextra + prefix;
|
||||||
|
*extra = (numextra << 24) | (distance_code - (prefix << numextra));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
uint32_t insert_length_;
|
static int insbase[] = { 0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50, 66,
|
||||||
uint32_t copy_length_;
|
98, 130, 194, 322, 578, 1090, 2114, 6210, 22594 };
|
||||||
uint32_t copy_length_code_;
|
static int insextra[] = { 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5,
|
||||||
uint32_t copy_distance_;
|
5, 6, 7, 8, 9, 10, 12, 14, 24 };
|
||||||
// Values <= 16 are short codes, values > 16 are distances shifted by 16.
|
static int copybase[] = { 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 18, 22, 30, 38,
|
||||||
uint32_t distance_code_;
|
54, 70, 102, 134, 198, 326, 582, 1094, 2118 };
|
||||||
uint16_t distance_prefix_;
|
static int copyextra[] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4,
|
||||||
uint16_t command_prefix_;
|
4, 5, 5, 6, 7, 8, 9, 10, 24 };
|
||||||
int distance_extra_bits_;
|
|
||||||
uint32_t distance_extra_bits_value_;
|
static inline int GetInsertLengthCode(int insertlen) {
|
||||||
|
if (insertlen < 6) {
|
||||||
|
return insertlen;
|
||||||
|
} else if (insertlen < 130) {
|
||||||
|
insertlen -= 2;
|
||||||
|
int nbits = Log2FloorNonZero(insertlen) - 1;
|
||||||
|
return (nbits << 1) + (insertlen >> nbits) + 2;
|
||||||
|
} else if (insertlen < 2114) {
|
||||||
|
return Log2FloorNonZero(insertlen - 66) + 10;
|
||||||
|
} else if (insertlen < 6210) {
|
||||||
|
return 21;
|
||||||
|
} else if (insertlen < 22594) {
|
||||||
|
return 22;
|
||||||
|
} else {
|
||||||
|
return 23;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int GetCopyLengthCode(int copylen) {
|
||||||
|
if (copylen < 10) {
|
||||||
|
return copylen - 2;
|
||||||
|
} else if (copylen < 134) {
|
||||||
|
copylen -= 6;
|
||||||
|
int nbits = Log2FloorNonZero(copylen) - 1;
|
||||||
|
return (nbits << 1) + (copylen >> nbits) + 4;
|
||||||
|
} else if (copylen < 2118) {
|
||||||
|
return Log2FloorNonZero(copylen - 70) + 12;
|
||||||
|
} else {
|
||||||
|
return 23;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int CombineLengthCodes(
|
||||||
|
int inscode, int copycode, int distancecode) {
|
||||||
|
int bits64 = (copycode & 0x7u) | ((inscode & 0x7u) << 3);
|
||||||
|
if (distancecode == 0 && inscode < 8 && copycode < 16) {
|
||||||
|
return (copycode < 8) ? bits64 : (bits64 | 64);
|
||||||
|
} else {
|
||||||
|
// "To convert an insert-and-copy length code to an insert length code and
|
||||||
|
// a copy length code, the following table can be used"
|
||||||
|
static const int cells[9] = { 2, 3, 6, 4, 5, 8, 7, 9, 10 };
|
||||||
|
return (cells[(copycode >> 3) + 3 * (inscode >> 3)] << 6) | bits64;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void GetLengthCode(int insertlen, int copylen, int distancecode,
|
||||||
|
uint16_t* code, uint64_t* extra) {
|
||||||
|
int inscode = GetInsertLengthCode(insertlen);
|
||||||
|
int copycode = GetCopyLengthCode(copylen);
|
||||||
|
uint64_t insnumextra = insextra[inscode];
|
||||||
|
uint64_t numextra = insnumextra + copyextra[copycode];
|
||||||
|
uint64_t insextraval = insertlen - insbase[inscode];
|
||||||
|
uint64_t copyextraval = copylen - copybase[copycode];
|
||||||
|
*code = CombineLengthCodes(inscode, copycode, distancecode);
|
||||||
|
*extra = (numextra << 48) | (copyextraval << insnumextra) | insextraval;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Command {
|
||||||
|
Command() {}
|
||||||
|
|
||||||
|
Command(int insertlen, int copylen, int copylen_code, int distance_code)
|
||||||
|
: insert_len_(insertlen), copy_len_(copylen) {
|
||||||
|
GetDistCode(distance_code, &dist_prefix_, &dist_extra_);
|
||||||
|
GetLengthCode(insertlen, copylen_code, dist_prefix_,
|
||||||
|
&cmd_prefix_, &cmd_extra_);
|
||||||
|
}
|
||||||
|
|
||||||
|
Command(int insertlen)
|
||||||
|
: insert_len_(insertlen), copy_len_(0), dist_prefix_(16), dist_extra_(0) {
|
||||||
|
GetLengthCode(insertlen, 4, dist_prefix_, &cmd_prefix_, &cmd_extra_);
|
||||||
|
}
|
||||||
|
|
||||||
|
int DistanceCode() const {
|
||||||
|
if (dist_prefix_ < 16) {
|
||||||
|
return dist_prefix_ + 1;
|
||||||
|
}
|
||||||
|
int nbits = dist_extra_ >> 24;
|
||||||
|
int extra = dist_extra_ & 0xffffff;
|
||||||
|
int prefix = dist_prefix_ - 12 - 2 * nbits;
|
||||||
|
return (prefix << nbits) + extra + 13;
|
||||||
|
}
|
||||||
|
|
||||||
|
int DistanceContext() const {
|
||||||
|
int r = cmd_prefix_ >> 6;
|
||||||
|
int c = cmd_prefix_ & 7;
|
||||||
|
if ((r == 0 || r == 2 || r == 4 || r == 7) && (c <= 2)) {
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
int insert_len_;
|
||||||
|
int copy_len_;
|
||||||
|
uint16_t cmd_prefix_;
|
||||||
|
uint16_t dist_prefix_;
|
||||||
|
uint64_t cmd_extra_;
|
||||||
|
uint32_t dist_extra_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace brotli
|
} // namespace brotli
|
||||||
|
156
enc/encode.cc
156
enc/encode.cc
@ -185,105 +185,41 @@ void BuildAndStoreEntropyCodes(
|
|||||||
void EncodeCommand(const Command& cmd,
|
void EncodeCommand(const Command& cmd,
|
||||||
const EntropyCodeCommand& entropy,
|
const EntropyCodeCommand& entropy,
|
||||||
int* storage_ix, uint8_t* storage) {
|
int* storage_ix, uint8_t* storage) {
|
||||||
int code = cmd.command_prefix_;
|
int code = cmd.cmd_prefix_;
|
||||||
WriteBits(entropy.depth_[code], entropy.bits_[code], storage_ix, storage);
|
WriteBits(entropy.depth_[code], entropy.bits_[code], storage_ix, storage);
|
||||||
if (code >= 128) {
|
int nextra = cmd.cmd_extra_ >> 48;
|
||||||
code -= 128;
|
uint64_t extra = cmd.cmd_extra_ & 0xffffffffffffULL;
|
||||||
}
|
if (nextra > 0) {
|
||||||
int insert_extra_bits = InsertLengthExtraBits(code);
|
WriteBits(nextra, extra, storage_ix, storage);
|
||||||
uint64_t insert_extra_bits_val =
|
|
||||||
cmd.insert_length_ - InsertLengthOffset(code);
|
|
||||||
int copy_extra_bits = CopyLengthExtraBits(code);
|
|
||||||
uint64_t copy_extra_bits_val = cmd.copy_length_code_ - CopyLengthOffset(code);
|
|
||||||
if (insert_extra_bits > 0) {
|
|
||||||
WriteBits(insert_extra_bits, insert_extra_bits_val, storage_ix, storage);
|
|
||||||
}
|
|
||||||
if (copy_extra_bits > 0) {
|
|
||||||
WriteBits(copy_extra_bits, copy_extra_bits_val, storage_ix, storage);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void EncodeCopyDistance(const Command& cmd, const EntropyCodeDistance& entropy,
|
void EncodeCopyDistance(const Command& cmd, const EntropyCodeDistance& entropy,
|
||||||
int* storage_ix, uint8_t* storage) {
|
int* storage_ix, uint8_t* storage) {
|
||||||
int code = cmd.distance_prefix_;
|
int code = cmd.dist_prefix_;
|
||||||
int extra_bits = cmd.distance_extra_bits_;
|
int extra_bits = cmd.dist_extra_ >> 24;
|
||||||
uint64_t extra_bits_val = cmd.distance_extra_bits_value_;
|
uint64_t extra_bits_val = cmd.dist_extra_ & 0xffffff;
|
||||||
WriteBits(entropy.depth_[code], entropy.bits_[code], storage_ix, storage);
|
WriteBits(entropy.depth_[code], entropy.bits_[code], storage_ix, storage);
|
||||||
if (extra_bits > 0) {
|
if (extra_bits > 0) {
|
||||||
WriteBits(extra_bits, extra_bits_val, storage_ix, storage);
|
WriteBits(extra_bits, extra_bits_val, storage_ix, storage);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ComputeDistanceShortCodes(std::vector<Command>* cmds,
|
void RecomputeDistancePrefixes(std::vector<Command>* cmds,
|
||||||
size_t pos,
|
int num_direct_distance_codes,
|
||||||
const size_t max_backward,
|
int distance_postfix_bits) {
|
||||||
int* dist_ringbuffer,
|
if (num_direct_distance_codes == 0 &&
|
||||||
size_t* ringbuffer_idx) {
|
distance_postfix_bits == 0) {
|
||||||
static const int kIndexOffset[16] = {
|
return;
|
||||||
3, 2, 1, 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2
|
|
||||||
};
|
|
||||||
static const int kValueOffset[16] = {
|
|
||||||
0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
|
|
||||||
};
|
|
||||||
for (int i = 0; i < cmds->size(); ++i) {
|
|
||||||
pos += (*cmds)[i].insert_length_;
|
|
||||||
size_t max_distance = std::min(pos, max_backward);
|
|
||||||
int cur_dist = (*cmds)[i].copy_distance_;
|
|
||||||
int dist_code = cur_dist + 16;
|
|
||||||
if (cur_dist <= max_distance) {
|
|
||||||
if (cur_dist == 0) break;
|
|
||||||
int limits[16] = { 0, 0, 0, 0,
|
|
||||||
6, 6, 11, 11,
|
|
||||||
11, 11, 11, 11,
|
|
||||||
12, 12, 12, 12 };
|
|
||||||
for (int k = 0; k < 16; ++k) {
|
|
||||||
// Only accept more popular choices.
|
|
||||||
if (cur_dist < limits[k]) {
|
|
||||||
// Typically unpopular ranges, don't replace a short distance
|
|
||||||
// with them.
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
int comp = (dist_ringbuffer[(*ringbuffer_idx + kIndexOffset[k]) & 3] +
|
|
||||||
kValueOffset[k]);
|
|
||||||
if (cur_dist == comp) {
|
|
||||||
dist_code = k + 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (dist_code > 1) {
|
|
||||||
dist_ringbuffer[*ringbuffer_idx & 3] = cur_dist;
|
|
||||||
++(*ringbuffer_idx);
|
|
||||||
}
|
|
||||||
pos += (*cmds)[i].copy_length_;
|
|
||||||
} else {
|
|
||||||
int word_idx = cur_dist - max_distance - 1;
|
|
||||||
const std::string word =
|
|
||||||
GetTransformedDictionaryWord((*cmds)[i].copy_length_code_, word_idx);
|
|
||||||
pos += word.size();
|
|
||||||
}
|
|
||||||
(*cmds)[i].distance_code_ = dist_code;
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
void ComputeCommandPrefixes(std::vector<Command>* cmds,
|
|
||||||
int num_direct_distance_codes,
|
|
||||||
int distance_postfix_bits) {
|
|
||||||
for (int i = 0; i < cmds->size(); ++i) {
|
for (int i = 0; i < cmds->size(); ++i) {
|
||||||
Command* cmd = &(*cmds)[i];
|
Command* cmd = &(*cmds)[i];
|
||||||
cmd->command_prefix_ = CommandPrefix(cmd->insert_length_,
|
if (cmd->copy_len_ > 0 && cmd->cmd_prefix_ >= 128) {
|
||||||
cmd->copy_length_code_);
|
PrefixEncodeCopyDistance(cmd->DistanceCode(),
|
||||||
if (cmd->copy_length_code_ > 0) {
|
|
||||||
PrefixEncodeCopyDistance(cmd->distance_code_,
|
|
||||||
num_direct_distance_codes,
|
num_direct_distance_codes,
|
||||||
distance_postfix_bits,
|
distance_postfix_bits,
|
||||||
&cmd->distance_prefix_,
|
&cmd->dist_prefix_,
|
||||||
&cmd->distance_extra_bits_,
|
&cmd->dist_extra_);
|
||||||
&cmd->distance_extra_bits_value_);
|
|
||||||
}
|
|
||||||
if (cmd->command_prefix_ < 128 && cmd->distance_prefix_ == 0) {
|
|
||||||
cmd->distance_prefix_ = 0xffff;
|
|
||||||
} else {
|
|
||||||
cmd->command_prefix_ += 128;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -478,9 +414,9 @@ void BuildMetaBlock(const EncodingParams& params,
|
|||||||
if (cmds.empty()) {
|
if (cmds.empty()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
ComputeCommandPrefixes(&mb->cmds,
|
RecomputeDistancePrefixes(&mb->cmds,
|
||||||
mb->params.num_direct_distance_codes,
|
mb->params.num_direct_distance_codes,
|
||||||
mb->params.distance_postfix_bits);
|
mb->params.distance_postfix_bits);
|
||||||
SplitBlock(mb->cmds,
|
SplitBlock(mb->cmds,
|
||||||
&ringbuffer[pos & mask],
|
&ringbuffer[pos & mask],
|
||||||
&mb->literal_split,
|
&mb->literal_split,
|
||||||
@ -534,7 +470,7 @@ size_t MetaBlockLength(const std::vector<Command>& cmds) {
|
|||||||
size_t length = 0;
|
size_t length = 0;
|
||||||
for (int i = 0; i < cmds.size(); ++i) {
|
for (int i = 0; i < cmds.size(); ++i) {
|
||||||
const Command& cmd = cmds[i];
|
const Command& cmd = cmds[i];
|
||||||
length += cmd.insert_length_ + cmd.copy_length_;
|
length += cmd.insert_len_ + cmd.copy_len_;
|
||||||
}
|
}
|
||||||
return length;
|
return length;
|
||||||
}
|
}
|
||||||
@ -604,7 +540,7 @@ void StoreMetaBlock(const MetaBlock& mb,
|
|||||||
const Command& cmd = mb.cmds[i];
|
const Command& cmd = mb.cmds[i];
|
||||||
MoveAndEncode(command_split_code, &command_it, storage_ix, storage);
|
MoveAndEncode(command_split_code, &command_it, storage_ix, storage);
|
||||||
EncodeCommand(cmd, command_codes[command_it.type_], storage_ix, storage);
|
EncodeCommand(cmd, command_codes[command_it.type_], storage_ix, storage);
|
||||||
for (int j = 0; j < cmd.insert_length_; ++j) {
|
for (int j = 0; j < cmd.insert_len_; ++j) {
|
||||||
MoveAndEncode(literal_split_code, &literal_it, storage_ix, storage);
|
MoveAndEncode(literal_split_code, &literal_it, storage_ix, storage);
|
||||||
int histogram_idx = literal_it.type_;
|
int histogram_idx = literal_it.type_;
|
||||||
uint8_t prev_byte = *pos > 0 ? ringbuffer[(*pos - 1) & mask] : 0;
|
uint8_t prev_byte = *pos > 0 ? ringbuffer[(*pos - 1) & mask] : 0;
|
||||||
@ -619,15 +555,14 @@ void StoreMetaBlock(const MetaBlock& mb,
|
|||||||
storage_ix, storage);
|
storage_ix, storage);
|
||||||
++(*pos);
|
++(*pos);
|
||||||
}
|
}
|
||||||
if (*pos < end_pos && cmd.distance_prefix_ != 0xffff) {
|
if (*pos < end_pos && cmd.cmd_prefix_ >= 128) {
|
||||||
MoveAndEncode(distance_split_code, &distance_it, storage_ix, storage);
|
MoveAndEncode(distance_split_code, &distance_it, storage_ix, storage);
|
||||||
int context = (distance_it.type_ << 2) +
|
int context = (distance_it.type_ << 2) + cmd.DistanceContext();
|
||||||
((cmd.copy_length_code_ > 4) ? 3 : cmd.copy_length_code_ - 2);
|
|
||||||
int histogram_index = mb.distance_context_map[context];
|
int histogram_index = mb.distance_context_map[context];
|
||||||
EncodeCopyDistance(cmd, distance_codes[histogram_index],
|
EncodeCopyDistance(cmd, distance_codes[histogram_index],
|
||||||
storage_ix, storage);
|
storage_ix, storage);
|
||||||
}
|
}
|
||||||
*pos += cmd.copy_length_;
|
*pos += cmd.copy_len_;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -635,20 +570,19 @@ BrotliCompressor::BrotliCompressor(BrotliParams params)
|
|||||||
: params_(params),
|
: params_(params),
|
||||||
window_bits_(kWindowBits),
|
window_bits_(kWindowBits),
|
||||||
hashers_(new Hashers()),
|
hashers_(new Hashers()),
|
||||||
dist_ringbuffer_idx_(0),
|
|
||||||
input_pos_(0),
|
input_pos_(0),
|
||||||
ringbuffer_(kRingBufferBits, kMetaBlockSizeBits),
|
ringbuffer_(kRingBufferBits, kMetaBlockSizeBits),
|
||||||
literal_cost_(1 << kRingBufferBits),
|
literal_cost_(1 << kRingBufferBits),
|
||||||
storage_ix_(0),
|
storage_ix_(0),
|
||||||
storage_(new uint8_t[2 << kMetaBlockSizeBits]) {
|
storage_(new uint8_t[2 << kMetaBlockSizeBits]) {
|
||||||
dist_ringbuffer_[0] = 16;
|
dist_cache_[0] = 4;
|
||||||
dist_ringbuffer_[1] = 15;
|
dist_cache_[1] = 11;
|
||||||
dist_ringbuffer_[2] = 11;
|
dist_cache_[2] = 15;
|
||||||
dist_ringbuffer_[3] = 4;
|
dist_cache_[3] = 16;
|
||||||
storage_[0] = 0;
|
storage_[0] = 0;
|
||||||
switch (params.mode) {
|
switch (params.mode) {
|
||||||
case BrotliParams::MODE_TEXT: hash_type_ = Hashers::HASH_15_8_4; break;
|
case BrotliParams::MODE_TEXT: hash_type_ = 8; break;
|
||||||
case BrotliParams::MODE_FONT: hash_type_ = Hashers::HASH_15_8_2; break;
|
case BrotliParams::MODE_FONT: hash_type_ = 9; break;
|
||||||
default: break;
|
default: break;
|
||||||
}
|
}
|
||||||
hashers_->Init(hash_type_);
|
hashers_->Init(hash_type_);
|
||||||
@ -701,7 +635,7 @@ void BrotliCompressor::WriteMetaBlock(const size_t input_size,
|
|||||||
uint8_t* encoded_buffer) {
|
uint8_t* encoded_buffer) {
|
||||||
static const double kMinUTF8Ratio = 0.75;
|
static const double kMinUTF8Ratio = 0.75;
|
||||||
bool utf8_mode = false;
|
bool utf8_mode = false;
|
||||||
std::vector<Command> commands;
|
std::vector<Command> commands((input_size + 1) >> 1);
|
||||||
if (input_size > 0) {
|
if (input_size > 0) {
|
||||||
ringbuffer_.Write(input_buffer, input_size);
|
ringbuffer_.Write(input_buffer, input_size);
|
||||||
utf8_mode = IsMostlyUTF8(
|
utf8_mode = IsMostlyUTF8(
|
||||||
@ -716,17 +650,26 @@ void BrotliCompressor::WriteMetaBlock(const size_t input_size,
|
|||||||
kRingBufferMask, kRingBufferMask,
|
kRingBufferMask, kRingBufferMask,
|
||||||
ringbuffer_.start(), &literal_cost_[0]);
|
ringbuffer_.start(), &literal_cost_[0]);
|
||||||
}
|
}
|
||||||
|
int last_insert_len = 0;
|
||||||
|
int num_commands = 0;
|
||||||
|
double base_min_score = 8.115;
|
||||||
CreateBackwardReferences(
|
CreateBackwardReferences(
|
||||||
input_size, input_pos_,
|
input_size, input_pos_,
|
||||||
ringbuffer_.start(),
|
ringbuffer_.start(), kRingBufferMask,
|
||||||
&literal_cost_[0],
|
&literal_cost_[0], kRingBufferMask,
|
||||||
kRingBufferMask, kMaxBackwardDistance,
|
kMaxBackwardDistance,
|
||||||
|
base_min_score,
|
||||||
|
9, // quality
|
||||||
hashers_.get(),
|
hashers_.get(),
|
||||||
hash_type_,
|
hash_type_,
|
||||||
&commands);
|
dist_cache_,
|
||||||
ComputeDistanceShortCodes(&commands, input_pos_, kMaxBackwardDistance,
|
&last_insert_len,
|
||||||
dist_ringbuffer_,
|
&commands[0],
|
||||||
&dist_ringbuffer_idx_);
|
&num_commands);
|
||||||
|
commands.resize(num_commands);
|
||||||
|
if (last_insert_len > 0) {
|
||||||
|
commands.push_back(Command(last_insert_len));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
EncodingParams params;
|
EncodingParams params;
|
||||||
params.num_direct_distance_codes =
|
params.num_direct_distance_codes =
|
||||||
@ -807,7 +750,6 @@ int BrotliCompressBuffer(BrotliParams params,
|
|||||||
*encoded_size += output_size;
|
*encoded_size += output_size;
|
||||||
max_output_size -= output_size;
|
max_output_size -= output_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -66,12 +66,11 @@ class BrotliCompressor {
|
|||||||
BrotliParams params_;
|
BrotliParams params_;
|
||||||
int window_bits_;
|
int window_bits_;
|
||||||
std::unique_ptr<Hashers> hashers_;
|
std::unique_ptr<Hashers> hashers_;
|
||||||
Hashers::Type hash_type_;
|
int hash_type_;
|
||||||
int dist_ringbuffer_[4];
|
|
||||||
size_t dist_ringbuffer_idx_;
|
|
||||||
size_t input_pos_;
|
size_t input_pos_;
|
||||||
RingBuffer ringbuffer_;
|
RingBuffer ringbuffer_;
|
||||||
std::vector<float> literal_cost_;
|
std::vector<float> literal_cost_;
|
||||||
|
int dist_cache_[4];
|
||||||
int storage_ix_;
|
int storage_ix_;
|
||||||
uint8_t* storage_;
|
uint8_t* storage_;
|
||||||
static StaticDictionary *static_dictionary_;
|
static StaticDictionary *static_dictionary_;
|
||||||
|
@ -46,6 +46,16 @@ inline int Log2Floor(uint32_t n) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int Log2FloorNonZero(uint32_t n) {
|
||||||
|
#ifdef __GNUC__
|
||||||
|
return 31 ^ __builtin_clz(n);
|
||||||
|
#else
|
||||||
|
unsigned int result = 0;
|
||||||
|
while (n >>= 1) result++;
|
||||||
|
return result;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
// Return ceiling(log2(n)) for positive integer n. Returns -1 iff n == 0.
|
// Return ceiling(log2(n)) for positive integer n. Returns -1 iff n == 0.
|
||||||
inline int Log2Ceiling(uint32_t n) {
|
inline int Log2Ceiling(uint32_t n) {
|
||||||
int floor = Log2Floor(n);
|
int floor = Log2Floor(n);
|
||||||
|
418
enc/hash.h
418
enc/hash.h
@ -31,10 +31,18 @@
|
|||||||
#include "./fast_log.h"
|
#include "./fast_log.h"
|
||||||
#include "./find_match_length.h"
|
#include "./find_match_length.h"
|
||||||
#include "./port.h"
|
#include "./port.h"
|
||||||
|
#include "./prefix.h"
|
||||||
#include "./static_dict.h"
|
#include "./static_dict.h"
|
||||||
|
|
||||||
namespace brotli {
|
namespace brotli {
|
||||||
|
|
||||||
|
static const int kDistanceCacheIndex[] = {
|
||||||
|
0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
|
||||||
|
};
|
||||||
|
static const int kDistanceCacheOffset[] = {
|
||||||
|
0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
|
||||||
|
};
|
||||||
|
|
||||||
// kHashMul32 multiplier has these properties:
|
// kHashMul32 multiplier has these properties:
|
||||||
// * The multiplier must be odd. Otherwise we may lose the highest bit.
|
// * The multiplier must be odd. Otherwise we may lose the highest bit.
|
||||||
// * No long streaks of 1s or 0s.
|
// * No long streaks of 1s or 0s.
|
||||||
@ -75,59 +83,194 @@ inline uint32_t Hash(const uint8_t *data) {
|
|||||||
// when it is not much longer and the bit cost for encoding it is more
|
// when it is not much longer and the bit cost for encoding it is more
|
||||||
// than the saved literals.
|
// than the saved literals.
|
||||||
inline double BackwardReferenceScore(double average_cost,
|
inline double BackwardReferenceScore(double average_cost,
|
||||||
double start_cost4,
|
|
||||||
double start_cost3,
|
|
||||||
double start_cost2,
|
|
||||||
int copy_length,
|
int copy_length,
|
||||||
int backward_reference_offset) {
|
int backward_reference_offset) {
|
||||||
double retval = 0;
|
return (copy_length * average_cost -
|
||||||
switch (copy_length) {
|
1.20 * Log2Floor(backward_reference_offset));
|
||||||
case 2: retval = start_cost2; break;
|
|
||||||
case 3: retval = start_cost3; break;
|
|
||||||
default: retval = start_cost4 + (copy_length - 4) * average_cost; break;
|
|
||||||
}
|
|
||||||
retval -= 1.20 * Log2Floor(backward_reference_offset);
|
|
||||||
return retval;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inline double BackwardReferenceScoreUsingLastDistance(double average_cost,
|
inline double BackwardReferenceScoreUsingLastDistance(double average_cost,
|
||||||
double start_cost4,
|
|
||||||
double start_cost3,
|
|
||||||
double start_cost2,
|
|
||||||
int copy_length,
|
int copy_length,
|
||||||
int distance_short_code) {
|
int distance_short_code) {
|
||||||
double retval = 0;
|
|
||||||
switch (copy_length) {
|
|
||||||
case 2: retval = start_cost2; break;
|
|
||||||
case 3: retval = start_cost3; break;
|
|
||||||
default: retval = start_cost4 + (copy_length - 4) * average_cost; break;
|
|
||||||
}
|
|
||||||
static const double kDistanceShortCodeBitCost[16] = {
|
static const double kDistanceShortCodeBitCost[16] = {
|
||||||
-0.6, 0.95, 1.17, 1.27,
|
-0.6, 0.95, 1.17, 1.27,
|
||||||
0.93, 0.93, 0.96, 0.96, 0.99, 0.99,
|
0.93, 0.93, 0.96, 0.96, 0.99, 0.99,
|
||||||
1.05, 1.05, 1.15, 1.15, 1.25, 1.25
|
1.05, 1.05, 1.15, 1.15, 1.25, 1.25
|
||||||
};
|
};
|
||||||
retval -= kDistanceShortCodeBitCost[distance_short_code];
|
return (average_cost * copy_length
|
||||||
return retval;
|
- kDistanceShortCodeBitCost[distance_short_code]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// A (forgetful) hash table to the data seen by the compressor, to
|
||||||
|
// help create backward references to previous data.
|
||||||
|
//
|
||||||
|
// This is a hash map of fixed size (kBucketSize). Starting from the
|
||||||
|
// given index, kBucketSweep buckets are used to store values of a key.
|
||||||
|
template <int kBucketBits, int kBucketSweep>
|
||||||
|
class HashLongestMatchQuickly {
|
||||||
|
public:
|
||||||
|
HashLongestMatchQuickly() {
|
||||||
|
Reset();
|
||||||
|
}
|
||||||
|
void Reset() {
|
||||||
|
// It is not strictly necessary to fill this buffer here, but
|
||||||
|
// not filling will make the results of the compression stochastic
|
||||||
|
// (but correct). This is because random data would cause the
|
||||||
|
// system to find accidentally good backward references here and there.
|
||||||
|
std::fill(&buckets_[0],
|
||||||
|
&buckets_[sizeof(buckets_) / sizeof(buckets_[0])],
|
||||||
|
0);
|
||||||
|
}
|
||||||
|
// Look at 4 bytes at data.
|
||||||
|
// Compute a hash from these, and store the value somewhere within
|
||||||
|
// [ix .. ix+3].
|
||||||
|
inline void Store(const uint8_t *data, const int ix) {
|
||||||
|
const uint32_t key = Hash<kBucketBits, 4>(data);
|
||||||
|
// Wiggle the value with the bucket sweep range.
|
||||||
|
const uint32_t off = (static_cast<uint32_t>(ix) >> 3) % kBucketSweep;
|
||||||
|
buckets_[key + off] = ix;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store hashes for a range of data.
|
||||||
|
void StoreHashes(const uint8_t *data, size_t len, int startix, int mask) {
|
||||||
|
for (int p = 0; p < len; ++p) {
|
||||||
|
Store(&data[p & mask], startix + p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool HasStaticDictionary() const { return false; }
|
||||||
|
|
||||||
|
// Find a longest backward match of &ring_buffer[cur_ix & ring_buffer_mask]
|
||||||
|
// up to the length of max_length.
|
||||||
|
//
|
||||||
|
// Does not look for matches longer than max_length.
|
||||||
|
// Does not look for matches further away than max_backward.
|
||||||
|
// Writes the best found match length into best_len_out.
|
||||||
|
// Writes the index (&data[index]) of the start of the best match into
|
||||||
|
// best_distance_out.
|
||||||
|
inline bool FindLongestMatch(const uint8_t * __restrict ring_buffer,
|
||||||
|
const size_t ring_buffer_mask,
|
||||||
|
const float* __restrict literal_cost,
|
||||||
|
const size_t literal_cost_mask,
|
||||||
|
const double average_cost,
|
||||||
|
const int* __restrict distance_cache,
|
||||||
|
const uint32_t cur_ix,
|
||||||
|
const uint32_t max_length,
|
||||||
|
const uint32_t max_backward,
|
||||||
|
int * __restrict best_len_out,
|
||||||
|
int * __restrict best_len_code_out,
|
||||||
|
int * __restrict best_distance_out,
|
||||||
|
double* __restrict best_score_out) {
|
||||||
|
const int best_len_in = *best_len_out;
|
||||||
|
const int cur_ix_masked = cur_ix & ring_buffer_mask;
|
||||||
|
int compare_char = ring_buffer[cur_ix_masked + best_len_in];
|
||||||
|
double best_score = *best_score_out;
|
||||||
|
int best_len = best_len_in;
|
||||||
|
int backward = distance_cache[0];
|
||||||
|
size_t prev_ix = cur_ix - backward;
|
||||||
|
bool match_found = false;
|
||||||
|
if (prev_ix < cur_ix) {
|
||||||
|
prev_ix &= ring_buffer_mask;
|
||||||
|
if (compare_char == ring_buffer[prev_ix + best_len]) {
|
||||||
|
int len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
|
||||||
|
&ring_buffer[cur_ix_masked],
|
||||||
|
max_length);
|
||||||
|
if (len >= 4) {
|
||||||
|
best_score = BackwardReferenceScoreUsingLastDistance(average_cost,
|
||||||
|
len, 0);
|
||||||
|
best_len = len;
|
||||||
|
*best_len_out = len;
|
||||||
|
*best_len_code_out = len;
|
||||||
|
*best_distance_out = backward;
|
||||||
|
*best_score_out = best_score;
|
||||||
|
compare_char = ring_buffer[cur_ix_masked + best_len];
|
||||||
|
if (kBucketSweep == 1) {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
match_found = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const uint32_t key = Hash<kBucketBits, 4>(&ring_buffer[cur_ix_masked]);
|
||||||
|
if (kBucketSweep == 1) {
|
||||||
|
// Only one to look for, don't bother to prepare for a loop.
|
||||||
|
prev_ix = buckets_[key];
|
||||||
|
backward = cur_ix - prev_ix;
|
||||||
|
prev_ix &= ring_buffer_mask;
|
||||||
|
if (compare_char != ring_buffer[prev_ix + best_len_in]) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const int len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
|
||||||
|
&ring_buffer[cur_ix_masked],
|
||||||
|
max_length);
|
||||||
|
if (len >= 4) {
|
||||||
|
*best_len_out = len;
|
||||||
|
*best_len_code_out = len;
|
||||||
|
*best_distance_out = backward;
|
||||||
|
*best_score_out = BackwardReferenceScore(average_cost, len, backward);
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
uint32_t *bucket = buckets_ + key;
|
||||||
|
prev_ix = *bucket++;
|
||||||
|
for (int i = 0; i < kBucketSweep; ++i, prev_ix = *bucket++) {
|
||||||
|
const int backward = cur_ix - prev_ix;
|
||||||
|
prev_ix &= ring_buffer_mask;
|
||||||
|
if (compare_char != ring_buffer[prev_ix + best_len]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const int len =
|
||||||
|
FindMatchLengthWithLimit(&ring_buffer[prev_ix],
|
||||||
|
&ring_buffer[cur_ix_masked],
|
||||||
|
max_length);
|
||||||
|
if (len >= 4) {
|
||||||
|
const double score = BackwardReferenceScore(average_cost,
|
||||||
|
len, backward);
|
||||||
|
if (best_score < score) {
|
||||||
|
best_score = score;
|
||||||
|
best_len = len;
|
||||||
|
*best_len_out = best_len;
|
||||||
|
*best_len_code_out = best_len;
|
||||||
|
*best_distance_out = backward;
|
||||||
|
*best_score_out = score;
|
||||||
|
compare_char = ring_buffer[cur_ix_masked + best_len];
|
||||||
|
match_found = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return match_found;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
static const uint32_t kBucketSize = 1 << kBucketBits;
|
||||||
|
uint32_t buckets_[kBucketSize + kBucketSweep];
|
||||||
|
};
|
||||||
|
|
||||||
// A (forgetful) hash table to the data seen by the compressor, to
|
// A (forgetful) hash table to the data seen by the compressor, to
|
||||||
// help create backward references to previous data.
|
// help create backward references to previous data.
|
||||||
//
|
//
|
||||||
// This is a hash map of fixed size (kBucketSize) to a ring buffer of
|
// This is a hash map of fixed size (kBucketSize) to a ring buffer of
|
||||||
// fixed size (kBlockSize). The ring buffer contains the last kBlockSize
|
// fixed size (kBlockSize). The ring buffer contains the last kBlockSize
|
||||||
// index positions of the given hash key in the compressed data.
|
// index positions of the given hash key in the compressed data.
|
||||||
template <int kBucketBits, int kBlockBits, int kMinLength>
|
template <int kBucketBits,
|
||||||
|
int kBlockBits,
|
||||||
|
int kMinLength,
|
||||||
|
int kNumLastDistancesToCheck,
|
||||||
|
bool kUseCostModel,
|
||||||
|
bool kUseDictionary>
|
||||||
class HashLongestMatch {
|
class HashLongestMatch {
|
||||||
public:
|
public:
|
||||||
HashLongestMatch()
|
HashLongestMatch() : static_dict_(NULL) {
|
||||||
: last_distance1_(4),
|
|
||||||
last_distance2_(11),
|
|
||||||
last_distance3_(15),
|
|
||||||
last_distance4_(16),
|
|
||||||
insert_length_(0),
|
|
||||||
average_cost_(5.4),
|
|
||||||
static_dict_(NULL) {
|
|
||||||
Reset();
|
Reset();
|
||||||
}
|
}
|
||||||
void Reset() {
|
void Reset() {
|
||||||
@ -166,73 +309,58 @@ class HashLongestMatch {
|
|||||||
// into best_distance_out.
|
// into best_distance_out.
|
||||||
// Write the score of the best match into best_score_out.
|
// Write the score of the best match into best_score_out.
|
||||||
bool FindLongestMatch(const uint8_t * __restrict data,
|
bool FindLongestMatch(const uint8_t * __restrict data,
|
||||||
const float * __restrict literal_cost,
|
|
||||||
const size_t ring_buffer_mask,
|
const size_t ring_buffer_mask,
|
||||||
|
const float * __restrict literal_cost,
|
||||||
|
const size_t literal_cost_mask,
|
||||||
|
const double average_cost,
|
||||||
|
const int* __restrict distance_cache,
|
||||||
const uint32_t cur_ix,
|
const uint32_t cur_ix,
|
||||||
uint32_t max_length,
|
uint32_t max_length,
|
||||||
const uint32_t max_backward,
|
const uint32_t max_backward,
|
||||||
size_t * __restrict best_len_out,
|
int * __restrict best_len_out,
|
||||||
size_t * __restrict best_len_code_out,
|
int * __restrict best_len_code_out,
|
||||||
size_t * __restrict best_distance_out,
|
int * __restrict best_distance_out,
|
||||||
double * __restrict best_score_out,
|
double * __restrict best_score_out) {
|
||||||
bool * __restrict in_dictionary) {
|
|
||||||
*in_dictionary = true;
|
|
||||||
*best_len_code_out = 0;
|
*best_len_code_out = 0;
|
||||||
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
|
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
|
||||||
const double start_cost4 = literal_cost == NULL ? 20 :
|
double start_cost_diff4 = 0.0;
|
||||||
literal_cost[cur_ix_masked] +
|
double start_cost_diff3 = 0.0;
|
||||||
literal_cost[(cur_ix + 1) & ring_buffer_mask] +
|
double start_cost_diff2 = 0.0;
|
||||||
literal_cost[(cur_ix + 2) & ring_buffer_mask] +
|
if (kUseCostModel) {
|
||||||
literal_cost[(cur_ix + 3) & ring_buffer_mask];
|
start_cost_diff4 = literal_cost == NULL ? 0 :
|
||||||
const double start_cost3 = literal_cost == NULL ? 15 :
|
literal_cost[cur_ix & literal_cost_mask] +
|
||||||
literal_cost[cur_ix_masked] +
|
literal_cost[(cur_ix + 1) & literal_cost_mask] +
|
||||||
literal_cost[(cur_ix + 1) & ring_buffer_mask] +
|
literal_cost[(cur_ix + 2) & literal_cost_mask] +
|
||||||
literal_cost[(cur_ix + 2) & ring_buffer_mask] + 0.3;
|
literal_cost[(cur_ix + 3) & literal_cost_mask] -
|
||||||
double start_cost2 = literal_cost == NULL ? 10 :
|
4 * average_cost;
|
||||||
literal_cost[cur_ix_masked] +
|
start_cost_diff3 = literal_cost == NULL ? 0 :
|
||||||
literal_cost[(cur_ix + 1) & ring_buffer_mask] + 1.2;
|
literal_cost[cur_ix & literal_cost_mask] +
|
||||||
|
literal_cost[(cur_ix + 1) & literal_cost_mask] +
|
||||||
|
literal_cost[(cur_ix + 2) & literal_cost_mask] -
|
||||||
|
3 * average_cost + 0.3;
|
||||||
|
start_cost_diff2 = literal_cost == NULL ? 0 :
|
||||||
|
literal_cost[cur_ix & literal_cost_mask] +
|
||||||
|
literal_cost[(cur_ix + 1) & literal_cost_mask] -
|
||||||
|
2 * average_cost + 1.2;
|
||||||
|
}
|
||||||
bool match_found = false;
|
bool match_found = false;
|
||||||
// Don't accept a short copy from far away.
|
// Don't accept a short copy from far away.
|
||||||
double best_score = 8.115;
|
double best_score = *best_score_out;
|
||||||
if (insert_length_ < 8) {
|
int best_len = *best_len_out;
|
||||||
double cost_diff[8] =
|
|
||||||
{ 0.1, 0.038, 0.019, 0.013, 0.001, 0.001, 0.001, 0.001 };
|
|
||||||
best_score += cost_diff[insert_length_];
|
|
||||||
}
|
|
||||||
size_t best_len = *best_len_out;
|
|
||||||
*best_len_out = 0;
|
*best_len_out = 0;
|
||||||
size_t best_ix = 1;
|
|
||||||
// Try last distance first.
|
// Try last distance first.
|
||||||
for (int i = 0; i < 16; ++i) {
|
for (int i = 0; i < kNumLastDistancesToCheck; ++i) {
|
||||||
size_t prev_ix = cur_ix;
|
const int idx = kDistanceCacheIndex[i];
|
||||||
switch(i) {
|
const int backward = distance_cache[idx] + kDistanceCacheOffset[i];
|
||||||
case 0: prev_ix -= last_distance1_; break;
|
size_t prev_ix = cur_ix - backward;
|
||||||
case 1: prev_ix -= last_distance2_; break;
|
|
||||||
case 2: prev_ix -= last_distance3_; break;
|
|
||||||
case 3: prev_ix -= last_distance4_; break;
|
|
||||||
|
|
||||||
case 4: prev_ix -= last_distance1_ - 1; break;
|
|
||||||
case 5: prev_ix -= last_distance1_ + 1; break;
|
|
||||||
case 6: prev_ix -= last_distance1_ - 2; break;
|
|
||||||
case 7: prev_ix -= last_distance1_ + 2; break;
|
|
||||||
case 8: prev_ix -= last_distance1_ - 3; break;
|
|
||||||
case 9: prev_ix -= last_distance1_ + 3; break;
|
|
||||||
|
|
||||||
case 10: prev_ix -= last_distance2_ - 1; break;
|
|
||||||
case 11: prev_ix -= last_distance2_ + 1; break;
|
|
||||||
case 12: prev_ix -= last_distance2_ - 2; break;
|
|
||||||
case 13: prev_ix -= last_distance2_ + 2; break;
|
|
||||||
case 14: prev_ix -= last_distance2_ - 3; break;
|
|
||||||
case 15: prev_ix -= last_distance2_ + 3; break;
|
|
||||||
}
|
|
||||||
if (prev_ix >= cur_ix) {
|
if (prev_ix >= cur_ix) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const size_t backward = cur_ix - prev_ix;
|
|
||||||
if (PREDICT_FALSE(backward > max_backward)) {
|
if (PREDICT_FALSE(backward > max_backward)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
prev_ix &= ring_buffer_mask;
|
prev_ix &= ring_buffer_mask;
|
||||||
|
|
||||||
if (cur_ix_masked + best_len > ring_buffer_mask ||
|
if (cur_ix_masked + best_len > ring_buffer_mask ||
|
||||||
prev_ix + best_len > ring_buffer_mask ||
|
prev_ix + best_len > ring_buffer_mask ||
|
||||||
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
|
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
|
||||||
@ -246,29 +374,30 @@ class HashLongestMatch {
|
|||||||
// Comparing for >= 2 does not change the semantics, but just saves for
|
// Comparing for >= 2 does not change the semantics, but just saves for
|
||||||
// a few unnecessary binary logarithms in backward reference score,
|
// a few unnecessary binary logarithms in backward reference score,
|
||||||
// since we are not interested in such short matches.
|
// since we are not interested in such short matches.
|
||||||
const double score = BackwardReferenceScoreUsingLastDistance(
|
double score = BackwardReferenceScoreUsingLastDistance(
|
||||||
average_cost_,
|
average_cost, len, i);
|
||||||
start_cost4,
|
if (kUseCostModel) {
|
||||||
start_cost3,
|
switch (len) {
|
||||||
start_cost2,
|
case 2: score += start_cost_diff2; break;
|
||||||
len, i);
|
case 3: score += start_cost_diff3; break;
|
||||||
|
default: score += start_cost_diff4;
|
||||||
|
}
|
||||||
|
}
|
||||||
if (best_score < score) {
|
if (best_score < score) {
|
||||||
best_score = score;
|
best_score = score;
|
||||||
best_len = len;
|
best_len = len;
|
||||||
best_ix = backward;
|
|
||||||
*best_len_out = best_len;
|
*best_len_out = best_len;
|
||||||
*best_len_code_out = best_len;
|
*best_len_code_out = best_len;
|
||||||
*best_distance_out = best_ix;
|
*best_distance_out = backward;
|
||||||
*best_score_out = best_score;
|
*best_score_out = best_score;
|
||||||
match_found = true;
|
match_found = true;
|
||||||
*in_dictionary = backward > max_backward;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (kMinLength == 2) {
|
if (kMinLength == 2) {
|
||||||
int stop = int(cur_ix) - 64;
|
int stop = int(cur_ix) - 64;
|
||||||
if (stop < 0) { stop = 0; }
|
if (stop < 0) { stop = 0; }
|
||||||
start_cost2 -= 1.0;
|
start_cost_diff2 -= 1.0;
|
||||||
for (int i = cur_ix - 1; i > stop; --i) {
|
for (int i = cur_ix - 1; i > stop; --i) {
|
||||||
size_t prev_ix = i;
|
size_t prev_ix = i;
|
||||||
const size_t backward = cur_ix - prev_ix;
|
const size_t backward = cur_ix - prev_ix;
|
||||||
@ -281,15 +410,15 @@ class HashLongestMatch {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
int len = 2;
|
int len = 2;
|
||||||
const double score = start_cost2 - 2.3 * Log2Floor(backward);
|
const double score =
|
||||||
|
average_cost * 2 - 2.3 * Log2Floor(backward) + start_cost_diff2;
|
||||||
|
|
||||||
if (best_score < score) {
|
if (best_score < score) {
|
||||||
best_score = score;
|
best_score = score;
|
||||||
best_len = len;
|
best_len = len;
|
||||||
best_ix = backward;
|
|
||||||
*best_len_out = best_len;
|
*best_len_out = best_len;
|
||||||
*best_len_code_out = best_len;
|
*best_len_code_out = best_len;
|
||||||
*best_distance_out = best_ix;
|
*best_distance_out = backward;
|
||||||
match_found = true;
|
match_found = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -317,26 +446,24 @@ class HashLongestMatch {
|
|||||||
// Comparing for >= 3 does not change the semantics, but just saves
|
// Comparing for >= 3 does not change the semantics, but just saves
|
||||||
// for a few unnecessary binary logarithms in backward reference
|
// for a few unnecessary binary logarithms in backward reference
|
||||||
// score, since we are not interested in such short matches.
|
// score, since we are not interested in such short matches.
|
||||||
const double score = BackwardReferenceScore(average_cost_,
|
double score = BackwardReferenceScore(average_cost,
|
||||||
start_cost4,
|
len, backward);
|
||||||
start_cost3,
|
if (kUseCostModel) {
|
||||||
start_cost2,
|
score += (len >= 4) ? start_cost_diff4 : start_cost_diff3;
|
||||||
len, backward);
|
}
|
||||||
if (best_score < score) {
|
if (best_score < score) {
|
||||||
best_score = score;
|
best_score = score;
|
||||||
best_len = len;
|
best_len = len;
|
||||||
best_ix = backward;
|
|
||||||
*best_len_out = best_len;
|
*best_len_out = best_len;
|
||||||
*best_len_code_out = best_len;
|
*best_len_code_out = best_len;
|
||||||
*best_distance_out = best_ix;
|
*best_distance_out = backward;
|
||||||
*best_score_out = best_score;
|
*best_score_out = best_score;
|
||||||
match_found = true;
|
match_found = true;
|
||||||
*in_dictionary = false;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (static_dict_ != NULL) {
|
if (kUseDictionary && static_dict_ != NULL) {
|
||||||
// We decide based on first 4 bytes how many bytes to test for.
|
// We decide based on first 4 bytes how many bytes to test for.
|
||||||
int prefix = BROTLI_UNALIGNED_LOAD32(&data[cur_ix_masked]);
|
int prefix = BROTLI_UNALIGNED_LOAD32(&data[cur_ix_masked]);
|
||||||
int maxlen = static_dict_->GetLength(prefix);
|
int maxlen = static_dict_->GetLength(prefix);
|
||||||
@ -347,21 +474,17 @@ class HashLongestMatch {
|
|||||||
int word_id;
|
int word_id;
|
||||||
if (static_dict_->Get(snippet, ©_len_code, &word_id)) {
|
if (static_dict_->Get(snippet, ©_len_code, &word_id)) {
|
||||||
const size_t backward = max_backward + word_id + 1;
|
const size_t backward = max_backward + word_id + 1;
|
||||||
const double score = BackwardReferenceScore(average_cost_,
|
const double score = (BackwardReferenceScore(average_cost,
|
||||||
start_cost4,
|
len, backward) +
|
||||||
start_cost3,
|
start_cost_diff4);
|
||||||
start_cost2,
|
|
||||||
len, backward);
|
|
||||||
if (best_score < score) {
|
if (best_score < score) {
|
||||||
best_score = score;
|
best_score = score;
|
||||||
best_len = len;
|
best_len = len;
|
||||||
best_ix = backward;
|
|
||||||
*best_len_out = best_len;
|
*best_len_out = best_len;
|
||||||
*best_len_code_out = copy_len_code;
|
*best_len_code_out = copy_len_code;
|
||||||
*best_distance_out = best_ix;
|
*best_distance_out = backward;
|
||||||
*best_score_out = best_score;
|
*best_score_out = best_score;
|
||||||
match_found = true;
|
match_found = true;
|
||||||
*in_dictionary = true;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -369,21 +492,6 @@ class HashLongestMatch {
|
|||||||
return match_found;
|
return match_found;
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_last_distance(int v) {
|
|
||||||
if (last_distance1_ != v) {
|
|
||||||
last_distance4_ = last_distance3_;
|
|
||||||
last_distance3_ = last_distance2_;
|
|
||||||
last_distance2_ = last_distance1_;
|
|
||||||
last_distance1_ = v;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int last_distance() const { return last_distance1_; }
|
|
||||||
|
|
||||||
void set_insert_length(int v) { insert_length_ = v; }
|
|
||||||
|
|
||||||
void set_average_cost(double v) { average_cost_ = v; }
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Number of hash buckets.
|
// Number of hash buckets.
|
||||||
static const uint32_t kBucketSize = 1 << kBucketBits;
|
static const uint32_t kBucketSize = 1 << kBucketBits;
|
||||||
@ -401,46 +509,48 @@ class HashLongestMatch {
|
|||||||
// Buckets containing kBlockSize of backward references.
|
// Buckets containing kBlockSize of backward references.
|
||||||
int buckets_[kBucketSize][kBlockSize];
|
int buckets_[kBucketSize][kBlockSize];
|
||||||
|
|
||||||
int last_distance1_;
|
|
||||||
int last_distance2_;
|
|
||||||
int last_distance3_;
|
|
||||||
int last_distance4_;
|
|
||||||
|
|
||||||
// Cost adjustment for how many literals we are planning to insert
|
|
||||||
// anyway.
|
|
||||||
int insert_length_;
|
|
||||||
|
|
||||||
double average_cost_;
|
|
||||||
|
|
||||||
const StaticDictionary *static_dict_;
|
const StaticDictionary *static_dict_;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Hashers {
|
struct Hashers {
|
||||||
enum Type {
|
typedef HashLongestMatchQuickly<16, 1> H1;
|
||||||
HASH_15_8_4 = 0,
|
typedef HashLongestMatchQuickly<17, 4> H2;
|
||||||
HASH_15_8_2 = 1,
|
typedef HashLongestMatch<14, 4, 4, 4, false, false> H3;
|
||||||
};
|
typedef HashLongestMatch<14, 5, 4, 4, false, false> H4;
|
||||||
|
typedef HashLongestMatch<15, 6, 4, 10, false, false> H5;
|
||||||
|
typedef HashLongestMatch<15, 7, 4, 10, false, false> H6;
|
||||||
|
typedef HashLongestMatch<15, 8, 4, 16, true, false> H7;
|
||||||
|
typedef HashLongestMatch<15, 8, 4, 16, true, true> H8;
|
||||||
|
typedef HashLongestMatch<15, 8, 2, 16, true, false> H9;
|
||||||
|
|
||||||
void Init(Type type) {
|
void Init(int type) {
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case HASH_15_8_4:
|
case 1: hash_h1.reset(new H1); break;
|
||||||
hash_15_8_4.reset(new HashLongestMatch<15, 8, 4>());
|
case 2: hash_h2.reset(new H2); break;
|
||||||
break;
|
case 3: hash_h3.reset(new H3); break;
|
||||||
case HASH_15_8_2:
|
case 4: hash_h4.reset(new H4); break;
|
||||||
hash_15_8_2.reset(new HashLongestMatch<15, 8, 2>());
|
case 5: hash_h5.reset(new H5); break;
|
||||||
break;
|
case 6: hash_h6.reset(new H6); break;
|
||||||
default:
|
case 7: hash_h7.reset(new H7); break;
|
||||||
break;
|
case 8: hash_h8.reset(new H8); break;
|
||||||
|
case 9: hash_h9.reset(new H9); break;
|
||||||
|
default: break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void SetStaticDictionary(const StaticDictionary *dict) {
|
void SetStaticDictionary(const StaticDictionary *dict) {
|
||||||
if (hash_15_8_4.get() != NULL) hash_15_8_4->SetStaticDictionary(dict);
|
if (hash_h8.get() != NULL) hash_h8->SetStaticDictionary(dict);
|
||||||
if (hash_15_8_2.get() != NULL) hash_15_8_2->SetStaticDictionary(dict);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<HashLongestMatch<15, 8, 4> > hash_15_8_4;
|
std::unique_ptr<H1> hash_h1;
|
||||||
std::unique_ptr<HashLongestMatch<15, 8, 2> > hash_15_8_2;
|
std::unique_ptr<H2> hash_h2;
|
||||||
|
std::unique_ptr<H3> hash_h3;
|
||||||
|
std::unique_ptr<H4> hash_h4;
|
||||||
|
std::unique_ptr<H5> hash_h5;
|
||||||
|
std::unique_ptr<H6> hash_h6;
|
||||||
|
std::unique_ptr<H7> hash_h7;
|
||||||
|
std::unique_ptr<H8> hash_h8;
|
||||||
|
std::unique_ptr<H9> hash_h9;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace brotli
|
} // namespace brotli
|
||||||
|
@ -45,8 +45,8 @@ void BuildHistograms(
|
|||||||
const Command &cmd = cmds[i];
|
const Command &cmd = cmds[i];
|
||||||
insert_and_copy_it.Next();
|
insert_and_copy_it.Next();
|
||||||
(*insert_and_copy_histograms)[insert_and_copy_it.type_].Add(
|
(*insert_and_copy_histograms)[insert_and_copy_it.type_].Add(
|
||||||
cmd.command_prefix_);
|
cmd.cmd_prefix_);
|
||||||
for (int j = 0; j < cmd.insert_length_; ++j) {
|
for (int j = 0; j < cmd.insert_len_; ++j) {
|
||||||
literal_it.Next();
|
literal_it.Next();
|
||||||
uint8_t prev_byte = pos > 0 ? ringbuffer[(pos - 1) & mask] : 0;
|
uint8_t prev_byte = pos > 0 ? ringbuffer[(pos - 1) & mask] : 0;
|
||||||
uint8_t prev_byte2 = pos > 1 ? ringbuffer[(pos - 2) & mask] : 0;
|
uint8_t prev_byte2 = pos > 1 ? ringbuffer[(pos - 2) & mask] : 0;
|
||||||
@ -55,12 +55,12 @@ void BuildHistograms(
|
|||||||
(*literal_histograms)[context].Add(ringbuffer[pos & mask]);
|
(*literal_histograms)[context].Add(ringbuffer[pos & mask]);
|
||||||
++pos;
|
++pos;
|
||||||
}
|
}
|
||||||
pos += cmd.copy_length_;
|
pos += cmd.copy_len_;
|
||||||
if (cmd.copy_length_ > 0 && cmd.distance_prefix_ != 0xffff) {
|
if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) {
|
||||||
dist_it.Next();
|
dist_it.Next();
|
||||||
int context = (dist_it.type_ << kDistanceContextBits) +
|
int context = (dist_it.type_ << kDistanceContextBits) +
|
||||||
((cmd.copy_length_code_ > 4) ? 3 : cmd.copy_length_code_ - 2);
|
cmd.DistanceContext();
|
||||||
(*copy_dist_histograms)[context].Add(cmd.distance_prefix_);
|
(*copy_dist_histograms)[context].Add(cmd.dist_prefix_);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -77,7 +77,7 @@ void BuildLiteralHistogramsForBlockType(
|
|||||||
BlockSplitIterator literal_it(literal_split);
|
BlockSplitIterator literal_it(literal_split);
|
||||||
for (int i = 0; i < cmds.size(); ++i) {
|
for (int i = 0; i < cmds.size(); ++i) {
|
||||||
const Command &cmd = cmds[i];
|
const Command &cmd = cmds[i];
|
||||||
for (int j = 0; j < cmd.insert_length_; ++j) {
|
for (int j = 0; j < cmd.insert_len_; ++j) {
|
||||||
literal_it.Next();
|
literal_it.Next();
|
||||||
if (literal_it.type_ == block_type) {
|
if (literal_it.type_ == block_type) {
|
||||||
uint8_t prev_byte = pos > 0 ? ringbuffer[(pos - 1) & mask] : 0;
|
uint8_t prev_byte = pos > 0 ? ringbuffer[(pos - 1) & mask] : 0;
|
||||||
@ -87,7 +87,7 @@ void BuildLiteralHistogramsForBlockType(
|
|||||||
}
|
}
|
||||||
++pos;
|
++pos;
|
||||||
}
|
}
|
||||||
pos += cmd.copy_length_;
|
pos += cmd.copy_len_;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
131
enc/prefix.cc
131
enc/prefix.cc
@ -1,131 +0,0 @@
|
|||||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
|
||||||
//
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
// you may not use this file except in compliance with the License.
|
|
||||||
// You may obtain a copy of the License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
//
|
|
||||||
// Functions for encoding of integers into prefix codes the amount of extra
|
|
||||||
// bits, and the actual values of the extra bits.
|
|
||||||
|
|
||||||
#include "./prefix.h"
|
|
||||||
|
|
||||||
#include "./fast_log.h"
|
|
||||||
|
|
||||||
namespace brotli {
|
|
||||||
|
|
||||||
static const PrefixCodeRange kInsertLengthPrefixCode[kNumInsertLenPrefixes] = {
|
|
||||||
{ 0, 0}, { 1, 0}, { 2, 0}, { 3, 0},
|
|
||||||
{ 4, 0}, { 5, 0}, { 6, 1}, { 8, 1},
|
|
||||||
{ 10, 2}, { 14, 2}, { 18, 3}, { 26, 3},
|
|
||||||
{ 34, 4}, { 50, 4}, { 66, 5}, { 98, 5},
|
|
||||||
{ 130, 6}, { 194, 7}, { 322, 8}, { 578, 9},
|
|
||||||
{1090, 10}, {2114, 12}, {6210, 14}, {22594, 24},
|
|
||||||
};
|
|
||||||
|
|
||||||
static const PrefixCodeRange kCopyLengthPrefixCode[kNumCopyLenPrefixes] = {
|
|
||||||
{ 2, 0}, { 3, 0}, { 4, 0}, { 5, 0},
|
|
||||||
{ 6, 0}, { 7, 0}, { 8, 0}, { 9, 0},
|
|
||||||
{ 10, 1}, { 12, 1}, { 14, 2}, { 18, 2},
|
|
||||||
{ 22, 3}, { 30, 3}, { 38, 4}, { 54, 4},
|
|
||||||
{ 70, 5}, { 102, 5}, { 134, 6}, { 198, 7},
|
|
||||||
{326, 8}, { 582, 9}, {1094, 10}, {2118, 24},
|
|
||||||
};
|
|
||||||
|
|
||||||
static const int kInsertAndCopyRangeLut[9] = {
|
|
||||||
0, 1, 4, 2, 3, 6, 5, 7, 8,
|
|
||||||
};
|
|
||||||
|
|
||||||
static const int kInsertRangeLut[9] = {
|
|
||||||
0, 0, 1, 1, 0, 2, 1, 2, 2,
|
|
||||||
};
|
|
||||||
|
|
||||||
static const int kCopyRangeLut[9] = {
|
|
||||||
0, 1, 0, 1, 2, 0, 2, 1, 2,
|
|
||||||
};
|
|
||||||
|
|
||||||
int InsertLengthPrefix(int length) {
|
|
||||||
for (int i = 0; i < kNumInsertLenPrefixes; ++i) {
|
|
||||||
const PrefixCodeRange& range = kInsertLengthPrefixCode[i];
|
|
||||||
if (length >= range.offset && length < range.offset + (1 << range.nbits)) {
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
int CopyLengthPrefix(int length) {
|
|
||||||
for (int i = 0; i < kNumCopyLenPrefixes; ++i) {
|
|
||||||
const PrefixCodeRange& range = kCopyLengthPrefixCode[i];
|
|
||||||
if (length >= range.offset && length < range.offset + (1 << range.nbits)) {
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
int CommandPrefix(int insert_length, int copy_length) {
|
|
||||||
if (copy_length == 0) {
|
|
||||||
copy_length = 4;
|
|
||||||
}
|
|
||||||
int insert_prefix = InsertLengthPrefix(insert_length);
|
|
||||||
int copy_prefix = CopyLengthPrefix(copy_length);
|
|
||||||
int range_idx = 3 * (insert_prefix >> 3) + (copy_prefix >> 3);
|
|
||||||
return ((kInsertAndCopyRangeLut[range_idx] << 6) +
|
|
||||||
((insert_prefix & 7) << 3) + (copy_prefix & 7));
|
|
||||||
}
|
|
||||||
|
|
||||||
int InsertLengthExtraBits(int code) {
|
|
||||||
int insert_code = (kInsertRangeLut[code >> 6] << 3) + ((code >> 3) & 7);
|
|
||||||
return kInsertLengthPrefixCode[insert_code].nbits;
|
|
||||||
}
|
|
||||||
|
|
||||||
int InsertLengthOffset(int code) {
|
|
||||||
int insert_code = (kInsertRangeLut[code >> 6] << 3) + ((code >> 3) & 7);
|
|
||||||
return kInsertLengthPrefixCode[insert_code].offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
int CopyLengthExtraBits(int code) {
|
|
||||||
int copy_code = (kCopyRangeLut[code >> 6] << 3) + (code & 7);
|
|
||||||
return kCopyLengthPrefixCode[copy_code].nbits;
|
|
||||||
}
|
|
||||||
|
|
||||||
int CopyLengthOffset(int code) {
|
|
||||||
int copy_code = (kCopyRangeLut[code >> 6] << 3) + (code & 7);
|
|
||||||
return kCopyLengthPrefixCode[copy_code].offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
void PrefixEncodeCopyDistance(int distance_code,
|
|
||||||
int num_direct_codes,
|
|
||||||
int postfix_bits,
|
|
||||||
uint16_t* code,
|
|
||||||
int* nbits,
|
|
||||||
uint32_t* extra_bits) {
|
|
||||||
distance_code -= 1;
|
|
||||||
if (distance_code < kNumDistanceShortCodes + num_direct_codes) {
|
|
||||||
*code = distance_code;
|
|
||||||
*nbits = 0;
|
|
||||||
*extra_bits = 0;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
distance_code -= kNumDistanceShortCodes + num_direct_codes;
|
|
||||||
distance_code += (1 << (postfix_bits + 2));
|
|
||||||
int bucket = Log2Floor(distance_code) - 1;
|
|
||||||
int postfix_mask = (1 << postfix_bits) - 1;
|
|
||||||
int postfix = distance_code & postfix_mask;
|
|
||||||
int prefix = (distance_code >> bucket) & 1;
|
|
||||||
int offset = (2 + prefix) << bucket;
|
|
||||||
*nbits = bucket - postfix_bits;
|
|
||||||
*code = kNumDistanceShortCodes + num_direct_codes +
|
|
||||||
((2 * (*nbits - 1) + prefix) << postfix_bits) + postfix;
|
|
||||||
*extra_bits = (distance_code - offset) >> postfix_bits;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace brotli
|
|
38
enc/prefix.h
38
enc/prefix.h
@ -19,6 +19,7 @@
|
|||||||
#define BROTLI_ENC_PREFIX_H_
|
#define BROTLI_ENC_PREFIX_H_
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
#include "./fast_log.h"
|
||||||
|
|
||||||
namespace brotli {
|
namespace brotli {
|
||||||
|
|
||||||
@ -36,19 +37,6 @@ struct PrefixCodeRange {
|
|||||||
int nbits;
|
int nbits;
|
||||||
};
|
};
|
||||||
|
|
||||||
int CommandPrefix(int insert_length, int copy_length);
|
|
||||||
int InsertLengthExtraBits(int prefix);
|
|
||||||
int InsertLengthOffset(int prefix);
|
|
||||||
int CopyLengthExtraBits(int prefix);
|
|
||||||
int CopyLengthOffset(int prefix);
|
|
||||||
|
|
||||||
void PrefixEncodeCopyDistance(int distance_code,
|
|
||||||
int num_direct_codes,
|
|
||||||
int shift_bits,
|
|
||||||
uint16_t* prefix,
|
|
||||||
int* nbits,
|
|
||||||
uint32_t* extra_bits);
|
|
||||||
|
|
||||||
static const PrefixCodeRange kBlockLengthPrefixCode[kNumBlockLenPrefixes] = {
|
static const PrefixCodeRange kBlockLengthPrefixCode[kNumBlockLenPrefixes] = {
|
||||||
{ 1, 2}, { 5, 2}, { 9, 2}, { 13, 2},
|
{ 1, 2}, { 5, 2}, { 9, 2}, { 13, 2},
|
||||||
{ 17, 3}, { 25, 3}, { 33, 3}, { 41, 3},
|
{ 17, 3}, { 25, 3}, { 33, 3}, { 41, 3},
|
||||||
@ -69,6 +57,30 @@ inline void GetBlockLengthPrefixCode(int len,
|
|||||||
*extra = len - kBlockLengthPrefixCode[*code].offset;
|
*extra = len - kBlockLengthPrefixCode[*code].offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline void PrefixEncodeCopyDistance(int distance_code,
|
||||||
|
int num_direct_codes,
|
||||||
|
int postfix_bits,
|
||||||
|
uint16_t* code,
|
||||||
|
uint32_t* extra_bits) {
|
||||||
|
distance_code -= 1;
|
||||||
|
if (distance_code < kNumDistanceShortCodes + num_direct_codes) {
|
||||||
|
*code = distance_code;
|
||||||
|
*extra_bits = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
distance_code -= kNumDistanceShortCodes + num_direct_codes;
|
||||||
|
distance_code += (1 << (postfix_bits + 2));
|
||||||
|
int bucket = Log2Floor(distance_code) - 1;
|
||||||
|
int postfix_mask = (1 << postfix_bits) - 1;
|
||||||
|
int postfix = distance_code & postfix_mask;
|
||||||
|
int prefix = (distance_code >> bucket) & 1;
|
||||||
|
int offset = (2 + prefix) << bucket;
|
||||||
|
int nbits = bucket - postfix_bits;
|
||||||
|
*code = kNumDistanceShortCodes + num_direct_codes +
|
||||||
|
((2 * (nbits - 1) + prefix) << postfix_bits) + postfix;
|
||||||
|
*extra_bits = (nbits << 24) | ((distance_code - offset) >> postfix_bits);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace brotli
|
} // namespace brotli
|
||||||
|
|
||||||
#endif // BROTLI_ENC_PREFIX_H_
|
#endif // BROTLI_ENC_PREFIX_H_
|
||||||
|
@ -26,10 +26,10 @@ class RingBuffer {
|
|||||||
public:
|
public:
|
||||||
RingBuffer(int window_bits, int tail_bits)
|
RingBuffer(int window_bits, int tail_bits)
|
||||||
: window_bits_(window_bits), tail_bits_(tail_bits), pos_(0) {
|
: window_bits_(window_bits), tail_bits_(tail_bits), pos_(0) {
|
||||||
static const int kSlackForThreeByteHashingEverywhere = 2;
|
static const int kSlackForFourByteHashingEverywhere = 3;
|
||||||
const int buflen = (1 << window_bits_) + (1 << tail_bits_);
|
const int buflen = (1 << window_bits_) + (1 << tail_bits_);
|
||||||
buffer_ = new uint8_t[buflen + kSlackForThreeByteHashingEverywhere];
|
buffer_ = new uint8_t[buflen + kSlackForFourByteHashingEverywhere];
|
||||||
for (int i = 0; i < kSlackForThreeByteHashingEverywhere; ++i) {
|
for (int i = 0; i < kSlackForFourByteHashingEverywhere; ++i) {
|
||||||
buffer_[buflen + i] = 0;
|
buffer_[buflen + i] = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -54,6 +54,7 @@ inline void WriteBits(int n_bits,
|
|||||||
#ifdef BIT_WRITER_DEBUG
|
#ifdef BIT_WRITER_DEBUG
|
||||||
printf("WriteBits %2d 0x%016llx %10d\n", n_bits, bits, *pos);
|
printf("WriteBits %2d 0x%016llx %10d\n", n_bits, bits, *pos);
|
||||||
#endif
|
#endif
|
||||||
|
assert(bits < 1ULL << n_bits);
|
||||||
#ifdef IS_LITTLE_ENDIAN
|
#ifdef IS_LITTLE_ENDIAN
|
||||||
// This branch of the code can write up to 56 bits at a time,
|
// This branch of the code can write up to 56 bits at a time,
|
||||||
// 7 bits are lost by being perhaps already in *p and at least
|
// 7 bits are lost by being perhaps already in *p and at least
|
||||||
|
Loading…
Reference in New Issue
Block a user