mirror of
https://github.com/google/brotli.git
synced 2025-01-01 04:40:08 +00:00
Merge pull request #97 from szabadka/master
Faster encoding for low quality settings.
This commit is contained in:
commit
fa2c6df4f5
@ -118,7 +118,7 @@ void CreateBackwardReferences(size_t num_bytes,
|
||||
int delayed_backward_references_in_row = 0;
|
||||
for (;;) {
|
||||
--max_length;
|
||||
int best_len_2 = quality < 4 ? std::min(best_len - 1, max_length) : 0;
|
||||
int best_len_2 = quality < 5 ? std::min(best_len - 1, max_length) : 0;
|
||||
int best_len_code_2 = 0;
|
||||
int best_dist_2 = 0;
|
||||
double best_score_2 = min_score;
|
||||
@ -182,7 +182,7 @@ void CreateBackwardReferences(size_t num_bytes,
|
||||
distance_code = 3;
|
||||
} else if (best_dist == dist_cache[3]) {
|
||||
distance_code = 4;
|
||||
} else if (quality > 1 && best_dist >= 6) {
|
||||
} else if (quality > 3 && best_dist >= 6) {
|
||||
for (int k = 4; k < kNumDistanceShortCodes; ++k) {
|
||||
int idx = kDistanceCacheIndex[k];
|
||||
int candidate = dist_cache[idx] + kDistanceCacheOffset[k];
|
||||
@ -356,19 +356,33 @@ void CreateBackwardReferences(size_t num_bytes,
|
||||
commands, num_commands, num_literals);
|
||||
break;
|
||||
case 8:
|
||||
CreateBackwardReferences<Hashers::H8, true, true>(
|
||||
CreateBackwardReferences<Hashers::H8, false, false>(
|
||||
num_bytes, position, ringbuffer, ringbuffer_mask,
|
||||
literal_cost, literal_cost_mask, max_backward_limit, base_min_score,
|
||||
quality, hashers->hash_h8.get(), dist_cache, last_insert_len,
|
||||
commands, num_commands, num_literals);
|
||||
break;
|
||||
case 9:
|
||||
CreateBackwardReferences<Hashers::H9, true, false>(
|
||||
CreateBackwardReferences<Hashers::H9, false, false>(
|
||||
num_bytes, position, ringbuffer, ringbuffer_mask,
|
||||
literal_cost, literal_cost_mask, max_backward_limit, base_min_score,
|
||||
quality, hashers->hash_h9.get(), dist_cache, last_insert_len,
|
||||
commands, num_commands, num_literals);
|
||||
break;
|
||||
case 10:
|
||||
CreateBackwardReferences<Hashers::H11Text, true, true>(
|
||||
num_bytes, position, ringbuffer, ringbuffer_mask,
|
||||
literal_cost, literal_cost_mask, max_backward_limit, base_min_score,
|
||||
quality, hashers->hash_h11_text.get(), dist_cache, last_insert_len,
|
||||
commands, num_commands, num_literals);
|
||||
break;
|
||||
case 11:
|
||||
CreateBackwardReferences<Hashers::H11Font, true, false>(
|
||||
num_bytes, position, ringbuffer, ringbuffer_mask,
|
||||
literal_cost, literal_cost_mask, max_backward_limit, base_min_score,
|
||||
quality, hashers->hash_h11_font.get(), dist_cache, last_insert_len,
|
||||
commands, num_commands, num_literals);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -38,6 +38,9 @@
|
||||
namespace brotli {
|
||||
|
||||
static const double kMinUTF8Ratio = 0.75;
|
||||
static const int kMinQualityForBlockSplit = 4;
|
||||
static const int kMinQualityForContextModeling = 5;
|
||||
static const int kMinQualityForOptimizeHistograms = 4;
|
||||
|
||||
int ParseAsUTF8(int* symbol, const uint8_t* input, int size) {
|
||||
// ASCII
|
||||
@ -149,7 +152,7 @@ BrotliCompressor::BrotliCompressor(BrotliParams params)
|
||||
params_.lgwin = kMaxWindowBits;
|
||||
}
|
||||
if (params_.lgblock == 0) {
|
||||
params_.lgblock = params_.quality == 1 ? 14 : 16;
|
||||
params_.lgblock = params_.quality < kMinQualityForBlockSplit ? 14 : 16;
|
||||
if (params_.quality >= 9 && params_.lgwin > params_.lgblock) {
|
||||
params_.lgblock = std::min(21, params_.lgwin);
|
||||
}
|
||||
@ -199,19 +202,10 @@ BrotliCompressor::BrotliCompressor(BrotliParams params)
|
||||
dist_cache_[3] = 16;
|
||||
|
||||
// Initialize hashers.
|
||||
switch (params_.quality) {
|
||||
case 0:
|
||||
case 1: hash_type_ = 1; break;
|
||||
case 2:
|
||||
case 3: hash_type_ = 2; break;
|
||||
case 4: hash_type_ = 3; break;
|
||||
case 5:
|
||||
case 6: hash_type_ = 4; break;
|
||||
case 7: hash_type_ = 5; break;
|
||||
case 8: hash_type_ = 6; break;
|
||||
case 9: hash_type_ = 7; break;
|
||||
default: // quality > 9
|
||||
hash_type_ = (params_.mode == BrotliParams::MODE_TEXT) ? 8 : 9;
|
||||
if (params_.quality <= 9) {
|
||||
hash_type_ = params_.quality;
|
||||
} else {
|
||||
hash_type_ = (params_.mode == BrotliParams::MODE_TEXT) ? 10 : 11;
|
||||
}
|
||||
hashers_->Init(hash_type_);
|
||||
if (params_.mode == BrotliParams::MODE_TEXT &&
|
||||
@ -330,7 +324,7 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last,
|
||||
// literals and commands.
|
||||
static const int kMaxNumDelayedSymbols = 0x2fff;
|
||||
if (!is_last && !force_flush &&
|
||||
(params_.quality > 1 ||
|
||||
(params_.quality >= kMinQualityForBlockSplit ||
|
||||
(num_literals_ + num_commands_ < kMaxNumDelayedSymbols)) &&
|
||||
num_commands_ + (input_block_size() >> 1) < cmd_buffer_size_ &&
|
||||
input_pos_ + input_block_size() <= last_flush_pos_ + mask + 1) {
|
||||
@ -359,7 +353,7 @@ void DecideOverLiteralContextModeling(const uint8_t* input,
|
||||
int* literal_context_mode,
|
||||
int* num_literal_contexts,
|
||||
const int** literal_context_map) {
|
||||
if (quality <= 3 || length < 64) {
|
||||
if (quality < kMinQualityForContextModeling || length < 64) {
|
||||
return;
|
||||
}
|
||||
// Simple heuristics to guess if the data is UTF8 or not. The goal is to
|
||||
@ -453,7 +447,7 @@ bool BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
|
||||
num_direct_distance_codes,
|
||||
distance_postfix_bits);
|
||||
}
|
||||
if (params_.quality == 1) {
|
||||
if (params_.quality < kMinQualityForBlockSplit) {
|
||||
if (!StoreMetaBlockTrivial(data, last_flush_pos_, bytes, mask, is_last,
|
||||
commands_.get(), num_commands_,
|
||||
&storage_ix,
|
||||
@ -492,7 +486,7 @@ bool BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
|
||||
params_.enable_context_modeling,
|
||||
&mb);
|
||||
}
|
||||
if (params_.quality >= 3) {
|
||||
if (params_.quality >= kMinQualityForOptimizeHistograms) {
|
||||
OptimizeHistograms(num_direct_distance_codes,
|
||||
distance_postfix_bits,
|
||||
&mb);
|
||||
|
97
enc/hash.h
97
enc/hash.h
@ -107,7 +107,7 @@ inline double BackwardReferenceScoreUsingLastDistance(double average_cost,
|
||||
//
|
||||
// This is a hash map of fixed size (kBucketSize). Starting from the
|
||||
// given index, kBucketSweep buckets are used to store values of a key.
|
||||
template <int kBucketBits, int kBucketSweep>
|
||||
template <int kBucketBits, int kBucketSweep, bool kUseDictionary>
|
||||
class HashLongestMatchQuickly {
|
||||
public:
|
||||
HashLongestMatchQuickly() {
|
||||
@ -216,8 +216,6 @@ class HashLongestMatchQuickly {
|
||||
*best_distance_out = backward;
|
||||
*best_score_out = BackwardReferenceScore(average_cost, len, backward);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
uint32_t *bucket = buckets_ + key;
|
||||
@ -250,38 +248,39 @@ class HashLongestMatchQuickly {
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!match_found && num_dict_matches_ >= (num_dict_lookups_ >> 7)) {
|
||||
++num_dict_lookups_;
|
||||
const uint32_t key = Hash<14, 4>(&ring_buffer[cur_ix_masked]) << 1;
|
||||
const uint16_t v = kStaticDictionaryHash[key];
|
||||
if (v > 0) {
|
||||
const int len = v & 31;
|
||||
const int dist = v >> 5;
|
||||
const int offset = kBrotliDictionaryOffsetsByLength[len] + len * dist;
|
||||
if (len <= max_length) {
|
||||
const int matchlen =
|
||||
FindMatchLengthWithLimit(&ring_buffer[cur_ix_masked],
|
||||
&kBrotliDictionary[offset], len);
|
||||
if (matchlen == len) {
|
||||
const size_t backward = max_backward + dist + 1;
|
||||
const double score = BackwardReferenceScore(average_cost,
|
||||
len, backward);
|
||||
if (best_score < score) {
|
||||
++num_dict_matches_;
|
||||
best_score = score;
|
||||
best_len = len;
|
||||
*best_len_out = best_len;
|
||||
*best_len_code_out = best_len;
|
||||
*best_distance_out = backward;
|
||||
*best_score_out = best_score;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (kUseDictionary && !match_found &&
|
||||
num_dict_matches_ >= (num_dict_lookups_ >> 7)) {
|
||||
++num_dict_lookups_;
|
||||
const uint32_t key = Hash<14, 4>(&ring_buffer[cur_ix_masked]) << 1;
|
||||
const uint16_t v = kStaticDictionaryHash[key];
|
||||
if (v > 0) {
|
||||
const int len = v & 31;
|
||||
const int dist = v >> 5;
|
||||
const int offset = kBrotliDictionaryOffsetsByLength[len] + len * dist;
|
||||
if (len <= max_length) {
|
||||
const int matchlen =
|
||||
FindMatchLengthWithLimit(&ring_buffer[cur_ix_masked],
|
||||
&kBrotliDictionary[offset], len);
|
||||
if (matchlen == len) {
|
||||
const size_t backward = max_backward + dist + 1;
|
||||
const double score = BackwardReferenceScore(average_cost,
|
||||
len, backward);
|
||||
if (best_score < score) {
|
||||
++num_dict_matches_;
|
||||
best_score = score;
|
||||
best_len = len;
|
||||
*best_len_out = best_len;
|
||||
*best_len_code_out = best_len;
|
||||
*best_distance_out = backward;
|
||||
*best_score_out = best_score;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return match_found;
|
||||
}
|
||||
return match_found;
|
||||
}
|
||||
|
||||
private:
|
||||
@ -363,19 +362,19 @@ class HashLongestMatch {
|
||||
double start_cost_diff4 = 0.0;
|
||||
double start_cost_diff3 = 0.0;
|
||||
double start_cost_diff2 = 0.0;
|
||||
if (kUseCostModel) {
|
||||
start_cost_diff4 = literal_cost == NULL ? 0 :
|
||||
if (kUseCostModel && literal_cost != NULL) {
|
||||
start_cost_diff4 =
|
||||
literal_cost[cur_ix & literal_cost_mask] +
|
||||
literal_cost[(cur_ix + 1) & literal_cost_mask] +
|
||||
literal_cost[(cur_ix + 2) & literal_cost_mask] +
|
||||
literal_cost[(cur_ix + 3) & literal_cost_mask] -
|
||||
4 * average_cost;
|
||||
start_cost_diff3 = literal_cost == NULL ? 0 :
|
||||
start_cost_diff3 =
|
||||
literal_cost[cur_ix & literal_cost_mask] +
|
||||
literal_cost[(cur_ix + 1) & literal_cost_mask] +
|
||||
literal_cost[(cur_ix + 2) & literal_cost_mask] -
|
||||
3 * average_cost + 0.3;
|
||||
start_cost_diff2 = literal_cost == NULL ? 0 :
|
||||
start_cost_diff2 =
|
||||
literal_cost[cur_ix & literal_cost_mask] +
|
||||
literal_cost[(cur_ix + 1) & literal_cost_mask] -
|
||||
2 * average_cost + 1.2;
|
||||
@ -457,6 +456,7 @@ class HashLongestMatch {
|
||||
*best_len_code_out = best_len;
|
||||
*best_distance_out = backward;
|
||||
match_found = true;
|
||||
break; // The score can never get better since backward increases.
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -589,15 +589,20 @@ class HashLongestMatch {
|
||||
};
|
||||
|
||||
struct Hashers {
|
||||
typedef HashLongestMatchQuickly<16, 1> H1;
|
||||
typedef HashLongestMatchQuickly<17, 4> H2;
|
||||
typedef HashLongestMatch<14, 4, 4, 4, false, false> H3;
|
||||
typedef HashLongestMatch<14, 5, 4, 4, false, false> H4;
|
||||
typedef HashLongestMatch<15, 6, 4, 10, false, false> H5;
|
||||
typedef HashLongestMatch<15, 7, 4, 10, false, false> H6;
|
||||
typedef HashLongestMatch<15, 8, 4, 16, false, false> H7;
|
||||
typedef HashLongestMatch<15, 8, 4, 16, true, true> H8;
|
||||
typedef HashLongestMatch<15, 8, 2, 16, true, false> H9;
|
||||
// For kBucketSweep == 1, enabling the dictionary lookup makes compression
|
||||
// a little faster (0.5% - 1%) and it compresses 0.15% better on small text
|
||||
// and html inputs.
|
||||
typedef HashLongestMatchQuickly<16, 1, true> H1;
|
||||
typedef HashLongestMatchQuickly<16, 2, false> H2;
|
||||
typedef HashLongestMatchQuickly<16, 4, false> H3;
|
||||
typedef HashLongestMatchQuickly<17, 4, true> H4;
|
||||
typedef HashLongestMatch<14, 4, 4, 4, false, false> H5;
|
||||
typedef HashLongestMatch<14, 5, 4, 4, false, false> H6;
|
||||
typedef HashLongestMatch<15, 6, 4, 10, false, false> H7;
|
||||
typedef HashLongestMatch<15, 7, 4, 10, false, false> H8;
|
||||
typedef HashLongestMatch<15, 8, 4, 16, false, false> H9;
|
||||
typedef HashLongestMatch<15, 8, 4, 16, true, true> H11Text;
|
||||
typedef HashLongestMatch<15, 8, 2, 16, true, false> H11Font;
|
||||
|
||||
void Init(int type) {
|
||||
switch (type) {
|
||||
@ -610,12 +615,14 @@ struct Hashers {
|
||||
case 7: hash_h7.reset(new H7); break;
|
||||
case 8: hash_h8.reset(new H8); break;
|
||||
case 9: hash_h9.reset(new H9); break;
|
||||
case 10: hash_h11_text.reset(new H11Text); break;
|
||||
case 11: hash_h11_font.reset(new H11Font); break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
|
||||
void SetStaticDictionary(const StaticDictionary *dict) {
|
||||
if (hash_h8.get() != NULL) hash_h8->SetStaticDictionary(dict);
|
||||
if (hash_h11_text.get() != NULL) hash_h11_text->SetStaticDictionary(dict);
|
||||
}
|
||||
|
||||
std::unique_ptr<H1> hash_h1;
|
||||
@ -627,6 +634,8 @@ struct Hashers {
|
||||
std::unique_ptr<H7> hash_h7;
|
||||
std::unique_ptr<H8> hash_h8;
|
||||
std::unique_ptr<H9> hash_h9;
|
||||
std::unique_ptr<H11Text> hash_h11_text;
|
||||
std::unique_ptr<H11Font> hash_h11_font;
|
||||
};
|
||||
|
||||
} // namespace brotli
|
||||
|
Loading…
Reference in New Issue
Block a user