update brotli encoder with latest improvements

2024-11-22 11:40:06 +00:00 · 2015-08-28 16:09:23 +02:00 · 2015-08-28 16:09:23 +02:00 · 6511d6b016
commit 6511d6b016
parent db71549ac5
10 changed files with 152 additions and 65 deletions
--- a/enc/backward_references.cc
+++ b/enc/backward_references.cc
@ -112,7 +112,7 @@ class ZopfliCostModel {
    uint64_t copynumextra = copyextra[copycode];
    uint16_t dist_symbol;
    uint32_t distextra;
-    GetDistCode(dist_code, &dist_symbol, &distextra);
+    PrefixEncodeCopyDistance(dist_code, 0, 0, &dist_symbol, &distextra);
    uint32_t distnumextra = distextra >> 24;

    double result = insnumextra + copynumextra + distnumextra;
@ -517,7 +517,7 @@ void CreateBackwardReferences(size_t num_bytes,
  // Minimum score to accept a backward reference.
  const int kMinScore = 4.0;

-  while (i + 3 < i_end) {
+  while (i + Hasher::kHashTypeLength - 1 < i_end) {
    int max_length = i_end - i;
    size_t max_distance = std::min(i + i_diff, max_backward_limit);
    int best_len = 0;
--- a/enc/bit_cost.h
+++ b/enc/bit_cost.h
@ -17,6 +17,7 @@
 #ifndef BROTLI_ENC_BIT_COST_H_
 #define BROTLI_ENC_BIT_COST_H_

+
 #include <stdint.h>

 #include "./entropy_encode.h"
@ -24,7 +25,8 @@

 namespace brotli {

-static inline double BitsEntropy(const int *population, int size) {
+static inline double ShannonEntropy(const int *population, int size,
+                                    int *total) {
  int sum = 0;
  double retval = 0;
  const int *population_end = population + size;
@ -42,6 +44,13 @@ static inline double BitsEntropy(const int *population, int size) {
    retval -= p * FastLog2(p);
  }
  if (sum) retval += sum * FastLog2(sum);
+  *total = sum;
+  return retval;
+}
+
+static inline double BitsEntropy(const int *population, int size) {
+  int sum;
+  double retval = ShannonEntropy(population, size, &sum);
  if (retval < sum) {
    // At least one bit per literal is needed.
    retval = sum;
@ -49,6 +58,7 @@ static inline double BitsEntropy(const int *population, int size) {
  return retval;
 }

+
 template<int kSize>
 double PopulationCost(const Histogram<kSize>& histogram) {
  if (histogram.total_count_ == 0) {
--- a/enc/brotli_bit_stream.cc
+++ b/enc/brotli_bit_stream.cc
@ -731,7 +731,7 @@ bool StoreMetaBlock(const uint8_t* input,
        int distnumextra = cmd.dist_extra_ >> 24;
        int distextra = cmd.dist_extra_ & 0xffffff;
        if (mb.distance_context_map.empty()) {
-        distance_enc.StoreSymbol(dist_code, storage_ix, storage);
+          distance_enc.StoreSymbol(dist_code, storage_ix, storage);
        } else {
          int context = cmd.DistanceContext();
          distance_enc.StoreSymbolWithContext<kDistanceContextBits>(
--- a/enc/cluster.h
+++ b/enc/cluster.h
@ -213,6 +213,7 @@ void HistogramRemap(const HistogramType* in, int in_size,
    symbols[i] = best_out;
  }

+
  // Recompute each out based on raw and symbols.
  for (std::set<int>::const_iterator k = all_symbols.begin();
       k != all_symbols.end(); ++k) {
@ -283,6 +284,7 @@ void ClusterHistograms(const std::vector<HistogramType>& in,
  HistogramReindex(out, histogram_symbols);
 }

+
 }  // namespace brotli

 #endif  // BROTLI_ENC_CLUSTER_H_
--- a/enc/command.h
+++ b/enc/command.h
@ -19,23 +19,10 @@

 #include <stdint.h>
 #include "./fast_log.h"
+#include "./prefix.h"

 namespace brotli {

-static inline void GetDistCode(int distance_code,
-                               uint16_t* code, uint32_t* extra) {
-  if (distance_code < 16) {
-    *code = distance_code;
-    *extra = 0;
-  } else {
-    distance_code -= 12;
-    int numextra = Log2FloorNonZero(distance_code) - 1;
-    int prefix = distance_code >> numextra;
-    *code = 12 + 2 * numextra + prefix;
-    *extra = (numextra << 24) | (distance_code - (prefix << numextra));
-  }
-}
-
 static int insbase[] =   { 0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50, 66,
    98, 130, 194, 322, 578, 1090, 2114, 6210, 22594 };
 static int insextra[] =  { 0, 0, 0, 0, 0, 0, 1, 1,  2,  2,  3,  3,  4,  4,  5,
@ -108,7 +95,10 @@ struct Command {
  // distance_code is e.g. 0 for same-as-last short code, or 16 for offset 1.
  Command(int insertlen, int copylen, int copylen_code, int distance_code)
      : insert_len_(insertlen), copy_len_(copylen) {
-    GetDistCode(distance_code, &dist_prefix_, &dist_extra_);
+    // The distance prefix and extra bits are stored in this Command as if
+    // npostfix and ndirect were 0, they are only recomputed later after the
+    // clustering if needed.
+    PrefixEncodeCopyDistance(distance_code, 0, 0, &dist_prefix_, &dist_extra_);
    GetLengthCode(insertlen, copylen_code, dist_prefix_,
                  &cmd_prefix_, &cmd_extra_);
  }
--- a/enc/encode.cc
+++ b/enc/encode.cc
@ -108,8 +108,7 @@ void RecomputeDistancePrefixes(Command* cmds,
                               size_t num_commands,
                               int num_direct_distance_codes,
                               int distance_postfix_bits) {
-  if (num_direct_distance_codes == 0 &&
-      distance_postfix_bits == 0) {
+  if (num_direct_distance_codes == 0 && distance_postfix_bits == 0) {
    return;
  }
  for (int i = 0; i < num_commands; ++i) {
@ -187,9 +186,12 @@ BrotliCompressor::BrotliCompressor(BrotliParams params)
  } else if (params_.lgwin == 17) {
    last_byte_ = 1;
    last_byte_bits_ = 7;
-  } else {
+  } else if (params_.lgwin > 17) {
    last_byte_ = ((params_.lgwin - 17) << 1) | 1;
    last_byte_bits_ = 4;
+  } else {
+    last_byte_ = ((params_.lgwin - 8) << 4) | 1;
+    last_byte_bits_ = 7;
  }

  // Initialize distance cache.
@ -340,6 +342,71 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last,
  return WriteMetaBlockInternal(is_last, utf8_mode, out_size, output);
 }

+// Decide about the context map based on the ability of the prediction
+// ability of the previous byte UTF8-prefix on the next byte. The
+// prediction ability is calculated as shannon entropy. Here we need
+// shannon entropy instead of 'BitsEntropy' since the prefix will be
+// encoded with the remaining 6 bits of the following byte, and
+// BitsEntropy will assume that symbol to be stored alone using Huffman
+// coding.
+void ChooseContextMap(int quality,
+                      int* bigram_histo,
+                      int* num_literal_contexts,
+                      const int** literal_context_map) {
+  int monogram_histo[3] = { 0 };
+  int two_prefix_histo[6] = { 0 };
+  int total = 0;
+  for (int i = 0; i < 9; ++i) {
+    total += bigram_histo[i];
+    monogram_histo[i % 3] += bigram_histo[i];
+    int j = i;
+    if (j >= 6) {
+      j -= 6;
+    }
+    two_prefix_histo[j] += bigram_histo[i];
+  }
+  int dummy;
+  double entropy1 = ShannonEntropy(monogram_histo, 3, &dummy);
+  double entropy2 = (ShannonEntropy(two_prefix_histo, 3, &dummy) +
+                     ShannonEntropy(two_prefix_histo + 3, 3, &dummy));
+  double entropy3 = 0;
+  for (int k = 0; k < 3; ++k) {
+    entropy3 += ShannonEntropy(bigram_histo + 3 * k, 3, &dummy);
+  }
+  entropy1 *= (1.0 / total);
+  entropy2 *= (1.0 / total);
+  entropy3 *= (1.0 / total);
+
+  static const int kStaticContextMapContinuation[64] = {
+    1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  };
+  static const int kStaticContextMapSimpleUTF8[64] = {
+    0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  };
+  if (quality < 7) {
+    // 3 context models is a bit slower, don't use it at lower qualities.
+    entropy3 = entropy1 * 10;
+  }
+  // If expected savings by symbol are less than 0.2 bits, skip the
+  // context modeling -- in exchange for faster decoding speed.
+  if (entropy1 - entropy2 < 0.2 &&
+      entropy1 - entropy3 < 0.2) {
+    *num_literal_contexts = 1;
+  } else if (entropy2 - entropy3 < 0.02) {
+    *num_literal_contexts = 2;
+    *literal_context_map = kStaticContextMapSimpleUTF8;
+  } else {
+    *num_literal_contexts = 3;
+    *literal_context_map = kStaticContextMapContinuation;
+  }
+}
+
 void DecideOverLiteralContextModeling(const uint8_t* input,
                                      size_t start_pos,
                                      size_t length,
@ -351,40 +418,24 @@ void DecideOverLiteralContextModeling(const uint8_t* input,
  if (quality < kMinQualityForContextModeling || length < 64) {
    return;
  }
-  // Simple heuristics to guess if the data is UTF8 or not. The goal is to
-  // recognize non-UTF8 data quickly by searching for the following obvious
-  // violations: a continuation byte following an ASCII byte or an ASCII or
-  // lead byte following a lead byte. If we find such violation we decide that
-  // the data is not UTF8. To make the analysis of UTF8 data faster we only
-  // examine 64 byte long strides at every 4kB intervals, if there are no
-  // violations found, we assume the whole data is UTF8.
+  // Gather bigram data of the UTF8 byte prefixes. To make the analysis of
+  // UTF8 data faster we only examine 64 byte long strides at every 4kB
+  // intervals.
  const size_t end_pos = start_pos + length;
+  int bigram_prefix_histo[9] = { 0 };
  for (; start_pos + 64 < end_pos; start_pos += 4096) {
+      static const int lut[4] = { 0, 0, 1, 2 };
    const size_t stride_end_pos = start_pos + 64;
-    uint8_t prev = input[start_pos & mask];
+    int prev = lut[input[start_pos & mask] >> 6] * 3;
    for (size_t pos = start_pos + 1; pos < stride_end_pos; ++pos) {
      const uint8_t literal = input[pos & mask];
-      if ((prev < 128 && (literal & 0xc0) == 0x80) ||
-          (prev >= 192 && (literal & 0xc0) != 0x80)) {
-        return;
-      }
-      prev = literal;
+      ++bigram_prefix_histo[prev + lut[literal >> 6]];
+      prev = lut[literal >> 6] * 3;
    }
  }
  *literal_context_mode = CONTEXT_UTF8;
-  // If the data is UTF8, this static context map distinguishes between ASCII
-  // or lead bytes and continuation bytes: the UTF8 context value based on the
-  // last two bytes is 2 or 3 if and only if the next byte is a continuation
-  // byte (see table in context.h).
-  static const int kStaticContextMap[64] = {
-    0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-  };
-  static const int kNumLiteralContexts = 2;
-  *num_literal_contexts = kNumLiteralContexts;
-  *literal_context_map = kStaticContextMap;
+  ChooseContextMap(quality, &bigram_prefix_histo[0], num_literal_contexts,
+                   literal_context_map);
 }

 bool BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
--- a/enc/encode.h
+++ b/enc/encode.h
@ -30,7 +30,7 @@
 namespace brotli {

 static const int kMaxWindowBits = 24;
-static const int kMinWindowBits = 16;
+static const int kMinWindowBits = 10;
 static const int kMinInputBlockBits = 16;
 static const int kMaxInputBlockBits = 24;

@ -59,7 +59,7 @@ struct BrotliParams {
  // Controls the compression-speed vs compression-density tradeoffs. The higher
  // the quality, the slower the compression. Range is 0 to 11.
  int quality;
-  // Base 2 logarithm of the sliding window size. Range is 16 to 24.
+  // Base 2 logarithm of the sliding window size. Range is 10 to 24.
  int lgwin;
  // Base 2 logarithm of the maximum input block size. Range is 16 to 24.
  // If set to 0, the value will be set based on the quality.
--- a/enc/hash.h
+++ b/enc/hash.h
@ -130,9 +130,7 @@ class HashLongestMatchQuickly {
    // not filling will make the results of the compression stochastic
    // (but correct). This is because random data would cause the
    // system to find accidentally good backward references here and there.
-    std::fill(&buckets_[0],
-              &buckets_[sizeof(buckets_) / sizeof(buckets_[0])],
-              0);
+    memset(&buckets_[0], 0, sizeof(buckets_));
    num_dict_lookups_ = 0;
    num_dict_matches_ = 0;
  }
@ -140,7 +138,7 @@ class HashLongestMatchQuickly {
  // Compute a hash from these, and store the value somewhere within
  // [ix .. ix+3].
  inline void Store(const uint8_t *data, const int ix) {
-    const uint32_t key = Hash<kBucketBits>(data);
+    const uint32_t key = HashBytes(data);
    // Wiggle the value with the bucket sweep range.
    const uint32_t off = (static_cast<uint32_t>(ix) >> 3) % kBucketSweep;
    buckets_[key + off] = ix;
@ -201,7 +199,7 @@ class HashLongestMatchQuickly {
        }
      }
    }
-    const uint32_t key = Hash<kBucketBits>(&ring_buffer[cur_ix_masked]);
+    const uint32_t key = HashBytes(&ring_buffer[cur_ix_masked]);
    if (kBucketSweep == 1) {
      // Only one to look for, don't bother to prepare for a loop.
      prev_ix = buckets_[key];
@ -291,6 +289,21 @@ class HashLongestMatchQuickly {
    return match_found;
  }

+  enum { kHashLength = 5 };
+  enum { kHashTypeLength = 8 };
+  // HashBytes is the function that chooses the bucket to place
+  // the address in. The HashLongestMatch and HashLongestMatchQuickly
+  // classes have separate, different implementations of hashing.
+  static uint32_t HashBytes(const uint8_t *data) {
+    // Computing a hash based on 5 bytes works much better for
+    // qualities 1 and 3, where the next hash value is likely to replace
+    static const uint32_t kHashMul32 = 0x1e35a7bd;
+    uint64_t h = (BROTLI_UNALIGNED_LOAD64(data) << 24) * kHashMul32;
+    // The higher bits contain more mixture from the multiplication,
+    // so we take our results from there.
+    return h >> (64 - kBucketBits);
+  }
+
 private:
  static const uint32_t kBucketSize = 1 << kBucketBits;
  uint32_t buckets_[kBucketSize + kBucketSweep];
@ -317,7 +330,7 @@ class HashLongestMatch {
  }

  void Reset() {
-    std::fill(&num_[0], &num_[sizeof(num_) / sizeof(num_[0])], 0);
+    memset(&num_[0], 0, sizeof(num_));
    num_dict_lookups_ = 0;
    num_dict_matches_ = 0;
  }
@ -325,7 +338,7 @@ class HashLongestMatch {
  // Look at 3 bytes at data.
  // Compute a hash from these, and store the value of ix at that position.
  inline void Store(const uint8_t *data, const int ix) {
-    const uint32_t key = Hash<kBucketBits>(data);
+    const uint32_t key = HashBytes(data);
    const int minor_ix = num_[key] & kBlockMask;
    buckets_[key][minor_ix] = ix;
    ++num_[key];
@ -401,7 +414,7 @@ class HashLongestMatch {
        }
      }
    }
-    const uint32_t key = Hash<kBucketBits>(&data[cur_ix_masked]);
+    const uint32_t key = HashBytes(&data[cur_ix_masked]);
    const int * __restrict const bucket = &buckets_[key][0];
    const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
    for (int i = num_[key] - 1; i >= down; --i) {
@ -518,7 +531,7 @@ class HashLongestMatch {
        *matches++ = BackwardMatch(backward, len);
      }
    }
-    const uint32_t key = Hash<kBucketBits>(&data[cur_ix_masked]);
+    const uint32_t key = HashBytes(&data[cur_ix_masked]);
    const int * __restrict const bucket = &buckets_[key][0];
    const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
    for (int i = num_[key] - 1; i >= down; --i) {
@ -562,6 +575,27 @@ class HashLongestMatch {
    *num_matches += matches - orig_matches;
  }

+  enum { kHashLength = 4 };
+  enum { kHashTypeLength = 4 };
+
+  // HashBytes is the function that chooses the bucket to place
+  // the address in. The HashLongestMatch and HashLongestMatchQuickly
+  // classes have separate, different implementations of hashing.
+  static uint32_t HashBytes(const uint8_t *data) {
+    // kHashMul32 multiplier has these properties:
+    // * The multiplier must be odd. Otherwise we may lose the highest bit.
+    // * No long streaks of 1s or 0s.
+    // * Is not unfortunate (see the unittest) for the English language.
+    // * There is no effort to ensure that it is a prime, the oddity is enough
+    //   for this use.
+    // * The number has been tuned heuristically against compression benchmarks.
+    static const uint32_t kHashMul32 = 0x1e35a7bd;
+    uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
+    // The higher bits contain more mixture from the multiplication,
+    // so we take our results from there.
+    return h >> (32 - kBucketBits);
+  }
+
 private:
  // Number of hash buckets.
  static const uint32_t kBucketSize = 1 << kBucketBits;
@ -614,7 +648,7 @@ struct Hashers {

  template<typename Hasher>
  void WarmupHash(const size_t size, const uint8_t* dict, Hasher* hasher) {
-    for (size_t i = 0; i + 3 < size; i++) {
+    for (size_t i = 0; i + Hasher::kHashTypeLength - 1 < size; i++) {
      hasher->Store(dict, i);
    }
  }
--- a/enc/ringbuffer.h
+++ b/enc/ringbuffer.h
@ -36,10 +36,10 @@ class RingBuffer {
        mask_((1 << window_bits) - 1),
        tail_size_(1 << tail_bits),
        pos_(0) {
-    static const int kSlackForFourByteHashingEverywhere = 3;
+    static const int kSlackForEightByteHashingEverywhere = 7;
    const int buflen = (1 << window_bits_) + tail_size_;
-    buffer_ = new uint8_t[buflen + kSlackForFourByteHashingEverywhere];
-    for (int i = 0; i < kSlackForFourByteHashingEverywhere; ++i) {
+    buffer_ = new uint8_t[buflen + kSlackForEightByteHashingEverywhere];
+    for (int i = 0; i < kSlackForEightByteHashingEverywhere; ++i) {
      buffer_[buflen + i] = 0;
    }
  }
--- a/enc/static_dict.cc
+++ b/enc/static_dict.cc
@ -377,7 +377,7 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
    // Transforms with prefixes "e ", "s ", ", " and "\xc2\xa0"
    if ((data[1] == ' ' &&
         (data[0] == 'e' || data[0] == 's' || data[0] == ',')) ||
-        (data[0] == '\xc2' && data[1] == '\xa0')) {
+        (data[0] == 0xc2 && data[1] == 0xa0)) {
      key = Hash(&data[2]);
      bucket = kStaticDictionaryBuckets[key];
      int num = bucket & 0xff;
@ -388,7 +388,7 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
        const int n = 1 << kBrotliDictionarySizeBitsByLength[l];
        const int id = w.idx;
        if (w.transform == 0 && IsMatch(w, &data[2], max_length - 2)) {
-          if (data[0] == '\xc2') {
+          if (data[0] == 0xc2) {
            AddMatch(id + 102 * n, l + 2, l, matches);
            found_match = true;
          } else if (l + 2 < max_length && data[l + 2] == ' ') {