Fix more conversion warnings.

2024-12-28 02:31:05 +00:00 · 2016-01-07 16:27:49 +01:00 · 2016-01-07 16:27:49 +01:00 · 8844b7f0d7
commit 8844b7f0d7
parent 580db01252
32 changed files with 1471 additions and 1429 deletions
--- a/enc/backward_references.cc
+++ b/enc/backward_references.cc
@ -31,22 +31,22 @@ class ZopfliCostModel {
                       size_t ringbuffer_mask,
                       const Command* commands,
                       size_t num_commands,
-                       int last_insert_len) {
-    std::vector<int> histogram_literal(256, 0);
-    std::vector<int> histogram_cmd(kNumCommandPrefixes, 0);
-    std::vector<int> histogram_dist(kNumDistancePrefixes, 0);
+                       size_t last_insert_len) {
+    std::vector<uint32_t> histogram_literal(256, 0);
+    std::vector<uint32_t> histogram_cmd(kNumCommandPrefixes, 0);
+    std::vector<uint32_t> histogram_dist(kNumDistancePrefixes, 0);

    size_t pos = position - last_insert_len;
    for (size_t i = 0; i < num_commands; i++) {
-      int inslength = commands[i].insert_len_;
-      int copylength = commands[i].copy_len_;
-      int distcode = commands[i].dist_prefix_;
-      int cmdcode = commands[i].cmd_prefix_;
+      size_t inslength = commands[i].insert_len_;
+      size_t copylength = commands[i].copy_len_;
+      size_t distcode = commands[i].dist_prefix_;
+      size_t cmdcode = commands[i].cmd_prefix_;

      histogram_cmd[cmdcode]++;
      if (cmdcode >= 128) histogram_dist[distcode]++;

-      for (int j = 0; j < inslength; j++) {
+      for (size_t j = 0; j < inslength; j++) {
        histogram_literal[ringbuffer[(pos + j) & ringbuffer_mask]]++;
      }

@ -58,7 +58,7 @@ class ZopfliCostModel {
    Set(histogram_cmd, &cost_cmd_);
    Set(histogram_dist, &cost_dist_);

-    for (int i = 0; i < kNumCommandPrefixes; ++i) {
+    for (uint32_t i = 0; i < kNumCommandPrefixes; ++i) {
      min_cost_cmd_ = std::min(min_cost_cmd_, cost_cmd_[i]);
    }

@ -84,17 +84,17 @@ class ZopfliCostModel {
    }
    cost_cmd_.resize(kNumCommandPrefixes);
    cost_dist_.resize(kNumDistancePrefixes);
-    for (int i = 0; i < kNumCommandPrefixes; ++i) {
+    for (uint32_t i = 0; i < kNumCommandPrefixes; ++i) {
      cost_cmd_[i] = FastLog2(11 + i);
    }
-    for (int i = 0; i < kNumDistancePrefixes; ++i) {
+    for (uint32_t i = 0; i < kNumDistancePrefixes; ++i) {
      cost_dist_[i] = FastLog2(20 + i);
    }
    min_cost_cmd_ = FastLog2(11);
  }

  double GetCommandCost(
-      int dist_code, int length_code, int insert_length) const {
+      size_t dist_code, size_t length_code, size_t insert_length) const {
    uint16_t inscode = GetInsertLengthCode(insert_length);
    uint16_t copycode = GetCopyLengthCode(length_code);
    uint16_t cmdcode = CombineLengthCodes(inscode, copycode, dist_code == 0);
@ -103,7 +103,8 @@ class ZopfliCostModel {
    PrefixEncodeCopyDistance(dist_code, 0, 0, &dist_symbol, &distextra);
    uint32_t distnumextra = distextra >> 24;

-    double result = insextra[inscode] + copyextra[copycode] + distnumextra;
+    double result =  static_cast<double>(
+        kInsExtra[inscode] + kCopyExtra[copycode] + distnumextra);
    result += cost_cmd_[cmdcode];
    if (cmdcode >= 128) result += cost_dist_[dist_symbol];
    return result;
@ -118,9 +119,9 @@ class ZopfliCostModel {
  }

 private:
-  void Set(const std::vector<int>& histogram, std::vector<double>* cost) {
+  void Set(const std::vector<uint32_t>& histogram, std::vector<double>* cost) {
    cost->resize(histogram.size());
-    int sum = 0;
+    size_t sum = 0;
    for (size_t i = 0; i < histogram.size(); i++) {
      sum += histogram[i];
    }
@ -146,40 +147,41 @@ class ZopfliCostModel {
  double min_cost_cmd_;
 };

-inline void SetDistanceCache(int distance,
-                             int distance_code,
-                             int max_distance,
+inline void SetDistanceCache(size_t distance,
+                             size_t distance_code,
+                             size_t max_distance,
                             const int* dist_cache,
                             int* result_dist_cache) {
  if (distance <= max_distance && distance_code > 0) {
-    result_dist_cache[0] = distance;
+    result_dist_cache[0] = static_cast<int>(distance);
    memcpy(&result_dist_cache[1], dist_cache, 3 * sizeof(dist_cache[0]));
  } else {
    memcpy(result_dist_cache, dist_cache, 4 * sizeof(dist_cache[0]));
  }
 }

-inline int ComputeDistanceCode(int distance,
-                               int max_distance,
-                               int quality,
-                               const int* dist_cache) {
+inline size_t ComputeDistanceCode(size_t distance,
+                                  size_t max_distance,
+                                  int quality,
+                                  const int* dist_cache) {
  if (distance <= max_distance) {
-    if (distance == dist_cache[0]) {
+    if (distance == static_cast<size_t>(dist_cache[0])) {
      return 0;
-    } else if (distance == dist_cache[1]) {
+    } else if (distance == static_cast<size_t>(dist_cache[1])) {
      return 1;
-    } else if (distance == dist_cache[2]) {
+    } else if (distance == static_cast<size_t>(dist_cache[2])) {
      return 2;
-    } else if (distance == dist_cache[3]) {
+    } else if (distance == static_cast<size_t>(dist_cache[3])) {
      return 3;
    } else if (quality > 3 && distance >= 6) {
-      for (int k = 4; k < kNumDistanceShortCodes; ++k) {
-        int idx = kDistanceCacheIndex[k];
-        int candidate = dist_cache[idx] + kDistanceCacheOffset[k];
-        static const int kLimits[16] = { 0, 0, 0, 0,
-                                         6, 6, 11, 11,
-                                         11, 11, 11, 11,
-                                         12, 12, 12, 12 };
+      for (size_t k = 4; k < kNumDistanceShortCodes; ++k) {
+        size_t idx = kDistanceCacheIndex[k];
+        size_t candidate =
+            static_cast<size_t>(dist_cache[idx] + kDistanceCacheOffset[k]);
+        static const size_t kLimits[16] = {  0,  0,  0,  0,
+                                             6,  6, 11, 11,
+                                            11, 11, 11, 11,
+                                            12, 12, 12, 12 };
        if (distance == candidate && distance >= kLimits[k]) {
          return k;
        }
@ -198,30 +200,30 @@ struct ZopfliNode {
                 cost(kInfinity) {}

  // best length to get up to this byte (not including this byte itself)
-  int length;
+  uint32_t length;
  // distance associated with the length
-  int distance;
-  int distance_code;
+  uint32_t distance;
+  uint32_t distance_code;
  int distance_cache[4];
  // length code associated with the length - usually the same as length,
  // except in case of length-changing dictionary transformation.
-  int length_code;
+  uint32_t length_code;
  // number of literal inserts before this copy
-  int insert_length;
+  uint32_t insert_length;
  // smallest cost to get to this byte from the beginning, as found so far
  double cost;
 };

 inline void UpdateZopfliNode(ZopfliNode* nodes, size_t pos, size_t start_pos,
-                             int len, int len_code, int dist, int dist_code,
-                             int max_dist, const int* dist_cache,
-                             double cost) {
+                             size_t len, size_t len_code, size_t dist,
+                             size_t dist_code, size_t max_dist,
+                             const int* dist_cache, double cost) {
  ZopfliNode& next = nodes[pos + len];
-  next.length = len;
-  next.length_code = len_code;
-  next.distance = dist;
-  next.distance_code = dist_code;
-  next.insert_length = static_cast<int>(pos - start_pos);
+  next.length = static_cast<uint32_t>(len);
+  next.length_code = static_cast<uint32_t>(len_code);
+  next.distance = static_cast<uint32_t>(dist);
+  next.distance_code = static_cast<uint32_t>(dist_code);
+  next.insert_length = static_cast<uint32_t>(pos - start_pos);
  next.cost = cost;
  SetDistanceCache(dist, dist_code, max_dist, dist_cache,
                   &next.distance_cache[0]);
@ -231,7 +233,7 @@ inline void UpdateZopfliNode(ZopfliNode* nodes, size_t pos, size_t start_pos,
 class StartPosQueue {
 public:
  explicit StartPosQueue(int bits)
-      : mask_((1 << bits) - 1), q_(1 << bits), idx_(0) {}
+      : mask_((1u << bits) - 1), q_(1 << bits), idx_(0) {}

  void Clear() {
    idx_ = 0;
@ -244,43 +246,47 @@ class StartPosQueue {
      // have a copy of at least length 2.
      return;
    }
-    q_[idx_ & mask_] = std::make_pair(pos, costdiff);
-    // Restore the sorted order.
-    for (int i = idx_; i > 0 && i > idx_ - mask_; --i) {
-      if (q_[i & mask_].second > q_[(i - 1) & mask_].second) {
-        std::swap(q_[i & mask_], q_[(i - 1) & mask_]);
-      }
-    }
+    size_t offset = -idx_ & mask_;
    ++idx_;
+    size_t len = size();
+    q_[offset] = std::make_pair(pos, costdiff);
+    /* Restore the sorted order. In the list of |len| items at most |len - 1|
+       adjacent element comparisons / swaps are required. */
+    for (size_t i = 1; i < len; ++i) {
+      if (q_[offset & mask_].second > q_[(offset + 1) & mask_].second) {
+        std::swap(q_[offset & mask_], q_[(offset + 1) & mask_]);
+      }
+      ++offset;
+    }
  }

-  int size() const { return std::min(idx_, mask_ + 1); }
+  size_t size() const { return std::min(idx_, mask_ + 1); }

-  size_t GetStartPos(int k) const {
-    return q_[(idx_ - k - 1) & mask_].first;
+  size_t GetStartPos(size_t k) const {
+    return q_[(k + 1 - idx_) & mask_].first;
  }

 private:
-  const int mask_;
+  const size_t mask_;
  std::vector<std::pair<size_t, double> > q_;
-  int idx_;
+  size_t idx_;
 };

 // Returns the minimum possible copy length that can improve the cost of any
 // future position.
-int ComputeMinimumCopyLength(const StartPosQueue& queue,
-                             const std::vector<ZopfliNode>& nodes,
-                             const ZopfliCostModel& model,
-                             size_t pos,
-                             double min_cost_cmd) {
+size_t ComputeMinimumCopyLength(const StartPosQueue& queue,
+                                const std::vector<ZopfliNode>& nodes,
+                                const ZopfliCostModel& model,
+                                size_t pos,
+                                double min_cost_cmd) {
  // Compute the minimum possible cost of reaching any future position.
  const size_t start0 = queue.GetStartPos(0);
  double min_cost = (nodes[start0].cost +
                     model.GetLiteralCosts(start0, pos) +
                     min_cost_cmd);
-  int len = 2;
-  int next_len_bucket = 4;
-  int next_len_offset = 10;
+  size_t len = 2;
+  size_t next_len_bucket = 4;
+  size_t next_len_offset = 10;
  while (pos + len < nodes.size() && nodes[pos + len].cost <= min_cost) {
    // We already reached (pos + len) with no more cost than the minimum
    // possible cost of reaching anything from this pos, so there is no point in
@ -303,13 +309,13 @@ void ZopfliIterate(size_t num_bytes,
                   size_t ringbuffer_mask,
                   const size_t max_backward_limit,
                   const ZopfliCostModel& model,
-                   const std::vector<int>& num_matches,
+                   const std::vector<uint32_t>& num_matches,
                   const std::vector<BackwardMatch>& matches,
                   int* dist_cache,
-                   int* last_insert_len,
+                   size_t* last_insert_len,
                   Command* commands,
                   size_t* num_commands,
-                   int* num_literals) {
+                   size_t* num_literals) {
  const Command * const orig_commands = commands;

  std::vector<ZopfliNode> nodes(num_bytes + 1);
@ -324,17 +330,17 @@ void ZopfliIterate(size_t num_bytes,
  for (size_t i = 0; i + 3 < num_bytes; i++) {
    size_t cur_ix = position + i;
    size_t cur_ix_masked = cur_ix & ringbuffer_mask;
-    int max_distance = static_cast<int>(std::min(cur_ix, max_backward_limit));
-    int max_length = static_cast<int>(num_bytes - i);
+    size_t max_distance = std::min(cur_ix, max_backward_limit);
+    size_t max_length = num_bytes - i;

    queue.Push(i, nodes[i].cost - model.GetLiteralCosts(0, i));

-    const int min_len = ComputeMinimumCopyLength(queue, nodes, model,
-                                                 i, min_cost_cmd);
+    const size_t min_len = ComputeMinimumCopyLength(queue, nodes, model,
+                                                    i, min_cost_cmd);

    // Go over the command starting positions in order of increasing cost
    // difference.
-    for (int k = 0; k < 5 && k < queue.size(); ++k) {
+    for (size_t k = 0; k < 5 && k < queue.size(); ++k) {
      const size_t start = queue.GetStartPos(k);
      const double start_costdiff =
          nodes[start].cost - model.GetLiteralCosts(0, start);
@ -342,10 +348,11 @@ void ZopfliIterate(size_t num_bytes,

      // Look for last distance matches using the distance cache from this
      // starting position.
-      int best_len = min_len - 1;
-      for (int j = 0; j < kNumDistanceShortCodes; ++j) {
-        const int idx = kDistanceCacheIndex[j];
-        const int backward = dist_cache2[idx] + kDistanceCacheOffset[j];
+      size_t best_len = min_len - 1;
+      for (size_t j = 0; j < kNumDistanceShortCodes; ++j) {
+        const size_t idx = kDistanceCacheIndex[j];
+        const size_t backward =
+            static_cast<size_t>(dist_cache2[idx] + kDistanceCacheOffset[j]);
        size_t prev_ix = cur_ix - backward;
        if (prev_ix >= cur_ix) {
          continue;
@ -361,12 +368,12 @@ void ZopfliIterate(size_t num_bytes,
            ringbuffer[prev_ix + best_len]) {
          continue;
        }
-        const int len =
+        const size_t len =
            FindMatchLengthWithLimit(&ringbuffer[prev_ix],
                                     &ringbuffer[cur_ix_masked],
                                     max_length);
-        for (int l = best_len + 1; l <= len; ++l) {
-          const int inslen = static_cast<int>(i - start);
+        for (size_t l = best_len + 1; l <= len; ++l) {
+          const size_t inslen = i - start;
          double cmd_cost = model.GetCommandCost(j, l, inslen);
          double cost = start_costdiff + cmd_cost + model.GetLiteralCosts(0, i);
          if (cost < nodes[i + l].cost) {
@ -383,24 +390,24 @@ void ZopfliIterate(size_t num_bytes,
      if (k >= 2) continue;

      // Loop through all possible copy lengths at this position.
-      int len = min_len;
-      for (int j = 0; j < num_matches[i]; ++j) {
+      size_t len = min_len;
+      for (size_t j = 0; j < num_matches[i]; ++j) {
        BackwardMatch match = matches[cur_match_pos + j];
-        int dist = match.distance;
+        size_t dist = match.distance;
        bool is_dictionary_match = dist > max_distance;
        // We already tried all possible last distance matches, so we can use
        // normal distance code here.
-        int dist_code = dist + 15;
+        size_t dist_code = dist + 15;
        // Try all copy lengths up until the maximum copy length corresponding
        // to this distance. If the distance refers to the static dictionary, or
        // the maximum length is long enough, try only one maximum length.
-        int max_len = match.length();
+        size_t max_len = match.length();
        if (len < max_len && (is_dictionary_match || max_len > kMaxZopfliLen)) {
          len = max_len;
        }
        for (; len <= max_len; ++len) {
-          int len_code = is_dictionary_match ? match.length_code() : len;
-          const int inslen = static_cast<int>(i - start);
+          size_t len_code = is_dictionary_match ? match.length_code() : len;
+          const size_t inslen = i - start;
          double cmd_cost = model.GetCommandCost(dist_code, len_code, inslen);
          double cost = start_costdiff + cmd_cost + model.GetLiteralCosts(0, i);
          if (cost < nodes[i + len].cost) {
@ -422,16 +429,16 @@ void ZopfliIterate(size_t num_bytes,
    }
  }

-  std::vector<int> backwards;
+  std::vector<uint32_t> backwards;
  size_t index = num_bytes;
  while (nodes[index].cost == kInfinity) --index;
-  while (index > 0) {
-    int len = nodes[index].length + nodes[index].insert_length;
-    backwards.push_back(len);
+  while (index != 0) {
+    size_t len = nodes[index].length + nodes[index].insert_length;
+    backwards.push_back(static_cast<uint32_t>(len));
    index -= len;
  }

-  std::vector<int> path;
+  std::vector<uint32_t> path;
  for (size_t i = backwards.size(); i > 0; i--) {
    path.push_back(backwards[i - 1]);
  }
@ -439,19 +446,18 @@ void ZopfliIterate(size_t num_bytes,
  size_t pos = 0;
  for (size_t i = 0; i < path.size(); i++) {
    const ZopfliNode& next = nodes[pos + path[i]];
-    int copy_length = next.length;
-    int insert_length = next.insert_length;
+    size_t copy_length = next.length;
+    size_t insert_length = next.insert_length;
    pos += insert_length;
    if (i == 0) {
      insert_length += *last_insert_len;
      *last_insert_len = 0;
    }
-    int distance = next.distance;
-    int len_code = next.length_code;
-    int max_distance =
-        static_cast<int>(std::min(position + pos, max_backward_limit));
+    size_t distance = next.distance;
+    size_t len_code = next.length_code;
+    size_t max_distance = std::min(position + pos, max_backward_limit);
    bool is_dictionary = (distance > max_distance);
-    int dist_code = next.distance_code;
+    size_t dist_code = next.distance_code;

    Command cmd(insert_length, copy_length, len_code, dist_code);
    *commands++ = cmd;
@ -460,30 +466,31 @@ void ZopfliIterate(size_t num_bytes,
      dist_cache[3] = dist_cache[2];
      dist_cache[2] = dist_cache[1];
      dist_cache[1] = dist_cache[0];
-      dist_cache[0] = distance;
+      dist_cache[0] = static_cast<int>(distance);
    }

    *num_literals += insert_length;
    insert_length = 0;
    pos += copy_length;
  }
-  *last_insert_len += static_cast<int>(num_bytes - pos);
-  *num_commands += (commands - orig_commands);
+  *last_insert_len += num_bytes - pos;
+  *num_commands += static_cast<size_t>(commands - orig_commands);
 }

 template<typename Hasher>
 void CreateBackwardReferences(size_t num_bytes,
                              size_t position,
+                              bool is_last,
                              const uint8_t* ringbuffer,
                              size_t ringbuffer_mask,
                              const size_t max_backward_limit,
                              const int quality,
                              Hasher* hasher,
                              int* dist_cache,
-                              int* last_insert_len,
+                              size_t* last_insert_len,
                              Command* commands,
                              size_t* num_commands,
-                              int* num_literals) {
+                              size_t* num_literals) {
  if (num_bytes >= 3 && position >= 3) {
    // Prepare the hashes for three last bytes of the last write.
    // These could not be calculated before, since they require knowledge
@ -496,7 +503,7 @@ void CreateBackwardReferences(size_t num_bytes,
                  static_cast<uint32_t>(position - 1));
  }
  const Command * const orig_commands = commands;
-  int insert_length = *last_insert_len;
+  size_t insert_length = *last_insert_len;
  size_t i = position & ringbuffer_mask;
  const size_t i_diff = position - i;
  const size_t i_end = i + num_bytes;
@ -509,12 +516,11 @@ void CreateBackwardReferences(size_t num_bytes,
  const int kMinScore = 4.0;

  while (i + Hasher::kHashTypeLength - 1 < i_end) {
-    int max_length = static_cast<int>(i_end - i);
-    int max_distance =
-        static_cast<int>(std::min(i + i_diff, max_backward_limit));
-    int best_len = 0;
-    int best_len_code = 0;
-    int best_dist = 0;
+    size_t max_length = i_end - i;
+    size_t max_distance = std::min(i + i_diff, max_backward_limit);
+    size_t best_len = 0;
+    size_t best_len_code = 0;
+    size_t best_dist = 0;
    double best_score = kMinScore;
    bool match_found = hasher->FindLongestMatch(
        ringbuffer, ringbuffer_mask,
@ -525,12 +531,12 @@ void CreateBackwardReferences(size_t num_bytes,
      int delayed_backward_references_in_row = 0;
      for (;;) {
        --max_length;
-        int best_len_2 = quality < 5 ? std::min(best_len - 1, max_length) : 0;
-        int best_len_code_2 = 0;
-        int best_dist_2 = 0;
+        size_t best_len_2 =
+            quality < 5 ? std::min(best_len - 1, max_length) : 0;
+        size_t best_len_code_2 = 0;
+        size_t best_dist_2 = 0;
        double best_score_2 = kMinScore;
-        max_distance =
-            static_cast<int>(std::min(i + i_diff + 1, max_backward_limit));
+        max_distance = std::min(i + i_diff + 1, max_backward_limit);
        hasher->Store(ringbuffer + i, static_cast<uint32_t>(i + i_diff));
        match_found = hasher->FindLongestMatch(
            ringbuffer, ringbuffer_mask,
@ -555,15 +561,15 @@ void CreateBackwardReferences(size_t num_bytes,
      }
      apply_random_heuristics =
          i + 2 * best_len + random_heuristics_window_size;
-      max_distance = static_cast<int>(std::min(i + i_diff, max_backward_limit));
+      max_distance = std::min(i + i_diff, max_backward_limit);
      // The first 16 codes are special shortcodes, and the minimum offset is 1.
-      int distance_code =
+      size_t distance_code =
          ComputeDistanceCode(best_dist, max_distance, quality, dist_cache);
      if (best_dist <= max_distance && distance_code > 0) {
        dist_cache[3] = dist_cache[2];
        dist_cache[2] = dist_cache[1];
        dist_cache[1] = dist_cache[0];
-        dist_cache[0] = best_dist;
+        dist_cache[0] = static_cast<int>(best_dist);
      }
      Command cmd(insert_length, best_len, best_len_code, distance_code);
      *commands++ = cmd;
@ -571,7 +577,7 @@ void CreateBackwardReferences(size_t num_bytes,
      insert_length = 0;
      // Put the hash keys into the table, if there are enough
      // bytes left.
-      for (int j = 1; j < best_len; ++j) {
+      for (size_t j = 1; j < best_len; ++j) {
        hasher->Store(&ringbuffer[i + j],
                      static_cast<uint32_t>(i + i_diff + j));
      }
@ -608,13 +614,14 @@ void CreateBackwardReferences(size_t num_bytes,
      }
    }
  }
-  insert_length += static_cast<int>(i_end - i);
+  insert_length += i_end - i;
  *last_insert_len = insert_length;
-  *num_commands += commands - orig_commands;
+  *num_commands += static_cast<size_t>(commands - orig_commands);
 }

 void CreateBackwardReferences(size_t num_bytes,
                              size_t position,
+                              bool is_last,
                              const uint8_t* ringbuffer,
                              size_t ringbuffer_mask,
                              const size_t max_backward_limit,
@ -622,10 +629,10 @@ void CreateBackwardReferences(size_t num_bytes,
                              Hashers* hashers,
                              int hash_type,
                              int* dist_cache,
-                              int* last_insert_len,
+                              size_t* last_insert_len,
                              Command* commands,
                              size_t* num_commands,
-                              int* num_literals) {
+                              size_t* num_literals) {
  bool zopflify = quality > 9;
  if (zopflify) {
    Hashers::H9* hasher = hashers->hash_h9;
@ -640,28 +647,27 @@ void CreateBackwardReferences(size_t num_bytes,
      hasher->Store(&ringbuffer[(position - 1) & ringbuffer_mask],
                    static_cast<uint32_t>(position - 1));
    }
-    std::vector<int> num_matches(num_bytes);
+    std::vector<uint32_t> num_matches(num_bytes);
    std::vector<BackwardMatch> matches(3 * num_bytes);
    size_t cur_match_pos = 0;
    for (size_t i = 0; i + 3 < num_bytes; ++i) {
-      int max_distance =
-          static_cast<int>(std::min(position + i, max_backward_limit));
-      int max_length = static_cast<int>(num_bytes - i);
+      size_t max_distance = std::min(position + i, max_backward_limit);
+      size_t max_length = num_bytes - i;
      // Ensure that we have at least kMaxZopfliLen free slots.
      if (matches.size() < cur_match_pos + kMaxZopfliLen) {
        matches.resize(cur_match_pos + kMaxZopfliLen);
      }
-      hasher->FindAllMatches(
-          ringbuffer, ringbuffer_mask,
-          static_cast<uint32_t>(position + i), max_length, max_distance,
-          &num_matches[i], &matches[cur_match_pos]);
+      size_t num_found_matches = hasher->FindAllMatches(
+          ringbuffer, ringbuffer_mask, position + i, max_length, max_distance,
+          &matches[cur_match_pos]);
+      num_matches[i] = static_cast<uint32_t>(num_found_matches);
      hasher->Store(&ringbuffer[(position + i) & ringbuffer_mask],
                    static_cast<uint32_t>(position + i));
-      cur_match_pos += num_matches[i];
-      if (num_matches[i] == 1) {
-        const int match_len = matches[cur_match_pos - 1].length();
+      cur_match_pos += num_found_matches;
+      if (num_found_matches == 1) {
+        const size_t match_len = matches[cur_match_pos - 1].length();
        if (match_len > kMaxZopfliLen) {
-          for (int j = 1; j < match_len; ++j) {
+          for (size_t j = 1; j < match_len; ++j) {
            ++i;
            hasher->Store(&ringbuffer[(position + i) & ringbuffer_mask],
                          static_cast<uint32_t>(position + i));
@ -670,14 +676,14 @@ void CreateBackwardReferences(size_t num_bytes,
        }
      }
    }
-    int orig_num_literals = *num_literals;
-    int orig_last_insert_len = *last_insert_len;
+    size_t orig_num_literals = *num_literals;
+    size_t orig_last_insert_len = *last_insert_len;
    int orig_dist_cache[4] = {
      dist_cache[0], dist_cache[1], dist_cache[2], dist_cache[3]
    };
    size_t orig_num_commands = *num_commands;
-    static const int kIterations = 2;
-    for (int i = 0; i < kIterations; i++) {
+    static const size_t kIterations = 2;
+    for (size_t i = 0; i < kIterations; i++) {
      ZopfliCostModel model;
      if (i == 0) {
        model.SetFromLiteralCosts(num_bytes, position,
@ -702,57 +708,58 @@ void CreateBackwardReferences(size_t num_bytes,
  switch (hash_type) {
    case 1:
      CreateBackwardReferences<Hashers::H1>(
-          num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
-          quality, hashers->hash_h1, dist_cache, last_insert_len,
-          commands, num_commands, num_literals);
+          num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
+          max_backward_limit, quality, hashers->hash_h1, dist_cache,
+          last_insert_len, commands, num_commands, num_literals);
      break;
+
    case 2:
      CreateBackwardReferences<Hashers::H2>(
-          num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
-          quality, hashers->hash_h2, dist_cache, last_insert_len,
-          commands, num_commands, num_literals);
+          num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
+          max_backward_limit, quality, hashers->hash_h2, dist_cache,
+          last_insert_len, commands, num_commands, num_literals);
      break;
    case 3:
      CreateBackwardReferences<Hashers::H3>(
-          num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
-          quality, hashers->hash_h3, dist_cache, last_insert_len,
-          commands, num_commands, num_literals);
+          num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
+          max_backward_limit, quality, hashers->hash_h3, dist_cache,
+          last_insert_len, commands, num_commands, num_literals);
      break;
    case 4:
      CreateBackwardReferences<Hashers::H4>(
-          num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
-          quality, hashers->hash_h4, dist_cache, last_insert_len,
-          commands, num_commands, num_literals);
+          num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
+          max_backward_limit, quality, hashers->hash_h4, dist_cache,
+          last_insert_len, commands, num_commands, num_literals);
      break;
    case 5:
      CreateBackwardReferences<Hashers::H5>(
-          num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
-          quality, hashers->hash_h5, dist_cache, last_insert_len,
-          commands, num_commands, num_literals);
+          num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
+          max_backward_limit, quality, hashers->hash_h5, dist_cache,
+          last_insert_len, commands, num_commands, num_literals);
      break;
    case 6:
      CreateBackwardReferences<Hashers::H6>(
-          num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
-          quality, hashers->hash_h6, dist_cache, last_insert_len,
-          commands, num_commands, num_literals);
+          num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
+          max_backward_limit, quality, hashers->hash_h6, dist_cache,
+          last_insert_len, commands, num_commands, num_literals);
      break;
    case 7:
      CreateBackwardReferences<Hashers::H7>(
-          num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
-          quality, hashers->hash_h7, dist_cache, last_insert_len,
-          commands, num_commands, num_literals);
+          num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
+          max_backward_limit, quality, hashers->hash_h7, dist_cache,
+          last_insert_len, commands, num_commands, num_literals);
      break;
    case 8:
      CreateBackwardReferences<Hashers::H8>(
-          num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
-          quality, hashers->hash_h8, dist_cache, last_insert_len,
-          commands, num_commands, num_literals);
+          num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
+          max_backward_limit, quality, hashers->hash_h8, dist_cache,
+          last_insert_len, commands, num_commands, num_literals);
      break;
    case 9:
      CreateBackwardReferences<Hashers::H9>(
-          num_bytes, position, ringbuffer, ringbuffer_mask, max_backward_limit,
-          quality, hashers->hash_h9, dist_cache, last_insert_len,
-          commands, num_commands, num_literals);
+          num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
+          max_backward_limit, quality, hashers->hash_h9, dist_cache,
+          last_insert_len, commands, num_commands, num_literals);
      break;
    default:
      break;
--- a/enc/backward_references.h
+++ b/enc/backward_references.h
@ -9,8 +9,6 @@
 #ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_
 #define BROTLI_ENC_BACKWARD_REFERENCES_H_

-#include <vector>
-
 #include "./hash.h"
 #include "./command.h"
 #include "./types.h"
@ -23,6 +21,7 @@ namespace brotli {
 // by this call.
 void CreateBackwardReferences(size_t num_bytes,
                              size_t position,
+                              bool is_last,
                              const uint8_t* ringbuffer,
                              size_t ringbuffer_mask,
                              const size_t max_backward_limit,
@ -30,10 +29,10 @@ void CreateBackwardReferences(size_t num_bytes,
                              Hashers* hashers,
                              int hash_type,
                              int* dist_cache,
-                              int* last_insert_len,
+                              size_t* last_insert_len,
                              Command* commands,
                              size_t* num_commands,
-                              int* num_literals);
+                              size_t* num_literals);

 }  // namespace brotli

--- a/enc/bit_cost.h
+++ b/enc/bit_cost.h
@ -17,35 +17,35 @@

 namespace brotli {

-static inline double ShannonEntropy(const int *population, int size,
-                                    int *total) {
-  int sum = 0;
+static inline double ShannonEntropy(const uint32_t *population, size_t size,
+                                    size_t *total) {
+  size_t sum = 0;
  double retval = 0;
-  const int *population_end = population + size;
-  int p;
+  const uint32_t *population_end = population + size;
+  size_t p;
  if (size & 1) {
    goto odd_number_of_elements_left;
  }
  while (population < population_end) {
    p = *population++;
    sum += p;
-    retval -= p * FastLog2(p);
+    retval -= static_cast<double>(p) * FastLog2(p);
 odd_number_of_elements_left:
    p = *population++;
    sum += p;
-    retval -= p * FastLog2(p);
+    retval -= static_cast<double>(p) * FastLog2(p);
  }
-  if (sum) retval += sum * FastLog2(sum);
+  if (sum) retval += static_cast<double>(sum) * FastLog2(sum);
  *total = sum;
  return retval;
 }

-static inline double BitsEntropy(const int *population, int size) {
-  int sum;
+static inline double BitsEntropy(const uint32_t *population, size_t size) {
+  size_t sum;
  double retval = ShannonEntropy(population, size, &sum);
  if (retval < sum) {
    // At least one bit per literal is needed.
-    retval = sum;
+    retval = static_cast<double>(sum);
  }
  return retval;
 }
@ -66,7 +66,7 @@ double PopulationCost(const Histogram<kSize>& histogram) {
    return 12;
  }
  if (count == 2) {
-    return 20 + histogram.total_count_;
+    return static_cast<double>(20 + histogram.total_count_);
  }
  double bits = 0;
  uint8_t depth_array[kSize] = { 0 };
@ -82,16 +82,16 @@ double PopulationCost(const Histogram<kSize>& histogram) {
  // In this loop we compute the entropy of the histogram and simultaneously
  // build a simplified histogram of the code length codes where we use the
  // zero repeat code 17, but we don't use the non-zero repeat code 16.
-  int max_depth = 1;
-  int depth_histo[kCodeLengthCodes] = { 0 };
+  size_t max_depth = 1;
+  uint32_t depth_histo[kCodeLengthCodes] = { 0 };
  const double log2total = FastLog2(histogram.total_count_);
-  for (int i = 0; i < kSize;) {
+  for (size_t i = 0; i < kSize;) {
    if (histogram.data_[i] > 0) {
      // Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
      //                          =  log2(total_count) - log2(count(symbol))
      double log2p = log2total - FastLog2(histogram.data_[i]);
      // Approximate the bit depth by round(-log2(P(symbol)))
-      int depth = static_cast<int>(log2p + 0.5);
+      size_t depth = static_cast<size_t>(log2p + 0.5);
      bits += histogram.data_[i] * log2p;
      if (depth > 15) {
        depth = 15;
@ -104,8 +104,8 @@ double PopulationCost(const Histogram<kSize>& histogram) {
    } else {
      // Compute the run length of zeros and add the appropriate number of 0 and
      // 17 code length codes to the code length code histogram.
-      int reps = 1;
-      for (int k = i + 1; k < kSize && histogram.data_[k] == 0; ++k) {
+      uint32_t reps = 1;
+      for (size_t k = i + 1; k < kSize && histogram.data_[k] == 0; ++k) {
        ++reps;
      }
      i += reps;
@ -128,7 +128,7 @@ double PopulationCost(const Histogram<kSize>& histogram) {
    }
  }
  // Add the estimated encoding cost of the code length code histogram.
-  bits += 18 + 2 * max_depth;
+  bits += static_cast<double>(18 + 2 * max_depth);
  // Add the entropy of the code length code histogram.
  bits += BitsEntropy(depth_histo, kCodeLengthCodes);
  return bits;
--- a/enc/block_splitter.cc
+++ b/enc/block_splitter.cc
@ -10,11 +10,9 @@

 #include <assert.h>
 #include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>

 #include <algorithm>
+#include <cstring>
 #include <map>

 #include "./cluster.h"
@ -24,19 +22,19 @@

 namespace brotli {

-static const int kMaxLiteralHistograms = 100;
-static const int kMaxCommandHistograms = 50;
+static const size_t kMaxLiteralHistograms = 100;
+static const size_t kMaxCommandHistograms = 50;
 static const double kLiteralBlockSwitchCost = 28.1;
 static const double kCommandBlockSwitchCost = 13.5;
 static const double kDistanceBlockSwitchCost = 14.6;
-static const int kLiteralStrideLength = 70;
-static const int kCommandStrideLength = 40;
-static const int kSymbolsPerLiteralHistogram = 544;
-static const int kSymbolsPerCommandHistogram = 530;
-static const int kSymbolsPerDistanceHistogram = 544;
-static const int kMinLengthForBlockSplitting = 128;
-static const int kIterMulForRefining = 2;
-static const int kMinItersForRefining = 100;
+static const size_t kLiteralStrideLength = 70;
+static const size_t kCommandStrideLength = 40;
+static const size_t kSymbolsPerLiteralHistogram = 544;
+static const size_t kSymbolsPerCommandHistogram = 530;
+static const size_t kSymbolsPerDistanceHistogram = 544;
+static const size_t kMinLengthForBlockSplitting = 128;
+static const size_t kIterMulForRefining = 2;
+static const size_t kMinItersForRefining = 100;

 void CopyLiteralsToByteArray(const Command* cmds,
                             const size_t num_commands,
@ -99,17 +97,17 @@ inline static unsigned int MyRand(unsigned int* seed) {

 template<typename HistogramType, typename DataType>
 void InitialEntropyCodes(const DataType* data, size_t length,
-                         int literals_per_histogram,
-                         int max_histograms,
+                         size_t literals_per_histogram,
+                         size_t max_histograms,
                         size_t stride,
                         std::vector<HistogramType>* vec) {
-  int total_histograms = static_cast<int>(length) / literals_per_histogram + 1;
+  size_t total_histograms = length / literals_per_histogram + 1;
  if (total_histograms > max_histograms) {
    total_histograms = max_histograms;
  }
  unsigned int seed = 7;
  size_t block_length = length / total_histograms;
-  for (int i = 0; i < total_histograms; ++i) {
+  for (size_t i = 0; i < total_histograms; ++i) {
    size_t pos = length * i / total_histograms;
    if (i != 0) {
      pos += MyRand(&seed) % block_length;
@ -155,8 +153,8 @@ void RefineEntropyCodes(const DataType* data, size_t length,
  }
 }

-inline static double BitCost(int count) {
-  return count == 0 ? -2 : FastLog2(count);
+inline static double BitCost(size_t count) {
+  return count == 0 ? -2.0 : FastLog2(count);
 }

 template<typename DataType, int kSize>
@ -170,15 +168,16 @@ void FindBlocks(const DataType* data, const size_t length,
    }
    return;
  }
-  int vecsize = static_cast<int>(vec.size());
+  size_t vecsize = vec.size();
  assert(vecsize <= 256);
  double* insert_cost = new double[kSize * vecsize];
  memset(insert_cost, 0, sizeof(insert_cost[0]) * kSize * vecsize);
-  for (int j = 0; j < vecsize; ++j) {
-    insert_cost[j] = FastLog2(vec[j].total_count_);
+  for (size_t j = 0; j < vecsize; ++j) {
+    insert_cost[j] = FastLog2(static_cast<uint32_t>(vec[j].total_count_));
  }
-  for (int i = kSize - 1; i >= 0; --i) {
-    for (int j = 0; j < vecsize; ++j) {
+  for (size_t i = kSize; i != 0;) {
+    --i;
+    for (size_t j = 0; j < vecsize; ++j) {
      insert_cost[i * vecsize + j] = insert_cost[j] - BitCost(vec[j].data_[i]);
    }
  }
@ -194,9 +193,9 @@ void FindBlocks(const DataType* data, const size_t length,
  // position, we need to switch here.
  for (size_t byte_ix = 0; byte_ix < length; ++byte_ix) {
    size_t ix = byte_ix * vecsize;
-    int insert_cost_ix = data[byte_ix] * vecsize;
+    size_t insert_cost_ix = data[byte_ix] * vecsize;
    double min_cost = 1e99;
-    for (int k = 0; k < vecsize; ++k) {
+    for (size_t k = 0; k < vecsize; ++k) {
      // We are coding the symbol in data[byte_ix] with entropy code k.
      cost[k] += insert_cost[insert_cost_ix + k];
      if (cost[k] < min_cost) {
@ -207,9 +206,9 @@ void FindBlocks(const DataType* data, const size_t length,
    double block_switch_cost = block_switch_bitcost;
    // More blocks for the beginning.
    if (byte_ix < 2000) {
-      block_switch_cost *= 0.77 + 0.07 * byte_ix / 2000;
+      block_switch_cost *= 0.77 + 0.07 * static_cast<double>(byte_ix) / 2000;
    }
-    for (int k = 0; k < vecsize; ++k) {
+    for (size_t k = 0; k < vecsize; ++k) {
      cost[k] -= min_cost;
      if (cost[k] >= block_switch_cost) {
        cost[k] = block_switch_cost;
@ -234,9 +233,9 @@ void FindBlocks(const DataType* data, const size_t length,
  delete[] switch_signal;
 }

-int RemapBlockIds(uint8_t* block_ids, const size_t length) {
+size_t RemapBlockIds(uint8_t* block_ids, const size_t length) {
  std::map<uint8_t, uint8_t> new_id;
-  int next_id = 0;
+  size_t next_id = 0;
  for (size_t i = 0; i < length; ++i) {
    if (new_id.find(block_ids[i]) == new_id.end()) {
      new_id[block_ids[i]] = static_cast<uint8_t>(next_id);
@ -253,7 +252,7 @@ template<typename HistogramType, typename DataType>
 void BuildBlockHistograms(const DataType* data, const size_t length,
                          uint8_t* block_ids,
                          std::vector<HistogramType>* histograms) {
-  int num_types = RemapBlockIds(block_ids, length);
+  size_t num_types = RemapBlockIds(block_ids, length);
  assert(num_types <= 256);
  histograms->clear();
  histograms->resize(num_types);
@ -266,8 +265,8 @@ template<typename HistogramType, typename DataType>
 void ClusterBlocks(const DataType* data, const size_t length,
                   uint8_t* block_ids) {
  std::vector<HistogramType> histograms;
-  std::vector<int> block_index(length);
-  int cur_idx = 0;
+  std::vector<uint32_t> block_index(length);
+  uint32_t cur_idx = 0;
  HistogramType cur_histogram;
  for (size_t i = 0; i < length; ++i) {
    bool block_boundary = (i + 1 == length || block_ids[i] != block_ids[i + 1]);
@ -280,10 +279,10 @@ void ClusterBlocks(const DataType* data, const size_t length,
    }
  }
  std::vector<HistogramType> clustered_histograms;
-  std::vector<int> histogram_symbols;
+  std::vector<uint32_t> histogram_symbols;
  // Block ids need to fit in one byte.
  static const size_t kMaxNumberOfBlockTypes = 256;
-  ClusterHistograms(histograms, 1, static_cast<int>(histograms.size()),
+  ClusterHistograms(histograms, 1, histograms.size(),
                    kMaxNumberOfBlockTypes,
                    &clustered_histograms,
                    &histogram_symbols);
@ -293,30 +292,30 @@ void ClusterBlocks(const DataType* data, const size_t length,
 }

 void BuildBlockSplit(const std::vector<uint8_t>& block_ids, BlockSplit* split) {
-  int cur_id = block_ids[0];
-  int cur_length = 1;
-  split->num_types = -1;
+  uint8_t cur_id = block_ids[0];
+  uint8_t max_type = cur_id;
+  uint32_t cur_length = 1;
  for (size_t i = 1; i < block_ids.size(); ++i) {
-    if (block_ids[i] != cur_id) {
+    uint8_t next_id = block_ids[i];
+    if (next_id != cur_id) {
      split->types.push_back(cur_id);
      split->lengths.push_back(cur_length);
-      split->num_types = std::max(split->num_types, cur_id);
-      cur_id = block_ids[i];
+      max_type = std::max(max_type, next_id);
+      cur_id = next_id;
      cur_length = 0;
    }
    ++cur_length;
  }
  split->types.push_back(cur_id);
  split->lengths.push_back(cur_length);
-  split->num_types = std::max(split->num_types, cur_id);
-  ++split->num_types;
+  split->num_types = static_cast<size_t>(max_type) + 1;
 }

 template<typename HistogramType, typename DataType>
 void SplitByteVector(const std::vector<DataType>& data,
-                     const int literals_per_histogram,
-                     const int max_histograms,
-                     const int sampling_stride_length,
+                     const size_t literals_per_histogram,
+                     const size_t max_histograms,
+                     const size_t sampling_stride_length,
                     const double block_switch_cost,
                     BlockSplit* split) {
  if (data.empty()) {
@ -325,7 +324,7 @@ void SplitByteVector(const std::vector<DataType>& data,
  } else if (data.size() < kMinLengthForBlockSplitting) {
    split->num_types = 1;
    split->types.push_back(0);
-    split->lengths.push_back(static_cast<int>(data.size()));
+    split->lengths.push_back(static_cast<uint32_t>(data.size()));
    return;
  }
  std::vector<HistogramType> histograms;
@ -340,7 +339,7 @@ void SplitByteVector(const std::vector<DataType>& data,
                     &histograms);
  // Find a good path through literals with the good entropy codes.
  std::vector<uint8_t> block_ids(data.size());
-  for (int i = 0; i < 10; ++i) {
+  for (size_t i = 0; i < 10; ++i) {
    FindBlocks(&data[0], data.size(),
               block_switch_cost,
               histograms,
@ -387,27 +386,4 @@ void SplitBlock(const Command* cmds,
      dist_split);
 }

-void SplitBlockByTotalLength(const Command* all_commands,
-                             const size_t num_commands,
-                             int input_size,
-                             int target_length,
-                             std::vector<std::vector<Command> >* blocks) {
-  int num_blocks = input_size / target_length + 1;
-  int length_limit = input_size / num_blocks + 1;
-  int total_length = 0;
-  std::vector<Command> cur_block;
-  for (size_t i = 0; i < num_commands; ++i) {
-    const Command& cmd = all_commands[i];
-    int cmd_length = cmd.insert_len_ + cmd.copy_len_;
-    if (total_length > length_limit) {
-      blocks->push_back(cur_block);
-      cur_block.clear();
-      total_length = 0;
-    }
-    cur_block.push_back(cmd);
-    total_length += cmd_length;
-  }
-  blocks->push_back(cur_block);
-}
-
 }  // namespace brotli
--- a/enc/block_splitter.h
+++ b/enc/block_splitter.h
@ -9,9 +9,7 @@
 #ifndef BROTLI_ENC_BLOCK_SPLITTER_H_
 #define BROTLI_ENC_BLOCK_SPLITTER_H_

-#include <string.h>
 #include <vector>
-#include <utility>

 #include "./command.h"
 #include "./metablock.h"
@ -37,9 +35,9 @@ struct BlockSplitIterator {
  }

  const BlockSplit& split_;
-  int idx_;
-  int type_;
-  int length_;
+  size_t idx_;
+  size_t type_;
+  size_t length_;
 };

 void CopyLiteralsToByteArray(const Command* cmds,
@ -58,12 +56,6 @@ void SplitBlock(const Command* cmds,
                BlockSplit* insert_and_copy_split,
                BlockSplit* dist_split);

-void SplitBlockByTotalLength(const Command* all_commands,
-                             const size_t num_commands,
-                             int input_size,
-                             int target_length,
-                             std::vector<std::vector<Command> >* blocks);
-
 }  // namespace brotli

 #endif  // BROTLI_ENC_BLOCK_SPLITTER_H_
--- a/enc/brotli_bit_stream.cc
+++ b/enc/brotli_bit_stream.cc
@ -11,6 +11,7 @@
 #include "./brotli_bit_stream.h"

 #include <algorithm>
+#include <cstring>
 #include <limits>
 #include <vector>

@ -22,59 +23,53 @@
 #include "./write_bits.h"
 namespace brotli {

-// returns false if fail
+namespace {
+
 // nibblesbits represents the 2 bits to encode MNIBBLES (0-3)
-bool EncodeMlen(size_t length, int* bits, int* numbits, int* nibblesbits) {
-  if (length > (1 << 24)) {
-    return false;
-  }
+// REQUIRES: length > 0
+// REQUIRES: length <= (1 << 24)
+void EncodeMlen(size_t length, uint64_t* bits,
+                size_t* numbits, uint64_t* nibblesbits) {
+  assert(length > 0);
+  assert(length <= (1 << 24));
  length--;  // MLEN - 1 is encoded
-  int lg = length == 0 ? 1 : Log2Floor(static_cast<uint32_t>(length)) + 1;
+  size_t lg = length == 0 ? 1 : Log2FloorNonZero(
+      static_cast<uint32_t>(length)) + 1;
  assert(lg <= 24);
-  int mnibbles = (lg < 16 ? 16 : (lg + 3)) / 4;
+  size_t mnibbles = (lg < 16 ? 16 : (lg + 3)) / 4;
  *nibblesbits = mnibbles - 4;
  *numbits = mnibbles * 4;
-  *bits = static_cast<int>(length);
-  return true;
+  *bits = length;
 }

-void StoreVarLenUint8(int n, int* storage_ix, uint8_t* storage) {
+}  // namespace
+
+void StoreVarLenUint8(size_t n, size_t* storage_ix, uint8_t* storage) {
  if (n == 0) {
    WriteBits(1, 0, storage_ix, storage);
  } else {
    WriteBits(1, 1, storage_ix, storage);
-    int nbits = Log2Floor(n);
+    size_t nbits = Log2FloorNonZero(n);
    WriteBits(3, nbits, storage_ix, storage);
    WriteBits(nbits, n - (1 << nbits), storage_ix, storage);
  }
 }

-bool StoreCompressedMetaBlockHeader(bool final_block,
+void StoreCompressedMetaBlockHeader(bool final_block,
                                    size_t length,
-                                    int* storage_ix,
+                                    size_t* storage_ix,
                                    uint8_t* storage) {
  // Write ISLAST bit.
  WriteBits(1, final_block, storage_ix, storage);
  // Write ISEMPTY bit.
  if (final_block) {
-    WriteBits(1, length == 0, storage_ix, storage);
-    if (length == 0) {
-      return true;
-    }
-  }
-
-  if (length == 0) {
-    // Only the last meta-block can be empty.
-    return false;
-  }
-
-  int lenbits;
-  int nlenbits;
-  int nibblesbits;
-  if (!EncodeMlen(length, &lenbits, &nlenbits, &nibblesbits)) {
-    return false;
+    WriteBits(1, 0, storage_ix, storage);
  }

+  uint64_t lenbits;
+  size_t nlenbits;
+  uint64_t nibblesbits;
+  EncodeMlen(length, &lenbits, &nlenbits, &nibblesbits);
  WriteBits(2, nibblesbits, storage_ix, storage);
  WriteBits(nlenbits, lenbits, storage_ix, storage);

@ -82,31 +77,27 @@ bool StoreCompressedMetaBlockHeader(bool final_block,
    // Write ISUNCOMPRESSED bit.
    WriteBits(1, 0, storage_ix, storage);
  }
-  return true;
 }

-bool StoreUncompressedMetaBlockHeader(size_t length,
-                                      int* storage_ix,
+void StoreUncompressedMetaBlockHeader(size_t length,
+                                      size_t* storage_ix,
                                      uint8_t* storage) {
  // Write ISLAST bit. Uncompressed block cannot be the last one, so set to 0.
  WriteBits(1, 0, storage_ix, storage);
-  int lenbits;
-  int nlenbits;
-  int nibblesbits;
-  if (!EncodeMlen(length, &lenbits, &nlenbits, &nibblesbits)) {
-    return false;
-  }
+  uint64_t lenbits;
+  size_t nlenbits;
+  uint64_t nibblesbits;
+  EncodeMlen(length, &lenbits, &nlenbits, &nibblesbits);
  WriteBits(2, nibblesbits, storage_ix, storage);
  WriteBits(nlenbits, lenbits, storage_ix, storage);
  // Write ISUNCOMPRESSED bit.
  WriteBits(1, 1, storage_ix, storage);
-  return true;
 }

 void StoreHuffmanTreeOfHuffmanTreeToBitMask(
    const int num_codes,
    const uint8_t *code_length_bitdepth,
-    int *storage_ix,
+    size_t *storage_ix,
    uint8_t *storage) {
  static const uint8_t kStorageOrder[kCodeLengthCodes] = {
    1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15
@ -129,7 +120,7 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask(
  };

  // Throw away trailing zeros:
-  int codes_to_store = kCodeLengthCodes;
+  size_t codes_to_store = kCodeLengthCodes;
  if (num_codes > 1) {
    for (; codes_to_store > 0; --codes_to_store) {
      if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
@ -137,7 +128,7 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask(
      }
    }
  }
-  int skip_some = 0;  // skips none.
+  size_t skip_some = 0;  // skips none.
  if (code_length_bitdepth[kStorageOrder[0]] == 0 &&
      code_length_bitdepth[kStorageOrder[1]] == 0) {
    skip_some = 2;  // skips two.
@ -146,8 +137,8 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask(
    }
  }
  WriteBits(2, skip_some, storage_ix, storage);
-  for (int i = skip_some; i < codes_to_store; ++i) {
-    uint8_t l = code_length_bitdepth[kStorageOrder[i]];
+  for (size_t i = skip_some; i < codes_to_store; ++i) {
+    size_t l = code_length_bitdepth[kStorageOrder[i]];
    WriteBits(kHuffmanBitLengthHuffmanCodeBitLengths[l],
              kHuffmanBitLengthHuffmanCodeSymbols[l], storage_ix, storage);
  }
@ -158,10 +149,10 @@ void StoreHuffmanTreeToBitMask(
    const std::vector<uint8_t> &huffman_tree_extra_bits,
    const uint8_t *code_length_bitdepth,
    const std::vector<uint16_t> &code_length_bitdepth_symbols,
-    int * __restrict storage_ix,
+    size_t * __restrict storage_ix,
    uint8_t * __restrict storage) {
  for (size_t i = 0; i < huffman_tree.size(); ++i) {
-    int ix = huffman_tree[i];
+    size_t ix = huffman_tree[i];
    WriteBits(code_length_bitdepth[ix], code_length_bitdepth_symbols[ix],
              storage_ix, storage);
    // Extra bits
@ -177,17 +168,17 @@ void StoreHuffmanTreeToBitMask(
 }

 void StoreSimpleHuffmanTree(const uint8_t* depths,
-                            int symbols[4],
-                            int num_symbols,
-                            int max_bits,
-                            int *storage_ix, uint8_t *storage) {
+                            size_t symbols[4],
+                            size_t num_symbols,
+                            size_t max_bits,
+                            size_t *storage_ix, uint8_t *storage) {
  // value of 1 indicates a simple Huffman code
  WriteBits(2, 1, storage_ix, storage);
  WriteBits(2, num_symbols - 1, storage_ix, storage);  // NSYM - 1

  // Sort
-  for (int i = 0; i < num_symbols; i++) {
-    for (int j = i + 1; j < num_symbols; j++) {
+  for (size_t i = 0; i < num_symbols; i++) {
+    for (size_t j = i + 1; j < num_symbols; j++) {
      if (depths[symbols[j]] < depths[symbols[i]]) {
        std::swap(symbols[j], symbols[i]);
      }
@ -213,8 +204,8 @@ void StoreSimpleHuffmanTree(const uint8_t* depths,

 // num = alphabet size
 // depths = symbol depths
-void StoreHuffmanTree(const uint8_t* depths, int num,
-                      int *storage_ix, uint8_t *storage) {
+void StoreHuffmanTree(const uint8_t* depths, size_t num,
+                      size_t *storage_ix, uint8_t *storage) {
  // Write the Huffman tree into the brotli-representation.
  std::vector<uint8_t> huffman_tree;
  std::vector<uint8_t> huffman_tree_extra_bits;
@ -224,7 +215,7 @@ void StoreHuffmanTree(const uint8_t* depths, int num,
  WriteHuffmanTree(depths, num, &huffman_tree, &huffman_tree_extra_bits);

  // Calculate the statistics of the Huffman tree in brotli-representation.
-  int huffman_tree_histogram[kCodeLengthCodes] = { 0 };
+  uint32_t huffman_tree_histogram[kCodeLengthCodes] = { 0 };
  for (size_t i = 0; i < huffman_tree.size(); ++i) {
    ++huffman_tree_histogram[huffman_tree[i]];
  }
@ -270,15 +261,15 @@ void StoreHuffmanTree(const uint8_t* depths, int num,
 }


-void BuildAndStoreHuffmanTree(const int *histogram,
-                              const int length,
+void BuildAndStoreHuffmanTree(const uint32_t *histogram,
+                              const size_t length,
                              uint8_t* depth,
                              uint16_t* bits,
-                              int* storage_ix,
+                              size_t* storage_ix,
                              uint8_t* storage) {
-  int count = 0;
-  int s4[4] = { 0 };
-  for (int i = 0; i < length; i++) {
+  size_t count = 0;
+  size_t s4[4] = { 0 };
+  for (size_t i = 0; i < length; i++) {
    if (histogram[i]) {
      if (count < 4) {
        s4[count] = i;
@ -289,8 +280,8 @@ void BuildAndStoreHuffmanTree(const int *histogram,
    }
  }

-  int max_bits_counter = length - 1;
-  int max_bits = 0;
+  size_t max_bits_counter = length - 1;
+  size_t max_bits = 0;
  while (max_bits_counter) {
    max_bits_counter >>= 1;
    ++max_bits;
@ -312,30 +303,32 @@ void BuildAndStoreHuffmanTree(const int *histogram,
  }
 }

-int IndexOf(const std::vector<int>& v, int value) {
-  for (int i = 0; i < static_cast<int>(v.size()); ++i) {
+size_t IndexOf(const std::vector<uint32_t>& v, uint32_t value) {
+  size_t i = 0;
+  for (; i < v.size(); ++i) {
    if (v[i] == value) return i;
  }
-  return -1;
+  return i;
 }

-void MoveToFront(std::vector<int>* v, int index) {
-  int value = (*v)[index];
-  for (int i = index; i > 0; --i) {
+void MoveToFront(std::vector<uint32_t>* v, size_t index) {
+  uint32_t value = (*v)[index];
+  for (size_t i = index; i != 0; --i) {
    (*v)[i] = (*v)[i - 1];
  }
  (*v)[0] = value;
 }

-std::vector<int> MoveToFrontTransform(const std::vector<int>& v) {
+std::vector<uint32_t> MoveToFrontTransform(const std::vector<uint32_t>& v) {
  if (v.empty()) return v;
-  std::vector<int> mtf(*std::max_element(v.begin(), v.end()) + 1);
-  for (int i = 0; i < static_cast<int>(mtf.size()); ++i) mtf[i] = i;
-  std::vector<int> result(v.size());
+  uint32_t max_value = *std::max_element(v.begin(), v.end());
+  std::vector<uint32_t> mtf(max_value + 1);
+  for (uint32_t i = 0; i <= max_value; ++i) mtf[i] = i;
+  std::vector<uint32_t> result(v.size());
  for (size_t i = 0; i < v.size(); ++i) {
-    int index = IndexOf(mtf, v[i]);
-    assert(index >= 0);
-    result[i] = index;
+    size_t index = IndexOf(mtf, v[i]);
+    assert(index < mtf.size());
+    result[i] = static_cast<uint32_t>(index);
    MoveToFront(&mtf, index);
  }
  return result;
@ -347,61 +340,62 @@ std::vector<int> MoveToFrontTransform(const std::vector<int>& v) {
 // initial value of *max_run_length_prefix. The prefix code of run length L is
 // simply Log2Floor(L) and the number of extra bits is the same as the prefix
 // code.
-void RunLengthCodeZeros(const std::vector<int>& v_in,
-                        int* max_run_length_prefix,
-                        std::vector<int>* v_out,
-                        std::vector<int>* extra_bits) {
-  int max_reps = 0;
+void RunLengthCodeZeros(const std::vector<uint32_t>& v_in,
+                        uint32_t* max_run_length_prefix,
+                        std::vector<uint32_t>* v_out,
+                        std::vector<uint32_t>* extra_bits) {
+  uint32_t max_reps = 0;
  for (size_t i = 0; i < v_in.size();) {
    for (; i < v_in.size() && v_in[i] != 0; ++i) ;
-    int reps = 0;
+    uint32_t reps = 0;
    for (; i < v_in.size() && v_in[i] == 0; ++i) {
      ++reps;
    }
    max_reps = std::max(reps, max_reps);
  }
-  int max_prefix = max_reps > 0 ? Log2Floor(max_reps) : 0;
-  *max_run_length_prefix = std::min(max_prefix, *max_run_length_prefix);
+  uint32_t max_prefix = max_reps > 0 ? Log2FloorNonZero(max_reps) : 0;
+  max_prefix = std::min(max_prefix, *max_run_length_prefix);
+  *max_run_length_prefix = max_prefix;
  for (size_t i = 0; i < v_in.size();) {
    if (v_in[i] != 0) {
      v_out->push_back(v_in[i] + *max_run_length_prefix);
      extra_bits->push_back(0);
      ++i;
    } else {
-      int reps = 1;
+      uint32_t reps = 1;
      for (size_t k = i + 1; k < v_in.size() && v_in[k] == 0; ++k) {
        ++reps;
      }
      i += reps;
-      while (reps) {
-        if (reps < (2 << *max_run_length_prefix)) {
-          int run_length_prefix = Log2Floor(reps);
+      while (reps != 0) {
+        if (reps < (2u << max_prefix)) {
+          uint32_t run_length_prefix = Log2FloorNonZero(reps);
          v_out->push_back(run_length_prefix);
-          extra_bits->push_back(reps - (1 << run_length_prefix));
+          extra_bits->push_back(reps - (1u << run_length_prefix));
          break;
        } else {
-          v_out->push_back(*max_run_length_prefix);
-          extra_bits->push_back((1 << *max_run_length_prefix) - 1);
-          reps -= (2 << *max_run_length_prefix) - 1;
+          v_out->push_back(max_prefix);
+          extra_bits->push_back((1u << max_prefix) - 1u);
+          reps -= (2u << max_prefix) - 1u;
        }
      }
    }
  }
 }

-void EncodeContextMap(const std::vector<int>& context_map,
-                      int num_clusters,
-                      int* storage_ix, uint8_t* storage) {
+void EncodeContextMap(const std::vector<uint32_t>& context_map,
+                      size_t num_clusters,
+                      size_t* storage_ix, uint8_t* storage) {
  StoreVarLenUint8(num_clusters - 1, storage_ix, storage);

  if (num_clusters == 1) {
    return;
  }

-  std::vector<int> transformed_symbols = MoveToFrontTransform(context_map);
-  std::vector<int> rle_symbols;
-  std::vector<int> extra_bits;
-  int max_run_length_prefix = 6;
+  std::vector<uint32_t> transformed_symbols = MoveToFrontTransform(context_map);
+  std::vector<uint32_t> rle_symbols;
+  std::vector<uint32_t> extra_bits;
+  uint32_t max_run_length_prefix = 6;
  RunLengthCodeZeros(transformed_symbols, &max_run_length_prefix,
                     &rle_symbols, &extra_bits);
  HistogramContextMap symbol_histogram;
@ -432,32 +426,32 @@ void EncodeContextMap(const std::vector<int>& context_map,
 }

 void StoreBlockSwitch(const BlockSplitCode& code,
-                      const int block_ix,
-                      int* storage_ix,
+                      const size_t block_ix,
+                      size_t* storage_ix,
                      uint8_t* storage) {
  if (block_ix > 0) {
-    int typecode = code.type_code[block_ix];
+    size_t typecode = code.type_code[block_ix];
    WriteBits(code.type_depths[typecode], code.type_bits[typecode],
              storage_ix, storage);
  }
-  int lencode = code.length_prefix[block_ix];
+  size_t lencode = code.length_prefix[block_ix];
  WriteBits(code.length_depths[lencode], code.length_bits[lencode],
            storage_ix, storage);
  WriteBits(code.length_nextra[block_ix], code.length_extra[block_ix],
            storage_ix, storage);
 }

-void BuildAndStoreBlockSplitCode(const std::vector<int>& types,
-                                 const std::vector<int>& lengths,
-                                 const int num_types,
+void BuildAndStoreBlockSplitCode(const std::vector<uint8_t>& types,
+                                 const std::vector<uint32_t>& lengths,
+                                 const size_t num_types,
                                 BlockSplitCode* code,
-                                 int* storage_ix,
+                                 size_t* storage_ix,
                                 uint8_t* storage) {
-  const int num_blocks = static_cast<int>(types.size());
-  std::vector<int> type_histo(num_types + 2);
-  std::vector<int> length_histo(26);
-  int last_type = 1;
-  int second_last_type = 0;
+  const size_t num_blocks = types.size();
+  std::vector<uint32_t> type_histo(num_types + 2);
+  std::vector<uint32_t> length_histo(26);
+  size_t last_type = 1;
+  size_t second_last_type = 0;
  code->type_code.resize(num_blocks);
  code->length_prefix.resize(num_blocks);
  code->length_nextra.resize(num_blocks);
@ -466,15 +460,15 @@ void BuildAndStoreBlockSplitCode(const std::vector<int>& types,
  code->type_bits.resize(num_types + 2);
  code->length_depths.resize(26);
  code->length_bits.resize(26);
-  for (int i = 0; i < num_blocks; ++i) {
-    int type = types[i];
-    int type_code = (type == last_type + 1 ? 1 :
+  for (size_t i = 0; i < num_blocks; ++i) {
+    size_t type = types[i];
+    size_t type_code = (type == last_type + 1 ? 1 :
                     type == second_last_type ? 0 :
                     type + 2);
    second_last_type = last_type;
    last_type = type;
-    code->type_code[i] = type_code;
-    if (i > 0) ++type_histo[type_code];
+    code->type_code[i] = static_cast<uint32_t>(type_code);
+    if (i != 0) ++type_histo[type_code];
    GetBlockLengthPrefixCode(lengths[i],
                             &code->length_prefix[i],
                             &code->length_nextra[i],
@ -493,31 +487,31 @@ void BuildAndStoreBlockSplitCode(const std::vector<int>& types,
  }
 }

-void StoreTrivialContextMap(int num_types,
-                            int context_bits,
-                            int* storage_ix,
+void StoreTrivialContextMap(size_t num_types,
+                            size_t context_bits,
+                            size_t* storage_ix,
                            uint8_t* storage) {
  StoreVarLenUint8(num_types - 1, storage_ix, storage);
  if (num_types > 1) {
-    int repeat_code = context_bits - 1;
-    int repeat_bits = (1 << repeat_code) - 1;
-    int alphabet_size = num_types + repeat_code;
-    std::vector<int> histogram(alphabet_size);
+    size_t repeat_code = context_bits - 1;
+    uint64_t repeat_bits = (1 << repeat_code) - 1;
+    size_t alphabet_size = num_types + repeat_code;
+    std::vector<uint32_t> histogram(alphabet_size);
    std::vector<uint8_t> depths(alphabet_size);
    std::vector<uint16_t> bits(alphabet_size);
    // Write RLEMAX.
    WriteBits(1, 1, storage_ix, storage);
    WriteBits(4, repeat_code - 1, storage_ix, storage);
-    histogram[repeat_code] = num_types;
+    histogram[repeat_code] = static_cast<uint32_t>(num_types);
    histogram[0] = 1;
-    for (int i = context_bits; i < alphabet_size; ++i) {
+    for (size_t i = context_bits; i < alphabet_size; ++i) {
      histogram[i] = 1;
    }
    BuildAndStoreHuffmanTree(&histogram[0], alphabet_size,
                             &depths[0], &bits[0],
                             storage_ix, storage);
-    for (int i = 0; i < num_types; ++i) {
-      int code = (i == 0 ? 0 : i + context_bits - 1);
+    for (size_t i = 0; i < num_types; ++i) {
+      size_t code = (i == 0 ? 0 : i + context_bits - 1);
      WriteBits(depths[code], bits[code], storage_ix, storage);
      WriteBits(depths[repeat_code], bits[repeat_code], storage_ix, storage);
      WriteBits(repeat_code, repeat_bits, storage_ix, storage);
@ -530,10 +524,10 @@ void StoreTrivialContextMap(int num_types,
 // Manages the encoding of one block category (literal, command or distance).
 class BlockEncoder {
 public:
-  BlockEncoder(int alphabet_size,
-               int num_block_types,
-               const std::vector<int>& block_types,
-               const std::vector<int>& block_lengths)
+  BlockEncoder(size_t alphabet_size,
+               size_t num_block_types,
+               const std::vector<uint8_t>& block_types,
+               const std::vector<uint32_t>& block_lengths)
      : alphabet_size_(alphabet_size),
        num_block_types_(num_block_types),
        block_types_(block_types),
@ -544,7 +538,8 @@ class BlockEncoder {

  // Creates entropy codes of block lengths and block types and stores them
  // to the bit stream.
-  void BuildAndStoreBlockSwitchEntropyCodes(int* storage_ix, uint8_t* storage) {
+  void BuildAndStoreBlockSwitchEntropyCodes(size_t* storage_ix,
+                                            uint8_t* storage) {
    BuildAndStoreBlockSplitCode(
        block_types_, block_lengths_, num_block_types_,
        &block_split_code_, storage_ix, storage);
@ -555,7 +550,7 @@ class BlockEncoder {
  template<int kSize>
  void BuildAndStoreEntropyCodes(
      const std::vector<Histogram<kSize> >& histograms,
-      int* storage_ix, uint8_t* storage) {
+      size_t* storage_ix, uint8_t* storage) {
    depths_.resize(histograms.size() * alphabet_size_);
    bits_.resize(histograms.size() * alphabet_size_);
    for (size_t i = 0; i < histograms.size(); ++i) {
@ -568,7 +563,7 @@ class BlockEncoder {

  // Stores the next symbol with the entropy code of the current block type.
  // Updates the block type and block length at block boundaries.
-  void StoreSymbol(int symbol, int* storage_ix, uint8_t* storage) {
+  void StoreSymbol(size_t symbol, size_t* storage_ix, uint8_t* storage) {
    if (block_len_ == 0) {
      ++block_ix_;
      block_len_ = block_lengths_[block_ix_];
@ -576,7 +571,7 @@ class BlockEncoder {
      StoreBlockSwitch(block_split_code_, block_ix_, storage_ix, storage);
    }
    --block_len_;
-    int ix = entropy_ix_ + symbol;
+    size_t ix = entropy_ix_ + symbol;
    WriteBits(depths_[ix], bits_[ix], storage_ix, storage);
  }

@ -584,67 +579,60 @@ class BlockEncoder {
  // context value.
  // Updates the block type and block length at block boundaries.
  template<int kContextBits>
-  void StoreSymbolWithContext(int symbol, int context,
-                              const std::vector<int>& context_map,
-                              int* storage_ix, uint8_t* storage) {
+  void StoreSymbolWithContext(size_t symbol, size_t context,
+                              const std::vector<uint32_t>& context_map,
+                              size_t* storage_ix, uint8_t* storage) {
    if (block_len_ == 0) {
      ++block_ix_;
      block_len_ = block_lengths_[block_ix_];
-      entropy_ix_ = block_types_[block_ix_] << kContextBits;
+      size_t block_type = block_types_[block_ix_];
+      entropy_ix_ = block_type << kContextBits;
      StoreBlockSwitch(block_split_code_, block_ix_, storage_ix, storage);
    }
    --block_len_;
-    int histo_ix = context_map[entropy_ix_ + context];
-    int ix = histo_ix * alphabet_size_ + symbol;
+    size_t histo_ix = context_map[entropy_ix_ + context];
+    size_t ix = histo_ix * alphabet_size_ + symbol;
    WriteBits(depths_[ix], bits_[ix], storage_ix, storage);
  }

 private:
-  const int alphabet_size_;
-  const int num_block_types_;
-  const std::vector<int>& block_types_;
-  const std::vector<int>& block_lengths_;
+  const size_t alphabet_size_;
+  const size_t num_block_types_;
+  const std::vector<uint8_t>& block_types_;
+  const std::vector<uint32_t>& block_lengths_;
  BlockSplitCode block_split_code_;
-  int block_ix_;
-  int block_len_;
-  int entropy_ix_;
+  size_t block_ix_;
+  size_t block_len_;
+  size_t entropy_ix_;
  std::vector<uint8_t> depths_;
  std::vector<uint16_t> bits_;
 };

-void JumpToByteBoundary(int* storage_ix, uint8_t* storage) {
-  *storage_ix = (*storage_ix + 7) & ~7;
+void JumpToByteBoundary(size_t* storage_ix, uint8_t* storage) {
+  *storage_ix = (*storage_ix + 7u) & ~7u;
  storage[*storage_ix >> 3] = 0;
 }

-bool StoreMetaBlock(const uint8_t* input,
+void StoreMetaBlock(const uint8_t* input,
                    size_t start_pos,
                    size_t length,
                    size_t mask,
                    uint8_t prev_byte,
                    uint8_t prev_byte2,
                    bool is_last,
-                    int num_direct_distance_codes,
-                    int distance_postfix_bits,
-                    int literal_context_mode,
+                    uint32_t num_direct_distance_codes,
+                    uint32_t distance_postfix_bits,
+                    ContextType literal_context_mode,
                    const brotli::Command *commands,
                    size_t n_commands,
                    const MetaBlockSplit& mb,
-                    int *storage_ix,
+                    size_t *storage_ix,
                    uint8_t *storage) {
-  if (!StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage)) {
-    return false;
-  }
+  StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage);

-  if (length == 0) {
-    // Only the last meta-block can be empty, so jump to next byte.
-    JumpToByteBoundary(storage_ix, storage);
-    return true;
-  }
-
-  int num_distance_codes =
+  size_t num_distance_codes =
      kNumDistanceShortCodes + num_direct_distance_codes +
-      (48 << distance_postfix_bits);
+      (48u << distance_postfix_bits);

  BlockEncoder literal_enc(256,
                           mb.literal_split.num_types,
@ -666,11 +654,11 @@ bool StoreMetaBlock(const uint8_t* input,
  WriteBits(2, distance_postfix_bits, storage_ix, storage);
  WriteBits(4, num_direct_distance_codes >> distance_postfix_bits,
            storage_ix, storage);
-  for (int i = 0; i < mb.literal_split.num_types; ++i) {
+  for (size_t i = 0; i < mb.literal_split.num_types; ++i) {
    WriteBits(2, literal_context_mode, storage_ix, storage);
  }

-  int num_literal_histograms = static_cast<int>(mb.literal_histograms.size());
+  size_t num_literal_histograms = mb.literal_histograms.size();
  if (mb.literal_context_map.empty()) {
    StoreTrivialContextMap(num_literal_histograms, kLiteralContextBits,
                           storage_ix, storage);
@ -679,7 +667,7 @@ bool StoreMetaBlock(const uint8_t* input,
                     storage_ix, storage);
  }

-  int num_dist_histograms = static_cast<int>(mb.distance_histograms.size());
+  size_t num_dist_histograms = mb.distance_histograms.size();
  if (mb.distance_context_map.empty()) {
    StoreTrivialContextMap(num_dist_histograms, kDistanceContextBits,
                           storage_ix, storage);
@ -698,20 +686,19 @@ bool StoreMetaBlock(const uint8_t* input,
  size_t pos = start_pos;
  for (size_t i = 0; i < n_commands; ++i) {
    const Command cmd = commands[i];
-    int cmd_code = cmd.cmd_prefix_;
-    int lennumextra = static_cast<int>(cmd.cmd_extra_ >> 48);
+    size_t cmd_code = cmd.cmd_prefix_;
+    uint32_t lennumextra = static_cast<uint32_t>(cmd.cmd_extra_ >> 48);
    uint64_t lenextra = cmd.cmd_extra_ & 0xffffffffffffUL;
    command_enc.StoreSymbol(cmd_code, storage_ix, storage);
    WriteBits(lennumextra, lenextra, storage_ix, storage);
    if (mb.literal_context_map.empty()) {
-      for (int j = 0; j < cmd.insert_len_; j++) {
+      for (size_t j = cmd.insert_len_; j != 0; --j) {
        literal_enc.StoreSymbol(input[pos & mask], storage_ix, storage);
        ++pos;
      }
    } else {
-      for (int j = 0; j < cmd.insert_len_; ++j) {
-        int context = Context(prev_byte, prev_byte2,
-                              literal_context_mode);
+      for (size_t j = cmd.insert_len_; j != 0; --j) {
+        size_t context = Context(prev_byte, prev_byte2, literal_context_mode);
        uint8_t literal = input[pos & mask];
        literal_enc.StoreSymbolWithContext<kLiteralContextBits>(
            literal, context, mb.literal_context_map, storage_ix, storage);
@ -725,13 +712,13 @@ bool StoreMetaBlock(const uint8_t* input,
      prev_byte2 = input[(pos - 2) & mask];
      prev_byte = input[(pos - 1) & mask];
      if (cmd.cmd_prefix_ >= 128) {
-        int dist_code = cmd.dist_prefix_;
-        int distnumextra = cmd.dist_extra_ >> 24;
-        int distextra = cmd.dist_extra_ & 0xffffff;
+        size_t dist_code = cmd.dist_prefix_;
+        uint32_t distnumextra = cmd.dist_extra_ >> 24;
+        uint64_t distextra = cmd.dist_extra_ & 0xffffff;
        if (mb.distance_context_map.empty()) {
          distance_enc.StoreSymbol(dist_code, storage_ix, storage);
        } else {
-          int context = cmd.DistanceContext();
+          size_t context = cmd.DistanceContext();
          distance_enc.StoreSymbolWithContext<kDistanceContextBits>(
              dist_code, context, mb.distance_context_map, storage_ix, storage);
        }
@ -742,45 +729,86 @@ bool StoreMetaBlock(const uint8_t* input,
  if (is_last) {
    JumpToByteBoundary(storage_ix, storage);
  }
-  return true;
 }

-bool StoreMetaBlockTrivial(const uint8_t* input,
+void BuildHistograms(const uint8_t* input,
+                     size_t start_pos,
+                     size_t mask,
+                     const brotli::Command *commands,
+                     size_t n_commands,
+                     HistogramLiteral* lit_histo,
+                     HistogramCommand* cmd_histo,
+                     HistogramDistance* dist_histo) {
+  size_t pos = start_pos;
+  for (size_t i = 0; i < n_commands; ++i) {
+    const Command cmd = commands[i];
+    cmd_histo->Add(cmd.cmd_prefix_);
+    for (size_t j = cmd.insert_len_; j != 0; --j) {
+      lit_histo->Add(input[pos & mask]);
+      ++pos;
+    }
+    pos += cmd.copy_len_;
+    if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) {
+      dist_histo->Add(cmd.dist_prefix_);
+    }
+  }
+}
+
+void StoreDataWithHuffmanCodes(const uint8_t* input,
+                               size_t start_pos,
+                               size_t mask,
+                               const brotli::Command *commands,
+                               size_t n_commands,
+                               const uint8_t* lit_depth,
+                               const uint16_t* lit_bits,
+                               const uint8_t* cmd_depth,
+                               const uint16_t* cmd_bits,
+                               const uint8_t* dist_depth,
+                               const uint16_t* dist_bits,
+                               size_t* storage_ix,
+                               uint8_t* storage) {
+  size_t pos = start_pos;
+  for (size_t i = 0; i < n_commands; ++i) {
+    const Command cmd = commands[i];
+    const size_t cmd_code = cmd.cmd_prefix_;
+    const uint32_t lennumextra = static_cast<uint32_t>(cmd.cmd_extra_ >> 48);
+    const uint64_t lenextra = cmd.cmd_extra_ & 0xffffffffffffUL;
+    WriteBits(cmd_depth[cmd_code], cmd_bits[cmd_code], storage_ix, storage);
+    WriteBits(lennumextra, lenextra, storage_ix, storage);
+    for (size_t j = cmd.insert_len_; j != 0; --j) {
+      const uint8_t literal = input[pos & mask];
+      WriteBits(lit_depth[literal], lit_bits[literal], storage_ix, storage);
+      ++pos;
+    }
+    pos += cmd.copy_len_;
+    if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) {
+      const size_t dist_code = cmd.dist_prefix_;
+      const uint32_t distnumextra = cmd.dist_extra_ >> 24;
+      const uint32_t distextra = cmd.dist_extra_ & 0xffffff;
+      WriteBits(dist_depth[dist_code], dist_bits[dist_code],
+                storage_ix, storage);
+      WriteBits(distnumextra, distextra, storage_ix, storage);
+    }
+  }
+}
+
+void StoreMetaBlockTrivial(const uint8_t* input,
                           size_t start_pos,
                           size_t length,
                           size_t mask,
                           bool is_last,
                           const brotli::Command *commands,
                           size_t n_commands,
-                           int *storage_ix,
+                           size_t *storage_ix,
                           uint8_t *storage) {
-  if (!StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage)) {
-    return false;
-  }
-
-  if (length == 0) {
-    // Only the last meta-block can be empty, so jump to next byte.
-    JumpToByteBoundary(storage_ix, storage);
-    return true;
-  }
+  StoreCompressedMetaBlockHeader(is_last, length, storage_ix, storage);

  HistogramLiteral lit_histo;
  HistogramCommand cmd_histo;
  HistogramDistance dist_histo;

-  size_t pos = start_pos;
-  for (size_t i = 0; i < n_commands; ++i) {
-    const Command cmd = commands[i];
-    cmd_histo.Add(cmd.cmd_prefix_);
-    for (int j = 0; j < cmd.insert_len_; ++j) {
-      lit_histo.Add(input[pos & mask]);
-      ++pos;
-    }
-    pos += cmd.copy_len_;
-    if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) {
-      dist_histo.Add(cmd.dist_prefix_);
-    }
-  }
+  BuildHistograms(input, start_pos, mask, commands, n_commands,
+                  &lit_histo, &cmd_histo, &dist_histo);

  WriteBits(13, 0, storage_ix, storage);

@ -800,59 +828,37 @@ bool StoreMetaBlockTrivial(const uint8_t* input,
  BuildAndStoreHuffmanTree(&dist_histo.data_[0], 64,
                           &dist_depth[0], &dist_bits[0],
                           storage_ix, storage);
-
-  pos = start_pos;
-  for (size_t i = 0; i < n_commands; ++i) {
-    const Command cmd = commands[i];
-    const int cmd_code = cmd.cmd_prefix_;
-    const int lennumextra = static_cast<int>(cmd.cmd_extra_ >> 48);
-    const uint64_t lenextra = cmd.cmd_extra_ & 0xffffffffffffUL;
-    WriteBits(cmd_depth[cmd_code], cmd_bits[cmd_code], storage_ix, storage);
-    WriteBits(lennumextra, lenextra, storage_ix, storage);
-    for (int j = 0; j < cmd.insert_len_; j++) {
-      const uint8_t literal = input[pos & mask];
-      WriteBits(lit_depth[literal], lit_bits[literal], storage_ix, storage);
-      ++pos;
-    }
-    pos += cmd.copy_len_;
-    if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) {
-      const int dist_code = cmd.dist_prefix_;
-      const int distnumextra = cmd.dist_extra_ >> 24;
-      const int distextra = cmd.dist_extra_ & 0xffffff;
-      WriteBits(dist_depth[dist_code], dist_bits[dist_code],
-                storage_ix, storage);
-      WriteBits(distnumextra, distextra, storage_ix, storage);
-    }
-  }
+  StoreDataWithHuffmanCodes(input, start_pos, mask, commands,
+                            n_commands, &lit_depth[0], &lit_bits[0],
+                            &cmd_depth[0], &cmd_bits[0],
+                            &dist_depth[0], &dist_bits[0],
+                            storage_ix, storage);
  if (is_last) {
    JumpToByteBoundary(storage_ix, storage);
  }
-  return true;
 }

 // This is for storing uncompressed blocks (simple raw storage of
 // bytes-as-bytes).
-bool StoreUncompressedMetaBlock(bool final_block,
+void StoreUncompressedMetaBlock(bool final_block,
                                const uint8_t * __restrict input,
                                size_t position, size_t mask,
                                size_t len,
-                                int * __restrict storage_ix,
+                                size_t * __restrict storage_ix,
                                uint8_t * __restrict storage) {
-  if (!brotli::StoreUncompressedMetaBlockHeader(len, storage_ix, storage)) {
-    return false;
-  }
+  StoreUncompressedMetaBlockHeader(len, storage_ix, storage);
  JumpToByteBoundary(storage_ix, storage);

  size_t masked_pos = position & mask;
  if (masked_pos + len > mask + 1) {
    size_t len1 = mask + 1 - masked_pos;
    memcpy(&storage[*storage_ix >> 3], &input[masked_pos], len1);
-    *storage_ix += static_cast<int>(len1 << 3);
+    *storage_ix += len1 << 3;
    len -= len1;
    masked_pos = 0;
  }
  memcpy(&storage[*storage_ix >> 3], &input[masked_pos], len);
-  *storage_ix += static_cast<int>(len << 3);
+  *storage_ix += len << 3;

  // We need to clear the next 4 bytes to continue to be
  // compatible with WriteBits.
@ -865,10 +871,9 @@ bool StoreUncompressedMetaBlock(bool final_block,
    brotli::WriteBits(1, 1, storage_ix, storage);  // isempty
    JumpToByteBoundary(storage_ix, storage);
  }
-  return true;
 }

-void StoreSyncMetaBlock(int * __restrict storage_ix,
+void StoreSyncMetaBlock(size_t * __restrict storage_ix,
                        uint8_t * __restrict storage) {
  // Empty metadata meta-block bit pattern:
  //   1 bit:  is_last (0)
--- a/enc/brotli_bit_stream.h
+++ b/enc/brotli_bit_stream.h
@ -27,53 +27,60 @@ namespace brotli {
 // position for the current storage.

 // Stores a number between 0 and 255.
-void StoreVarLenUint8(int n, int* storage_ix, uint8_t* storage);
+void StoreVarLenUint8(size_t n, size_t* storage_ix, uint8_t* storage);

 // Stores the compressed meta-block header.
-bool StoreCompressedMetaBlockHeader(bool final_block,
+// REQUIRES: length > 0
+// REQUIRES: length <= (1 << 24)
+void StoreCompressedMetaBlockHeader(bool final_block,
                                    size_t length,
-                                    int* storage_ix,
+                                    size_t* storage_ix,
                                    uint8_t* storage);

 // Stores the uncompressed meta-block header.
-bool StoreUncompressedMetaBlockHeader(size_t length,
-                                      int* storage_ix,
+// REQUIRES: length > 0
+// REQUIRES: length <= (1 << 24)
+void StoreUncompressedMetaBlockHeader(size_t length,
+                                      size_t* storage_ix,
                                      uint8_t* storage);

 // Stores a context map where the histogram type is always the block type.
-void StoreTrivialContextMap(int num_types,
-                            int context_bits,
-                            int* storage_ix,
+void StoreTrivialContextMap(size_t num_types,
+                            size_t context_bits,
+                            size_t* storage_ix,
                            uint8_t* storage);

 void StoreHuffmanTreeOfHuffmanTreeToBitMask(
    const int num_codes,
    const uint8_t *code_length_bitdepth,
-    int *storage_ix,
+    size_t *storage_ix,
    uint8_t *storage);

+void StoreHuffmanTree(const uint8_t* depths, size_t num,
+                      size_t *storage_ix, uint8_t *storage);
+
 // Builds a Huffman tree from histogram[0:length] into depth[0:length] and
 // bits[0:length] and stores the encoded tree to the bit stream.
-void BuildAndStoreHuffmanTree(const int *histogram,
-                              const int length,
+void BuildAndStoreHuffmanTree(const uint32_t *histogram,
+                              const size_t length,
                              uint8_t* depth,
                              uint16_t* bits,
-                              int* storage_ix,
+                              size_t* storage_ix,
                              uint8_t* storage);

 // Encodes the given context map to the bit stream. The number of different
 // histogram ids is given by num_clusters.
-void EncodeContextMap(const std::vector<int>& context_map,
-                      int num_clusters,
-                      int* storage_ix, uint8_t* storage);
+void EncodeContextMap(const std::vector<uint32_t>& context_map,
+                      size_t num_clusters,
+                      size_t* storage_ix, uint8_t* storage);

 // Data structure that stores everything that is needed to encode each block
 // switch command.
 struct BlockSplitCode {
-  std::vector<int> type_code;
-  std::vector<int> length_prefix;
-  std::vector<int> length_nextra;
-  std::vector<int> length_extra;
+  std::vector<uint32_t> type_code;
+  std::vector<uint32_t> length_prefix;
+  std::vector<uint32_t> length_nextra;
+  std::vector<uint32_t> length_extra;
  std::vector<uint8_t> type_depths;
  std::vector<uint16_t> type_bits;
  std::vector<uint8_t> length_depths;
@ -82,58 +89,64 @@ struct BlockSplitCode {

 // Builds a BlockSplitCode data structure from the block split given by the
 // vector of block types and block lengths and stores it to the bit stream.
-void BuildAndStoreBlockSplitCode(const std::vector<int>& types,
-                                 const std::vector<int>& lengths,
-                                 const int num_types,
+void BuildAndStoreBlockSplitCode(const std::vector<uint8_t>& types,
+                                 const std::vector<uint32_t>& lengths,
+                                 const size_t num_types,
                                 BlockSplitCode* code,
-                                 int* storage_ix,
+                                 size_t* storage_ix,
                                 uint8_t* storage);

 // Stores the block switch command with index block_ix to the bit stream.
 void StoreBlockSwitch(const BlockSplitCode& code,
-                      const int block_ix,
-                      int* storage_ix,
+                      const size_t block_ix,
+                      size_t* storage_ix,
                      uint8_t* storage);

-bool StoreMetaBlock(const uint8_t* input,
+// REQUIRES: length > 0
+// REQUIRES: length <= (1 << 24)
+void StoreMetaBlock(const uint8_t* input,
                    size_t start_pos,
                    size_t length,
                    size_t mask,
                    uint8_t prev_byte,
                    uint8_t prev_byte2,
                    bool final_block,
-                    int num_direct_distance_codes,
-                    int distance_postfix_bits,
-                    int literal_context_mode,
+                    uint32_t num_direct_distance_codes,
+                    uint32_t distance_postfix_bits,
+                    ContextType literal_context_mode,
                    const brotli::Command *commands,
                    size_t n_commands,
                    const MetaBlockSplit& mb,
-                    int *storage_ix,
+                    size_t *storage_ix,
                    uint8_t *storage);

 // Stores the meta-block without doing any block splitting, just collects
 // one histogram per block category and uses that for entropy coding.
-bool StoreMetaBlockTrivial(const uint8_t* input,
+// REQUIRES: length > 0
+// REQUIRES: length <= (1 << 24)
+void StoreMetaBlockTrivial(const uint8_t* input,
                           size_t start_pos,
                           size_t length,
                           size_t mask,
                           bool is_last,
                           const brotli::Command *commands,
                           size_t n_commands,
-                           int *storage_ix,
+                           size_t *storage_ix,
                           uint8_t *storage);

 // This is for storing uncompressed blocks (simple raw storage of
 // bytes-as-bytes).
-bool StoreUncompressedMetaBlock(bool final_block,
+// REQUIRES: length > 0
+// REQUIRES: length <= (1 << 24)
+void StoreUncompressedMetaBlock(bool final_block,
                                const uint8_t* input,
                                size_t position, size_t mask,
                                size_t len,
-                                int* storage_ix,
+                                size_t* storage_ix,
                                uint8_t* storage);

 // Stores an empty metadata meta-block and syncs to a byte boundary.
-void StoreSyncMetaBlock(int* storage_ix, uint8_t* storage);
+void StoreSyncMetaBlock(size_t* storage_ix, uint8_t* storage);

 }  // namespace brotli

--- a/enc/cluster.h
+++ b/enc/cluster.h
@ -10,11 +10,8 @@
 #define BROTLI_ENC_CLUSTER_H_

 #include <math.h>
-#include <stdio.h>
 #include <algorithm>
-#include <complex>
 #include <map>
-#include <set>
 #include <utility>
 #include <vector>

@ -28,41 +25,39 @@
 namespace brotli {

 struct HistogramPair {
-  int idx1;
-  int idx2;
-  bool valid;
+  uint32_t idx1;
+  uint32_t idx2;
  double cost_combo;
  double cost_diff;
 };

-struct HistogramPairComparator {
-  bool operator()(const HistogramPair& p1, const HistogramPair& p2) const {
-    if (p1.cost_diff != p2.cost_diff) {
-      return p1.cost_diff > p2.cost_diff;
-    }
-    return abs(p1.idx1 - p1.idx2) > abs(p2.idx1 - p2.idx2);
+inline bool operator<(const HistogramPair& p1, const HistogramPair& p2) {
+  if (p1.cost_diff != p2.cost_diff) {
+    return p1.cost_diff > p2.cost_diff;
  }
-};
+  return (p1.idx2 - p1.idx1) > (p2.idx2 - p2.idx1);
+}

 // Returns entropy reduction of the context map when we combine two clusters.
-inline double ClusterCostDiff(int size_a, int size_b) {
-  int size_c = size_a + size_b;
-  return size_a * FastLog2(size_a) + size_b * FastLog2(size_b) -
-      size_c * FastLog2(size_c);
+inline double ClusterCostDiff(size_t size_a, size_t size_b) {
+  size_t size_c = size_a + size_b;
+  return static_cast<double>(size_a) * FastLog2(size_a) +
+      static_cast<double>(size_b) * FastLog2(size_b) -
+      static_cast<double>(size_c) * FastLog2(size_c);
 }

 // Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
-// it is below a threshold, stores the pair (idx1, idx2) in the *pairs heap.
+// it is below a threshold, stores the pair (idx1, idx2) in the *pairs queue.
 template<typename HistogramType>
-void CompareAndPushToHeap(const HistogramType* out,
-                          const int* cluster_size,
-                          int idx1, int idx2,
-                          std::vector<HistogramPair>* pairs) {
+void CompareAndPushToQueue(const HistogramType* out,
+                           const uint32_t* cluster_size,
+                           uint32_t idx1, uint32_t idx2,
+                           std::vector<HistogramPair>* pairs) {
  if (idx1 == idx2) {
    return;
  }
  if (idx2 < idx1) {
-    int t = idx2;
+    uint32_t t = idx2;
    idx2 = idx1;
    idx1 = t;
  }
@ -70,7 +65,6 @@ void CompareAndPushToHeap(const HistogramType* out,
  HistogramPair p;
  p.idx1 = idx1;
  p.idx2 = idx2;
-  p.valid = true;
  p.cost_diff = 0.5 * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]);
  p.cost_diff -= out[idx1].bit_cost_;
  p.cost_diff -= out[idx2].bit_cost_;
@ -94,37 +88,38 @@ void CompareAndPushToHeap(const HistogramType* out,
  }
  if (store_pair) {
    p.cost_diff += p.cost_combo;
-    pairs->push_back(p);
-    std::push_heap(pairs->begin(), pairs->end(), HistogramPairComparator());
+    if (!pairs->empty() && (pairs->front() < p)) {
+      // Replace the top of the queue if needed.
+      pairs->push_back(pairs->front());
+      pairs->front() = p;
+    } else {
+      pairs->push_back(p);
+    }
  }
 }

 template<typename HistogramType>
 void HistogramCombine(HistogramType* out,
-                      int* cluster_size,
-                      int* symbols,
-                      int symbols_size,
+                      uint32_t* cluster_size,
+                      uint32_t* symbols,
+                      size_t symbols_size,
                      size_t max_clusters) {
  double cost_diff_threshold = 0.0;
  size_t min_cluster_size = 1;
-  std::set<int> all_symbols;
-  std::vector<int> clusters;
-  for (int i = 0; i < symbols_size; ++i) {
-    if (all_symbols.find(symbols[i]) == all_symbols.end()) {
-      all_symbols.insert(symbols[i]);
-      if (!clusters.empty()) {
-        BROTLI_DCHECK(clusters.back() < symbols[i]);
-      }
-      clusters.push_back(symbols[i]);
-    }
-  }
+
+  // Uniquify the list of symbols.
+  std::vector<uint32_t> clusters(symbols, symbols + symbols_size);
+  std::sort(clusters.begin(), clusters.end());
+  std::vector<uint32_t>::iterator last =
+      std::unique(clusters.begin(), clusters.end());
+  clusters.resize(static_cast<size_t>(last - clusters.begin()));

  // We maintain a heap of histogram pairs, ordered by the bit cost reduction.
  std::vector<HistogramPair> pairs;
  for (size_t idx1 = 0; idx1 < clusters.size(); ++idx1) {
    for (size_t idx2 = idx1 + 1; idx2 < clusters.size(); ++idx2) {
-      CompareAndPushToHeap(out, cluster_size, clusters[idx1], clusters[idx2],
-                           &pairs);
+      CompareAndPushToQueue(out, cluster_size, clusters[idx1], clusters[idx2],
+                            &pairs);
    }
  }

@ -135,38 +130,48 @@ void HistogramCombine(HistogramType* out,
      continue;
    }
    // Take the best pair from the top of heap.
-    int best_idx1 = pairs[0].idx1;
-    int best_idx2 = pairs[0].idx2;
+    uint32_t best_idx1 = pairs[0].idx1;
+    uint32_t best_idx2 = pairs[0].idx2;
    out[best_idx1].AddHistogram(out[best_idx2]);
    out[best_idx1].bit_cost_ = pairs[0].cost_combo;
    cluster_size[best_idx1] += cluster_size[best_idx2];
-    for (int i = 0; i < symbols_size; ++i) {
+    for (size_t i = 0; i < symbols_size; ++i) {
      if (symbols[i] == best_idx2) {
        symbols[i] = best_idx1;
      }
    }
-    for (size_t i = 0; i + 1 < clusters.size(); ++i) {
-      if (clusters[i] >= best_idx2) {
-        clusters[i] = clusters[i + 1];
+    for (std::vector<uint32_t>::iterator cluster = clusters.begin();
+         cluster != clusters.end(); ++cluster) {
+      if (*cluster >= best_idx2) {
+        clusters.erase(cluster);
+        break;
      }
    }
-    clusters.pop_back();
-    // Invalidate pairs intersecting the just combined best pair.
+
+    // Remove pairs intersecting the just combined best pair.
+    size_t copy_to_idx = 0;
    for (size_t i = 0; i < pairs.size(); ++i) {
      HistogramPair& p = pairs[i];
      if (p.idx1 == best_idx1 || p.idx2 == best_idx1 ||
          p.idx1 == best_idx2 || p.idx2 == best_idx2) {
-        p.valid = false;
+        // Remove invalid pair from the queue.
+        continue;
      }
+      if (pairs.front() < p) {
+        // Replace the top of the queue if needed.
+        HistogramPair front = pairs.front();
+        pairs.front() = p;
+        pairs[copy_to_idx] = front;
+      } else {
+        pairs[copy_to_idx] = p;
+      }
+      ++copy_to_idx;
    }
-    // Pop invalid pairs from the top of the heap.
-    while (!pairs.empty() && !pairs[0].valid) {
-      std::pop_heap(pairs.begin(), pairs.end(), HistogramPairComparator());
-      pairs.pop_back();
-    }
+    pairs.resize(copy_to_idx);
+
    // Push new pairs formed with the combined histogram to the heap.
    for (size_t i = 0; i < clusters.size(); ++i) {
-      CompareAndPushToHeap(out, cluster_size, best_idx1, clusters[i], &pairs);
+      CompareAndPushToQueue(out, cluster_size, best_idx1, clusters[i], &pairs);
    }
  }
 }
@ -189,16 +194,19 @@ double HistogramBitCostDistance(const HistogramType& histogram,
 // Find the best 'out' histogram for each of the 'in' histograms.
 // Note: we assume that out[]->bit_cost_ is already up-to-date.
 template<typename HistogramType>
-void HistogramRemap(const HistogramType* in, int in_size,
-                    HistogramType* out, int* symbols) {
-  std::set<int> all_symbols;
-  for (int i = 0; i < in_size; ++i) {
-    all_symbols.insert(symbols[i]);
-  }
-  for (int i = 0; i < in_size; ++i) {
-    int best_out = i == 0 ? symbols[0] : symbols[i - 1];
+void HistogramRemap(const HistogramType* in, size_t in_size,
+                    HistogramType* out, uint32_t* symbols) {
+  // Uniquify the list of symbols.
+  std::vector<uint32_t> all_symbols(symbols, symbols + in_size);
+  std::sort(all_symbols.begin(), all_symbols.end());
+  std::vector<uint32_t>::iterator last =
+      std::unique(all_symbols.begin(), all_symbols.end());
+  all_symbols.resize(static_cast<size_t>(last - all_symbols.begin()));
+
+  for (size_t i = 0; i < in_size; ++i) {
+    uint32_t best_out = i == 0 ? symbols[0] : symbols[i - 1];
    double best_bits = HistogramBitCostDistance(in[i], out[best_out]);
-    for (std::set<int>::const_iterator k = all_symbols.begin();
+    for (std::vector<uint32_t>::const_iterator k = all_symbols.begin();
         k != all_symbols.end(); ++k) {
      const double cur_bits = HistogramBitCostDistance(in[i], out[*k]);
      if (cur_bits < best_bits) {
@ -211,11 +219,11 @@ void HistogramRemap(const HistogramType* in, int in_size,


  // Recompute each out based on raw and symbols.
-  for (std::set<int>::const_iterator k = all_symbols.begin();
+  for (std::vector<uint32_t>::const_iterator k = all_symbols.begin();
       k != all_symbols.end(); ++k) {
    out[*k].Clear();
  }
-  for (int i = 0; i < in_size; ++i) {
+  for (size_t i = 0; i < in_size; ++i) {
    out[symbols[i]].AddHistogram(in[i]);
  }
 }
@ -224,10 +232,10 @@ void HistogramRemap(const HistogramType* in, int in_size,
 // increasing order.
 template<typename HistogramType>
 void HistogramReindex(std::vector<HistogramType>* out,
-                      std::vector<int>* symbols) {
+                      std::vector<uint32_t>* symbols) {
  std::vector<HistogramType> tmp(*out);
-  std::map<int, int> new_index;
-  int next_index = 0;
+  std::map<uint32_t, uint32_t> new_index;
+  uint32_t next_index = 0;
  for (size_t i = 0; i < symbols->size(); ++i) {
    if (new_index.find((*symbols)[i]) == new_index.end()) {
      new_index[(*symbols)[i]] = next_index;
@ -246,25 +254,25 @@ void HistogramReindex(std::vector<HistogramType>* out,
 // indicate which of the 'out' histograms is the best approximation.
 template<typename HistogramType>
 void ClusterHistograms(const std::vector<HistogramType>& in,
-                       int num_contexts, int num_blocks,
+                       size_t num_contexts, size_t num_blocks,
                       size_t max_histograms,
                       std::vector<HistogramType>* out,
-                       std::vector<int>* histogram_symbols) {
-  const int in_size = num_contexts * num_blocks;
-  BROTLI_DCHECK(in_size == in.size());
-  std::vector<int> cluster_size(in_size, 1);
+                       std::vector<uint32_t>* histogram_symbols) {
+  const size_t in_size = num_contexts * num_blocks;
+  assert(in_size == in.size());
+  std::vector<uint32_t> cluster_size(in_size, 1);
  out->resize(in_size);
  histogram_symbols->resize(in_size);
-  for (int i = 0; i < in_size; ++i) {
+  for (size_t i = 0; i < in_size; ++i) {
    (*out)[i] = in[i];
    (*out)[i].bit_cost_ = PopulationCost(in[i]);
-    (*histogram_symbols)[i] = i;
+    (*histogram_symbols)[i] = static_cast<uint32_t>(i);
  }


-  const int max_input_histograms = 64;
-  for (int i = 0; i < in_size; i += max_input_histograms) {
-    int num_to_combine = std::min(in_size - i, max_input_histograms);
+  const size_t max_input_histograms = 64;
+  for (size_t i = 0; i < in_size; i += max_input_histograms) {
+    size_t num_to_combine = std::min(in_size - i, max_input_histograms);
    HistogramCombine(&(*out)[0], &cluster_size[0],
                     &(*histogram_symbols)[i], num_to_combine,
                     max_histograms);
--- a/enc/command.h
+++ b/enc/command.h
@ -15,21 +15,21 @@

 namespace brotli {

-static int insbase[] =   { 0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50, 66,
-    98, 130, 194, 322, 578, 1090, 2114, 6210, 22594 };
-static int insextra[] =  { 0, 0, 0, 0, 0, 0, 1, 1,  2,  2,  3,  3,  4,  4,  5,
-    5,   6,   7,   8,   9,   10,   12,   14,    24 };
-static int copybase[] =  { 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 18, 22, 30, 38,
-    54,  70, 102, 134, 198, 326,   582, 1094,  2118 };
-static int copyextra[] = { 0, 0, 0, 0, 0, 0, 0, 0,  1,  1,  2,  2,  3,  3,  4,
-    4,   5,   5,   6,   7,   8,     9,   10,    24 };
+static uint32_t kInsBase[] =   { 0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50,
+    66, 98, 130, 194, 322, 578, 1090, 2114, 6210, 22594 };
+static uint32_t kInsExtra[] =  { 0, 0, 0, 0, 0, 0, 1, 1,  2,  2,  3,  3,  4,  4,
+    5,   5,   6,   7,   8,   9,   10,   12,   14,    24 };
+static uint32_t kCopyBase[] =  { 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 18, 22, 30,
+    38, 54,  70, 102, 134, 198, 326,   582, 1094,  2118 };
+static uint32_t kCopyExtra[] = { 0, 0, 0, 0, 0, 0, 0, 0,  1,  1,  2,  2,  3,  3,
+     4,  4,   5,   5,   6,   7,   8,     9,   10,    24 };

-static inline uint16_t GetInsertLengthCode(int insertlen) {
+static inline uint16_t GetInsertLengthCode(size_t insertlen) {
  if (insertlen < 6) {
    return static_cast<uint16_t>(insertlen);
  } else if (insertlen < 130) {
    insertlen -= 2;
-    int nbits = Log2FloorNonZero(insertlen) - 1;
+    uint32_t nbits = Log2FloorNonZero(insertlen) - 1u;
    return static_cast<uint16_t>((nbits << 1) + (insertlen >> nbits) + 2);
  } else if (insertlen < 2114) {
    return static_cast<uint16_t>(Log2FloorNonZero(insertlen - 66) + 10);
@ -42,12 +42,12 @@ static inline uint16_t GetInsertLengthCode(int insertlen) {
  }
 }

-static inline uint16_t GetCopyLengthCode(int copylen) {
+static inline uint16_t GetCopyLengthCode(size_t copylen) {
  if (copylen < 10) {
    return static_cast<uint16_t>(copylen - 2);
  } else if (copylen < 134) {
    copylen -= 6;
-    int nbits = Log2FloorNonZero(copylen) - 1;
+    uint32_t nbits = Log2FloorNonZero(copylen) - 1u;
    return static_cast<uint16_t>((nbits << 1) + (copylen >> nbits) + 4);
  } else if (copylen < 2118) {
    return static_cast<uint16_t>(Log2FloorNonZero(copylen - 70) + 12);
@ -71,23 +71,25 @@ static inline uint16_t CombineLengthCodes(
  }
 }

-static inline void GetLengthCode(int insertlen, int copylen,
+static inline void GetLengthCode(size_t insertlen, size_t copylen,
                                 bool use_last_distance,
                                 uint16_t* code, uint64_t* extra) {
  uint16_t inscode = GetInsertLengthCode(insertlen);
  uint16_t copycode = GetCopyLengthCode(copylen);
-  uint64_t insnumextra = insextra[inscode];
-  uint64_t numextra = insnumextra + copyextra[copycode];
-  uint64_t insextraval = insertlen - insbase[inscode];
-  uint64_t copyextraval = copylen - copybase[copycode];
+  uint64_t insnumextra = kInsExtra[inscode];
+  uint64_t numextra = insnumextra + kCopyExtra[copycode];
+  uint64_t insextraval = insertlen - kInsBase[inscode];
+  uint64_t copyextraval = copylen - kCopyBase[copycode];
  *code = CombineLengthCodes(inscode, copycode, use_last_distance);
  *extra = (numextra << 48) | (copyextraval << insnumextra) | insextraval;
 }

 struct Command {
  // distance_code is e.g. 0 for same-as-last short code, or 16 for offset 1.
-  Command(int insertlen, int copylen, int copylen_code, int distance_code)
-      : insert_len_(insertlen), copy_len_(copylen) {
+  Command(size_t insertlen, size_t copylen, size_t copylen_code,
+          size_t distance_code)
+      : insert_len_(static_cast<uint32_t>(insertlen))
+      , copy_len_(static_cast<uint32_t>(copylen)) {
    // The distance prefix and extra bits are stored in this Command as if
    // npostfix and ndirect were 0, they are only recomputed later after the
    // clustering if needed.
@ -96,32 +98,33 @@ struct Command {
                  &cmd_prefix_, &cmd_extra_);
  }

-  Command(int insertlen)
-      : insert_len_(insertlen), copy_len_(0), dist_prefix_(16), dist_extra_(0) {
+  explicit Command(size_t insertlen)
+      : insert_len_(static_cast<uint32_t>(insertlen))
+      , copy_len_(0), dist_prefix_(16), dist_extra_(0) {
    GetLengthCode(insertlen, 4, dist_prefix_ == 0, &cmd_prefix_, &cmd_extra_);
  }

-  int DistanceCode() const {
+  uint32_t DistanceCode() const {
    if (dist_prefix_ < 16) {
      return dist_prefix_;
    }
-    int nbits = dist_extra_ >> 24;
-    int extra = dist_extra_ & 0xffffff;
-    int prefix = dist_prefix_ - 12 - 2 * nbits;
+    uint32_t nbits = dist_extra_ >> 24;
+    uint32_t extra = dist_extra_ & 0xffffff;
+    uint32_t prefix = dist_prefix_ - 12 - 2 * nbits;
    return (prefix << nbits) + extra + 12;
  }

-  int DistanceContext() const {
-    int r = cmd_prefix_ >> 6;
-    int c = cmd_prefix_ & 7;
+  uint32_t DistanceContext() const {
+    uint32_t r = cmd_prefix_ >> 6;
+    uint32_t c = cmd_prefix_ & 7;
    if ((r == 0 || r == 2 || r == 4 || r == 7) && (c <= 2)) {
      return c;
    }
    return 3;
  }

-  int insert_len_;
-  int copy_len_;
+  uint32_t insert_len_;
+  uint32_t copy_len_;
  uint16_t cmd_prefix_;
  uint16_t dist_prefix_;
  uint64_t cmd_extra_;
--- a/enc/context.h
+++ b/enc/context.h
@ -157,7 +157,7 @@ enum ContextType {
  CONTEXT_SIGNED       = 3
 };

-static inline uint8_t Context(uint8_t p1, uint8_t p2, int mode) {
+static inline uint8_t Context(uint8_t p1, uint8_t p2, ContextType mode) {
  switch (mode) {
    case CONTEXT_LSB6:
      return p1 & 0x3f;
--- a/enc/dictionary.h
+++ b/enc/dictionary.h
@ -19,13 +19,13 @@ extern "C" {

 extern const uint8_t kBrotliDictionary[122784];

-static const int kBrotliDictionaryOffsetsByLength[] = {
+static const uint32_t kBrotliDictionaryOffsetsByLength[] = {
     0,     0,     0,     0,     0,  4096,  9216, 21504, 35840, 44032,
 53248, 63488, 74752, 87040, 93696, 100864, 104704, 106752, 108928, 113536,
 115968, 118528, 119872, 121280, 122016,
 };

-static const int kBrotliDictionarySizeBitsByLength[] = {
+static const uint8_t kBrotliDictionarySizeBitsByLength[] = {
  0,  0,  0,  0, 10, 10, 11, 11, 10, 10,
 10, 10, 10,  9,  9,  8,  7,  7,  8,  7,
  7,  6,  6,  5,  5,
--- a/enc/encode.cc
+++ b/enc/encode.cc
@ -9,6 +9,7 @@
 #include "./encode.h"

 #include <algorithm>
+#include <cstring>
 #include <limits>

 #include "./backward_references.h"
@ -38,8 +39,8 @@ static const int kMaxNumDelayedSymbols = 0x2fff;

 void RecomputeDistancePrefixes(Command* cmds,
                               size_t num_commands,
-                               int num_direct_distance_codes,
-                               int distance_postfix_bits) {
+                               uint32_t num_direct_distance_codes,
+                               uint32_t distance_postfix_bits) {
  if (num_direct_distance_codes == 0 && distance_postfix_bits == 0) {
    return;
  }
@ -55,6 +56,16 @@ void RecomputeDistancePrefixes(Command* cmds,
  }
 }

+/* Wraps 64-bit input position to 32-bit ringbuffer position preserving
+   "not-a-first-lap" feature. */
+uint32_t WrapPosition(uint64_t position) {
+  uint32_t result = static_cast<uint32_t>(position);
+  if (position > (1u << 30)) {
+    result = (result & ((1u << 30) - 1)) | (1u << 30);
+  }
+  return result;
+}
+
 uint8_t* BrotliCompressor::GetBrotliStorage(size_t size) {
  if (storage_size_ < size) {
    delete[] storage_;
@ -64,6 +75,22 @@ uint8_t* BrotliCompressor::GetBrotliStorage(size_t size) {
  return storage_;
 }

+void EncodeWindowBits(int lgwin, uint8_t* last_byte, uint8_t* last_byte_bits) {
+  if (lgwin == 16) {
+    *last_byte = 0;
+    *last_byte_bits = 1;
+  } else if (lgwin == 17) {
+    *last_byte = 1;
+    *last_byte_bits = 7;
+  } else if (lgwin > 17) {
+    *last_byte = static_cast<uint8_t>(((lgwin - 17) << 1) | 1);
+    *last_byte_bits = 4;
+  } else {
+    *last_byte = static_cast<uint8_t>(((lgwin - 8) << 4) | 1);
+    *last_byte_bits = 7;
+  }
+}
+
 BrotliCompressor::BrotliCompressor(BrotliParams params)
    : params_(params),
      hashers_(new Hashers()),
@ -109,19 +136,7 @@ BrotliCompressor::BrotliCompressor(BrotliParams params)
  cmd_alloc_size_ = 0;

  // Initialize last byte with stream header.
-  if (params_.lgwin == 16) {
-    last_byte_ = 0;
-    last_byte_bits_ = 1;
-  } else if (params_.lgwin == 17) {
-    last_byte_ = 1;
-    last_byte_bits_ = 7;
-  } else if (params_.lgwin > 17) {
-    last_byte_ = static_cast<uint8_t>(((params_.lgwin - 17) << 1) | 1);
-    last_byte_bits_ = 4;
-  } else {
-    last_byte_ = static_cast<uint8_t>(((params_.lgwin - 8) << 4) | 1);
-    last_byte_bits_ = 7;
-  }
+  EncodeWindowBits(params_.lgwin, &last_byte_, &last_byte_bits_);

  // Initialize distance cache.
  dist_cache_[0] = 4;
@ -213,13 +228,14 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last,
                                       const bool force_flush,
                                       size_t* out_size,
                                       uint8_t** output) {
-  const size_t bytes = input_pos_ - last_processed_pos_;
+  const uint64_t delta = input_pos_ - last_processed_pos_;
  const uint8_t* data = ringbuffer_->start();
-  const size_t mask = ringbuffer_->mask();
+  const uint32_t mask = ringbuffer_->mask();

-  if (bytes > input_block_size()) {
+  if (delta > input_block_size() || (delta == 0 && !is_last)) {
    return false;
  }
+  const uint32_t bytes = static_cast<uint32_t>(delta);

  // Theoretical max number of commands is 1 per 2 bytes.
  size_t newsize = num_commands_ + bytes / 2 + 1;
@ -232,7 +248,8 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last,
        static_cast<Command*>(realloc(commands_, sizeof(Command) * newsize));
  }

-  CreateBackwardReferences(bytes, last_processed_pos_, data, mask,
+  CreateBackwardReferences(bytes, WrapPosition(last_processed_pos_),
+                           is_last, data, mask,
                           max_backward_distance_,
                           params_.quality,
                           hashers_,
@ -262,7 +279,8 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last,
    last_insert_len_ = 0;
  }

-  return WriteMetaBlockInternal(is_last, out_size, output);
+  WriteMetaBlockInternal(is_last, out_size, output);
+  return true;
 }

 // Decide about the context map based on the ability of the prediction
@ -273,42 +291,43 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last,
 // BitsEntropy will assume that symbol to be stored alone using Huffman
 // coding.
 void ChooseContextMap(int quality,
-                      int* bigram_histo,
-                      int* num_literal_contexts,
-                      const int** literal_context_map) {
-  int monogram_histo[3] = { 0 };
-  int two_prefix_histo[6] = { 0 };
-  int total = 0;
-  for (int i = 0; i < 9; ++i) {
+                      uint32_t* bigram_histo,
+                      size_t* num_literal_contexts,
+                      const uint32_t** literal_context_map) {
+  uint32_t monogram_histo[3] = { 0 };
+  uint32_t two_prefix_histo[6] = { 0 };
+  size_t total = 0;
+  for (size_t i = 0; i < 9; ++i) {
    total += bigram_histo[i];
    monogram_histo[i % 3] += bigram_histo[i];
-    int j = i;
+    size_t j = i;
    if (j >= 6) {
      j -= 6;
    }
    two_prefix_histo[j] += bigram_histo[i];
  }
-  int dummy;
+  size_t dummy;
  double entropy1 = ShannonEntropy(monogram_histo, 3, &dummy);
  double entropy2 = (ShannonEntropy(two_prefix_histo, 3, &dummy) +
                     ShannonEntropy(two_prefix_histo + 3, 3, &dummy));
  double entropy3 = 0;
-  for (int k = 0; k < 3; ++k) {
+  for (size_t k = 0; k < 3; ++k) {
    entropy3 += ShannonEntropy(bigram_histo + 3 * k, 3, &dummy);
  }

  assert(total != 0);
-  entropy1 *= (1.0 / total);
-  entropy2 *= (1.0 / total);
-  entropy3 *= (1.0 / total);
+  double scale = 1.0 / static_cast<double>(total);
+  entropy1 *= scale;
+  entropy2 *= scale;
+  entropy3 *= scale;

-  static const int kStaticContextMapContinuation[64] = {
+  static const uint32_t kStaticContextMapContinuation[64] = {
    1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  };
-  static const int kStaticContextMapSimpleUTF8[64] = {
+  static const uint32_t kStaticContextMapSimpleUTF8[64] = {
    0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@ -337,9 +356,9 @@ void DecideOverLiteralContextModeling(const uint8_t* input,
                                      size_t length,
                                      size_t mask,
                                      int quality,
-                                      int* literal_context_mode,
-                                      int* num_literal_contexts,
-                                      const int** literal_context_map) {
+                                      ContextType* literal_context_mode,
+                                      size_t* num_literal_contexts,
+                                      const uint32_t** literal_context_map) {
  if (quality < kMinQualityForContextModeling || length < 64) {
    return;
  }
@ -347,7 +366,7 @@ void DecideOverLiteralContextModeling(const uint8_t* input,
  // UTF8 data faster we only examine 64 byte long strides at every 4kB
  // intervals.
  const size_t end_pos = start_pos + length;
-  int bigram_prefix_histo[9] = { 0 };
+  uint32_t bigram_prefix_histo[9] = { 0 };
  for (; start_pos + 64 <= end_pos; start_pos += 4096) {
      static const int lut[4] = { 0, 0, 1, 2 };
    const size_t stride_end_pos = start_pos + 64;
@ -363,27 +382,33 @@ void DecideOverLiteralContextModeling(const uint8_t* input,
                   literal_context_map);
 }

-bool BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
+void BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
                                              size_t* out_size,
                                              uint8_t** output) {
-  const size_t bytes = input_pos_ - last_flush_pos_;
+  assert(input_pos_ >= last_flush_pos_);
+  assert(input_pos_ > last_flush_pos_ || is_last);
+  assert(input_pos_ - last_flush_pos_ <= 1u << 24);
+  const uint32_t bytes = static_cast<uint32_t>(input_pos_ - last_flush_pos_);
  const uint8_t* data = ringbuffer_->start();
-  const size_t mask = ringbuffer_->mask();
+  const uint32_t mask = ringbuffer_->mask();
  const size_t max_out_size = 2 * bytes + 500;
  uint8_t* storage = GetBrotliStorage(max_out_size);
  storage[0] = last_byte_;
-  int storage_ix = last_byte_bits_;
+  size_t storage_ix = last_byte_bits_;

  bool uncompressed = false;
  if (num_commands_ < (bytes >> 8) + 2) {
    if (num_literals_ > 0.99 * static_cast<double>(bytes)) {
-      int literal_histo[256] = { 0 };
-      static const int kSampleRate = 13;
+      uint32_t literal_histo[256] = { 0 };
+      static const uint32_t kSampleRate = 13;
      static const double kMinEntropy = 7.92;
      const double bit_cost_threshold =
          static_cast<double>(bytes) * kMinEntropy / kSampleRate;
-      for (size_t i = last_flush_pos_; i < input_pos_; i += kSampleRate) {
-        ++literal_histo[data[i & mask]];
+      size_t t = (bytes + kSampleRate - 1) / kSampleRate;
+      uint32_t pos = static_cast<uint32_t>(last_flush_pos_);
+      for (size_t i = 0; i < t; i++) {
+        ++literal_histo[data[pos & mask]];
+        pos += kSampleRate;
      }
      if (BitsEntropy(literal_histo, 256) > bit_cost_threshold) {
        uncompressed = true;
@ -392,23 +417,20 @@ bool BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
  }

  if (bytes == 0) {
-    if (!StoreCompressedMetaBlockHeader(is_last, 0, &storage_ix, &storage[0])) {
-      return false;
-    }
-    storage_ix = (storage_ix + 7) & ~7;
+    // Write the ISLAST and ISEMPTY bits.
+    WriteBits(2, 3, &storage_ix, &storage[0]);
+    storage_ix = (storage_ix + 7u) & ~7u;
  } else if (uncompressed) {
    // Restore the distance cache, as its last update by
    // CreateBackwardReferences is now unused.
    memcpy(dist_cache_, saved_dist_cache_, sizeof(dist_cache_));
-    if (!StoreUncompressedMetaBlock(is_last,
-                                    data, last_flush_pos_, mask, bytes,
-                                    &storage_ix,
-                                    &storage[0])) {
-      return false;
-    }
+    StoreUncompressedMetaBlock(is_last, data,
+                               WrapPosition(last_flush_pos_), mask, bytes,
+                               &storage_ix,
+                               &storage[0]);
  } else {
-    int num_direct_distance_codes = 0;
-    int distance_postfix_bits = 0;
+    uint32_t num_direct_distance_codes = 0;
+    uint32_t distance_postfix_bits = 0;
    if (params_.quality > 9 && params_.mode == BrotliParams::MODE_FONT) {
      num_direct_distance_codes = 12;
      distance_postfix_bits = 1;
@ -418,29 +440,30 @@ bool BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
                                distance_postfix_bits);
    }
    if (params_.quality < kMinQualityForBlockSplit) {
-      if (!StoreMetaBlockTrivial(data, last_flush_pos_, bytes, mask, is_last,
-                                 commands_, num_commands_,
-                                 &storage_ix,
-                                 &storage[0])) {
-        return false;
-      }
+      StoreMetaBlockTrivial(data, WrapPosition(last_flush_pos_),
+                            bytes, mask, is_last,
+                            commands_, num_commands_,
+                            &storage_ix,
+                            &storage[0]);
    } else {
      MetaBlockSplit mb;
-      int literal_context_mode = CONTEXT_UTF8;
+      ContextType literal_context_mode = CONTEXT_UTF8;
      if (params_.quality <= 9) {
-        int num_literal_contexts = 1;
-        const int* literal_context_map = NULL;
-        DecideOverLiteralContextModeling(data, last_flush_pos_, bytes, mask,
+        size_t num_literal_contexts = 1;
+        const uint32_t* literal_context_map = NULL;
+        DecideOverLiteralContextModeling(data, WrapPosition(last_flush_pos_),
+                                         bytes, mask,
                                         params_.quality,
                                         &literal_context_mode,
                                         &num_literal_contexts,
                                         &literal_context_map);
        if (literal_context_map == NULL) {
-          BuildMetaBlockGreedy(data, last_flush_pos_, mask,
+          BuildMetaBlockGreedy(data, WrapPosition(last_flush_pos_), mask,
                               commands_, num_commands_,
                               &mb);
        } else {
-          BuildMetaBlockGreedyWithContexts(data, last_flush_pos_, mask,
+          BuildMetaBlockGreedyWithContexts(data, WrapPosition(last_flush_pos_),
+                                           mask,
                                           prev_byte_, prev_byte2_,
                                           literal_context_mode,
                                           num_literal_contexts,
@ -449,10 +472,11 @@ bool BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
                                           &mb);
        }
      } else {
-        if (!IsMostlyUTF8(data, last_flush_pos_, mask, bytes, kMinUTF8Ratio)) {
+        if (!IsMostlyUTF8(
+            data, WrapPosition(last_flush_pos_), mask, bytes, kMinUTF8Ratio)) {
          literal_context_mode = CONTEXT_SIGNED;
        }
-        BuildMetaBlock(data, last_flush_pos_, mask,
+        BuildMetaBlock(data, WrapPosition(last_flush_pos_), mask,
                       prev_byte_, prev_byte2_,
                       commands_, num_commands_,
                       literal_context_mode,
@ -463,36 +487,33 @@ bool BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
                           distance_postfix_bits,
                           &mb);
      }
-      if (!StoreMetaBlock(data, last_flush_pos_, bytes, mask,
-                          prev_byte_, prev_byte2_,
-                          is_last,
-                          num_direct_distance_codes,
-                          distance_postfix_bits,
-                          literal_context_mode,
-                          commands_, num_commands_,
-                          mb,
-                          &storage_ix,
-                          &storage[0])) {
-        return false;
-      }
+      StoreMetaBlock(data, WrapPosition(last_flush_pos_), bytes, mask,
+                     prev_byte_, prev_byte2_,
+                     is_last,
+                     num_direct_distance_codes,
+                     distance_postfix_bits,
+                     literal_context_mode,
+                     commands_, num_commands_,
+                     mb,
+                     &storage_ix,
+                     &storage[0]);
    }
-    if (bytes + 4 < static_cast<size_t>(storage_ix >> 3)) {
+    if (bytes + 4 < (storage_ix >> 3)) {
      // Restore the distance cache and last byte.
      memcpy(dist_cache_, saved_dist_cache_, sizeof(dist_cache_));
      storage[0] = last_byte_;
      storage_ix = last_byte_bits_;
-      if (!StoreUncompressedMetaBlock(is_last, data, last_flush_pos_, mask,
-                                      bytes, &storage_ix, &storage[0])) {
-        return false;
-      }
+      StoreUncompressedMetaBlock(is_last, data,
+                                 WrapPosition(last_flush_pos_), mask,
+                                 bytes, &storage_ix, &storage[0]);
    }
  }
  last_byte_ = storage[storage_ix >> 3];
-  last_byte_bits_ = storage_ix & 7;
+  last_byte_bits_ = storage_ix & 7u;
  last_flush_pos_ = input_pos_;
  last_processed_pos_ = input_pos_;
-  prev_byte_ = data[(last_flush_pos_ - 1) & mask];
-  prev_byte2_ = data[(last_flush_pos_ - 2) & mask];
+  prev_byte_ = data[(static_cast<uint32_t>(last_flush_pos_) - 1) & mask];
+  prev_byte2_ = data[(static_cast<uint32_t>(last_flush_pos_) - 2) & mask];
  num_commands_ = 0;
  num_literals_ = 0;
  // Save the state of the distance cache in case we need to restore it for
@ -500,7 +521,6 @@ bool BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
  memcpy(saved_dist_cache_, dist_cache_, sizeof(dist_cache_));
  *output = &storage[0];
  *out_size = storage_ix >> 3;
-  return true;
 }

 bool BrotliCompressor::WriteMetaBlock(const size_t input_size,
@ -532,21 +552,22 @@ bool BrotliCompressor::WriteMetadata(const size_t input_size,
  }
  uint64_t hdr_buffer_data[2];
  uint8_t* hdr_buffer = reinterpret_cast<uint8_t*>(&hdr_buffer_data[0]);
-  int storage_ix = last_byte_bits_;
+  size_t storage_ix = last_byte_bits_;
  hdr_buffer[0] = last_byte_;
  WriteBits(1, 0, &storage_ix, hdr_buffer);
  WriteBits(2, 3, &storage_ix, hdr_buffer);
  WriteBits(1, 0, &storage_ix, hdr_buffer);
  if (input_size == 0) {
    WriteBits(2, 0, &storage_ix, hdr_buffer);
-    *encoded_size = (storage_ix + 7) >> 3;
+    *encoded_size = (storage_ix + 7u) >> 3;
    memcpy(encoded_buffer, hdr_buffer, *encoded_size);
  } else {
-    int nbits = Log2Floor(static_cast<uint32_t>(input_size) - 1) + 1;
-    int nbytes = (nbits + 7) / 8;
+    uint32_t nbits = (input_size == 1) ? 0 : (Log2FloorNonZero(
+        static_cast<uint32_t>(input_size) - 1) + 1);
+    uint32_t nbytes = (nbits + 7) / 8;
    WriteBits(2, nbytes, &storage_ix, hdr_buffer);
    WriteBits(8 * nbytes, input_size - 1, &storage_ix, hdr_buffer);
-    size_t hdr_size = (storage_ix + 7) >> 3;
+    size_t hdr_size = (storage_ix + 7u) >> 3;
    memcpy(encoded_buffer, hdr_buffer, hdr_size);
    memcpy(&encoded_buffer[hdr_size], input_buffer, input_size);
    *encoded_size = hdr_size + input_size;
@ -582,38 +603,52 @@ int BrotliCompressBuffer(BrotliParams params,
  return 1;
 }

-size_t CopyOneBlockToRingBuffer(BrotliIn* r, BrotliCompressor* compressor) {
-  const size_t block_size = compressor->input_block_size();
-  size_t bytes_read = 0;
-  const uint8_t* data = reinterpret_cast<const uint8_t*>(
-      r->Read(block_size, &bytes_read));
-  if (data == NULL) {
-    return 0;
-  }
-  compressor->CopyInputToRingBuffer(bytes_read, data);
-
-  // Read more bytes until block_size is filled or an EOF (data == NULL) is
-  // received. This is useful to get deterministic compressed output for the
-  // same input no matter how r->Read splits the input to chunks.
-  for (size_t remaining = block_size - bytes_read; remaining > 0; ) {
-    size_t more_bytes_read = 0;
-    data = reinterpret_cast<const uint8_t*>(
-        r->Read(remaining, &more_bytes_read));
-    if (data == NULL) {
-      break;
-    }
-    compressor->CopyInputToRingBuffer(more_bytes_read, data);
-    bytes_read += more_bytes_read;
-    remaining -= more_bytes_read;
-  }
-  return bytes_read;
-}
-
 bool BrotliInIsFinished(BrotliIn* r) {
  size_t read_bytes;
  return r->Read(0, &read_bytes) == NULL;
 }

+const uint8_t* BrotliInReadAndCheckEnd(const size_t block_size,
+                                       BrotliIn* r,
+                                       size_t* bytes_read,
+                                       bool* is_last) {
+  *bytes_read = 0;
+  const uint8_t* data = reinterpret_cast<const uint8_t*>(
+      r->Read(block_size, bytes_read));
+  assert((data == NULL) == (*bytes_read == 0));
+  *is_last = BrotliInIsFinished(r);
+  return data;
+}
+
+bool CopyOneBlockToRingBuffer(BrotliIn* r,
+                              BrotliCompressor* compressor,
+                              size_t* bytes_read,
+                              bool* is_last) {
+  const size_t block_size = compressor->input_block_size();
+  const uint8_t* data = BrotliInReadAndCheckEnd(block_size, r,
+                                                bytes_read, is_last);
+  if (data == NULL) {
+    return *is_last;
+  }
+  compressor->CopyInputToRingBuffer(*bytes_read, data);
+
+  // Read more bytes until block_size is filled or an EOF (data == NULL) is
+  // received. This is useful to get deterministic compressed output for the
+  // same input no matter how r->Read splits the input to chunks.
+  for (size_t remaining = block_size - *bytes_read; remaining > 0; ) {
+    size_t more_bytes_read = 0;
+    data = BrotliInReadAndCheckEnd(remaining, r, &more_bytes_read, is_last);
+    if (data == NULL) {
+      return *is_last;
+    }
+    compressor->CopyInputToRingBuffer(more_bytes_read, data);
+    *bytes_read += more_bytes_read;
+    remaining -= more_bytes_read;
+  }
+  return true;
+}
+
+
 int BrotliCompress(BrotliParams params, BrotliIn* in, BrotliOut* out) {
  return BrotliCompressWithCustomDictionary(0, 0, params, in, out);
 }
@ -628,8 +663,9 @@ int BrotliCompressWithCustomDictionary(size_t dictsize, const uint8_t* dict,
  BrotliCompressor compressor(params);
  if (dictsize != 0) compressor.BrotliSetCustomDictionary(dictsize, dict);
  while (!final_block) {
-    in_bytes = CopyOneBlockToRingBuffer(in, &compressor);
-    final_block = in_bytes == 0 || BrotliInIsFinished(in);
+    if (!CopyOneBlockToRingBuffer(in, &compressor, &in_bytes, &final_block)) {
+      return false;
+    }
    out_bytes = 0;
    if (!compressor.WriteBrotliData(final_block,
                                    /* force_flush = */ false,
--- a/enc/encode.h
+++ b/enc/encode.h
@ -115,9 +115,9 @@ class BrotliCompressor {
  // the new output meta-block, or to zero if no new output meta-block was
  // created (in this case the processed input data is buffered internally).
  // If *out_size is positive, *output points to the start of the output data.
-  // Returns false if the size of the input data is larger than
-  // input_block_size() or if there was an error during writing the output.
  // If is_last or force_flush is true, an output meta-block is always created.
+  // Returns false if the size of the input data is larger than
+  // input_block_size() or if there is no new input data and is_last is false.
  bool WriteBrotliData(const bool is_last, const bool force_flush,
                       size_t* out_size, uint8_t** output);

@ -134,23 +134,23 @@ class BrotliCompressor {
 private:
  uint8_t* GetBrotliStorage(size_t size);

-  bool WriteMetaBlockInternal(const bool is_last,
+  void WriteMetaBlockInternal(const bool is_last,
                              size_t* out_size,
                              uint8_t** output);

  BrotliParams params_;
-  int max_backward_distance_;
+  size_t max_backward_distance_;
  Hashers* hashers_;
  int hash_type_;
-  size_t input_pos_;
+  uint64_t input_pos_;
  RingBuffer* ringbuffer_;
  size_t cmd_alloc_size_;
  Command* commands_;
  size_t num_commands_;
-  int num_literals_;
-  int last_insert_len_;
-  size_t last_flush_pos_;
-  size_t last_processed_pos_;
+  size_t num_literals_;
+  size_t last_insert_len_;
+  uint64_t last_flush_pos_;
+  uint64_t last_processed_pos_;
  int dist_cache_[4];
  int saved_dist_cache_[4];
  uint8_t last_byte_;
--- a/enc/encode_parallel.cc
+++ b/enc/encode_parallel.cc
@ -32,8 +32,8 @@ namespace brotli {
 namespace {

 void RecomputeDistancePrefixes(Command* cmds, size_t num_commands,
-                               int num_direct_distance_codes,
-                               int distance_postfix_bits) {
+                               uint32_t num_direct_distance_codes,
+                               uint32_t distance_postfix_bits) {
  if (num_direct_distance_codes == 0 &&
      distance_postfix_bits == 0) {
    return;
@ -51,21 +51,20 @@ void RecomputeDistancePrefixes(Command* cmds, size_t num_commands,
 }

 bool WriteMetaBlockParallel(const BrotliParams& params,
-                            const size_t block_size,
+                            const uint32_t input_size,
                            const uint8_t* input_buffer,
-                            const size_t prefix_size,
+                            const uint32_t prefix_size,
                            const uint8_t* prefix_buffer,
                            const bool is_first,
                            const bool is_last,
                            size_t* encoded_size,
                            uint8_t* encoded_buffer) {
-  if (block_size == 0) {
+  if (input_size == 0) {
    return false;
  }
-  const size_t input_size = block_size;

  // Copy prefix + next input block into a continuous area.
-  size_t input_pos = prefix_size;
+  uint32_t input_pos = prefix_size;
  // CreateBackwardReferences reads up to 3 bytes past the end of input if the
  // mask points past the end of input.
  // FindMatchLengthWithLimit could do another 8 bytes look-forward.
@ -75,7 +74,7 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
  // Since we don't have a ringbuffer, masking is a no-op.
  // We use one less bit than the full range because some of the code uses
  // mask + 1 as the size of the ringbuffer.
-  const size_t mask = std::numeric_limits<size_t>::max() >> 1;
+  const uint32_t mask = std::numeric_limits<uint32_t>::max() >> 1;

  uint8_t prev_byte = input_pos > 0 ? input[(input_pos - 1) & mask] : 0;
  uint8_t prev_byte2 = input_pos > 1 ? input[(input_pos - 2) & mask] : 0;
@ -91,10 +90,10 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
  hashers->Init(hash_type);

  // Compute backward references.
-  int last_insert_len = 0;
+  size_t last_insert_len = 0;
  size_t num_commands = 0;
-  int num_literals = 0;
-  int max_backward_distance = (1 << params.lgwin) - 16;
+  size_t num_literals = 0;
+  uint32_t max_backward_distance = (1 << params.lgwin) - 16;
  int dist_cache[4] = { -4, -4, -4, -4 };
  Command* commands = static_cast<Command*>(
      malloc(sizeof(Command) * ((input_size + 1) >> 1)));
@ -103,7 +102,7 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
    return false;
  }
  CreateBackwardReferences(
-      input_size, input_pos,
+      input_size, input_pos, is_last,
      &input[0], mask,
      max_backward_distance,
      params.quality,
@ -123,10 +122,11 @@ bool WriteMetaBlockParallel(const BrotliParams& params,

  // Build the meta-block.
  MetaBlockSplit mb;
-  int num_direct_distance_codes =
+  uint32_t num_direct_distance_codes =
      params.mode == BrotliParams::MODE_FONT ? 12 : 0;
-  int distance_postfix_bits = params.mode == BrotliParams::MODE_FONT ? 1 : 0;
-  int literal_context_mode = utf8_mode ? CONTEXT_UTF8 : CONTEXT_SIGNED;
+  uint32_t distance_postfix_bits =
+      params.mode == BrotliParams::MODE_FONT ? 1 : 0;
+  ContextType literal_context_mode = utf8_mode ? CONTEXT_UTF8 : CONTEXT_SIGNED;
  RecomputeDistancePrefixes(commands, num_commands,
                            num_direct_distance_codes,
                            distance_postfix_bits);
@ -145,8 +145,8 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
  // Set up the temporary output storage.
  const size_t max_out_size = 2 * input_size + 500;
  std::vector<uint8_t> storage(max_out_size);
-  int first_byte = 0;
-  int first_byte_bits = 0;
+  uint8_t first_byte = 0;
+  size_t first_byte_bits = 0;
  if (is_first) {
    if (params.lgwin == 16) {
      first_byte = 0;
@ -155,26 +155,23 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
      first_byte = 1;
      first_byte_bits = 7;
    } else {
-      first_byte = ((params.lgwin - 17) << 1) | 1;
+      first_byte = static_cast<uint8_t>(((params.lgwin - 17) << 1) | 1);
      first_byte_bits = 4;
    }
  }
  storage[0] = static_cast<uint8_t>(first_byte);
-  int storage_ix = first_byte_bits;
+  size_t storage_ix = first_byte_bits;

  // Store the meta-block to the temporary output.
-  if (!StoreMetaBlock(&input[0], input_pos, input_size, mask,
-                      prev_byte, prev_byte2,
-                      is_last,
-                      num_direct_distance_codes,
-                      distance_postfix_bits,
-                      literal_context_mode,
-                      commands, num_commands,
-                      mb,
-                      &storage_ix, &storage[0])) {
-    free(commands);
-    return false;
-  }
+  StoreMetaBlock(&input[0], input_pos, input_size, mask,
+                 prev_byte, prev_byte2,
+                 is_last,
+                 num_direct_distance_codes,
+                 distance_postfix_bits,
+                 literal_context_mode,
+                 commands, num_commands,
+                 mb,
+                 &storage_ix, &storage[0]);
  free(commands);

  // If this is not the last meta-block, store an empty metadata
@ -189,11 +186,9 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
  if (input_size + 4 < output_size) {
    storage[0] = static_cast<uint8_t>(first_byte);
    storage_ix = first_byte_bits;
-    if (!StoreUncompressedMetaBlock(is_last, &input[0], input_pos, mask,
-                                    input_size,
-                                    &storage_ix, &storage[0])) {
-      return false;
-    }
+    StoreUncompressedMetaBlock(is_last, &input[0], input_pos, mask,
+                               input_size,
+                               &storage_ix, &storage[0]);
    output_size = storage_ix >> 3;
  }

@ -239,19 +234,23 @@ int BrotliCompressBufferParallel(BrotliParams params,
    params.lgblock = kMaxInputBlockBits;
  }
  size_t max_input_block_size = 1 << params.lgblock;
+  size_t max_prefix_size = 1u << params.lgwin;

  std::vector<std::vector<uint8_t> > compressed_pieces;

  // Compress block-by-block independently.
  for (size_t pos = 0; pos < input_size; ) {
-    size_t input_block_size = std::min(max_input_block_size, input_size - pos);
+    uint32_t input_block_size =
+        static_cast<uint32_t>(std::min(max_input_block_size, input_size - pos));
+    uint32_t prefix_size =
+        static_cast<uint32_t>(std::min(max_prefix_size, pos));
    size_t out_size = input_block_size + (input_block_size >> 3) + 1024;
    std::vector<uint8_t> out(out_size);
    if (!WriteMetaBlockParallel(params,
                                input_block_size,
                                &input_buffer[pos],
-                                pos,
-                                input_buffer,
+                                prefix_size,
+                                &input_buffer[pos - prefix_size],
                                pos == 0,
                                pos + input_block_size == input_size,
                                &out_size,
--- a/enc/entropy_encode.cc
+++ b/enc/entropy_encode.cc
@ -19,24 +19,6 @@

 namespace brotli {

-namespace {
-
-struct HuffmanTree {
-  HuffmanTree(int count, int16_t left, int16_t right)
-      : total_count_(count),
-        index_left_(left),
-        index_right_or_value_(right) {
-  }
-  int total_count_;
-  int16_t index_left_;
-  int16_t index_right_or_value_;
-};
-
-// Sort the root nodes, least popular first.
-bool SortHuffmanTree(const HuffmanTree &v0, const HuffmanTree &v1) {
-  return v0.total_count_ < v1.total_count_;
-}
-
 void SetDepth(const HuffmanTree &p,
              HuffmanTree *pool,
              uint8_t *depth,
@ -50,8 +32,6 @@ void SetDepth(const HuffmanTree &p,
  }
 }

-}  // namespace
-
 // This function will create a Huffman tree.
 //
 // The catch here is that the tree cannot be arbitrarily deep.
@ -67,26 +47,27 @@ void SetDepth(const HuffmanTree &p,
 // we are not planning to use this with extremely long blocks.
 //
 // See http://en.wikipedia.org/wiki/Huffman_coding
-void CreateHuffmanTree(const int *data,
-                       const int length,
+void CreateHuffmanTree(const uint32_t *data,
+                       const size_t length,
                       const int tree_limit,
                       uint8_t *depth) {
  // For block sizes below 64 kB, we never need to do a second iteration
  // of this loop. Probably all of our block sizes will be smaller than
  // that, so this loop is mostly of academic interest. If we actually
  // would need this, we would be better off with the Katajainen algorithm.
-  for (int count_limit = 1; ; count_limit *= 2) {
+  for (uint32_t count_limit = 1; ; count_limit *= 2) {
    std::vector<HuffmanTree> tree;
    tree.reserve(2 * length + 1);

-    for (int i = length - 1; i >= 0; --i) {
+    for (size_t i = length; i != 0;) {
+      --i;
      if (data[i]) {
-        const int count = std::max(data[i], count_limit);
+        const uint32_t count = std::max(data[i], count_limit);
        tree.push_back(HuffmanTree(count, -1, static_cast<int16_t>(i)));
      }
    }

-    const int n = static_cast<int>(tree.size());
+    const size_t n = tree.size();
    if (n == 1) {
      depth[tree[0].index_right_or_value_] = 1;      // Only one element.
      break;
@ -101,14 +82,14 @@ void CreateHuffmanTree(const int *data,
    //              (n+1). These are naturally in ascending order.
    // [2n]: we add a sentinel at the end as well.
    // There will be (2n+1) elements at the end.
-    const HuffmanTree sentinel(std::numeric_limits<int>::max(), -1, -1);
+    const HuffmanTree sentinel(std::numeric_limits<uint32_t>::max(), -1, -1);
    tree.push_back(sentinel);
    tree.push_back(sentinel);

-    int i = 0;      // Points to the next leaf node.
-    int j = n + 1;  // Points to the next non-leaf node.
-    for (int k = n - 1; k > 0; --k) {
-      int left, right;
+    size_t i = 0;      // Points to the next leaf node.
+    size_t j = n + 1;  // Points to the next non-leaf node.
+    for (size_t k = n - 1; k != 0; --k) {
+      size_t left, right;
      if (tree[i].total_count_ <= tree[j].total_count_) {
        left = i;
        ++i;
@ -125,7 +106,7 @@ void CreateHuffmanTree(const int *data,
      }

      // The sentinel node becomes the parent node.
-      int j_end = static_cast<int>(tree.size()) - 1;
+      size_t j_end = tree.size() - 1;
      tree[j_end].total_count_ =
          tree[left].total_count_ + tree[right].total_count_;
      tree[j_end].index_left_ = static_cast<int16_t>(left);
@ -134,7 +115,7 @@ void CreateHuffmanTree(const int *data,
      // Add back the last sentinel node.
      tree.push_back(sentinel);
    }
-    BROTLI_DCHECK(tree.size() == 2 * n + 1);
+    assert(tree.size() == 2 * n + 1);
    SetDepth(tree[2 * n - 1], &tree[0], depth, 0);

    // We need to pack the Huffman tree in tree_limit bits.
@ -146,7 +127,7 @@ void CreateHuffmanTree(const int *data,
  }
 }

-void Reverse(std::vector<uint8_t>* v, int start, int end) {
+void Reverse(std::vector<uint8_t>* v, size_t start, size_t end) {
  --end;
  while (start < end) {
    uint8_t tmp = (*v)[start];
@ -160,9 +141,10 @@ void Reverse(std::vector<uint8_t>* v, int start, int end) {
 void WriteHuffmanTreeRepetitions(
    const uint8_t previous_value,
    const uint8_t value,
-    int repetitions,
+    size_t repetitions,
    std::vector<uint8_t> *tree,
    std::vector<uint8_t> *extra_bits_data) {
+  assert(repetitions > 0);
  if (previous_value != value) {
    tree->push_back(value);
    extra_bits_data->push_back(0);
@ -174,26 +156,29 @@ void WriteHuffmanTreeRepetitions(
    --repetitions;
  }
  if (repetitions < 3) {
-    for (int i = 0; i < repetitions; ++i) {
+    for (size_t i = 0; i < repetitions; ++i) {
      tree->push_back(value);
      extra_bits_data->push_back(0);
    }
  } else {
    repetitions -= 3;
-    int start = static_cast<int>(tree->size());
-    while (repetitions >= 0) {
+    size_t start = tree->size();
+    while (true) {
      tree->push_back(16);
      extra_bits_data->push_back(repetitions & 0x3);
      repetitions >>= 2;
+      if (repetitions == 0) {
+        break;
+      }
      --repetitions;
    }
-    Reverse(tree, start, static_cast<int>(tree->size()));
-    Reverse(extra_bits_data, start, static_cast<int>(tree->size()));
+    Reverse(tree, start, tree->size());
+    Reverse(extra_bits_data, start, tree->size());
  }
 }

 void WriteHuffmanTreeRepetitionsZeros(
-    int repetitions,
+    size_t repetitions,
    std::vector<uint8_t> *tree,
    std::vector<uint8_t> *extra_bits_data) {
  if (repetitions == 11) {
@ -202,32 +187,36 @@ void WriteHuffmanTreeRepetitionsZeros(
    --repetitions;
  }
  if (repetitions < 3) {
-    for (int i = 0; i < repetitions; ++i) {
+    for (size_t i = 0; i < repetitions; ++i) {
      tree->push_back(0);
      extra_bits_data->push_back(0);
    }
  } else {
    repetitions -= 3;
-    int start = static_cast<int>(tree->size());
-    while (repetitions >= 0) {
+    size_t start = tree->size();
+    while (true) {
      tree->push_back(17);
      extra_bits_data->push_back(repetitions & 0x7);
      repetitions >>= 3;
+      if (repetitions == 0) {
+        break;
+      }
      --repetitions;
    }
-    Reverse(tree, start, static_cast<int>(tree->size()));
-    Reverse(extra_bits_data, start, static_cast<int>(tree->size()));
+    Reverse(tree, start, tree->size());
+    Reverse(extra_bits_data, start, tree->size());
  }
 }

-int OptimizeHuffmanCountsForRle(int length, int* counts) {
-  int nonzero_count = 0;
-  int stride;
-  int limit;
-  int sum;
+bool OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts) {
+  size_t nonzero_count = 0;
+  size_t stride;
+  size_t limit;
+  size_t sum;
+  const size_t streak_limit = 1240;
  uint8_t* good_for_rle;
  // Let's make the Huffman code more compatible with rle encoding.
-  int i;
+  size_t i;
  for (i = 0; i < length; i++) {
    if (counts[i]) {
      ++nonzero_count;
@ -236,18 +225,16 @@ int OptimizeHuffmanCountsForRle(int length, int* counts) {
  if (nonzero_count < 16) {
    return 1;
  }
-  for (; length >= 0; --length) {
-    if (length == 0) {
-      return 1;  // All zeros.
-    }
-    if (counts[length - 1] != 0) {
-      // Now counts[0..length - 1] does not have trailing zeros.
-      break;
-    }
+  while (length != 0 && counts[length - 1] == 0) {
+    --length;
  }
+  if (length == 0) {
+    return 1;  // All zeros.
+  }
+  // Now counts[0..length - 1] does not have trailing zeros.
  {
-    int nonzeros = 0;
-    int smallest_nonzero = 1 << 30;
+    size_t nonzeros = 0;
+    uint32_t smallest_nonzero = 1 << 30;
    for (i = 0; i < length; ++i) {
      if (counts[i] != 0) {
        ++nonzeros;
@ -260,7 +247,7 @@ int OptimizeHuffmanCountsForRle(int length, int* counts) {
      // Small histogram will model it well.
      return 1;
    }
-    int zeros = length - nonzeros;
+    size_t zeros = length - nonzeros;
    if (smallest_nonzero < 4) {
      if (zeros < 6) {
        for (i = 1; i < length - 1; ++i) {
@ -284,41 +271,40 @@ int OptimizeHuffmanCountsForRle(int length, int* counts) {
    // Let's not spoil any of the existing good rle codes.
    // Mark any seq of 0's that is longer as 5 as a good_for_rle.
    // Mark any seq of non-0's that is longer as 7 as a good_for_rle.
-    int symbol = counts[0];
-    int stride = 0;
-    for (i = 0; i < length + 1; ++i) {
+    uint32_t symbol = counts[0];
+    size_t step = 0;
+    for (i = 0; i <= length; ++i) {
      if (i == length || counts[i] != symbol) {
-        if ((symbol == 0 && stride >= 5) ||
-            (symbol != 0 && stride >= 7)) {
-          int k;
-          for (k = 0; k < stride; ++k) {
+        if ((symbol == 0 && step >= 5) ||
+            (symbol != 0 && step >= 7)) {
+          size_t k;
+          for (k = 0; k < step; ++k) {
            good_for_rle[i - k - 1] = 1;
          }
        }
-        stride = 1;
+        step = 1;
        if (i != length) {
          symbol = counts[i];
        }
      } else {
-        ++stride;
+        ++step;
      }
    }
  }
  // 3) Let's replace those population counts that lead to more rle codes.
  // Math here is in 24.8 fixed point representation.
-  const int streak_limit = 1240;
  stride = 0;
  limit = 256 * (counts[0] + counts[1] + counts[2]) / 3 + 420;
  sum = 0;
-  for (i = 0; i < length + 1; ++i) {
+  for (i = 0; i <= length; ++i) {
    if (i == length || good_for_rle[i] ||
        (i != 0 && good_for_rle[i - 1]) ||
-        abs(256 * counts[i] - limit) >= streak_limit) {
+        (256 * counts[i] - limit + streak_limit) >= 2 * streak_limit) {
      if (stride >= 4 || (stride >= 3 && sum == 0)) {
-        int k;
+        size_t k;
        // The stride must end, collapse what we have, if we have enough (4).
-        int count = (sum + stride / 2) / stride;
-        if (count < 1) {
+        size_t count = (sum + stride / 2) / stride;
+        if (count == 0) {
          count = 1;
        }
        if (sum == 0) {
@ -328,7 +314,7 @@ int OptimizeHuffmanCountsForRle(int length, int* counts) {
        for (k = 0; k < stride; ++k) {
          // We don't want to change value at counts[i],
          // that is already belonging to the next stride. Thus - 1.
-          counts[i - k - 1] = count;
+          counts[i - k - 1] = static_cast<uint32_t>(count);
        }
      }
      stride = 0;
@ -358,17 +344,17 @@ int OptimizeHuffmanCountsForRle(int length, int* counts) {
  return 1;
 }

-static void DecideOverRleUse(const uint8_t* depth, const int length,
+static void DecideOverRleUse(const uint8_t* depth, const size_t length,
                             bool *use_rle_for_non_zero,
                             bool *use_rle_for_zero) {
-  int total_reps_zero = 0;
-  int total_reps_non_zero = 0;
-  int count_reps_zero = 0;
-  int count_reps_non_zero = 0;
-  for (int i = 0; i < length;) {
-    const int value = depth[i];
-    int reps = 1;
-    for (int k = i + 1; k < length && depth[k] == value; ++k) {
+  size_t total_reps_zero = 0;
+  size_t total_reps_non_zero = 0;
+  size_t count_reps_zero = 1;
+  size_t count_reps_non_zero = 1;
+  for (size_t i = 0; i < length;) {
+    const uint8_t value = depth[i];
+    size_t reps = 1;
+    for (size_t k = i + 1; k < length && depth[k] == value; ++k) {
      ++reps;
    }
    if (reps >= 3 && value == 0) {
@ -381,21 +367,19 @@ static void DecideOverRleUse(const uint8_t* depth, const int length,
    }
    i += reps;
  }
-  total_reps_non_zero -= count_reps_non_zero * 2;
-  total_reps_zero -= count_reps_zero * 2;
-  *use_rle_for_non_zero = total_reps_non_zero > 2;
-  *use_rle_for_zero = total_reps_zero > 2;
+  *use_rle_for_non_zero = total_reps_non_zero > count_reps_non_zero * 2;
+  *use_rle_for_zero = total_reps_zero > count_reps_zero * 2;
 }

 void WriteHuffmanTree(const uint8_t* depth,
-                      uint32_t length,
+                      size_t length,
                      std::vector<uint8_t> *tree,
                      std::vector<uint8_t> *extra_bits_data) {
  uint8_t previous_value = 8;

  // Throw away trailing zeros.
-  uint32_t new_length = length;
-  for (uint32_t i = 0; i < length; ++i) {
+  size_t new_length = length;
+  for (size_t i = 0; i < length; ++i) {
    if (depth[length - i - 1] == 0) {
      --new_length;
    } else {
@ -414,12 +398,12 @@ void WriteHuffmanTree(const uint8_t* depth,
  }

  // Actual rle coding.
-  for (uint32_t i = 0; i < new_length;) {
+  for (size_t i = 0; i < new_length;) {
    const uint8_t value = depth[i];
-    int reps = 1;
+    size_t reps = 1;
    if ((value != 0 && use_rle_for_non_zero) ||
        (value == 0 && use_rle_for_zero)) {
-      for (uint32_t k = i + 1; k < new_length && depth[k] == value; ++k) {
+      for (size_t k = i + 1; k < new_length && depth[k] == value; ++k) {
        ++reps;
      }
    }
@ -453,13 +437,15 @@ uint16_t ReverseBits(int num_bits, uint16_t bits) {

 }  // namespace

-void ConvertBitDepthsToSymbols(const uint8_t *depth, int len, uint16_t *bits) {
+void ConvertBitDepthsToSymbols(const uint8_t *depth,
+                               size_t len,
+                               uint16_t *bits) {
  // In Brotli, all bit depths are [1..15]
  // 0 bit depth means that the symbol does not exist.
  const int kMaxBits = 16;  // 0..15 are values for bits
  uint16_t bl_count[kMaxBits] = { 0 };
  {
-    for (int i = 0; i < len; ++i) {
+    for (size_t i = 0; i < len; ++i) {
      ++bl_count[depth[i]];
    }
    bl_count[0] = 0;
@ -473,7 +459,7 @@ void ConvertBitDepthsToSymbols(const uint8_t *depth, int len, uint16_t *bits) {
      next_code[bits] = static_cast<uint16_t>(code);
    }
  }
-  for (int i = 0; i < len; ++i) {
+  for (size_t i = 0; i < len; ++i) {
    if (depth[i]) {
      bits[i] = ReverseBits(depth[i], next_code[depth[i]]++);
    }
--- a/enc/entropy_encode.h
+++ b/enc/entropy_encode.h
@ -17,6 +17,26 @@

 namespace brotli {

+// A node of a Huffman tree.
+struct HuffmanTree {
+  HuffmanTree(uint32_t count, int16_t left, int16_t right)
+      : total_count_(count),
+        index_left_(left),
+        index_right_or_value_(right) {
+  }
+  uint32_t total_count_;
+  int16_t index_left_;
+  int16_t index_right_or_value_;
+};
+
+// Sort the root nodes, least popular first.
+inline bool SortHuffmanTree(const HuffmanTree &v0, const HuffmanTree &v1) {
+  return v0.total_count_ < v1.total_count_;
+}
+
+void SetDepth(const HuffmanTree &p, HuffmanTree *pool,
+              uint8_t *depth, uint8_t level);
+
 // This function will create a Huffman tree.
 //
 // The (data,length) contains the population counts.
@ -26,8 +46,8 @@ namespace brotli {
 // the symbol.
 //
 // See http://en.wikipedia.org/wiki/Huffman_coding
-void CreateHuffmanTree(const int *data,
-                       const int length,
+void CreateHuffmanTree(const uint32_t *data,
+                       const size_t length,
                       const int tree_limit,
                       uint8_t *depth);

@ -37,18 +57,20 @@ void CreateHuffmanTree(const int *data,
 //
 // length contains the size of the histogram.
 // counts contains the population counts.
-int OptimizeHuffmanCountsForRle(int length, int* counts);
+bool OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts);

 // Write a Huffman tree from bit depths into the bitstream representation
 // of a Huffman tree. The generated Huffman tree is to be compressed once
 // more using a Huffman tree
 void WriteHuffmanTree(const uint8_t* depth,
-                      uint32_t num,
+                      size_t num,
                      std::vector<uint8_t> *tree,
                      std::vector<uint8_t> *extra_bits_data);

 // Get the actual bit values for a tree of bit depths.
-void ConvertBitDepthsToSymbols(const uint8_t *depth, int len, uint16_t *bits);
+void ConvertBitDepthsToSymbols(const uint8_t *depth,
+                               size_t len,
+                               uint16_t *bits);

 template<int kSize>
 struct EntropyCode {
--- a/enc/fast_log.h
+++ b/enc/fast_log.h
@ -16,49 +16,16 @@

 namespace brotli {

-// Return floor(log2(n)) for positive integer n.  Returns -1 iff n == 0.
-inline int Log2Floor(uint32_t n) {
-#if defined(__clang__) ||                       \
-  (defined(__GNUC__) &&                                         \
-   ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4))
-  return n == 0 ? -1 : 31 ^ __builtin_clz(n);
-#else
-  if (n == 0)
-    return -1;
-  int log = 0;
-  uint32_t value = n;
-  for (int i = 4; i >= 0; --i) {
-    int shift = (1 << i);
-    uint32_t x = value >> shift;
-    if (x != 0) {
-      value = x;
-      log += shift;
-    }
-  }
-  assert(value == 1);
-  return log;
-#endif
-}
-
-static inline int Log2FloorNonZero(uint32_t n) {
+static inline uint32_t Log2FloorNonZero(size_t n) {
 #ifdef __GNUC__
-  return 31 ^ __builtin_clz(n);
+  return 31u ^ static_cast<uint32_t>(__builtin_clz(static_cast<uint32_t>(n)));
 #else
-  unsigned int result = 0;
+  uint32_t result = 0;
  while (n >>= 1) result++;
  return result;
 #endif
 }

-// Return ceiling(log2(n)) for positive integer n.  Returns -1 iff n == 0.
-inline int Log2Ceiling(uint32_t n) {
-  int floor = Log2Floor(n);
-  if (n == (n &~ (n - 1)))              // zero or a power of two
-    return floor;
-  else
-    return floor + 1;
-}
-
 // A lookup table for small values of log2(int) to be used in entropy
 // computation.
 //
@ -153,8 +120,8 @@ static const float kLog2Table[] = {
 };

 // Faster logarithm for small integers, with the property of log2(0) == 0.
-static inline double FastLog2(int v) {
-  if (v < (int)(sizeof(kLog2Table) / sizeof(kLog2Table[0]))) {
+static inline double FastLog2(size_t v) {
+  if (v < sizeof(kLog2Table) / sizeof(kLog2Table[0])) {
    return kLog2Table[v];
  }
 #if defined(_MSC_VER) && _MSC_VER <= 1600
--- a/enc/find_match_length.h
+++ b/enc/find_match_length.h
@ -18,10 +18,10 @@ namespace brotli {
 // Separate implementation for little-endian 64-bit targets, for speed.
 #if defined(__GNUC__) && defined(_LP64) && defined(IS_LITTLE_ENDIAN)

-static inline int FindMatchLengthWithLimit(const uint8_t* s1,
-                                           const uint8_t* s2,
-                                           size_t limit) {
-  int matched = 0;
+static inline size_t FindMatchLengthWithLimit(const uint8_t* s1,
+                                              const uint8_t* s2,
+                                              size_t limit) {
+  size_t matched = 0;
  size_t limit2 = (limit >> 3) + 1;  // + 1 is for pre-decrement in while
  while (PREDICT_TRUE(--limit2)) {
    if (PREDICT_FALSE(BROTLI_UNALIGNED_LOAD64(s2) ==
@ -31,7 +31,7 @@ static inline int FindMatchLengthWithLimit(const uint8_t* s1,
    } else {
      uint64_t x =
          BROTLI_UNALIGNED_LOAD64(s2) ^ BROTLI_UNALIGNED_LOAD64(s1 + matched);
-      int matching_bits =  __builtin_ctzll(x);
+      size_t matching_bits = static_cast<size_t>(__builtin_ctzll(x));
      matched += matching_bits >> 3;
      return matched;
    }
@ -48,10 +48,10 @@ static inline int FindMatchLengthWithLimit(const uint8_t* s1,
  return matched;
 }
 #else
-static inline int FindMatchLengthWithLimit(const uint8_t* s1,
-                                           const uint8_t* s2,
-                                           size_t limit) {
-  int matched = 0;
+static inline size_t FindMatchLengthWithLimit(const uint8_t* s1,
+                                             const uint8_t* s2,
+                                             size_t limit) {
+  size_t matched = 0;
  const uint8_t* s2_limit = s2 + limit;
  const uint8_t* s2_ptr = s2;
  // Find out how long the match is. We loop over the data 32 bits at a
--- a/enc/hash.h
+++ b/enc/hash.h
@ -10,12 +10,10 @@
 #ifndef BROTLI_ENC_HASH_H_
 #define BROTLI_ENC_HASH_H_

-#include <string.h>
 #include <sys/types.h>
 #include <algorithm>
-#include <cstdlib>
-#include <memory>
-#include <string>
+#include <cstring>
+#include <vector>

 #include "./dictionary_hash.h"
 #include "./fast_log.h"
@ -28,15 +26,17 @@

 namespace brotli {

-static const int kDistanceCacheIndex[] = {
+static const uint32_t kDistanceCacheIndex[] = {
  0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
 };
 static const int kDistanceCacheOffset[] = {
  0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
 };

-static const int kCutoffTransformsCount = 10;
-static const int kCutoffTransforms[] = {0, 12, 27, 23, 42, 63, 56, 48, 59, 64};
+static const uint32_t kCutoffTransformsCount = 10;
+static const uint8_t kCutoffTransforms[] = {
+  0, 12, 27, 23, 42, 63, 56, 48, 59, 64
+};

 // kHashMul32 multiplier has these properties:
 // * The multiplier must be odd. Otherwise we may lose the highest bit.
@ -68,41 +68,47 @@ inline uint32_t Hash(const uint8_t *data) {
 // This function is used to sometimes discard a longer backward reference
 // when it is not much longer and the bit cost for encoding it is more
 // than the saved literals.
-inline double BackwardReferenceScore(int copy_length,
-                                     int backward_reference_offset) {
-  return 5.4 * copy_length - 1.20 * Log2Floor(backward_reference_offset);
+//
+// backward_reference_offset MUST be positive.
+inline double BackwardReferenceScore(size_t copy_length,
+                                     size_t backward_reference_offset) {
+  return 5.4 * static_cast<double>(copy_length) -
+      1.20 * Log2FloorNonZero(backward_reference_offset);
 }

-inline double BackwardReferenceScoreUsingLastDistance(int copy_length,
-                                                      int distance_short_code) {
+inline double BackwardReferenceScoreUsingLastDistance(size_t copy_length,
+    size_t distance_short_code) {
  static const double kDistanceShortCodeBitCost[16] = {
    -0.6, 0.95, 1.17, 1.27,
    0.93, 0.93, 0.96, 0.96, 0.99, 0.99,
    1.05, 1.05, 1.15, 1.15, 1.25, 1.25
  };
-  return 5.4 * copy_length - kDistanceShortCodeBitCost[distance_short_code];
+  return 5.4 * static_cast<double>(copy_length) -
+      kDistanceShortCodeBitCost[distance_short_code];
 }

 struct BackwardMatch {
  BackwardMatch() : distance(0), length_and_code(0) {}

-  BackwardMatch(int dist, int len)
-      : distance(dist), length_and_code((len << 5)) {}
+  BackwardMatch(size_t dist, size_t len)
+      : distance(static_cast<uint32_t>(dist))
+      , length_and_code(static_cast<uint32_t>(len << 5)) {}

-  BackwardMatch(int dist, int len, int len_code)
-      : distance(dist),
-        length_and_code((len << 5) | (len == len_code ? 0 : len_code)) {}
+  BackwardMatch(size_t dist, size_t len, size_t len_code)
+      : distance(static_cast<uint32_t>(dist))
+      , length_and_code(static_cast<uint32_t>(
+            (len << 5) | (len == len_code ? 0 : len_code))) {}

-  int length() const {
+  size_t length() const {
    return length_and_code >> 5;
  }
-  int length_code() const {
-    int code = length_and_code & 31;
+  size_t length_code() const {
+    size_t code = length_and_code & 31;
    return code ? code : length();
  }

-  int distance;
-  int length_and_code;
+  uint32_t distance;
+  uint32_t length_and_code;
 };

 // A (forgetful) hash table to the data seen by the compressor, to
@ -146,27 +152,27 @@ class HashLongestMatchQuickly {
  inline bool FindLongestMatch(const uint8_t * __restrict ring_buffer,
                               const size_t ring_buffer_mask,
                               const int* __restrict distance_cache,
-                               const uint32_t cur_ix,
-                               const int max_length,
-                               const uint32_t max_backward,
-                               int * __restrict best_len_out,
-                               int * __restrict best_len_code_out,
-                               int * __restrict best_distance_out,
+                               const size_t cur_ix,
+                               const size_t max_length,
+                               const size_t max_backward,
+                               size_t * __restrict best_len_out,
+                               size_t * __restrict best_len_code_out,
+                               size_t * __restrict best_distance_out,
                               double* __restrict best_score_out) {
-    const int best_len_in = *best_len_out;
+    const size_t best_len_in = *best_len_out;
    const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
    int compare_char = ring_buffer[cur_ix_masked + best_len_in];
    double best_score = *best_score_out;
-    int best_len = best_len_in;
-    int cached_backward = distance_cache[0];
-    uint32_t prev_ix = cur_ix - cached_backward;
+    size_t best_len = best_len_in;
+    size_t cached_backward = static_cast<size_t>(distance_cache[0]);
+    size_t prev_ix = cur_ix - cached_backward;
    bool match_found = false;
    if (prev_ix < cur_ix) {
      prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
      if (compare_char == ring_buffer[prev_ix + best_len]) {
-        int len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
-                                           &ring_buffer[cur_ix_masked],
-                                           max_length);
+        size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
+                                              &ring_buffer[cur_ix_masked],
+                                              max_length);
        if (len >= 4) {
          best_score = BackwardReferenceScoreUsingLastDistance(len, 0);
          best_len = len;
@ -187,7 +193,7 @@ class HashLongestMatchQuickly {
    if (kBucketSweep == 1) {
      // Only one to look for, don't bother to prepare for a loop.
      prev_ix = buckets_[key];
-      uint32_t backward = cur_ix - prev_ix;
+      size_t backward = cur_ix - prev_ix;
      prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
      if (compare_char != ring_buffer[prev_ix + best_len_in]) {
        return false;
@ -195,9 +201,9 @@ class HashLongestMatchQuickly {
      if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
        return false;
      }
-      const int len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
-                                               &ring_buffer[cur_ix_masked],
-                                               max_length);
+      const size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
+                                                  &ring_buffer[cur_ix_masked],
+                                                  max_length);
      if (len >= 4) {
        *best_len_out = len;
        *best_len_code_out = len;
@ -209,7 +215,7 @@ class HashLongestMatchQuickly {
      uint32_t *bucket = buckets_ + key;
      prev_ix = *bucket++;
      for (int i = 0; i < kBucketSweep; ++i, prev_ix = *bucket++) {
-        const uint32_t backward = cur_ix - prev_ix;
+        const size_t backward = cur_ix - prev_ix;
        prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
        if (compare_char != ring_buffer[prev_ix + best_len]) {
          continue;
@ -217,10 +223,9 @@ class HashLongestMatchQuickly {
        if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
          continue;
        }
-        const int len =
-            FindMatchLengthWithLimit(&ring_buffer[prev_ix],
-                                     &ring_buffer[cur_ix_masked],
-                                     max_length);
+        const size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
+                                                    &ring_buffer[cur_ix_masked],
+                                                    max_length);
        if (len >= 4) {
          const double score = BackwardReferenceScore(len, backward);
          if (best_score < score) {
@ -242,19 +247,20 @@ class HashLongestMatchQuickly {
      const uint32_t dict_key = Hash<14>(&ring_buffer[cur_ix_masked]) << 1;
      const uint16_t v = kStaticDictionaryHash[dict_key];
      if (v > 0) {
-        const int len = v & 31;
-        const int dist = v >> 5;
-        const int offset = kBrotliDictionaryOffsetsByLength[len] + len * dist;
+        const uint32_t len = v & 31;
+        const uint32_t dist = v >> 5;
+        const size_t offset =
+            kBrotliDictionaryOffsetsByLength[len] + len * dist;
        if (len <= max_length) {
-          const int matchlen =
+          const size_t matchlen =
              FindMatchLengthWithLimit(&ring_buffer[cur_ix_masked],
                                       &kBrotliDictionary[offset], len);
-          if (matchlen > len - kCutoffTransformsCount && matchlen > 0) {
-            const int transform_id = kCutoffTransforms[len - matchlen];
-            const int word_id =
+          if (matchlen + kCutoffTransformsCount > len && matchlen > 0) {
+            const size_t transform_id = kCutoffTransforms[len - matchlen];
+            const size_t word_id =
                transform_id * (1 << kBrotliDictionarySizeBitsByLength[len]) +
                dist;
-            const int backward = max_backward + word_id + 1;
+            const size_t backward = max_backward + word_id + 1;
            const double score = BackwardReferenceScore(matchlen, backward);
            if (best_score < score) {
              ++num_dict_matches_;
@ -295,7 +301,7 @@ class HashLongestMatchQuickly {
 };

 // The maximum length for which the zopflification uses distinct distances.
-static const int kMaxZopfliLen = 325;
+static const uint16_t kMaxZopfliLen = 325;

 // A (forgetful) hash table to the data seen by the compressor, to
 // help create backward references to previous data.
@ -339,41 +345,42 @@ class HashLongestMatch {
  bool FindLongestMatch(const uint8_t * __restrict data,
                        const size_t ring_buffer_mask,
                        const int* __restrict distance_cache,
-                        const uint32_t cur_ix,
-                        const int max_length,
-                        const uint32_t max_backward,
-                        int * __restrict best_len_out,
-                        int * __restrict best_len_code_out,
-                        int * __restrict best_distance_out,
+                        const size_t cur_ix,
+                        const size_t max_length,
+                        const size_t max_backward,
+                        size_t * __restrict best_len_out,
+                        size_t * __restrict best_len_code_out,
+                        size_t * __restrict best_distance_out,
                        double * __restrict best_score_out) {
    *best_len_code_out = 0;
    const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
    bool match_found = false;
    // Don't accept a short copy from far away.
    double best_score = *best_score_out;
-    int best_len = *best_len_out;
+    size_t best_len = *best_len_out;
    *best_len_out = 0;
    // Try last distance first.
-    for (int i = 0; i < kNumLastDistancesToCheck; ++i) {
-      const int idx = kDistanceCacheIndex[i];
-      const int backward = distance_cache[idx] + kDistanceCacheOffset[i];
-      uint32_t prev_ix = cur_ix - backward;
+    for (size_t i = 0; i < kNumLastDistancesToCheck; ++i) {
+      const size_t idx = kDistanceCacheIndex[i];
+      const size_t backward =
+          static_cast<size_t>(distance_cache[idx] + kDistanceCacheOffset[i]);
+      size_t prev_ix = static_cast<size_t>(cur_ix - backward);
      if (prev_ix >= cur_ix) {
        continue;
      }
-      if (PREDICT_FALSE(backward > (int)max_backward)) {
+      if (PREDICT_FALSE(backward > max_backward)) {
        continue;
      }
-      prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
+      prev_ix &= ring_buffer_mask;

      if (cur_ix_masked + best_len > ring_buffer_mask ||
          prev_ix + best_len > ring_buffer_mask ||
          data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
        continue;
      }
-      const int len =
-          FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
-                                   max_length);
+      const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
+                                                  &data[cur_ix_masked],
+                                                  max_length);
      if (len >= 3 || (len == 2 && i < 2)) {
        // Comparing for >= 2 does not change the semantics, but just saves for
        // a few unnecessary binary logarithms in backward reference score,
@ -392,22 +399,23 @@ class HashLongestMatch {
    }
    const uint32_t key = HashBytes(&data[cur_ix_masked]);
    const uint32_t * __restrict const bucket = &buckets_[key][0];
-    const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
-    for (int i = num_[key] - 1; i >= down; --i) {
-      uint32_t prev_ix = bucket[i & kBlockMask];
-      const uint32_t backward = cur_ix - prev_ix;
+    const size_t down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
+    for (size_t i = num_[key]; i > down;) {
+      --i;
+      size_t prev_ix = bucket[i & kBlockMask];
+      const size_t backward = cur_ix - prev_ix;
      if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
        break;
      }
-      prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
+      prev_ix &= ring_buffer_mask;
      if (cur_ix_masked + best_len > ring_buffer_mask ||
          prev_ix + best_len > ring_buffer_mask ||
          data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
        continue;
      }
-      const int len =
-          FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
-                                   max_length);
+      const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
+                                                  &data[cur_ix_masked],
+                                                  max_length);
      if (len >= 4) {
        // Comparing for >= 3 does not change the semantics, but just saves
        // for a few unnecessary binary logarithms in backward reference
@ -425,24 +433,25 @@ class HashLongestMatch {
      }
    }
    if (!match_found && num_dict_matches_ >= (num_dict_lookups_ >> 7)) {
-      uint32_t dict_key = Hash<14>(&data[cur_ix_masked]) << 1;
+      size_t dict_key = Hash<14>(&data[cur_ix_masked]) << 1;
      for (int k = 0; k < 2; ++k, ++dict_key) {
        ++num_dict_lookups_;
        const uint16_t v = kStaticDictionaryHash[dict_key];
        if (v > 0) {
-          const int len = v & 31;
-          const int dist = v >> 5;
-          const int offset = kBrotliDictionaryOffsetsByLength[len] + len * dist;
+          const size_t len = v & 31;
+          const size_t dist = v >> 5;
+          const size_t offset =
+              kBrotliDictionaryOffsetsByLength[len] + len * dist;
          if (len <= max_length) {
-            const int matchlen =
+            const size_t matchlen =
                FindMatchLengthWithLimit(&data[cur_ix_masked],
                                         &kBrotliDictionary[offset], len);
-            if (matchlen > len - kCutoffTransformsCount && matchlen > 0) {
-              const int transform_id = kCutoffTransforms[len - matchlen];
-              const int word_id =
+            if (matchlen + kCutoffTransformsCount > len && matchlen > 0) {
+              const size_t transform_id = kCutoffTransforms[len - matchlen];
+              const size_t word_id =
                  transform_id * (1 << kBrotliDictionarySizeBitsByLength[len]) +
                  dist;
-              const int backward = max_backward + word_id + 1;
+              const size_t backward = max_backward + word_id + 1;
              double score = BackwardReferenceScore(matchlen, backward);
              if (best_score < score) {
                ++num_dict_matches_;
@ -471,19 +480,18 @@ class HashLongestMatch {
  // longest match.
  //
  // Requires that at least kMaxZopfliLen space is available in matches.
-  void FindAllMatches(const uint8_t* data,
-                      const size_t ring_buffer_mask,
-                      const uint32_t cur_ix,
-                      const int max_length,
-                      const uint32_t max_backward,
-                      int* num_matches,
-                      BackwardMatch* matches) const {
+  size_t FindAllMatches(const uint8_t* data,
+                        const size_t ring_buffer_mask,
+                        const size_t cur_ix,
+                        const size_t max_length,
+                        const size_t max_backward,
+                        BackwardMatch* matches) const {
    BackwardMatch* const orig_matches = matches;
    const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
-    int best_len = 1;
-    int stop = static_cast<int>(cur_ix) - 64;
-    if (stop < 0) { stop = 0; }
-    for (int i = cur_ix - 1; i > stop && best_len <= 2; --i) {
+    size_t best_len = 1;
+    size_t stop = cur_ix - 64;
+    if (cur_ix < 64) { stop = 0; }
+    for (size_t i = cur_ix - 1; i > stop && best_len <= 2; --i) {
      size_t prev_ix = i;
      const size_t backward = cur_ix - prev_ix;
      if (PREDICT_FALSE(backward > max_backward)) {
@ -494,33 +502,7 @@ class HashLongestMatch {
          data[cur_ix_masked + 1] != data[prev_ix + 1]) {
        continue;
      }
-      const int len =
-          FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
-                                   max_length);
-      if (len > best_len) {
-        best_len = len;
-        if (len > kMaxZopfliLen) {
-          matches = orig_matches;
-        }
-        *matches++ = BackwardMatch(static_cast<int>(backward), len);
-      }
-    }
-    const uint32_t key = HashBytes(&data[cur_ix_masked]);
-    const uint32_t * __restrict const bucket = &buckets_[key][0];
-    const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
-    for (int i = num_[key] - 1; i >= down; --i) {
-      uint32_t prev_ix = bucket[i & kBlockMask];
-      const uint32_t backward = cur_ix - prev_ix;
-      if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
-        break;
-      }
-      prev_ix &= static_cast<uint32_t>(ring_buffer_mask);
-      if (cur_ix_masked + best_len > ring_buffer_mask ||
-          prev_ix + best_len > ring_buffer_mask ||
-          data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
-        continue;
-      }
-      const int len =
+      const size_t len =
          FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
                                   max_length);
      if (len > best_len) {
@ -531,20 +513,48 @@ class HashLongestMatch {
        *matches++ = BackwardMatch(backward, len);
      }
    }
-    std::vector<int> dict_matches(kMaxDictionaryMatchLen + 1, kInvalidMatch);
-    int minlen = std::max<int>(4, best_len + 1);
+    const uint32_t key = HashBytes(&data[cur_ix_masked]);
+    const uint32_t * __restrict const bucket = &buckets_[key][0];
+    const size_t down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
+    for (size_t i = num_[key]; i > down;) {
+      --i;
+      size_t prev_ix = bucket[i & kBlockMask];
+      const size_t backward = cur_ix - prev_ix;
+      if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
+        break;
+      }
+      prev_ix &= ring_buffer_mask;
+      if (cur_ix_masked + best_len > ring_buffer_mask ||
+          prev_ix + best_len > ring_buffer_mask ||
+          data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
+        continue;
+      }
+      const size_t len =
+          FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
+                                   max_length);
+      if (len > best_len) {
+        best_len = len;
+        if (len > kMaxZopfliLen) {
+          matches = orig_matches;
+        }
+        *matches++ = BackwardMatch(backward, len);
+      }
+    }
+    std::vector<uint32_t> dict_matches(kMaxDictionaryMatchLen + 1,
+                                       kInvalidMatch);
+    size_t minlen = std::max<size_t>(4, best_len + 1);
    if (FindAllStaticDictionaryMatches(&data[cur_ix_masked], minlen, max_length,
                                       &dict_matches[0])) {
-      int maxlen = std::min<int>(kMaxDictionaryMatchLen, max_length);
-      for (int l = minlen; l <= maxlen; ++l) {
-        int dict_id = dict_matches[l];
+      size_t maxlen = std::min<size_t>(kMaxDictionaryMatchLen, max_length);
+      for (size_t l = minlen; l <= maxlen; ++l) {
+        uint32_t dict_id = dict_matches[l];
        if (dict_id < kInvalidMatch) {
          *matches++ = BackwardMatch(max_backward + (dict_id >> 5) + 1, l,
                                     dict_id & 31);
        }
      }
    }
-    *num_matches += static_cast<int>(matches - orig_matches);
+    return static_cast<size_t>(matches - orig_matches);
  }

  enum { kHashLength = 4 };
--- a/enc/histogram.cc
+++ b/enc/histogram.cc
@ -28,7 +28,7 @@ void BuildHistograms(
    size_t mask,
    uint8_t prev_byte,
    uint8_t prev_byte2,
-    const std::vector<int>& context_modes,
+    const std::vector<ContextType>& context_modes,
    std::vector<HistogramLiteral>* literal_histograms,
    std::vector<HistogramCommand>* insert_and_copy_histograms,
    std::vector<HistogramDistance>* copy_dist_histograms) {
@ -41,9 +41,9 @@ void BuildHistograms(
    insert_and_copy_it.Next();
    (*insert_and_copy_histograms)[insert_and_copy_it.type_].Add(
        cmd.cmd_prefix_);
-    for (int j = 0; j < cmd.insert_len_; ++j) {
+    for (size_t j = cmd.insert_len_; j != 0; --j) {
      literal_it.Next();
-      int context = (literal_it.type_ << kLiteralContextBits) +
+      size_t context = (literal_it.type_ << kLiteralContextBits) +
          Context(prev_byte, prev_byte2, context_modes[literal_it.type_]);
      (*literal_histograms)[context].Add(ringbuffer[pos & mask]);
      prev_byte2 = prev_byte;
@ -56,7 +56,7 @@ void BuildHistograms(
      prev_byte = ringbuffer[(pos - 1) & mask];
      if (cmd.cmd_prefix_ >= 128) {
        dist_it.Next();
-        int context = (dist_it.type_ << kDistanceContextBits) +
+        size_t context = (dist_it.type_ << kDistanceContextBits) +
            cmd.DistanceContext();
        (*copy_dist_histograms)[context].Add(cmd.dist_prefix_);
      }
--- a/enc/histogram.h
+++ b/enc/histogram.h
@ -9,10 +9,10 @@
 #ifndef BROTLI_ENC_HISTOGRAM_H_
 #define BROTLI_ENC_HISTOGRAM_H_

-#include <string.h>
+#include <cstring>
 #include <limits>
 #include <vector>
-#include <utility>
+#include "./context.h"
 #include "./command.h"
 #include "./fast_log.h"
 #include "./prefix.h"
@ -33,29 +33,29 @@ struct Histogram {
    total_count_ = 0;
    bit_cost_ = std::numeric_limits<double>::infinity();
  }
-  void Add(int val) {
+  void Add(size_t val) {
    ++data_[val];
    ++total_count_;
  }
-  void Remove(int val) {
+  void Remove(size_t val) {
    --data_[val];
    --total_count_;
  }
  template<typename DataType>
  void Add(const DataType *p, size_t n) {
-    total_count_ += static_cast<int>(n);
+    total_count_ += n;
    n += 1;
    while(--n) ++data_[*p++];
  }
  void AddHistogram(const Histogram& v) {
    total_count_ += v.total_count_;
-    for (int i = 0; i < kDataSize; ++i) {
+    for (size_t i = 0; i < kDataSize; ++i) {
      data_[i] += v.data_[i];
    }
  }

-  int data_[kDataSize];
-  int total_count_;
+  uint32_t data_[kDataSize];
+  size_t total_count_;
  double bit_cost_;
 };

@ -70,8 +70,8 @@ typedef Histogram<272> HistogramContextMap;
 // Block type histogram, 256 block types + 2 special symbols.
 typedef Histogram<258> HistogramBlockType;

-static const int kLiteralContextBits = 6;
-static const int kDistanceContextBits = 2;
+static const size_t kLiteralContextBits = 6;
+static const size_t kDistanceContextBits = 2;

 void BuildHistograms(
    const Command* cmds,
@ -84,7 +84,7 @@ void BuildHistograms(
    size_t mask,
    uint8_t prev_byte,
    uint8_t prev_byte2,
-    const std::vector<int>& context_modes,
+    const std::vector<ContextType>& context_modes,
    std::vector<HistogramLiteral>* literal_histograms,
    std::vector<HistogramCommand>* insert_and_copy_histograms,
    std::vector<HistogramDistance>* copy_dist_histograms);
--- a/enc/literal_cost.cc
+++ b/enc/literal_cost.cc
@ -17,29 +17,29 @@

 namespace brotli {

-static int UTF8Position(int last, int c, int clamp) {
+static size_t UTF8Position(size_t last, size_t c, size_t clamp) {
  if (c < 128) {
    return 0;  // Next one is the 'Byte 1' again.
-  } else if (c >= 192) {
-    return std::min(1, clamp);  // Next one is the 'Byte 2' of utf-8 encoding.
+  } else if (c >= 192) {  // Next one is the 'Byte 2' of utf-8 encoding.
+    return std::min<size_t>(1, clamp);
  } else {
    // Let's decide over the last byte if this ends the sequence.
    if (last < 0xe0) {
      return 0;  // Completed two or three byte coding.
-    } else {
-      return std::min(2, clamp);  // Next one is the 'Byte 3' of utf-8 encoding.
+    } else {  // Next one is the 'Byte 3' of utf-8 encoding.
+      return std::min<size_t>(2, clamp);
    }
  }
 }

-static int DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
-                                     const uint8_t *data) {
-  int counts[3] = { 0 };
-  int max_utf8 = 1;  // should be 2, but 1 compresses better.
-  int last_c = 0;
-  int utf8_pos = 0;
+static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
+                                        const uint8_t *data) {
+  size_t counts[3] = { 0 };
+  size_t max_utf8 = 1;  // should be 2, but 1 compresses better.
+  size_t last_c = 0;
+  size_t utf8_pos = 0;
  for (size_t i = 0; i < len; ++i) {
-    int c = data[(pos + i) & mask];
+    size_t c = data[(pos + i) & mask];
    utf8_pos = UTF8Position(last_c, c, 2);
    ++counts[utf8_pos];
    last_c = c;
@ -58,17 +58,17 @@ void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,

  // max_utf8 is 0 (normal ascii single byte modeling),
  // 1 (for 2-byte utf-8 modeling), or 2 (for 3-byte utf-8 modeling).
-  const int max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data);
-  int histogram[3][256] = { { 0 } };
-  int window_half = 495;
-  int in_window = std::min(window_half, static_cast<int>(len));
-  int in_window_utf8[3] = { 0 };
+  const size_t max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data);
+  size_t histogram[3][256] = { { 0 } };
+  size_t window_half = 495;
+  size_t in_window = std::min(window_half, len);
+  size_t in_window_utf8[3] = { 0 };

  // Bootstrap histograms.
-  int last_c = 0;
-  int utf8_pos = 0;
-  for (int i = 0; i < in_window; ++i) {
-    int c = data[(pos + i) & mask];
+  size_t last_c = 0;
+  size_t utf8_pos = 0;
+  for (size_t i = 0; i < in_window; ++i) {
+    size_t c = data[(pos + i) & mask];
    ++histogram[utf8_pos][c];
    ++in_window_utf8[utf8_pos];
    utf8_pos = UTF8Position(last_c, c, max_utf8);
@ -76,30 +76,30 @@ void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
  }

  // Compute bit costs with sliding window.
-  for (int i = 0; i < static_cast<int>(len); ++i) {
-    if (i - window_half >= 0) {
+  for (size_t i = 0; i < len; ++i) {
+    if (i >= window_half) {
      // Remove a byte in the past.
-      int c = (i - window_half - 1) < 0 ?
+      size_t c = i < window_half + 1 ?
          0 : data[(pos + i - window_half - 1) & mask];
-      int last_c = (i - window_half - 2) < 0 ?
+      size_t last_c = i < window_half + 2 ?
          0 : data[(pos + i - window_half - 2) & mask];
-      int utf8_pos2 = UTF8Position(last_c, c, max_utf8);
+      size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
      --histogram[utf8_pos2][data[(pos + i - window_half) & mask]];
      --in_window_utf8[utf8_pos2];
    }
-    if (i + window_half < static_cast<int>(len)) {
+    if (i + window_half < len) {
      // Add a byte in the future.
-      int c = data[(pos + i + window_half - 1) & mask];
-      int last_c = data[(pos + i + window_half - 2) & mask];
-      int utf8_pos2 = UTF8Position(last_c, c, max_utf8);
+      size_t c = data[(pos + i + window_half - 1) & mask];
+      size_t last_c = data[(pos + i + window_half - 2) & mask];
+      size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
      ++histogram[utf8_pos2][data[(pos + i + window_half) & mask]];
      ++in_window_utf8[utf8_pos2];
    }
-    int c = i < 1 ? 0 : data[(pos + i - 1) & mask];
-    int last_c = i < 2 ? 0 : data[(pos + i - 2) & mask];
-    int utf8_pos = UTF8Position(last_c, c, max_utf8);
+    size_t c = i < 1 ? 0 : data[(pos + i - 1) & mask];
+    size_t last_c = i < 2 ? 0 : data[(pos + i - 2) & mask];
+    size_t utf8_pos = UTF8Position(last_c, c, max_utf8);
    size_t masked_pos = (pos + i) & mask;
-    int histo = histogram[utf8_pos][data[masked_pos]];
+    size_t histo = histogram[utf8_pos][data[masked_pos]];
    if (histo == 0) {
      histo = 1;
    }
@ -114,7 +114,7 @@ void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
    // rapidly in the beginning of the file, perhaps because the beginning
    // of the data is a statistical "anomaly".
    if (i < 2000) {
-      lit_cost += 0.7 - ((2000 - i) / 2000.0 * 0.35);
+      lit_cost += 0.7 - (static_cast<double>(2000 - i) / 2000.0 * 0.35);
    }
    cost[i] = static_cast<float>(lit_cost);
  }
@ -126,28 +126,28 @@ void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
    EstimateBitCostsForLiteralsUTF8(pos, len, mask, data, cost);
    return;
  }
-  int histogram[256] = { 0 };
-  int window_half = 2000;
-  int in_window = std::min(window_half, static_cast<int>(len));
+  size_t histogram[256] = { 0 };
+  size_t window_half = 2000;
+  size_t in_window = std::min(window_half, len);

  // Bootstrap histogram.
-  for (int i = 0; i < in_window; ++i) {
+  for (size_t i = 0; i < in_window; ++i) {
    ++histogram[data[(pos + i) & mask]];
  }

  // Compute bit costs with sliding window.
-  for (int i = 0; i < static_cast<int>(len); ++i) {
-    if (i - window_half >= 0) {
+  for (size_t i = 0; i < len; ++i) {
+    if (i >= window_half) {
      // Remove a byte in the past.
      --histogram[data[(pos + i - window_half) & mask]];
      --in_window;
    }
-    if (i + window_half < static_cast<int>(len)) {
+    if (i + window_half < len) {
      // Add a byte in the future.
      ++histogram[data[(pos + i + window_half) & mask]];
      ++in_window;
    }
-    int histo = histogram[data[(pos + i) & mask]];
+    size_t histo = histogram[data[(pos + i) & mask]];
    if (histo == 0) {
      histo = 1;
    }
--- a/enc/metablock.cc
+++ b/enc/metablock.cc
@ -23,7 +23,7 @@ void BuildMetaBlock(const uint8_t* ringbuffer,
                    uint8_t prev_byte2,
                    const Command* cmds,
                    size_t num_commands,
-                    int literal_context_mode,
+                    ContextType literal_context_mode,
                    MetaBlockSplit* mb) {
  SplitBlock(cmds, num_commands,
             ringbuffer, pos, mask,
@ -31,12 +31,12 @@ void BuildMetaBlock(const uint8_t* ringbuffer,
             &mb->command_split,
             &mb->distance_split);

-  std::vector<int> literal_context_modes(mb->literal_split.num_types,
-                                         literal_context_mode);
+  std::vector<ContextType> literal_context_modes(mb->literal_split.num_types,
+                                                 literal_context_mode);

-  int num_literal_contexts =
+  size_t num_literal_contexts =
      mb->literal_split.num_types << kLiteralContextBits;
-  int num_distance_contexts =
+  size_t num_distance_contexts =
      mb->distance_split.num_types << kDistanceContextBits;
  std::vector<HistogramLiteral> literal_histograms(num_literal_contexts);
  mb->command_histograms.resize(mb->command_split.num_types);
@ -58,17 +58,15 @@ void BuildMetaBlock(const uint8_t* ringbuffer,
  // Histogram ids need to fit in one byte.
  static const size_t kMaxNumberOfHistograms = 256;

-  mb->literal_histograms = literal_histograms;
  ClusterHistograms(literal_histograms,
-                    1 << kLiteralContextBits,
+                    1u << kLiteralContextBits,
                    mb->literal_split.num_types,
                    kMaxNumberOfHistograms,
                    &mb->literal_histograms,
                    &mb->literal_context_map);

-  mb->distance_histograms = distance_histograms;
  ClusterHistograms(distance_histograms,
-                    1 << kDistanceContextBits,
+                    1u << kDistanceContextBits,
                    mb->distance_split.num_types,
                    kMaxNumberOfHistograms,
                    &mb->distance_histograms,
@ -79,10 +77,10 @@ void BuildMetaBlock(const uint8_t* ringbuffer,
 template<typename HistogramType>
 class BlockSplitter {
 public:
-  BlockSplitter(int alphabet_size,
-                int min_block_size,
+  BlockSplitter(size_t alphabet_size,
+                size_t min_block_size,
                double split_threshold,
-                int num_symbols,
+                size_t num_symbols,
                BlockSplit* split,
                std::vector<HistogramType>* histograms)
      : alphabet_size_(alphabet_size),
@ -95,10 +93,10 @@ class BlockSplitter {
        block_size_(0),
        curr_histogram_ix_(0),
        merge_last_count_(0) {
-    int max_num_blocks = num_symbols / min_block_size + 1;
+    size_t max_num_blocks = num_symbols / min_block_size + 1;
    // We have to allocate one more histogram than the maximum number of block
    // types for the current histogram when the meta-block is too big.
-    int max_num_types = std::min(max_num_blocks, kMaxBlockTypes + 1);
+    size_t max_num_types = std::min<size_t>(max_num_blocks, kMaxBlockTypes + 1);
    split_->lengths.resize(max_num_blocks);
    split_->types.resize(max_num_blocks);
    histograms_->resize(max_num_types);
@ -107,7 +105,7 @@ class BlockSplitter {

  // Adds the next symbol to the current histogram. When the current histogram
  // reaches the target size, decides on merging the block.
-  void AddSymbol(int symbol) {
+  void AddSymbol(size_t symbol) {
    (*histograms_)[curr_histogram_ix_].Add(symbol);
    ++block_size_;
    if (block_size_ == target_block_size_) {
@ -125,7 +123,7 @@ class BlockSplitter {
    }
    if (num_blocks_ == 0) {
      // Create first block.
-      split_->lengths[0] = block_size_;
+      split_->lengths[0] = static_cast<uint32_t>(block_size_);
      split_->types[0] = 0;
      last_entropy_[0] =
          BitsEntropy(&(*histograms_)[0].data_[0], alphabet_size_);
@ -140,8 +138,8 @@ class BlockSplitter {
      HistogramType combined_histo[2];
      double combined_entropy[2];
      double diff[2];
-      for (int j = 0; j < 2; ++j) {
-        int last_histogram_ix = last_histogram_ix_[j];
+      for (size_t j = 0; j < 2; ++j) {
+        size_t last_histogram_ix = last_histogram_ix_[j];
        combined_histo[j] = (*histograms_)[curr_histogram_ix_];
        combined_histo[j].AddHistogram((*histograms_)[last_histogram_ix]);
        combined_entropy[j] = BitsEntropy(
@ -153,10 +151,10 @@ class BlockSplitter {
          diff[0] > split_threshold_ &&
          diff[1] > split_threshold_) {
        // Create new block.
-        split_->lengths[num_blocks_] = block_size_;
-        split_->types[num_blocks_] = split_->num_types;
+        split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
+        split_->types[num_blocks_] = static_cast<uint8_t>(split_->num_types);
        last_histogram_ix_[1] = last_histogram_ix_[0];
-        last_histogram_ix_[0] = split_->num_types;
+        last_histogram_ix_[0] = static_cast<uint8_t>(split_->num_types);
        last_entropy_[1] = last_entropy_[0];
        last_entropy_[0] = entropy;
        ++num_blocks_;
@ -167,7 +165,7 @@ class BlockSplitter {
        target_block_size_ = min_block_size_;
      } else if (diff[1] < diff[0] - 20.0) {
        // Combine this block with second last block.
-        split_->lengths[num_blocks_] = block_size_;
+        split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
        split_->types[num_blocks_] = split_->types[num_blocks_ - 2];
        std::swap(last_histogram_ix_[0], last_histogram_ix_[1]);
        (*histograms_)[last_histogram_ix_[0]] = combined_histo[1];
@ -180,7 +178,7 @@ class BlockSplitter {
        target_block_size_ = min_block_size_;
      } else {
        // Combine this block with last block.
-        split_->lengths[num_blocks_ - 1] += block_size_;
+        split_->lengths[num_blocks_ - 1] += static_cast<uint32_t>(block_size_);
        (*histograms_)[last_histogram_ix_[0]] = combined_histo[0];
        last_entropy_[0] = combined_entropy[0];
        if (split_->num_types == 1) {
@ -201,35 +199,35 @@ class BlockSplitter {
  }

 private:
-  static const int kMaxBlockTypes = 256;
+  static const uint16_t kMaxBlockTypes = 256;

  // Alphabet size of particular block category.
-  const int alphabet_size_;
+  const size_t alphabet_size_;
  // We collect at least this many symbols for each block.
-  const int min_block_size_;
+  const size_t min_block_size_;
  // We merge histograms A and B if
  //   entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
  // where A is the current histogram and B is the histogram of the last or the
  // second last block type.
  const double split_threshold_;

-  int num_blocks_;
+  size_t num_blocks_;
  BlockSplit* split_;  // not owned
  std::vector<HistogramType>* histograms_;  // not owned

  // The number of symbols that we want to collect before deciding on whether
  // or not to merge the block with a previous one or emit a new block.
-  int target_block_size_;
+  size_t target_block_size_;
  // The number of symbols in the current histogram.
-  int block_size_;
+  size_t block_size_;
  // Offset of the current histogram.
-  int curr_histogram_ix_;
+  size_t curr_histogram_ix_;
  // Offset of the histograms of the previous two block types.
-  int last_histogram_ix_[2];
+  size_t last_histogram_ix_[2];
  // Entropy of the previous two block types.
  double last_entropy_[2];
  // The number of times we merged the current block with the last one.
-  int merge_last_count_;
+  size_t merge_last_count_;
 };

 void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
@ -238,7 +236,7 @@ void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
                          const Command *commands,
                          size_t n_commands,
                          MetaBlockSplit* mb) {
-  int num_literals = 0;
+  size_t num_literals = 0;
  for (size_t i = 0; i < n_commands; ++i) {
    num_literals += commands[i].insert_len_;
  }
@ -247,16 +245,16 @@ void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
      256, 512, 400.0, num_literals,
      &mb->literal_split, &mb->literal_histograms);
  BlockSplitter<HistogramCommand> cmd_blocks(
-      kNumCommandPrefixes, 1024, 500.0, static_cast<int>(n_commands),
+      kNumCommandPrefixes, 1024, 500.0, n_commands,
      &mb->command_split, &mb->command_histograms);
  BlockSplitter<HistogramDistance> dist_blocks(
-      64, 512, 100.0, static_cast<int>(n_commands),
+      64, 512, 100.0, n_commands,
      &mb->distance_split, &mb->distance_histograms);

  for (size_t i = 0; i < n_commands; ++i) {
    const Command cmd = commands[i];
    cmd_blocks.AddSymbol(cmd.cmd_prefix_);
-    for (int j = 0; j < cmd.insert_len_; ++j) {
+    for (size_t j = cmd.insert_len_; j != 0; --j) {
      lit_blocks.AddSymbol(ringbuffer[pos & mask]);
      ++pos;
    }
@ -276,11 +274,11 @@ void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
 template<typename HistogramType>
 class ContextBlockSplitter {
 public:
-  ContextBlockSplitter(int alphabet_size,
-                       int num_contexts,
-                       int min_block_size,
+  ContextBlockSplitter(size_t alphabet_size,
+                       size_t num_contexts,
+                       size_t min_block_size,
                       double split_threshold,
-                       int num_symbols,
+                       size_t num_symbols,
                       BlockSplit* split,
                       std::vector<HistogramType>* histograms)
      : alphabet_size_(alphabet_size),
@ -296,10 +294,10 @@ class ContextBlockSplitter {
        curr_histogram_ix_(0),
        last_entropy_(2 * num_contexts),
        merge_last_count_(0) {
-    int max_num_blocks = num_symbols / min_block_size + 1;
+    size_t max_num_blocks = num_symbols / min_block_size + 1;
    // We have to allocate one more histogram than the maximum number of block
    // types for the current histogram when the meta-block is too big.
-    int max_num_types = std::min(max_num_blocks, max_block_types_ + 1);
+    size_t max_num_types = std::min(max_num_blocks, max_block_types_ + 1);
    split_->lengths.resize(max_num_blocks);
    split_->types.resize(max_num_blocks);
    histograms_->resize(max_num_types * num_contexts);
@ -308,7 +306,7 @@ class ContextBlockSplitter {

  // Adds the next symbol to the current block type and context. When the
  // current block reaches the target size, decides on merging the block.
-  void AddSymbol(int symbol, int context) {
+  void AddSymbol(size_t symbol, size_t context) {
    (*histograms_)[curr_histogram_ix_ + context].Add(symbol);
    ++block_size_;
    if (block_size_ == target_block_size_) {
@ -326,9 +324,9 @@ class ContextBlockSplitter {
    }
    if (num_blocks_ == 0) {
      // Create first block.
-      split_->lengths[0] = block_size_;
+      split_->lengths[0] = static_cast<uint32_t>(block_size_);
      split_->types[0] = 0;
-      for (int i = 0; i < num_contexts_; ++i) {
+      for (size_t i = 0; i < num_contexts_; ++i) {
        last_entropy_[i] =
            BitsEntropy(&(*histograms_)[i].data_[0], alphabet_size_);
        last_entropy_[num_contexts_ + i] = last_entropy_[i];
@ -346,13 +344,13 @@ class ContextBlockSplitter {
      std::vector<HistogramType> combined_histo(2 * num_contexts_);
      std::vector<double> combined_entropy(2 * num_contexts_);
      double diff[2] = { 0.0 };
-      for (int i = 0; i < num_contexts_; ++i) {
-        int curr_histo_ix = curr_histogram_ix_ + i;
+      for (size_t i = 0; i < num_contexts_; ++i) {
+        size_t curr_histo_ix = curr_histogram_ix_ + i;
        entropy[i] = BitsEntropy(&(*histograms_)[curr_histo_ix].data_[0],
                                 alphabet_size_);
-        for (int j = 0; j < 2; ++j) {
-          int jx = j * num_contexts_ + i;
-          int last_histogram_ix = last_histogram_ix_[j] + i;
+        for (size_t j = 0; j < 2; ++j) {
+          size_t jx = j * num_contexts_ + i;
+          size_t last_histogram_ix = last_histogram_ix_[j] + i;
          combined_histo[jx] = (*histograms_)[curr_histo_ix];
          combined_histo[jx].AddHistogram((*histograms_)[last_histogram_ix]);
          combined_entropy[jx] = BitsEntropy(
@ -365,11 +363,11 @@ class ContextBlockSplitter {
          diff[0] > split_threshold_ &&
          diff[1] > split_threshold_) {
        // Create new block.
-        split_->lengths[num_blocks_] = block_size_;
-        split_->types[num_blocks_] = split_->num_types;
+        split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
+        split_->types[num_blocks_] = static_cast<uint8_t>(split_->num_types);
        last_histogram_ix_[1] = last_histogram_ix_[0];
        last_histogram_ix_[0] = split_->num_types * num_contexts_;
-        for (int i = 0; i < num_contexts_; ++i) {
+        for (size_t i = 0; i < num_contexts_; ++i) {
          last_entropy_[num_contexts_ + i] = last_entropy_[i];
          last_entropy_[i] = entropy[i];
        }
@ -381,10 +379,10 @@ class ContextBlockSplitter {
        target_block_size_ = min_block_size_;
      } else if (diff[1] < diff[0] - 20.0) {
        // Combine this block with second last block.
-        split_->lengths[num_blocks_] = block_size_;
+        split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
        split_->types[num_blocks_] = split_->types[num_blocks_ - 2];
        std::swap(last_histogram_ix_[0], last_histogram_ix_[1]);
-        for (int i = 0; i < num_contexts_; ++i) {
+        for (size_t i = 0; i < num_contexts_; ++i) {
          (*histograms_)[last_histogram_ix_[0] + i] =
              combined_histo[num_contexts_ + i];
          last_entropy_[num_contexts_ + i] = last_entropy_[i];
@ -397,8 +395,8 @@ class ContextBlockSplitter {
        target_block_size_ = min_block_size_;
      } else {
        // Combine this block with last block.
-        split_->lengths[num_blocks_ - 1] += block_size_;
-        for (int i = 0; i < num_contexts_; ++i) {
+        split_->lengths[num_blocks_ - 1] += static_cast<uint32_t>(block_size_);
+        for (size_t i = 0; i < num_contexts_; ++i) {
          (*histograms_)[last_histogram_ix_[0] + i] = combined_histo[i];
          last_entropy_[i] = combined_entropy[i];
          if (split_->num_types == 1) {
@ -423,34 +421,34 @@ class ContextBlockSplitter {
  static const int kMaxBlockTypes = 256;

  // Alphabet size of particular block category.
-  const int alphabet_size_;
-  const int num_contexts_;
-  const int max_block_types_;
+  const size_t alphabet_size_;
+  const size_t num_contexts_;
+  const size_t max_block_types_;
  // We collect at least this many symbols for each block.
-  const int min_block_size_;
+  const size_t min_block_size_;
  // We merge histograms A and B if
  //   entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
  // where A is the current histogram and B is the histogram of the last or the
  // second last block type.
  const double split_threshold_;

-  int num_blocks_;
+  size_t num_blocks_;
  BlockSplit* split_;  // not owned
  std::vector<HistogramType>* histograms_;  // not owned

  // The number of symbols that we want to collect before deciding on whether
  // or not to merge the block with a previous one or emit a new block.
-  int target_block_size_;
+  size_t target_block_size_;
  // The number of symbols in the current histogram.
-  int block_size_;
+  size_t block_size_;
  // Offset of the current histogram.
-  int curr_histogram_ix_;
+  size_t curr_histogram_ix_;
  // Offset of the histograms of the previous two block types.
-  int last_histogram_ix_[2];
+  size_t last_histogram_ix_[2];
  // Entropy of the previous two block types.
  std::vector<double> last_entropy_;
  // The number of times we merged the current block with the last one.
-  int merge_last_count_;
+  size_t merge_last_count_;
 };

 void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
@ -458,13 +456,13 @@ void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
                                      size_t mask,
                                      uint8_t prev_byte,
                                      uint8_t prev_byte2,
-                                      int literal_context_mode,
-                                      int num_contexts,
-                                      const int* static_context_map,
+                                      ContextType literal_context_mode,
+                                      size_t num_contexts,
+                                      const uint32_t* static_context_map,
                                      const Command *commands,
                                      size_t n_commands,
                                      MetaBlockSplit* mb) {
-  int num_literals = 0;
+  size_t num_literals = 0;
  for (size_t i = 0; i < n_commands; ++i) {
    num_literals += commands[i].insert_len_;
  }
@ -473,17 +471,17 @@ void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
      256, num_contexts, 512, 400.0, num_literals,
      &mb->literal_split, &mb->literal_histograms);
  BlockSplitter<HistogramCommand> cmd_blocks(
-      kNumCommandPrefixes, 1024, 500.0, static_cast<int>(n_commands),
+      kNumCommandPrefixes, 1024, 500.0, n_commands,
      &mb->command_split, &mb->command_histograms);
  BlockSplitter<HistogramDistance> dist_blocks(
-      64, 512, 100.0, static_cast<int>(n_commands),
+      64, 512, 100.0, n_commands,
      &mb->distance_split, &mb->distance_histograms);

  for (size_t i = 0; i < n_commands; ++i) {
    const Command cmd = commands[i];
    cmd_blocks.AddSymbol(cmd.cmd_prefix_);
-    for (int j = 0; j < cmd.insert_len_; ++j) {
-      int context = Context(prev_byte, prev_byte2, literal_context_mode);
+    for (size_t j = cmd.insert_len_; j != 0; --j) {
+      size_t context = Context(prev_byte, prev_byte2, literal_context_mode);
      uint8_t literal = ringbuffer[pos & mask];
      lit_blocks.AddSymbol(literal, static_context_map[context]);
      prev_byte2 = prev_byte;
@ -506,16 +504,16 @@ void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,

  mb->literal_context_map.resize(
      mb->literal_split.num_types << kLiteralContextBits);
-  for (int i = 0; i < mb->literal_split.num_types; ++i) {
-    for (int j = 0; j < (1 << kLiteralContextBits); ++j) {
+  for (size_t i = 0; i < mb->literal_split.num_types; ++i) {
+    for (size_t j = 0; j < (1u << kLiteralContextBits); ++j) {
      mb->literal_context_map[(i << kLiteralContextBits) + j] =
-          i * num_contexts + static_context_map[j];
+          static_cast<uint32_t>(i * num_contexts) + static_context_map[j];
    }
  }
 }

-void OptimizeHistograms(int num_direct_distance_codes,
-                        int distance_postfix_bits,
+void OptimizeHistograms(size_t num_direct_distance_codes,
+                        size_t distance_postfix_bits,
                        MetaBlockSplit* mb) {
  for (size_t i = 0; i < mb->literal_histograms.size(); ++i) {
    OptimizeHuffmanCountsForRle(256, &mb->literal_histograms[i].data_[0]);
@ -524,9 +522,9 @@ void OptimizeHistograms(int num_direct_distance_codes,
    OptimizeHuffmanCountsForRle(kNumCommandPrefixes,
                                &mb->command_histograms[i].data_[0]);
  }
-  int num_distance_codes =
+  size_t num_distance_codes =
      kNumDistanceShortCodes + num_direct_distance_codes +
-      (48 << distance_postfix_bits);
+      (48u << distance_postfix_bits);
  for (size_t i = 0; i < mb->distance_histograms.size(); ++i) {
    OptimizeHuffmanCountsForRle(num_distance_codes,
                                &mb->distance_histograms[i].data_[0]);
--- a/enc/metablock.h
+++ b/enc/metablock.h
@ -20,17 +20,17 @@ namespace brotli {
 struct BlockSplit {
  BlockSplit() : num_types(0) {}

-  int num_types;
-  std::vector<int> types;
-  std::vector<int> lengths;
+  size_t num_types;
+  std::vector<uint8_t> types;
+  std::vector<uint32_t> lengths;
 };

 struct MetaBlockSplit {
  BlockSplit literal_split;
  BlockSplit command_split;
  BlockSplit distance_split;
-  std::vector<int> literal_context_map;
-  std::vector<int> distance_context_map;
+  std::vector<uint32_t> literal_context_map;
+  std::vector<uint32_t> distance_context_map;
  std::vector<HistogramLiteral> literal_histograms;
  std::vector<HistogramCommand> command_histograms;
  std::vector<HistogramDistance> distance_histograms;
@ -44,7 +44,7 @@ void BuildMetaBlock(const uint8_t* ringbuffer,
                    uint8_t prev_byte2,
                    const Command* cmds,
                    size_t num_commands,
-                    int literal_context_mode,
+                    ContextType literal_context_mode,
                    MetaBlockSplit* mb);

 // Uses a fast greedy block splitter that tries to merge current block with the
@ -64,15 +64,15 @@ void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
                                      size_t mask,
                                      uint8_t prev_byte,
                                      uint8_t prev_byte2,
-                                      int literal_context_mode,
-                                      int num_contexts,
-                                      const int* static_context_map,
+                                      ContextType literal_context_mode,
+                                      size_t num_contexts,
+                                      const uint32_t* static_context_map,
                                      const Command *commands,
                                      size_t n_commands,
                                      MetaBlockSplit* mb);

-void OptimizeHistograms(int num_direct_distance_codes,
-                        int distance_postfix_bits,
+void OptimizeHistograms(size_t num_direct_distance_codes,
+                        size_t distance_postfix_bits,
                        MetaBlockSplit* mb);

 }  // namespace brotli
--- a/enc/port.h
+++ b/enc/port.h
@ -22,10 +22,9 @@
 /* Let's try and follow the Linux convention */
 #define __BYTE_ORDER  BYTE_ORDER
 #define __LITTLE_ENDIAN LITTLE_ENDIAN
-#define __BIG_ENDIAN BIG_ENDIAN
 #endif

-// define the macros IS_LITTLE_ENDIAN or IS_BIG_ENDIAN
+// define the macro IS_LITTLE_ENDIAN
 // using the above endian definitions from endian.h if
 // endian.h was included
 #ifdef __BYTE_ORDER
@ -33,19 +32,17 @@
 #define IS_LITTLE_ENDIAN
 #endif

-#if __BYTE_ORDER == __BIG_ENDIAN
-#define IS_BIG_ENDIAN
-#endif
-
 #else

 #if defined(__LITTLE_ENDIAN__)
 #define IS_LITTLE_ENDIAN
-#elif defined(__BIG_ENDIAN__)
-#define IS_BIG_ENDIAN
 #endif
 #endif  // __BYTE_ORDER

+#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+#define IS_LITTLE_ENDIAN
+#endif
+
 // Enable little-endian optimization for x64 architecture on Windows.
 #if (defined(_WIN32) || defined(_WIN64)) && defined(_M_X64)
 #define IS_LITTLE_ENDIAN
@ -69,8 +66,8 @@
 // On some platforms, like ARM, the copy functions can be more efficient
 // then a load and a store.

-#if defined(ARCH_PIII) || defined(ARCH_ATHLON) || \
-  defined(ARCH_K8) || defined(_ARCH_PPC)
+#if defined(ARCH_PIII) || \
+  defined(ARCH_ATHLON) || defined(ARCH_K8) || defined(_ARCH_PPC)

 // x86 and x86-64 can perform unaligned loads/stores directly;
 // modern PowerPC hardware can also do unaligned integer loads and stores;
@ -142,10 +139,4 @@ inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {

 #endif

-#ifdef BROTLI_ENCODE_DEBUG
-#define BROTLI_DCHECK(x) assert(x)
-#else
-#define BROTLI_DCHECK(x)
-#endif
-
 #endif  // BROTLI_ENC_PORT_H_
--- a/enc/prefix.h
+++ b/enc/prefix.h
@ -15,18 +15,18 @@

 namespace brotli {

-static const int kNumInsertLenPrefixes = 24;
-static const int kNumCopyLenPrefixes = 24;
-static const int kNumCommandPrefixes = 704;
-static const int kNumBlockLenPrefixes = 26;
-static const int kNumDistanceShortCodes = 16;
-static const int kNumDistancePrefixes = 520;
+static const uint32_t kNumInsertLenPrefixes = 24;
+static const uint32_t kNumCopyLenPrefixes = 24;
+static const uint32_t kNumCommandPrefixes = 704;
+static const uint32_t kNumBlockLenPrefixes = 26;
+static const uint32_t kNumDistanceShortCodes = 16;
+static const uint32_t kNumDistancePrefixes = 520;

 // Represents the range of values belonging to a prefix code:
 // [offset, offset + 2^nbits)
 struct PrefixCodeRange {
-  int offset;
-  int nbits;
+  uint32_t offset;
+  uint32_t nbits;
 };

 static const PrefixCodeRange kBlockLengthPrefixCode[kNumBlockLenPrefixes] = {
@ -39,8 +39,8 @@ static const PrefixCodeRange kBlockLengthPrefixCode[kNumBlockLenPrefixes] = {
  {8433, 13}, {16625, 24}
 };

-inline void GetBlockLengthPrefixCode(int len,
-                                     int* code, int* n_extra, int* extra) {
+inline void GetBlockLengthPrefixCode(uint32_t len, uint32_t* code,
+                                     uint32_t* n_extra, uint32_t* extra) {
  *code = 0;
  while (*code < 25 && len >= kBlockLengthPrefixCode[*code + 1].offset) {
    ++(*code);
@ -49,9 +49,9 @@ inline void GetBlockLengthPrefixCode(int len,
  *extra = len - kBlockLengthPrefixCode[*code].offset;
 }

-inline void PrefixEncodeCopyDistance(int distance_code,
-                                     int num_direct_codes,
-                                     int postfix_bits,
+inline void PrefixEncodeCopyDistance(size_t distance_code,
+                                     size_t num_direct_codes,
+                                     size_t postfix_bits,
                                     uint16_t* code,
                                     uint32_t* extra_bits) {
  if (distance_code < kNumDistanceShortCodes + num_direct_codes) {
@ -59,18 +59,19 @@ inline void PrefixEncodeCopyDistance(int distance_code,
    *extra_bits = 0;
    return;
  }
-  distance_code -= kNumDistanceShortCodes + num_direct_codes;
-  distance_code += (1 << (postfix_bits + 2));
-  int bucket = Log2Floor(distance_code) - 1;
-  int postfix_mask = (1 << postfix_bits) - 1;
-  int postfix = distance_code & postfix_mask;
-  int prefix = (distance_code >> bucket) & 1;
-  int offset = (2 + prefix) << bucket;
-  int nbits = bucket - postfix_bits;
+  distance_code -= kNumDistanceShortCodes + num_direct_codes;  /* >= 0 */
+  distance_code += (1 << (postfix_bits + 2));  /* > 0 */
+  size_t bucket = Log2FloorNonZero(distance_code) - 1;
+  size_t postfix_mask = (1 << postfix_bits) - 1;
+  size_t postfix = distance_code & postfix_mask;
+  size_t prefix = (distance_code >> bucket) & 1;
+  size_t offset = (2 + prefix) << bucket;
+  size_t nbits = bucket - postfix_bits;
  *code = static_cast<uint16_t>(
      (kNumDistanceShortCodes + num_direct_codes +
       ((2 * (nbits - 1) + prefix) << postfix_bits) + postfix));
-  *extra_bits = (nbits << 24) | ((distance_code - offset) >> postfix_bits);
+  *extra_bits = static_cast<uint32_t>(
+      (nbits << 24) | ((distance_code - offset) >> postfix_bits));
 }

 }  // namespace brotli
--- a/enc/ringbuffer.h
+++ b/enc/ringbuffer.h
@ -16,26 +16,34 @@
 namespace brotli {

 // A RingBuffer(window_bits, tail_bits) contains `1 << window_bits' bytes of
-// data in a circular manner: writing a byte writes it to
-// `position() % (1 << window_bits)'. For convenience, the RingBuffer array
-// contains another copy of the first `1 << tail_bits' bytes:
-// buffer_[i] == buffer_[i + (1 << window_bits)] if i < (1 << tail_bits).
+// data in a circular manner: writing a byte writes it to:
+//   `position() % (1 << window_bits)'.
+// For convenience, the RingBuffer array contains another copy of the
+// first `1 << tail_bits' bytes:
+//   buffer_[i] == buffer_[i + (1 << window_bits)], if i < (1 << tail_bits),
+// and another copy of the last two bytes:
+//   buffer_[-1] == buffer_[(1 << window_bits) - 1] and
+//   buffer_[-2] == buffer_[(1 << window_bits) - 2].
 class RingBuffer {
 public:
  RingBuffer(int window_bits, int tail_bits)
-      : size_((size_t(1) << window_bits)),
-        mask_((size_t(1) << window_bits) - 1),
-        tail_size_(size_t(1) << tail_bits),
+      : size_(1u << window_bits),
+        mask_((1u << window_bits) - 1),
+        tail_size_(1u << tail_bits),
        pos_(0) {
-    static const int kSlackForEightByteHashingEverywhere = 7;
+    static const size_t kSlackForEightByteHashingEverywhere = 7;
    const size_t buflen = size_ + tail_size_;
-    buffer_ = new uint8_t[buflen + kSlackForEightByteHashingEverywhere];
-    for (int i = 0; i < kSlackForEightByteHashingEverywhere; ++i) {
+    data_ = new uint8_t[2 + buflen + kSlackForEightByteHashingEverywhere];
+    buffer_ = data_ + 2;
+    for (size_t i = 0; i < kSlackForEightByteHashingEverywhere; ++i) {
      buffer_[buflen + i] = 0;
    }
+    // Initialize the last two bytes and their copy to zero.
+    buffer_[-2] = buffer_[size_ - 2] = 0;
+    buffer_[-1] = buffer_[size_ - 1] = 0;
  }
  ~RingBuffer() {
-    delete [] buffer_;
+    delete [] data_;
  }

  // Push bytes into the ring buffer.
@ -56,7 +64,12 @@ class RingBuffer {
      memcpy(&buffer_[0], bytes + (size_ - masked_pos),
             n - (size_ - masked_pos));
    }
-    pos_ += n;
+    buffer_[-2] = buffer_[size_ - 2];
+    buffer_[-1] = buffer_[size_ - 1];
+    pos_ += static_cast<uint32_t>(n);
+    if (pos_ > (1u << 30)) {  /* Wrap, but preserve not-a-first-lap feature. */
+      pos_ = (pos_ & ((1u << 30) - 1)) | (1u << 30);
+    }
  }

  void Reset() {
@ -64,10 +77,10 @@ class RingBuffer {
  }

  // Logical cursor position in the ring buffer.
-  size_t position() const { return pos_; }
+  uint32_t position() const { return pos_; }

  // Bit mask for getting the physical position for a logical position.
-  size_t mask() const { return mask_; }
+  uint32_t mask() const { return mask_; }

  uint8_t *start() { return &buffer_[0]; }
  const uint8_t *start() const { return &buffer_[0]; }
@ -83,14 +96,16 @@ class RingBuffer {
  }

  // Size of the ringbuffer is (1 << window_bits) + tail_size_.
-  const size_t size_;
-  const size_t mask_;
-  const size_t tail_size_;
+  const uint32_t size_;
+  const uint32_t mask_;
+  const uint32_t tail_size_;

  // Position to write in the ring buffer.
-  size_t pos_;
-  // The actual ring buffer containing the data and the copy of the beginning
-  // as a tail.
+  uint32_t pos_;
+  // The actual ring buffer containing the copy of the last two bytes, the data,
+  // and the copy of the beginning as a tail.
+  uint8_t *data_;
+  // The start of the ringbuffer.
  uint8_t *buffer_;
 };

--- a/enc/static_dict.cc
+++ b/enc/static_dict.cc
@ -22,19 +22,24 @@ inline uint32_t Hash(const uint8_t *data) {
  return h >> (32 - kDictNumBits);
 }

-inline void AddMatch(int distance, int len, int len_code, int* matches) {
-  matches[len] = std::min(matches[len], (distance << 5) + len_code);
+inline void AddMatch(size_t distance, size_t len, size_t len_code,
+                     uint32_t* matches) {
+  uint32_t match = static_cast<uint32_t>((distance << 5) + len_code);
+  matches[len] = std::min(matches[len], match);
 }

-inline int DictMatchLength(const uint8_t* data, int id, int len, int maxlen) {
-  const int offset = kBrotliDictionaryOffsetsByLength[len] + len * id;
+inline size_t DictMatchLength(const uint8_t* data,
+                              size_t id,
+                              size_t len,
+                              size_t maxlen) {
+  const size_t offset = kBrotliDictionaryOffsetsByLength[len] + len * id;
  return FindMatchLengthWithLimit(&kBrotliDictionary[offset], data,
                                  std::min(len, maxlen));
 }

-inline bool IsMatch(DictWord w, const uint8_t* data, int max_length) {
+inline bool IsMatch(DictWord w, const uint8_t* data, size_t max_length) {
  if (w.len > max_length) return false;
-  const int offset = kBrotliDictionaryOffsetsByLength[w.len] + w.len * w.idx;
+  const size_t offset = kBrotliDictionaryOffsetsByLength[w.len] + w.len * w.idx;
  const uint8_t* dict = &kBrotliDictionary[offset];
  if (w.transform == 0) {
    // Match against base dictionary word.
@ -44,12 +49,12 @@ inline bool IsMatch(DictWord w, const uint8_t* data, int max_length) {
    // Note that there are only ASCII uppercase words in the lookup table.
    return (dict[0] >= 'a' && dict[0] <= 'z' &&
            (dict[0] ^ 32) == data[0] &&
-            FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1) ==
-            w.len - 1);
+            FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) ==
+            w.len - 1u);
  } else {
    // Match against uppercase all transform.
    // Note that there are only ASCII uppercase words in the lookup table.
-    for (int i = 0; i < w.len; ++i) {
+    for (size_t i = 0; i < w.len; ++i) {
      if (dict[i] >= 'a' && dict[i] <= 'z') {
        if ((dict[i] ^ 32) != data[i]) return false;
      } else {
@ -61,22 +66,22 @@ inline bool IsMatch(DictWord w, const uint8_t* data, int max_length) {
 }

 bool FindAllStaticDictionaryMatches(const uint8_t* data,
-                                    int min_length,
-                                    int max_length,
-                                    int* matches) {
+                                    size_t min_length,
+                                    size_t max_length,
+                                    uint32_t* matches) {
  bool found_match = false;
-  uint32_t key = Hash(data);
-  uint32_t bucket = kStaticDictionaryBuckets[key];
+  size_t key = Hash(data);
+  size_t bucket = kStaticDictionaryBuckets[key];
  if (bucket != 0) {
-    int num = bucket & 0xff;
-    int offset = bucket >> 8;
-    for (int i = 0; i < num; ++i) {
+    size_t num = bucket & 0xff;
+    size_t offset = bucket >> 8;
+    for (size_t i = 0; i < num; ++i) {
      const DictWord w = kStaticDictionaryWords[offset + i];
-      const int l = w.len;
-      const int n = 1 << kBrotliDictionarySizeBitsByLength[l];
-      const int id = w.idx;
+      const size_t l = w.len;
+      const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
+      const size_t id = w.idx;
      if (w.transform == 0) {
-        const int matchlen = DictMatchLength(data, id, l, max_length);
+        const size_t matchlen = DictMatchLength(data, id, l, max_length);
        // Transform "" + kIdentity + ""
        if (matchlen == l) {
          AddMatch(id, l, l, matches);
@ -93,9 +98,10 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
          found_match = true;
        }
        // Transform "" + kOmitLastN + "" (N = 2 .. 9)
-        int minlen = std::max<int>(min_length, l - 9);
-        int maxlen = std::min<int>(matchlen, l - 2);
-        for (int len = minlen; len <= maxlen; ++len) {
+        size_t minlen = min_length;
+        if (l > 9) minlen = std::max(minlen, l - 9);
+        size_t maxlen = std::min(matchlen, l - 2);
+        for (size_t len = minlen; len <= maxlen; ++len) {
          AddMatch(id + kOmitLastNTransforms[l - len] * n, len, l, matches);
          found_match = true;
        }
@ -250,8 +256,9 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
          }
        }
      } else {
-        // Set t=0 for kUppercaseFirst and t=1 for kUppercaseAll transform.
-        const int t = w.transform - 10;
+        // Set t=false for kUppercaseFirst and
+        //     t=true otherwise (kUppercaseAll) transform.
+        const bool t = w.transform != kUppercaseFirst;
        if (!IsMatch(w, data, max_length)) {
          continue;
        }
@ -299,13 +306,13 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
    bool is_space = (data[0] == ' ');
    key = Hash(&data[1]);
    bucket = kStaticDictionaryBuckets[key];
-    int num = bucket & 0xff;
-    int offset = bucket >> 8;
-    for (int i = 0; i < num; ++i) {
+    size_t num = bucket & 0xff;
+    size_t offset = bucket >> 8;
+    for (size_t i = 0; i < num; ++i) {
      const DictWord w = kStaticDictionaryWords[offset + i];
-      const int l = w.len;
-      const int n = 1 << kBrotliDictionarySizeBitsByLength[l];
-      const int id = w.idx;
+      const size_t l = w.len;
+      const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
+      const size_t id = w.idx;
      if (w.transform == 0) {
        if (!IsMatch(w, &data[1], max_length - 1)) {
          continue;
@ -342,8 +349,9 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
          }
        }
      } else if (is_space) {
-        // Set t=0 for kUppercaseFirst and t=1 for kUppercaseAll transform.
-        const int t = w.transform - 10;
+        // Set t=false for kUppercaseFirst and
+        //     t=true otherwise (kUppercaseAll) transform.
+        const bool t = w.transform != kUppercaseFirst;
        if (!IsMatch(w, &data[1], max_length - 1)) {
          continue;
        }
@ -358,7 +366,7 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
        if (s[0] == ' ') {
          AddMatch(id + (t ? 83 : 15) * n, l + 2, l, matches);
        } else if (s[0] == ',') {
-          if (t == 0) {
+          if (!t) {
            AddMatch(id + 109 * n, l + 2, l, matches);
        }
          if (s[1] == ' ') {
@ -386,19 +394,19 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
        (data[0] == 0xc2 && data[1] == 0xa0)) {
      key = Hash(&data[2]);
      bucket = kStaticDictionaryBuckets[key];
-      int num = bucket & 0xff;
-      int offset = bucket >> 8;
-      for (int i = 0; i < num; ++i) {
+      size_t num = bucket & 0xff;
+      size_t offset = bucket >> 8;
+      for (size_t i = 0; i < num; ++i) {
        const DictWord w = kStaticDictionaryWords[offset + i];
-        const int l = w.len;
-        const int n = 1 << kBrotliDictionarySizeBitsByLength[l];
-        const int id = w.idx;
+        const size_t l = w.len;
+        const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
+        const size_t id = w.idx;
        if (w.transform == 0 && IsMatch(w, &data[2], max_length - 2)) {
          if (data[0] == 0xc2) {
            AddMatch(id + 102 * n, l + 2, l, matches);
            found_match = true;
          } else if (l + 2 < max_length && data[l + 2] == ' ') {
-            int t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13);
+            size_t t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13);
            AddMatch(id + t * n, l + 3, l, matches);
            found_match = true;
          }
@ -414,13 +422,13 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
         data[3] == 'm' && data[4] == '/')) {
      key = Hash(&data[5]);
      bucket = kStaticDictionaryBuckets[key];
-      int num = bucket & 0xff;
-      int offset = bucket >> 8;
-      for (int i = 0; i < num; ++i) {
+      size_t num = bucket & 0xff;
+      size_t offset = bucket >> 8;
+      for (size_t i = 0; i < num; ++i) {
        const DictWord w = kStaticDictionaryWords[offset + i];
-        const int l = w.len;
-        const int n = 1 << kBrotliDictionarySizeBitsByLength[l];
-        const int id = w.idx;
+        const size_t l = w.len;
+        const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
+        const size_t id = w.idx;
        if (w.transform == 0 && IsMatch(w, &data[5], max_length - 5)) {
          AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);
          found_match = true;
--- a/enc/static_dict.h
+++ b/enc/static_dict.h
@ -13,8 +13,8 @@

 namespace brotli {

-static const int kMaxDictionaryMatchLen = 37;
-static const int kInvalidMatch = 0xfffffff;
+static const size_t kMaxDictionaryMatchLen = 37;
+static const uint32_t kInvalidMatch = 0xfffffff;

 // Matches data against static dictionary words, and for each length l,
 // for which a match is found, updates matches[l] to be the minimum possible
@ -23,9 +23,9 @@ static const int kInvalidMatch = 0xfffffff;
 //   matches array is at least kMaxDictionaryMatchLen + 1 long
 //   all elements are initialized to kInvalidMatch
 bool FindAllStaticDictionaryMatches(const uint8_t* data,
-                                    int min_length,
-                                    int max_length,
-                                    int* matches);
+                                    size_t min_length,
+                                    size_t max_length,
+                                    uint32_t* matches);

 }  // namespace brotli

--- a/enc/transform.h
+++ b/enc/transform.h
@ -172,15 +172,11 @@ static const Transform kTransforms[] = {
 static const size_t kNumTransforms =
    sizeof(kTransforms) / sizeof(kTransforms[0]);

-static const int kOmitFirstNTransforms[10] = {
-  0, 3, 11, 26, 34, 39, 40, 55, 0, 54
-};
-
-static const int kOmitLastNTransforms[10] = {
+static const size_t kOmitLastNTransforms[10] = {
  0, 12, 27, 23, 42, 63, 56, 48, 59, 64,
 };

-static int ToUpperCase(uint8_t *p, int len) {
+static size_t ToUpperCase(uint8_t *p, size_t len) {
  if (len == 1 || p[0] < 0xc0) {
    if (p[0] >= 'a' && p[0] <= 'z') {
      p[0] ^= 32;
@ -198,41 +194,50 @@ static int ToUpperCase(uint8_t *p, int len) {
  return 3;
 }

-inline std::string ApplyTransform(
-    const Transform& t, const uint8_t* word, int len) {
-  std::string ret(t.prefix);
-  if (t.word_transform <= kOmitLast9) {
-    len -= t.word_transform;
+inline std::string TransformWord(
+    WordTransformType transform_type, const uint8_t* word, size_t len) {
+  if (transform_type <= kOmitLast9) {
+    if (len <= transform_type) {
+      return std::string();
+    }
+    return std::string(word, word + len - transform_type);
  }
-  if (len > 0) {
-    if (t.word_transform >= kOmitFirst1) {
-      const int skip = t.word_transform - (kOmitFirst1 - 1);
-      if (len > skip) {
-        ret += std::string(word + skip, word + len);
-      }
-    } else {
-      ret += std::string(word, word + len);
-      uint8_t *uppercase = reinterpret_cast<uint8_t*>(&ret[ret.size() - len]);
-      if (t.word_transform == kUppercaseFirst) {
-        ToUpperCase(uppercase, len);
-      } else if (t.word_transform == kUppercaseAll) {
-        while (len > 0) {
-          int step = ToUpperCase(uppercase, len);
-          uppercase += step;
-          len -= step;
-        }
-      }
+
+  if (transform_type >= kOmitFirst1) {
+    const size_t skip = transform_type - (kOmitFirst1 - 1);
+    if (len <= skip) {
+      return std::string();
+    }
+    return std::string(word + skip, word + len);
+  }
+
+  std::string ret = std::string(word, word + len);
+  uint8_t *uppercase = reinterpret_cast<uint8_t*>(&ret[0]);
+  if (transform_type == kUppercaseFirst) {
+    ToUpperCase(uppercase, len);
+  } else if (transform_type == kUppercaseAll) {
+    size_t position = 0;
+    while (position < len) {
+      size_t step = ToUpperCase(uppercase, len - position);
+      uppercase += step;
+      position += step;
    }
  }
-  ret += std::string(t.suffix);
  return ret;
 }

-inline std::string GetTransformedDictionaryWord(int len_code, int word_id) {
-  int num_words = 1 << kBrotliDictionarySizeBitsByLength[len_code];
-  int offset = kBrotliDictionaryOffsetsByLength[len_code];
-  int t = word_id / num_words;
-  int word_idx = word_id % num_words;
+inline std::string ApplyTransform(
+    const Transform& t, const uint8_t* word, size_t len) {
+  return std::string(t.prefix) +
+      TransformWord(t.word_transform, word, len) + std::string(t.suffix);
+}
+
+inline std::string GetTransformedDictionaryWord(size_t len_code,
+                                                size_t word_id) {
+  size_t num_words = 1u << kBrotliDictionarySizeBitsByLength[len_code];
+  size_t offset = kBrotliDictionaryOffsetsByLength[len_code];
+  size_t t = word_id / num_words;
+  size_t word_idx = word_id % num_words;
  offset += len_code * word_idx;
  const uint8_t* word = &kBrotliDictionary[offset];
  return ApplyTransform(kTransforms[t], word, len_code);
--- a/enc/utf8_util.cc
+++ b/enc/utf8_util.cc
@ -14,7 +14,7 @@ namespace brotli {

 namespace {

-int ParseAsUTF8(int* symbol, const uint8_t* input, size_t size) {
+size_t ParseAsUTF8(int* symbol, const uint8_t* input, size_t size) {
  // ASCII
  if ((input[0] & 0x80) == 0) {
    *symbol = input[0];
@ -72,7 +72,8 @@ bool IsMostlyUTF8(const uint8_t* data, const size_t pos, const size_t mask,
  size_t i = 0;
  while (i < length) {
    int symbol;
-    int bytes_read = ParseAsUTF8(&symbol, &data[(pos + i) & mask], length - i);
+    size_t bytes_read = ParseAsUTF8(
+        &symbol, &data[(pos + i) & mask], length - i);
    i += bytes_read;
    if (symbol < 0x110000) size_utf8 += bytes_read;
  }
--- a/enc/write_bits.h
+++ b/enc/write_bits.h
@ -34,9 +34,9 @@ namespace brotli {
 //
 // For n bits, we take the last 5 bits, OR that with high bits in BYTE-0,
 // and locate the rest in BYTE+1, BYTE+2, etc.
-inline void WriteBits(int n_bits,
+inline void WriteBits(size_t n_bits,
                      uint64_t bits,
-                      int * __restrict pos,
+                      size_t * __restrict pos,
                      uint8_t * __restrict array) {
 #ifdef BIT_WRITER_DEBUG
  printf("WriteBits  %2d  0x%016llx  %10d\n", n_bits, bits, *pos);
@ -57,11 +57,11 @@ inline void WriteBits(int n_bits,
 #else
  // implicit & 0xff is assumed for uint8_t arithmetics
  uint8_t *array_pos = &array[*pos >> 3];
-  const int bits_reserved_in_first_byte = (*pos & 7);
+  const size_t bits_reserved_in_first_byte = (*pos & 7);
  bits <<= bits_reserved_in_first_byte;
  *array_pos++ |= static_cast<uint8_t>(bits);
-  for (int bits_left_to_write = n_bits - 8 + bits_reserved_in_first_byte;
-       bits_left_to_write >= 1;
+  for (size_t bits_left_to_write = n_bits + bits_reserved_in_first_byte;
+       bits_left_to_write >= 9;
       bits_left_to_write -= 8) {
    bits >>= 8;
    *array_pos++ = static_cast<uint8_t>(bits);
@ -71,7 +71,7 @@ inline void WriteBits(int n_bits,
 #endif
 }

-inline void WriteBitsPrepareStorage(int pos, uint8_t *array) {
+inline void WriteBitsPrepareStorage(size_t pos, uint8_t *array) {
 #ifdef BIT_WRITER_DEBUG
  printf("WriteBitsPrepareStorage            %10d\n", pos);
 #endif