Convert encoder to plain C.

2024-11-21 19:20:09 +00:00 · 2016-06-13 11:01:04 +02:00 · 2016-06-13 11:01:04 +02:00 · b972c67780
commit b972c67780
parent 63111b21e8
62 changed files with 8103 additions and 6090 deletions
--- a/dec/port.h
+++ b/dec/port.h
@ -81,7 +81,7 @@
 #endif

 #if defined(BROTLI_DEBUG) || defined(BROTLI_ENABLE_LOG)
-static inline void BrotliDump(const char* f, int l, const char* fn) {
+static BROTLI_INLINE void BrotliDump(const char* f, int l, const char* fn) {
  fprintf(stderr, "%s:%d (%s)\n", f, l, fn);
  fflush(stderr);
 }
--- a/enc/Makefile
+++ b/enc/Makefile
@ -2,10 +2,10 @@

 include ../shared.mk

-OBJS = backward_references.o block_splitter.o brotli_bit_stream.o \
-       compress_fragment.o compress_fragment_two_pass.o encode.o \
-       encode_parallel.o entropy_encode.o histogram.o literal_cost.o \
-       metablock.o static_dict.o streams.o utf8_util.o
+OBJS = backward_references.o bit_cost.o block_splitter.o brotli_bit_stream.o \
+       cluster.o compress_fragment.o compress_fragment_two_pass.o compressor.o \
+       encode.o encode_parallel.o entropy_encode.o histogram.o literal_cost.o \
+       memory.o metablock.o static_dict.o streams.o utf8_util.o
 all : $(OBJS)

 clean :
--- a/enc/backward_references.c
+++ b/enc/backward_references.c
--- a/enc/backward_references.h
+++ b/enc/backward_references.h
@ -9,63 +9,37 @@
 #ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_
 #define BROTLI_ENC_BACKWARD_REFERENCES_H_

-#include <vector>
-
 #include "../common/types.h"
 #include "./command.h"
 #include "./hash.h"
+#include "./memory.h"
+#include "./port.h"

-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

 /* "commands" points to the next output command to write to, "*num_commands" is
   initially the total amount of commands output by previous
   CreateBackwardReferences calls, and must be incremented by the amount written
   by this call. */
-void CreateBackwardReferences(size_t num_bytes,
-                              size_t position,
-                              bool is_last,
-                              const uint8_t* ringbuffer,
-                              size_t ringbuffer_mask,
-                              const int quality,
-                              const int lgwin,
-                              Hashers* hashers,
-                              int hash_type,
-                              int* dist_cache,
-                              size_t* last_insert_len,
-                              Command* commands,
-                              size_t* num_commands,
-                              size_t* num_literals);
-
-static const float kInfinity = std::numeric_limits<float>::infinity();
-
-struct ZopfliNode {
-  ZopfliNode(void) : length(1),
-                     distance(0),
-                     insert_length(0),
-                     cost(kInfinity) {}
-
-  inline uint32_t copy_length() const {
-    return length & 0xffffff;
-  }
-
-  inline uint32_t length_code() const {
-    const uint32_t modifier = length >> 24;
-    return copy_length() + 9u - modifier;
-  }
-
-  inline uint32_t copy_distance() const {
-    return distance & 0x1ffffff;
-  }
-
-  inline uint32_t distance_code() const {
-    const uint32_t short_code = distance >> 25;
-    return short_code == 0 ? copy_distance() + 15 : short_code - 1;
-  }
-
-  inline uint32_t command_length() const {
-    return copy_length() + insert_length;
-  }
+BROTLI_INTERNAL void BrotliCreateBackwardReferences(MemoryManager* m,
+                                                    size_t num_bytes,
+                                                    size_t position,
+                                                    int is_last,
+                                                    const uint8_t* ringbuffer,
+                                                    size_t ringbuffer_mask,
+                                                    const int quality,
+                                                    const int lgwin,
+                                                    Hashers* hashers,
+                                                    int hash_type,
+                                                    int* dist_cache,
+                                                    size_t* last_insert_len,
+                                                    Command* commands,
+                                                    size_t* num_commands,
+                                                    size_t* num_literals);

+typedef struct ZopfliNode {
  /* best length to get up to this byte (not including this byte itself)
     highest 8 bit is used to reconstruct the length code */
  uint32_t length;
@ -75,9 +49,21 @@ struct ZopfliNode {
  uint32_t distance;
  /* number of literal inserts before this copy */
  uint32_t insert_length;
+
+  /* This union holds information used by dynamic-programming. During forward
+     pass |cost| it used to store the goal function. On path backtracing pass
+     |next| is assigned the offset to next node on the path. As |cost| is not
+     used after the forward pass, it shares the memory with |next|. */
+  union {
    /* Smallest cost to get to this byte from the beginning, as found so far. */
-  float cost;
-};
+    float cost;
+    /* Offset to the next node on the path. Equals to command_length() of the
+       next node on the path. For last node equals to BROTLI_UINT32_MAX */
+    uint32_t next;
+  } u;
+} ZopfliNode;
+
+BROTLI_INTERNAL void BrotliInitZopfliNodes(ZopfliNode* array, size_t length);

 /* Computes the shortest path of commands from position to at most
   position + num_bytes.
@ -92,26 +78,28 @@ struct ZopfliNode {
     (1) nodes[i].copy_length() >= 2
     (2) nodes[i].command_length() <= i and
     (3) nodes[i - nodes[i].command_length()].cost < kInfinity */
-void ZopfliComputeShortestPath(size_t num_bytes,
-                               size_t position,
-                               const uint8_t* ringbuffer,
-                               size_t ringbuffer_mask,
-                               const size_t max_backward_limit,
-                               const int* dist_cache,
-                               Hashers::H10* hasher,
-                               ZopfliNode* nodes,
-                               std::vector<uint32_t>* path);
+BROTLI_INTERNAL size_t BrotliZopfliComputeShortestPath(
+    MemoryManager* m, size_t num_bytes, size_t position,
+    const uint8_t* ringbuffer, size_t ringbuffer_mask, const int quality,
+    const size_t max_backward_limit, const int* dist_cache, H10* hasher,
+    ZopfliNode* nodes);

-void ZopfliCreateCommands(const size_t num_bytes,
-                          const size_t block_start,
-                          const size_t max_backward_limit,
-                          const std::vector<uint32_t>& path,
-                          const ZopfliNode* nodes,
-                          int* dist_cache,
-                          size_t* last_insert_len,
-                          Command* commands,
-                          size_t* num_literals);
+BROTLI_INTERNAL void BrotliZopfliCreateCommands(const size_t num_bytes,
+                                                const size_t block_start,
+                                                const size_t max_backward_limit,
+                                                const ZopfliNode* nodes,
+                                                int* dist_cache,
+                                                size_t* last_insert_len,
+                                                Command* commands,
+                                                size_t* num_literals);

-}  // namespace brotli
+/* Maximum distance, see section 9.1. of the spec. */
+static BROTLI_INLINE size_t MaxBackwardLimit(int lgwin) {
+  return (1u << lgwin) - 16;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif

 #endif  /* BROTLI_ENC_BACKWARD_REFERENCES_H_ */
--- a/enc/backward_references_inc.h
+++ b/enc/backward_references_inc.h
@ -0,0 +1,156 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN */
+
+#define Hasher HASHER()
+
+static void FN(CreateBackwardReferences)(MemoryManager* m,
+                                         size_t num_bytes,
+                                         size_t position,
+                                         int is_last,
+                                         const uint8_t* ringbuffer,
+                                         size_t ringbuffer_mask,
+                                         const int quality,
+                                         const int lgwin,
+                                         Hasher* hasher,
+                                         int* dist_cache,
+                                         size_t* last_insert_len,
+                                         Command* commands,
+                                         size_t* num_commands,
+                                         size_t* num_literals) {
+  /* Set maximum distance, see section 9.1. of the spec. */
+  const size_t max_backward_limit = MaxBackwardLimit(lgwin);
+
+  const Command * const orig_commands = commands;
+  size_t insert_length = *last_insert_len;
+  const size_t pos_end = position + num_bytes;
+  const size_t store_end = num_bytes >= FN(StoreLookahead)() ?
+      position + num_bytes - FN(StoreLookahead)() + 1 : position;
+
+  /* For speed up heuristics for random data. */
+  const size_t random_heuristics_window_size = quality < 9 ? 64 : 512;
+  size_t apply_random_heuristics = position + random_heuristics_window_size;
+
+  /* Minimum score to accept a backward reference. */
+  const double kMinScore = 4.0;
+
+  FN(Init)(m, hasher, ringbuffer, lgwin, position, num_bytes, is_last);
+  if (BROTLI_IS_OOM(m)) return;
+  FN(StitchToPreviousBlock)(hasher, num_bytes, position,
+                            ringbuffer, ringbuffer_mask);
+
+  while (position + FN(HashTypeLength)() < pos_end) {
+    size_t max_length = pos_end - position;
+    size_t max_distance = BROTLI_MIN(size_t, position, max_backward_limit);
+    size_t best_len = 0;
+    size_t best_len_code = 0;
+    size_t best_dist = 0;
+    double best_score = kMinScore;
+    int is_match_found = FN(FindLongestMatch)(hasher, ringbuffer,
+        ringbuffer_mask, dist_cache, position, max_length, max_distance,
+        &best_len, &best_len_code, &best_dist, &best_score);
+    if (is_match_found) {
+      /* Found a match. Let's look for something even better ahead. */
+      int delayed_backward_references_in_row = 0;
+      --max_length;
+      for (;; --max_length) {
+        size_t best_len_2 =
+            quality < 5 ? BROTLI_MIN(size_t, best_len - 1, max_length) : 0;
+        size_t best_len_code_2 = 0;
+        size_t best_dist_2 = 0;
+        double best_score_2 = kMinScore;
+        const double cost_diff_lazy = 7.0;
+        max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
+        is_match_found = FN(FindLongestMatch)(hasher, ringbuffer,
+            ringbuffer_mask, dist_cache, position + 1, max_length, max_distance,
+            &best_len_2, &best_len_code_2, &best_dist_2, &best_score_2);
+        if (is_match_found && best_score_2 >= best_score + cost_diff_lazy) {
+          /* Ok, let's just write one byte for now and start a match from the
+             next byte. */
+          ++position;
+          ++insert_length;
+          best_len = best_len_2;
+          best_len_code = best_len_code_2;
+          best_dist = best_dist_2;
+          best_score = best_score_2;
+          if (++delayed_backward_references_in_row < 4 &&
+              position + FN(HashTypeLength)() < pos_end) {
+            continue;
+          }
+        }
+        break;
+      }
+      apply_random_heuristics =
+          position + 2 * best_len + random_heuristics_window_size;
+      max_distance = BROTLI_MIN(size_t, position, max_backward_limit);
+      {
+        /* The first 16 codes are special shortcodes,
+           and the minimum offset is 1. */
+        size_t distance_code =
+            ComputeDistanceCode(best_dist, max_distance, quality, dist_cache);
+        if (best_dist <= max_distance && distance_code > 0) {
+          dist_cache[3] = dist_cache[2];
+          dist_cache[2] = dist_cache[1];
+          dist_cache[1] = dist_cache[0];
+          dist_cache[0] = (int)best_dist;
+        }
+        InitCommand(
+            commands++, insert_length, best_len, best_len_code, distance_code);
+      }
+      *num_literals += insert_length;
+      insert_length = 0;
+      /* Put the hash keys into the table, if there are enough bytes left.
+         Depending on the hasher implementation, it can push all positions
+         in the given range or only a subset of them. */
+      FN(StoreRange)(hasher, ringbuffer, ringbuffer_mask, position + 2,
+                     BROTLI_MIN(size_t, position + best_len, store_end));
+      position += best_len;
+    } else {
+      ++insert_length;
+      ++position;
+      /* If we have not seen matches for a long time, we can skip some
+         match lookups. Unsuccessful match lookups are very very expensive
+         and this kind of a heuristic speeds up compression quite
+         a lot. */
+      if (position > apply_random_heuristics) {
+        /* Going through uncompressible data, jump. */
+        if (position >
+            apply_random_heuristics + 4 * random_heuristics_window_size) {
+          /* It is quite a long time since we saw a copy, so we assume
+             that this data is not compressible, and store hashes less
+             often. Hashes of non compressible data are less likely to
+             turn out to be useful in the future, too, so we store less of
+             them to not to flood out the hash table of good compressible
+             data. */
+          const size_t kMargin =
+              BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 4);
+          size_t pos_jump =
+              BROTLI_MIN(size_t, position + 16, pos_end - kMargin);
+          for (; position < pos_jump; position += 4) {
+            FN(Store)(hasher, ringbuffer, ringbuffer_mask, position);
+            insert_length += 4;
+          }
+        } else {
+          const size_t kMargin =
+              BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 2);
+          size_t pos_jump =
+              BROTLI_MIN(size_t, position + 8, pos_end - kMargin);
+          for (; position < pos_jump; position += 2) {
+            FN(Store)(hasher, ringbuffer, ringbuffer_mask, position);
+            insert_length += 2;
+          }
+        }
+      }
+    }
+  }
+  insert_length += pos_end - position;
+  *last_insert_len = insert_length;
+  *num_commands += (size_t)(commands - orig_commands);
+}
+
+#undef Hasher
--- a/enc/bit_cost.c
+++ b/enc/bit_cost.c
@ -0,0 +1,35 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Functions to estimate the bit cost of Huffman trees. */
+
+#include "./bit_cost.h"
+
+#include "../common/constants.h"
+#include "../common/types.h"
+#include "./fast_log.h"
+#include "./histogram.h"
+#include "./port.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define FN(X) X ## Literal
+#include "./bit_cost_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Command
+#include "./bit_cost_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Distance
+#include "./bit_cost_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
--- a/enc/bit_cost.h
+++ b/enc/bit_cost.h
@ -10,13 +10,16 @@
 #define BROTLI_ENC_BIT_COST_H_

 #include "../common/types.h"
-#include "./entropy_encode.h"
 #include "./fast_log.h"
+#include "./histogram.h"
+#include "./port.h"

-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

-static inline double ShannonEntropy(const uint32_t *population, size_t size,
-                                    size_t *total) {
+static BROTLI_INLINE double ShannonEntropy(const uint32_t *population,
+                                           size_t size, size_t *total) {
  size_t sum = 0;
  double retval = 0;
  const uint32_t *population_end = population + size;
@ -27,135 +30,34 @@ static inline double ShannonEntropy(const uint32_t *population, size_t size,
  while (population < population_end) {
    p = *population++;
    sum += p;
-    retval -= static_cast<double>(p) * FastLog2(p);
+    retval -= (double)p * FastLog2(p);
 odd_number_of_elements_left:
    p = *population++;
    sum += p;
-    retval -= static_cast<double>(p) * FastLog2(p);
+    retval -= (double)p * FastLog2(p);
  }
-  if (sum) retval += static_cast<double>(sum) * FastLog2(sum);
+  if (sum) retval += (double)sum * FastLog2(sum);
  *total = sum;
  return retval;
 }

-static inline double BitsEntropy(const uint32_t *population, size_t size) {
+static BROTLI_INLINE double BitsEntropy(
+    const uint32_t *population, size_t size) {
  size_t sum;
  double retval = ShannonEntropy(population, size, &sum);
  if (retval < sum) {
    /* At least one bit per literal is needed. */
-    retval = static_cast<double>(sum);
+    retval = (double)sum;
  }
  return retval;
 }

-template<int kSize>
-double PopulationCost(const Histogram<kSize>& histogram) {
-  static const double kOneSymbolHistogramCost = 12;
-  static const double kTwoSymbolHistogramCost = 20;
-  static const double kThreeSymbolHistogramCost = 28;
-  static const double kFourSymbolHistogramCost = 37;
-  if (histogram.total_count_ == 0) {
-    return kOneSymbolHistogramCost;
-  }
-  int count = 0;
-  int s[5];
-  for (int i = 0; i < kSize; ++i) {
-    if (histogram.data_[i] > 0) {
-      s[count] = i;
-      ++count;
-      if (count > 4) break;
-    }
-  }
-  if (count == 1) {
-    return kOneSymbolHistogramCost;
-  }
-  if (count == 2) {
-    return (kTwoSymbolHistogramCost +
-            static_cast<double>(histogram.total_count_));
-  }
-  if (count == 3) {
-    const uint32_t histo0 = histogram.data_[s[0]];
-    const uint32_t histo1 = histogram.data_[s[1]];
-    const uint32_t histo2 = histogram.data_[s[2]];
-    const uint32_t histomax = std::max(histo0, std::max(histo1, histo2));
-    return (kThreeSymbolHistogramCost +
-            2 * (histo0 + histo1 + histo2) - histomax);
-  }
-  if (count == 4) {
-    uint32_t histo[4];
-    for (int i = 0; i < 4; ++i) {
-      histo[i] = histogram.data_[s[i]];
-    }
-    // Sort
-    for (int i = 0; i < 4; ++i) {
-      for (int j = i + 1; j < 4; ++j) {
-        if (histo[j] > histo[i]) {
-          std::swap(histo[j], histo[i]);
-        }
-      }
-    }
-    const uint32_t h23 = histo[2] + histo[3];
-    const uint32_t histomax = std::max(h23, histo[0]);
-    return (kFourSymbolHistogramCost +
-            3 * h23 + 2 * (histo[0] + histo[1]) - histomax);
-  }
+BROTLI_INTERNAL double BrotliPopulationCostLiteral(const HistogramLiteral*);
+BROTLI_INTERNAL double BrotliPopulationCostCommand(const HistogramCommand*);
+BROTLI_INTERNAL double BrotliPopulationCostDistance(const HistogramDistance*);

-  // In this loop we compute the entropy of the histogram and simultaneously
-  // build a simplified histogram of the code length codes where we use the
-  // zero repeat code 17, but we don't use the non-zero repeat code 16.
-  double bits = 0;
-  size_t max_depth = 1;
-  uint32_t depth_histo[kCodeLengthCodes] = { 0 };
-  const double log2total = FastLog2(histogram.total_count_);
-  for (size_t i = 0; i < kSize;) {
-    if (histogram.data_[i] > 0) {
-      // Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
-      //                          =  log2(total_count) - log2(count(symbol))
-      double log2p = log2total - FastLog2(histogram.data_[i]);
-      // Approximate the bit depth by round(-log2(P(symbol)))
-      size_t depth = static_cast<size_t>(log2p + 0.5);
-      bits += histogram.data_[i] * log2p;
-      if (depth > 15) {
-        depth = 15;
-      }
-      if (depth > max_depth) {
-        max_depth = depth;
-      }
-      ++depth_histo[depth];
-      ++i;
-    } else {
-      // Compute the run length of zeros and add the appropriate number of 0 and
-      // 17 code length codes to the code length code histogram.
-      uint32_t reps = 1;
-      for (size_t k = i + 1; k < kSize && histogram.data_[k] == 0; ++k) {
-        ++reps;
-      }
-      i += reps;
-      if (i == kSize) {
-        // Don't add any cost for the last zero run, since these are encoded
-        // only implicitly.
-        break;
-      }
-      if (reps < 3) {
-        depth_histo[0] += reps;
-      } else {
-        reps -= 2;
-        while (reps > 0) {
-          ++depth_histo[17];
-          // Add the 3 extra bits for the 17 code length code.
-          bits += 3;
-          reps >>= 3;
-        }
-      }
-    }
-  }
-  // Add the estimated encoding cost of the code length code histogram.
-  bits += static_cast<double>(18 + 2 * max_depth);
-  // Add the entropy of the code length code histogram.
-  bits += BitsEntropy(depth_histo, kCodeLengthCodes);
-  return bits;
-}
-
-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif

 #endif  /* BROTLI_ENC_BIT_COST_H_ */
--- a/enc/bit_cost_inc.h
+++ b/enc/bit_cost_inc.h
@ -0,0 +1,127 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN */
+
+#define HistogramType FN(Histogram)
+
+double FN(BrotliPopulationCost)(const HistogramType* histogram) {
+  static const double kOneSymbolHistogramCost = 12;
+  static const double kTwoSymbolHistogramCost = 20;
+  static const double kThreeSymbolHistogramCost = 28;
+  static const double kFourSymbolHistogramCost = 37;
+  const size_t data_size = FN(HistogramDataSize)();
+  int count = 0;
+  size_t s[5];
+  double bits = 0.0;
+  size_t i;
+  if (histogram->total_count_ == 0) {
+    return kOneSymbolHistogramCost;
+  }
+  for (i = 0; i < data_size; ++i) {
+    if (histogram->data_[i] > 0) {
+      s[count] = i;
+      ++count;
+      if (count > 4) break;
+    }
+  }
+  if (count == 1) {
+    return kOneSymbolHistogramCost;
+  }
+  if (count == 2) {
+    return (kTwoSymbolHistogramCost + (double)histogram->total_count_);
+  }
+  if (count == 3) {
+    const uint32_t histo0 = histogram->data_[s[0]];
+    const uint32_t histo1 = histogram->data_[s[1]];
+    const uint32_t histo2 = histogram->data_[s[2]];
+    const uint32_t histomax =
+        BROTLI_MAX(uint32_t, histo0, BROTLI_MAX(uint32_t, histo1, histo2));
+    return (kThreeSymbolHistogramCost +
+            2 * (histo0 + histo1 + histo2) - histomax);
+  }
+  if (count == 4) {
+    uint32_t histo[4];
+    uint32_t h23;
+    uint32_t histomax;
+    for (i = 0; i < 4; ++i) {
+      histo[i] = histogram->data_[s[i]];
+    }
+    /* Sort */
+    for (i = 0; i < 4; ++i) {
+      size_t j;
+      for (j = i + 1; j < 4; ++j) {
+        if (histo[j] > histo[i]) {
+          BROTLI_SWAP(uint32_t, histo, j, i);
+        }
+      }
+    }
+    h23 = histo[2] + histo[3];
+    histomax = BROTLI_MAX(uint32_t, h23, histo[0]);
+    return (kFourSymbolHistogramCost +
+            3 * h23 + 2 * (histo[0] + histo[1]) - histomax);
+  }
+
+  {
+    /* In this loop we compute the entropy of the histogram and simultaneously
+       build a simplified histogram of the code length codes where we use the
+       zero repeat code 17, but we don't use the non-zero repeat code 16. */
+    size_t max_depth = 1;
+    uint32_t depth_histo[BROTLI_CODE_LENGTH_CODES] = { 0 };
+    const double log2total = FastLog2(histogram->total_count_);
+    for (i = 0; i < data_size;) {
+      if (histogram->data_[i] > 0) {
+        /* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
+                                    = log2(total_count) - log2(count(symbol)) */
+        double log2p = log2total - FastLog2(histogram->data_[i]);
+        /* Approximate the bit depth by round(-log2(P(symbol))) */
+        size_t depth = (size_t)(log2p + 0.5);
+        bits += histogram->data_[i] * log2p;
+        if (depth > 15) {
+          depth = 15;
+        }
+        if (depth > max_depth) {
+          max_depth = depth;
+        }
+        ++depth_histo[depth];
+        ++i;
+      } else {
+        /* Compute the run length of zeros and add the appropriate number of 0
+           and 17 code length codes to the code length code histogram. */
+        uint32_t reps = 1;
+        size_t k;
+        for (k = i + 1; k < data_size && histogram->data_[k] == 0; ++k) {
+          ++reps;
+        }
+        i += reps;
+        if (i == data_size) {
+          /* Don't add any cost for the last zero run, since these are encoded
+             only implicitly. */
+          break;
+        }
+        if (reps < 3) {
+          depth_histo[0] += reps;
+        } else {
+          reps -= 2;
+          while (reps > 0) {
+            ++depth_histo[BROTLI_REPEAT_ZERO_CODE_LENGTH];
+            /* Add the 3 extra bits for the 17 code length code. */
+            bits += 3;
+            reps >>= 3;
+          }
+        }
+      }
+    }
+    /* Add the estimated encoding cost of the code length code histogram. */
+    bits += (double)(18 + 2 * max_depth);
+    /* Add the entropy of the code length code histogram. */
+    bits += BitsEntropy(depth_histo, BROTLI_CODE_LENGTH_CODES);
+  }
+  return bits;
+}
+
+#undef HistogramType
--- a/enc/block_encoder_inc.h
+++ b/enc/block_encoder_inc.h
@ -0,0 +1,33 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2014 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN */
+
+#define HistogramType FN(Histogram)
+
+/* Creates entropy codes for all block types and stores them to the bit
+   stream. */
+static void FN(BuildAndStoreEntropyCodes)(MemoryManager* m, BlockEncoder* self,
+    const HistogramType* histograms, const size_t histograms_size,
+    HuffmanTree* tree, size_t* storage_ix, uint8_t* storage) {
+  const size_t alphabet_size = self->alphabet_size_;
+  const size_t table_size = histograms_size * alphabet_size;
+  self->depths_ = BROTLI_ALLOC(m, uint8_t, table_size);
+  self->bits_ = BROTLI_ALLOC(m, uint16_t, table_size);
+  if (BROTLI_IS_OOM(m)) return;
+
+  {
+    size_t i;
+    for (i = 0; i < histograms_size; ++i) {
+      size_t ix = i * alphabet_size;
+      BuildAndStoreHuffmanTree(&histograms[i].data_[0], alphabet_size, tree,
+          &self->depths_[ix], &self->bits_[ix], storage_ix, storage);
+    }
+  }
+}
+
+#undef HistogramType
--- a/enc/block_splitter.c
+++ b/enc/block_splitter.c
@ -9,18 +9,19 @@
 #include "./block_splitter.h"

 #include <assert.h>
-#include <math.h>
-
-#include <algorithm>
-#include <cstring>
-#include <vector>
+#include <string.h>  /* memcpy, memset */

+#include "./bit_cost.h"
 #include "./cluster.h"
 #include "./command.h"
 #include "./fast_log.h"
 #include "./histogram.h"
+#include "./memory.h"
+#include "./port.h"

-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

 static const size_t kMaxLiteralHistograms = 100;
 static const size_t kMaxCommandHistograms = 50;
@ -36,45 +37,43 @@ static const size_t kMinLengthForBlockSplitting = 128;
 static const size_t kIterMulForRefining = 2;
 static const size_t kMinItersForRefining = 100;

-void CopyLiteralsToByteArray(const Command* cmds,
-                             const size_t num_commands,
-                             const uint8_t* data,
-                             const size_t offset,
-                             const size_t mask,
-                             std::vector<uint8_t>* literals) {
+static size_t CountLiterals(const Command* cmds, const size_t num_commands) {
  /* Count how many we have. */
  size_t total_length = 0;
-  for (size_t i = 0; i < num_commands; ++i) {
+  size_t i;
+  for (i = 0; i < num_commands; ++i) {
    total_length += cmds[i].insert_len_;
  }
-  if (total_length == 0) {
-    return;
-  }
+  return total_length;
+}

-  // Allocate.
-  literals->resize(total_length);
-
-  // Loop again, and copy this time.
+static void CopyLiteralsToByteArray(const Command* cmds,
+                                    const size_t num_commands,
+                                    const uint8_t* data,
+                                    const size_t offset,
+                                    const size_t mask,
+                                    uint8_t* literals) {
  size_t pos = 0;
  size_t from_pos = offset & mask;
-  for (size_t i = 0; i < num_commands && pos < total_length; ++i) {
+  size_t i;
+  for (i = 0; i < num_commands; ++i) {
    size_t insert_len = cmds[i].insert_len_;
    if (from_pos + insert_len > mask) {
      size_t head_size = mask + 1 - from_pos;
-      memcpy(&(*literals)[pos], data + from_pos, head_size);
+      memcpy(literals + pos, data + from_pos, head_size);
      from_pos = 0;
      pos += head_size;
      insert_len -= head_size;
    }
    if (insert_len > 0) {
-      memcpy(&(*literals)[pos], data + from_pos, insert_len);
+      memcpy(literals + pos, data + from_pos, insert_len);
      pos += insert_len;
    }
-    from_pos = (from_pos + insert_len + cmds[i].copy_len()) & mask;
+    from_pos = (from_pos + insert_len + CommandCopyLen(&cmds[i])) & mask;
  }
 }

-inline static unsigned int MyRand(unsigned int* seed) {
+static BROTLI_INLINE unsigned int MyRand(unsigned int* seed) {
  *seed *= 16807U;
  if (*seed == 0) {
    *seed = 1;
@ -82,424 +81,116 @@ inline static unsigned int MyRand(unsigned int* seed) {
  return *seed;
 }

-template<typename HistogramType, typename DataType>
-void InitialEntropyCodes(const DataType* data, size_t length,
-                         size_t stride,
-                         size_t num_histograms,
-                         HistogramType* histograms) {
-  for (size_t i = 0; i < num_histograms; ++i) {
-    histograms[i].Clear();
-  }
-  unsigned int seed = 7;
-  size_t block_length = length / num_histograms;
-  for (size_t i = 0; i < num_histograms; ++i) {
-    size_t pos = length * i / num_histograms;
-    if (i != 0) {
-      pos += MyRand(&seed) % block_length;
-    }
-    if (pos + stride >= length) {
-      pos = length - stride - 1;
-    }
-    histograms[i].Add(data + pos, stride);
-  }
-}
-
-template<typename HistogramType, typename DataType>
-void RandomSample(unsigned int* seed,
-                  const DataType* data,
-                  size_t length,
-                  size_t stride,
-                  HistogramType* sample) {
-  size_t pos = 0;
-  if (stride >= length) {
-    pos = 0;
-    stride = length;
-  } else {
-    pos = MyRand(seed) % (length - stride + 1);
-  }
-  sample->Add(data + pos, stride);
-}
-
-template<typename HistogramType, typename DataType>
-void RefineEntropyCodes(const DataType* data, size_t length,
-                        size_t stride,
-                        size_t num_histograms,
-                        HistogramType* histograms) {
-  size_t iters =
-      kIterMulForRefining * length / stride + kMinItersForRefining;
-  unsigned int seed = 7;
-  iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms;
-  for (size_t iter = 0; iter < iters; ++iter) {
-    HistogramType sample;
-    RandomSample(&seed, data, length, stride, &sample);
-    size_t ix = iter % num_histograms;
-    histograms[ix].AddHistogram(sample);
-  }
-}
-
-inline static double BitCost(size_t count) {
+static BROTLI_INLINE double BitCost(size_t count) {
  return count == 0 ? -2.0 : FastLog2(count);
 }

-// Assigns a block id from the range [0, vec.size()) to each data element
-// in data[0..length) and fills in block_id[0..length) with the assigned values.
-// Returns the number of blocks, i.e. one plus the number of block switches.
-template<typename DataType, int kSize>
-size_t FindBlocks(const DataType* data, const size_t length,
-                  const double block_switch_bitcost,
-                  const size_t num_histograms,
-                  const Histogram<kSize>* histograms,
-                  double* insert_cost,
-                  double* cost,
-                  uint8_t* switch_signal,
-                  uint8_t *block_id) {
-  if (num_histograms <= 1) {
-    for (size_t i = 0; i < length; ++i) {
-      block_id[i] = 0;
-    }
-    return 1;
-  }
-  const size_t bitmaplen = (num_histograms + 7) >> 3;
-  assert(num_histograms <= 256);
-  memset(insert_cost, 0, sizeof(insert_cost[0]) * kSize * num_histograms);
-  for (size_t j = 0; j < num_histograms; ++j) {
-    insert_cost[j] = FastLog2(static_cast<uint32_t>(
-        histograms[j].total_count_));
-  }
-  for (size_t i = kSize; i != 0;) {
-    --i;
-    for (size_t j = 0; j < num_histograms; ++j) {
-      insert_cost[i * num_histograms + j] =
-          insert_cost[j] - BitCost(histograms[j].data_[i]);
-    }
-  }
-  memset(cost, 0, sizeof(cost[0]) * num_histograms);
-  memset(switch_signal, 0, sizeof(switch_signal[0]) * length * bitmaplen);
-  // After each iteration of this loop, cost[k] will contain the difference
-  // between the minimum cost of arriving at the current byte position using
-  // entropy code k, and the minimum cost of arriving at the current byte
-  // position. This difference is capped at the block switch cost, and if it
-  // reaches block switch cost, it means that when we trace back from the last
-  // position, we need to switch here.
-  for (size_t byte_ix = 0; byte_ix < length; ++byte_ix) {
-    size_t ix = byte_ix * bitmaplen;
-    size_t insert_cost_ix = data[byte_ix] * num_histograms;
-    double min_cost = 1e99;
-    for (size_t k = 0; k < num_histograms; ++k) {
-      // We are coding the symbol in data[byte_ix] with entropy code k.
-      cost[k] += insert_cost[insert_cost_ix + k];
-      if (cost[k] < min_cost) {
-        min_cost = cost[k];
-        block_id[byte_ix] = static_cast<uint8_t>(k);
-      }
-    }
-    double block_switch_cost = block_switch_bitcost;
-    // More blocks for the beginning.
-    if (byte_ix < 2000) {
-      block_switch_cost *= 0.77 + 0.07 * static_cast<double>(byte_ix) / 2000;
-    }
-    for (size_t k = 0; k < num_histograms; ++k) {
-      cost[k] -= min_cost;
-      if (cost[k] >= block_switch_cost) {
-        cost[k] = block_switch_cost;
-        const uint8_t mask = static_cast<uint8_t>(1u << (k & 7));
-        assert((k >> 3) < bitmaplen);
-        switch_signal[ix + (k >> 3)] |= mask;
-      }
-    }
-  }
-  // Now trace back from the last position and switch at the marked places.
-  size_t byte_ix = length - 1;
-  size_t ix = byte_ix * bitmaplen;
-  uint8_t cur_id = block_id[byte_ix];
-  size_t num_blocks = 1;
-  while (byte_ix > 0) {
-    --byte_ix;
-    ix -= bitmaplen;
-    const uint8_t mask = static_cast<uint8_t>(1u << (cur_id & 7));
-    assert((static_cast<size_t>(cur_id) >> 3) < bitmaplen);
-    if (switch_signal[ix + (cur_id >> 3)] & mask) {
-      if (cur_id != block_id[byte_ix]) {
-        cur_id = block_id[byte_ix];
-        ++num_blocks;
-      }
-    }
-    block_id[byte_ix] = cur_id;
-  }
-  return num_blocks;
+#define HISTOGRAMS_PER_BATCH 64
+#define CLUSTERS_PER_BATCH 16
+
+#define FN(X) X ## Literal
+#define DataType uint8_t
+/* NOLINTNEXTLINE(build/include) */
+#include "./block_splitter_inc.h"
+#undef DataType
+#undef FN
+
+#define FN(X) X ## Command
+#define DataType uint16_t
+/* NOLINTNEXTLINE(build/include) */
+#include "./block_splitter_inc.h"
+#undef FN
+
+#define FN(X) X ## Distance
+/* NOLINTNEXTLINE(build/include) */
+#include "./block_splitter_inc.h"
+#undef DataType
+#undef FN
+
+void BrotliInitBlockSplit(BlockSplit* self) {
+  self->num_types = 0;
+  self->num_blocks = 0;
+  self->types = 0;
+  self->lengths = 0;
+  self->types_alloc_size = 0;
+  self->lengths_alloc_size = 0;
 }

-static size_t RemapBlockIds(uint8_t* block_ids, const size_t length,
-                            uint16_t* new_id, const size_t num_histograms) {
-  static const uint16_t kInvalidId = 256;
-  for (size_t i = 0; i < num_histograms; ++i) {
-    new_id[i] = kInvalidId;
-  }
-  uint16_t next_id = 0;
-  for (size_t i = 0; i < length; ++i) {
-    assert(block_ids[i] < num_histograms);
-    if (new_id[block_ids[i]] == kInvalidId) {
-      new_id[block_ids[i]] = next_id++;
-    }
-  }
-  for (size_t i = 0; i < length; ++i) {
-    block_ids[i] = static_cast<uint8_t>(new_id[block_ids[i]]);
-    assert(block_ids[i] < num_histograms);
-  }
-  assert(next_id <= num_histograms);
-  return next_id;
+void BrotliDestroyBlockSplit(MemoryManager* m, BlockSplit* self) {
+  BROTLI_FREE(m, self->types);
+  BROTLI_FREE(m, self->lengths);
 }

-template<typename HistogramType, typename DataType>
-void BuildBlockHistograms(const DataType* data, const size_t length,
-                          const uint8_t* block_ids,
-                          const size_t num_histograms,
-                          HistogramType* histograms) {
-  for (size_t i = 0; i < num_histograms; ++i) {
-    histograms[i].Clear();
-  }
-  for (size_t i = 0; i < length; ++i) {
-    histograms[block_ids[i]].Add(data[i]);
-  }
-}
-
-template<typename HistogramType, typename DataType>
-void ClusterBlocks(const DataType* data, const size_t length,
-                   const size_t num_blocks,
-                   uint8_t* block_ids,
-                   BlockSplit* split) {
-  static const size_t kMaxNumberOfBlockTypes = 256;
-  static const size_t kHistogramsPerBatch = 64;
-  static const size_t kClustersPerBatch = 16;
-  std::vector<uint32_t> histogram_symbols(num_blocks);
-  std::vector<uint32_t> block_lengths(num_blocks);
-
-  size_t block_idx = 0;
-  for (size_t i = 0; i < length; ++i) {
-    assert(block_idx < num_blocks);
-    ++block_lengths[block_idx];
-    if (i + 1 == length || block_ids[i] != block_ids[i + 1]) {
-      ++block_idx;
-    }
-  }
-  assert(block_idx == num_blocks);
-
-  const size_t expected_num_clusters =
-      kClustersPerBatch *
-      (num_blocks + kHistogramsPerBatch - 1) / kHistogramsPerBatch;
-  std::vector<HistogramType> all_histograms;
-  std::vector<uint32_t> cluster_size;
-  all_histograms.reserve(expected_num_clusters);
-  cluster_size.reserve(expected_num_clusters);
-  size_t num_clusters = 0;
-  std::vector<HistogramType> histograms(
-      std::min(num_blocks, kHistogramsPerBatch));
-  size_t max_num_pairs = kHistogramsPerBatch * kHistogramsPerBatch / 2;
-  std::vector<HistogramPair> pairs(max_num_pairs + 1);
-  size_t pos = 0;
-  for (size_t i = 0; i < num_blocks; i += kHistogramsPerBatch) {
-    const size_t num_to_combine = std::min(num_blocks - i, kHistogramsPerBatch);
-    uint32_t sizes[kHistogramsPerBatch];
-    uint32_t clusters[kHistogramsPerBatch];
-    uint32_t symbols[kHistogramsPerBatch];
-    uint32_t remap[kHistogramsPerBatch];
-    for (size_t j = 0; j < num_to_combine; ++j) {
-      histograms[j].Clear();
-      for (size_t k = 0; k < block_lengths[i + j]; ++k) {
-        histograms[j].Add(data[pos++]);
-      }
-      histograms[j].bit_cost_ = PopulationCost(histograms[j]);
-      symbols[j] = clusters[j] = static_cast<uint32_t>(j);
-      sizes[j] = 1;
-    }
-    size_t num_new_clusters = HistogramCombine(
-        &histograms[0], sizes, symbols, clusters, &pairs[0], num_to_combine,
-        num_to_combine, kHistogramsPerBatch, max_num_pairs);
-    for (size_t j = 0; j < num_new_clusters; ++j) {
-      all_histograms.push_back(histograms[clusters[j]]);
-      cluster_size.push_back(sizes[clusters[j]]);
-      remap[clusters[j]] = static_cast<uint32_t>(j);
-    }
-    for (size_t j = 0; j < num_to_combine; ++j) {
-      histogram_symbols[i + j] =
-          static_cast<uint32_t>(num_clusters) + remap[symbols[j]];
-    }
-    num_clusters += num_new_clusters;
-    assert(num_clusters == cluster_size.size());
-    assert(num_clusters == all_histograms.size());
-  }
-
-  max_num_pairs =
-      std::min(64 * num_clusters, (num_clusters / 2) * num_clusters);
-  pairs.resize(max_num_pairs + 1);
-
-  std::vector<uint32_t> clusters(num_clusters);
-  for (size_t i = 0; i < num_clusters; ++i) {
-    clusters[i] = static_cast<uint32_t>(i);
-  }
-  size_t num_final_clusters =
-      HistogramCombine(&all_histograms[0], &cluster_size[0],
-                       &histogram_symbols[0],
-                       &clusters[0], &pairs[0], num_clusters,
-                       num_blocks, kMaxNumberOfBlockTypes, max_num_pairs);
-
-  static const uint32_t kInvalidIndex = std::numeric_limits<uint32_t>::max();
-  std::vector<uint32_t> new_index(num_clusters, kInvalidIndex);
-  uint32_t next_index = 0;
-  pos = 0;
-  for (size_t i = 0; i < num_blocks; ++i) {
-    HistogramType histo;
-    for (size_t j = 0; j < block_lengths[i]; ++j) {
-      histo.Add(data[pos++]);
-    }
-    uint32_t best_out =
-        i == 0 ? histogram_symbols[0] : histogram_symbols[i - 1];
-    double best_bits = HistogramBitCostDistance(
-        histo, all_histograms[best_out]);
-    for (size_t j = 0; j < num_final_clusters; ++j) {
-      const double cur_bits = HistogramBitCostDistance(
-          histo, all_histograms[clusters[j]]);
-      if (cur_bits < best_bits) {
-        best_bits = cur_bits;
-        best_out = clusters[j];
-      }
-    }
-    histogram_symbols[i] = best_out;
-    if (new_index[best_out] == kInvalidIndex) {
-      new_index[best_out] = next_index++;
-    }
-  }
-  uint8_t max_type = 0;
-  uint32_t cur_length = 0;
-  block_idx = 0;
-  split->types.resize(num_blocks);
-  split->lengths.resize(num_blocks);
-  for (size_t i = 0; i < num_blocks; ++i) {
-    cur_length += block_lengths[i];
-    if (i + 1 == num_blocks ||
-        histogram_symbols[i] != histogram_symbols[i + 1]) {
-      const uint8_t id = static_cast<uint8_t>(new_index[histogram_symbols[i]]);
-      split->types[block_idx] = id;
-      split->lengths[block_idx] = cur_length;
-      max_type = std::max(max_type, id);
-      cur_length = 0;
-      ++block_idx;
-    }
-  }
-  split->types.resize(block_idx);
-  split->lengths.resize(block_idx);
-  split->num_types = static_cast<size_t>(max_type) + 1;
-}
-
-template<int kSize, typename DataType>
-void SplitByteVector(const std::vector<DataType>& data,
-                     const size_t literals_per_histogram,
-                     const size_t max_histograms,
-                     const size_t sampling_stride_length,
-                     const double block_switch_cost,
-                     BlockSplit* split) {
-  if (data.empty()) {
-    split->num_types = 1;
-    return;
-  } else if (data.size() < kMinLengthForBlockSplitting) {
-    split->num_types = 1;
-    split->types.push_back(0);
-    split->lengths.push_back(static_cast<uint32_t>(data.size()));
-    return;
-  }
-  size_t num_histograms = data.size() / literals_per_histogram + 1;
-  if (num_histograms > max_histograms) {
-    num_histograms = max_histograms;
-  }
-  Histogram<kSize>* histograms = new Histogram<kSize>[num_histograms];
-  // Find good entropy codes.
-  InitialEntropyCodes(&data[0], data.size(),
-                      sampling_stride_length,
-                      num_histograms, histograms);
-  RefineEntropyCodes(&data[0], data.size(),
-                     sampling_stride_length,
-                     num_histograms, histograms);
-  // Find a good path through literals with the good entropy codes.
-  std::vector<uint8_t> block_ids(data.size());
-  size_t num_blocks;
-  const size_t bitmaplen = (num_histograms + 7) >> 3;
-  double* insert_cost = new double[kSize * num_histograms];
-  double *cost = new double[num_histograms];
-  uint8_t* switch_signal = new uint8_t[data.size() * bitmaplen];
-  uint16_t* new_id = new uint16_t[num_histograms];
-  for (size_t i = 0; i < 10; ++i) {
-    num_blocks = FindBlocks(&data[0], data.size(),
-                            block_switch_cost,
-                            num_histograms, histograms,
-                            insert_cost, cost, switch_signal,
-                            &block_ids[0]);
-    num_histograms = RemapBlockIds(&block_ids[0], data.size(),
-                                   new_id, num_histograms);
-    BuildBlockHistograms(&data[0], data.size(), &block_ids[0],
-                         num_histograms, histograms);
-  }
-  delete[] insert_cost;
-  delete[] cost;
-  delete[] switch_signal;
-  delete[] new_id;
-  delete[] histograms;
-  ClusterBlocks<Histogram<kSize> >(&data[0], data.size(), num_blocks,
-                                   &block_ids[0], split);
-}
-
-void SplitBlock(const Command* cmds,
-                const size_t num_commands,
-                const uint8_t* data,
-                const size_t pos,
-                const size_t mask,
-                BlockSplit* literal_split,
-                BlockSplit* insert_and_copy_split,
-                BlockSplit* dist_split) {
+void BrotliSplitBlock(MemoryManager* m,
+                      const Command* cmds,
+                      const size_t num_commands,
+                      const uint8_t* data,
+                      const size_t pos,
+                      const size_t mask,
+                      const int quality,
+                      BlockSplit* literal_split,
+                      BlockSplit* insert_and_copy_split,
+                      BlockSplit* dist_split) {
  {
+    size_t literals_count = CountLiterals(cmds, num_commands);
+    uint8_t* literals = BROTLI_ALLOC(m, uint8_t, literals_count);
+    if (BROTLI_IS_OOM(m)) return;
    /* Create a continuous array of literals. */
-    std::vector<uint8_t> literals;
-    CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, &literals);
+    CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, literals);
    /* Create the block split on the array of literals.
       Literal histograms have alphabet size 256. */
-    SplitByteVector<256>(
-        literals,
+    SplitByteVectorLiteral(
+        m, literals, literals_count,
        kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
-        kLiteralStrideLength, kLiteralBlockSwitchCost,
+        kLiteralStrideLength, kLiteralBlockSwitchCost, quality,
        literal_split);
+    if (BROTLI_IS_OOM(m)) return;
+    BROTLI_FREE(m, literals);
  }

  {
    /* Compute prefix codes for commands. */
-    std::vector<uint16_t> insert_and_copy_codes(num_commands);
-    for (size_t i = 0; i < num_commands; ++i) {
+    uint16_t* insert_and_copy_codes = BROTLI_ALLOC(m, uint16_t, num_commands);
+    size_t i;
+    if (BROTLI_IS_OOM(m)) return;
+    for (i = 0; i < num_commands; ++i) {
      insert_and_copy_codes[i] = cmds[i].cmd_prefix_;
    }
    /* Create the block split on the array of command prefixes. */
-    SplitByteVector<kNumCommandPrefixes>(
-        insert_and_copy_codes,
+    SplitByteVectorCommand(
+        m, insert_and_copy_codes, num_commands,
        kSymbolsPerCommandHistogram, kMaxCommandHistograms,
-        kCommandStrideLength, kCommandBlockSwitchCost,
+        kCommandStrideLength, kCommandBlockSwitchCost, quality,
        insert_and_copy_split);
+    if (BROTLI_IS_OOM(m)) return;
+    /* TODO: reuse for distances? */
+    BROTLI_FREE(m, insert_and_copy_codes);
  }

  {
    /* Create a continuous array of distance prefixes. */
-    std::vector<uint16_t> distance_prefixes(num_commands);
-    size_t pos = 0;
-    for (size_t i = 0; i < num_commands; ++i) {
-      const Command& cmd = cmds[i];
-      if (cmd.copy_len() && cmd.cmd_prefix_ >= 128) {
-        distance_prefixes[pos++] = cmd.dist_prefix_;
+    uint16_t* distance_prefixes = BROTLI_ALLOC(m, uint16_t, num_commands);
+    size_t j = 0;
+    size_t i;
+    if (BROTLI_IS_OOM(m)) return;
+    for (i = 0; i < num_commands; ++i) {
+      const Command* cmd = &cmds[i];
+      if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
+        distance_prefixes[j++] = cmd->dist_prefix_;
      }
    }
-    distance_prefixes.resize(pos);
    /* Create the block split on the array of distance prefixes. */
-    SplitByteVector<kNumDistancePrefixes>(
-        distance_prefixes,
+    SplitByteVectorDistance(
+        m, distance_prefixes, j,
        kSymbolsPerDistanceHistogram, kMaxCommandHistograms,
-        kCommandStrideLength, kDistanceBlockSwitchCost,
+        kCommandStrideLength, kDistanceBlockSwitchCost, quality,
        dist_split);
+    if (BROTLI_IS_OOM(m)) return;
+    BROTLI_FREE(m, distance_prefixes);
  }
 }

-}  // namespace brotli
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
--- a/enc/block_splitter.h
+++ b/enc/block_splitter.h
@ -9,53 +9,42 @@
 #ifndef BROTLI_ENC_BLOCK_SPLITTER_H_
 #define BROTLI_ENC_BLOCK_SPLITTER_H_

-#include <vector>
-
 #include "../common/types.h"
 #include "./command.h"
-#include "./metablock.h"
+#include "./memory.h"
+#include "./port.h"

-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

-struct BlockSplitIterator {
-  explicit BlockSplitIterator(const BlockSplit& split)
-      : split_(split), idx_(0), type_(0), length_(0) {
-    if (!split.lengths.empty()) {
-      length_ = split.lengths[0];
-    }
-  }
+typedef struct BlockSplit {
+  size_t num_types;  /* Amount of distinct types */
+  size_t num_blocks;  /* Amount of values in types and length */
+  uint8_t* types;
+  uint32_t* lengths;

-  void Next(void) {
-    if (length_ == 0) {
-      ++idx_;
-      type_ = split_.types[idx_];
-      length_ = split_.lengths[idx_];
-    }
-    --length_;
-  }
+  size_t types_alloc_size;
+  size_t lengths_alloc_size;
+} BlockSplit;

-  const BlockSplit& split_;
-  size_t idx_;
-  size_t type_;
-  size_t length_;
-};
+BROTLI_INTERNAL void BrotliInitBlockSplit(BlockSplit* self);
+BROTLI_INTERNAL void BrotliDestroyBlockSplit(MemoryManager* m,
+                                             BlockSplit* self);

-void CopyLiteralsToByteArray(const Command* cmds,
-                             const size_t num_commands,
-                             const uint8_t* data,
-                             const size_t offset,
-                             const size_t mask,
-                             std::vector<uint8_t>* literals);
+BROTLI_INTERNAL void BrotliSplitBlock(MemoryManager* m,
+                                      const Command* cmds,
+                                      const size_t num_commands,
+                                      const uint8_t* data,
+                                      const size_t offset,
+                                      const size_t mask,
+                                      const int quality,
+                                      BlockSplit* literal_split,
+                                      BlockSplit* insert_and_copy_split,
+                                      BlockSplit* dist_split);

-void SplitBlock(const Command* cmds,
-                const size_t num_commands,
-                const uint8_t* data,
-                const size_t offset,
-                const size_t mask,
-                BlockSplit* literal_split,
-                BlockSplit* insert_and_copy_split,
-                BlockSplit* dist_split);
-
-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif

 #endif  /* BROTLI_ENC_BLOCK_SPLITTER_H_ */
--- a/enc/block_splitter_inc.h
+++ b/enc/block_splitter_inc.h
@ -0,0 +1,431 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN, DataType */
+
+#define HistogramType FN(Histogram)
+
+static void FN(InitialEntropyCodes)(const DataType* data, size_t length,
+                                    size_t stride,
+                                    size_t num_histograms,
+                                    HistogramType* histograms) {
+  unsigned int seed = 7;
+  size_t block_length = length / num_histograms;
+  size_t i;
+  FN(ClearHistograms)(histograms, num_histograms);
+  for (i = 0; i < num_histograms; ++i) {
+    size_t pos = length * i / num_histograms;
+    if (i != 0) {
+      pos += MyRand(&seed) % block_length;
+    }
+    if (pos + stride >= length) {
+      pos = length - stride - 1;
+    }
+    FN(HistogramAddVector)(&histograms[i], data + pos, stride);
+  }
+}
+
+static void FN(RandomSample)(unsigned int* seed,
+                             const DataType* data,
+                             size_t length,
+                             size_t stride,
+                             HistogramType* sample) {
+  size_t pos = 0;
+  if (stride >= length) {
+    pos = 0;
+    stride = length;
+  } else {
+    pos = MyRand(seed) % (length - stride + 1);
+  }
+  FN(HistogramAddVector)(sample, data + pos, stride);
+}
+
+static void FN(RefineEntropyCodes)(const DataType* data, size_t length,
+                                   size_t stride,
+                                   size_t num_histograms,
+                                   HistogramType* histograms) {
+  size_t iters =
+      kIterMulForRefining * length / stride + kMinItersForRefining;
+  unsigned int seed = 7;
+  size_t iter;
+  iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms;
+  for (iter = 0; iter < iters; ++iter) {
+    HistogramType sample;
+    FN(HistogramClear)(&sample);
+    FN(RandomSample)(&seed, data, length, stride, &sample);
+    FN(HistogramAddHistogram)(&histograms[iter % num_histograms], &sample);
+  }
+}
+
+/* Assigns a block id from the range [0, vec.size()) to each data element
+   in data[0..length) and fills in block_id[0..length) with the assigned values.
+   Returns the number of blocks, i.e. one plus the number of block switches. */
+static size_t FN(FindBlocks)(const DataType* data, const size_t length,
+                             const double block_switch_bitcost,
+                             const size_t num_histograms,
+                             const HistogramType* histograms,
+                             double* insert_cost,
+                             double* cost,
+                             uint8_t* switch_signal,
+                             uint8_t *block_id) {
+  const size_t data_size = FN(HistogramDataSize)();
+  const size_t bitmaplen = (num_histograms + 7) >> 3;
+  size_t num_blocks = 1;
+  size_t i;
+  size_t j;
+  assert(num_histograms <= 256);
+  if (num_histograms <= 1) {
+    for (i = 0; i < length; ++i) {
+      block_id[i] = 0;
+    }
+    return 1;
+  }
+  memset(insert_cost, 0, sizeof(insert_cost[0]) * data_size * num_histograms);
+  for (i = 0; i < num_histograms; ++i) {
+    insert_cost[i] = FastLog2((uint32_t)histograms[i].total_count_);
+  }
+  for (i = data_size; i != 0;) {
+    --i;
+    for (j = 0; j < num_histograms; ++j) {
+      insert_cost[i * num_histograms + j] =
+          insert_cost[j] - BitCost(histograms[j].data_[i]);
+    }
+  }
+  memset(cost, 0, sizeof(cost[0]) * num_histograms);
+  memset(switch_signal, 0, sizeof(switch_signal[0]) * length * bitmaplen);
+  /* After each iteration of this loop, cost[k] will contain the difference
+     between the minimum cost of arriving at the current byte position using
+     entropy code k, and the minimum cost of arriving at the current byte
+     position. This difference is capped at the block switch cost, and if it
+     reaches block switch cost, it means that when we trace back from the last
+     position, we need to switch here. */
+  for (i = 0; i < length; ++i) {
+    const size_t byte_ix = i;
+    size_t ix = byte_ix * bitmaplen;
+    size_t insert_cost_ix = data[byte_ix] * num_histograms;
+    double min_cost = 1e99;
+    double block_switch_cost = block_switch_bitcost;
+    size_t k;
+    for (k = 0; k < num_histograms; ++k) {
+      /* We are coding the symbol in data[byte_ix] with entropy code k. */
+      cost[k] += insert_cost[insert_cost_ix + k];
+      if (cost[k] < min_cost) {
+        min_cost = cost[k];
+        block_id[byte_ix] = (uint8_t)k;
+      }
+    }
+    /* More blocks for the beginning. */
+    if (byte_ix < 2000) {
+      block_switch_cost *= 0.77 + 0.07 * (double)byte_ix / 2000;
+    }
+    for (k = 0; k < num_histograms; ++k) {
+      cost[k] -= min_cost;
+      if (cost[k] >= block_switch_cost) {
+        const uint8_t mask = (uint8_t)(1u << (k & 7));
+        cost[k] = block_switch_cost;
+        assert((k >> 3) < bitmaplen);
+        switch_signal[ix + (k >> 3)] |= mask;
+      }
+    }
+  }
+  {  /* Trace back from the last position and switch at the marked places. */
+    size_t byte_ix = length - 1;
+    size_t ix = byte_ix * bitmaplen;
+    uint8_t cur_id = block_id[byte_ix];
+    while (byte_ix > 0) {
+      const uint8_t mask = (uint8_t)(1u << (cur_id & 7));
+      assert(((size_t)cur_id >> 3) < bitmaplen);
+      --byte_ix;
+      ix -= bitmaplen;
+      if (switch_signal[ix + (cur_id >> 3)] & mask) {
+        if (cur_id != block_id[byte_ix]) {
+          cur_id = block_id[byte_ix];
+          ++num_blocks;
+        }
+      }
+      block_id[byte_ix] = cur_id;
+    }
+  }
+  return num_blocks;
+}
+
+static size_t FN(RemapBlockIds)(uint8_t* block_ids, const size_t length,
+                                uint16_t* new_id, const size_t num_histograms) {
+  static const uint16_t kInvalidId = 256;
+  uint16_t next_id = 0;
+  size_t i;
+  for (i = 0; i < num_histograms; ++i) {
+    new_id[i] = kInvalidId;
+  }
+  for (i = 0; i < length; ++i) {
+    assert(block_ids[i] < num_histograms);
+    if (new_id[block_ids[i]] == kInvalidId) {
+      new_id[block_ids[i]] = next_id++;
+    }
+  }
+  for (i = 0; i < length; ++i) {
+    block_ids[i] = (uint8_t)new_id[block_ids[i]];
+    assert(block_ids[i] < num_histograms);
+  }
+  assert(next_id <= num_histograms);
+  return next_id;
+}
+
+static void FN(BuildBlockHistograms)(const DataType* data, const size_t length,
+                                     const uint8_t* block_ids,
+                                     const size_t num_histograms,
+                                     HistogramType* histograms) {
+  size_t i;
+  FN(ClearHistograms)(histograms, num_histograms);
+  for (i = 0; i < length; ++i) {
+    FN(HistogramAdd)(&histograms[block_ids[i]], data[i]);
+  }
+}
+
+static void FN(ClusterBlocks)(MemoryManager* m,
+                              const DataType* data, const size_t length,
+                              const size_t num_blocks,
+                              uint8_t* block_ids,
+                              BlockSplit* split) {
+  uint32_t* histogram_symbols = BROTLI_ALLOC(m, uint32_t, num_blocks);
+  uint32_t* block_lengths = BROTLI_ALLOC(m, uint32_t, num_blocks);
+  const size_t expected_num_clusters = CLUSTERS_PER_BATCH *
+      (num_blocks + HISTOGRAMS_PER_BATCH - 1) / HISTOGRAMS_PER_BATCH;
+  size_t all_histograms_size = 0;
+  size_t all_histograms_capacity = expected_num_clusters;
+  HistogramType* all_histograms =
+      BROTLI_ALLOC(m, HistogramType, all_histograms_capacity);
+  size_t cluster_size_size = 0;
+  size_t cluster_size_capacity = expected_num_clusters;
+  uint32_t* cluster_size = BROTLI_ALLOC(m, uint32_t, cluster_size_capacity);
+  size_t num_clusters = 0;
+  HistogramType* histograms = BROTLI_ALLOC(m, HistogramType,
+      BROTLI_MIN(size_t, num_blocks, HISTOGRAMS_PER_BATCH));
+  size_t max_num_pairs =
+      HISTOGRAMS_PER_BATCH * HISTOGRAMS_PER_BATCH / 2;
+  size_t pairs_capacity = max_num_pairs + 1;
+  HistogramPair* pairs = BROTLI_ALLOC(m, HistogramPair, pairs_capacity);
+  size_t pos = 0;
+  uint32_t* clusters;
+  size_t num_final_clusters;
+  static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX;
+  uint32_t* new_index;
+  uint8_t max_type = 0;
+  size_t i;
+
+  if (BROTLI_IS_OOM(m)) return;
+
+  memset(block_lengths, 0, num_blocks * sizeof(uint32_t));
+
+  {
+    size_t block_idx = 0;
+    for (i = 0; i < length; ++i) {
+      assert(block_idx < num_blocks);
+      ++block_lengths[block_idx];
+      if (i + 1 == length || block_ids[i] != block_ids[i + 1]) {
+        ++block_idx;
+      }
+    }
+    assert(block_idx == num_blocks);
+  }
+
+  for (i = 0; i < num_blocks; i += HISTOGRAMS_PER_BATCH) {
+    const size_t num_to_combine =
+        BROTLI_MIN(size_t, num_blocks - i, HISTOGRAMS_PER_BATCH);
+    uint32_t sizes[HISTOGRAMS_PER_BATCH];
+    uint32_t new_clusters[HISTOGRAMS_PER_BATCH];
+    uint32_t symbols[HISTOGRAMS_PER_BATCH];
+    uint32_t remap[HISTOGRAMS_PER_BATCH];
+    size_t num_new_clusters;
+    size_t j;
+    for (j = 0; j < num_to_combine; ++j) {
+      size_t k;
+      FN(HistogramClear)(&histograms[j]);
+      for (k = 0; k < block_lengths[i + j]; ++k) {
+        FN(HistogramAdd)(&histograms[j], data[pos++]);
+      }
+      histograms[j].bit_cost_ = FN(BrotliPopulationCost)(&histograms[j]);
+      symbols[j] = new_clusters[j] = (uint32_t)j;
+      sizes[j] = 1;
+    }
+    num_new_clusters = FN(BrotliHistogramCombine)(
+        histograms, sizes, symbols, new_clusters, pairs, num_to_combine,
+        num_to_combine, HISTOGRAMS_PER_BATCH, max_num_pairs);
+    BROTLI_ENSURE_CAPACITY(m, HistogramType, all_histograms,
+        all_histograms_capacity, all_histograms_size + num_new_clusters);
+    BROTLI_ENSURE_CAPACITY(m, uint32_t, cluster_size,
+        cluster_size_capacity, cluster_size_size + num_new_clusters);
+    if (BROTLI_IS_OOM(m)) return;
+    for (j = 0; j < num_new_clusters; ++j) {
+      all_histograms[all_histograms_size++] = histograms[new_clusters[j]];
+      cluster_size[cluster_size_size++] = sizes[new_clusters[j]];
+      remap[new_clusters[j]] = (uint32_t)j;
+    }
+    for (j = 0; j < num_to_combine; ++j) {
+      histogram_symbols[i + j] = (uint32_t)num_clusters + remap[symbols[j]];
+    }
+    num_clusters += num_new_clusters;
+    assert(num_clusters == cluster_size_size);
+    assert(num_clusters == all_histograms_size);
+  }
+  BROTLI_FREE(m, histograms);
+
+  max_num_pairs =
+      BROTLI_MIN(size_t, 64 * num_clusters, (num_clusters / 2) * num_clusters);
+  if (pairs_capacity < max_num_pairs + 1) {
+    BROTLI_FREE(m, pairs);
+    pairs = BROTLI_ALLOC(m, HistogramPair, max_num_pairs + 1);
+    if (BROTLI_IS_OOM(m)) return;
+  }
+
+  clusters = BROTLI_ALLOC(m, uint32_t, num_clusters);
+  if (BROTLI_IS_OOM(m)) return;
+  for (i = 0; i < num_clusters; ++i) {
+    clusters[i] = (uint32_t)i;
+  }
+  num_final_clusters = FN(BrotliHistogramCombine)(
+      all_histograms, cluster_size, histogram_symbols, clusters, pairs,
+      num_clusters, num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES,
+      max_num_pairs);
+  BROTLI_FREE(m, pairs);
+  BROTLI_FREE(m, cluster_size);
+
+  new_index = BROTLI_ALLOC(m, uint32_t, num_clusters);
+  if (BROTLI_IS_OOM(m)) return;
+  for (i = 0; i < num_clusters; ++i) new_index[i] = kInvalidIndex;
+  pos = 0;
+  {
+    uint32_t next_index = 0;
+    for (i = 0; i < num_blocks; ++i) {
+      HistogramType histo;
+      size_t j;
+      uint32_t best_out;
+      double best_bits;
+      FN(HistogramClear)(&histo);
+      for (j = 0; j < block_lengths[i]; ++j) {
+        FN(HistogramAdd)(&histo, data[pos++]);
+      }
+      best_out = (i == 0) ? histogram_symbols[0] : histogram_symbols[i - 1];
+      best_bits =
+          FN(BrotliHistogramBitCostDistance)(&histo, &all_histograms[best_out]);
+      for (j = 0; j < num_final_clusters; ++j) {
+        const double cur_bits = FN(BrotliHistogramBitCostDistance)(
+            &histo, &all_histograms[clusters[j]]);
+        if (cur_bits < best_bits) {
+          best_bits = cur_bits;
+          best_out = clusters[j];
+        }
+      }
+      histogram_symbols[i] = best_out;
+      if (new_index[best_out] == kInvalidIndex) {
+        new_index[best_out] = next_index++;
+      }
+    }
+  }
+  BROTLI_FREE(m, clusters);
+  BROTLI_FREE(m, all_histograms);
+  BROTLI_ENSURE_CAPACITY(
+      m, uint8_t, split->types, split->types_alloc_size, num_blocks);
+  BROTLI_ENSURE_CAPACITY(
+      m, uint32_t, split->lengths, split->lengths_alloc_size, num_blocks);
+  if (BROTLI_IS_OOM(m)) return;
+  {
+    uint32_t cur_length = 0;
+    size_t block_idx = 0;
+    for (i = 0; i < num_blocks; ++i) {
+      cur_length += block_lengths[i];
+      if (i + 1 == num_blocks ||
+          histogram_symbols[i] != histogram_symbols[i + 1]) {
+        const uint8_t id = (uint8_t)new_index[histogram_symbols[i]];
+        split->types[block_idx] = id;
+        split->lengths[block_idx] = cur_length;
+        max_type = BROTLI_MAX(uint8_t, max_type, id);
+        cur_length = 0;
+        ++block_idx;
+      }
+    }
+    split->num_blocks = block_idx;
+    split->num_types = (size_t)max_type + 1;
+  }
+  BROTLI_FREE(m, new_index);
+  BROTLI_FREE(m, block_lengths);
+  BROTLI_FREE(m, histogram_symbols);
+}
+
+static void FN(SplitByteVector)(MemoryManager* m,
+                                const DataType* data, const size_t length,
+                                const size_t literals_per_histogram,
+                                const size_t max_histograms,
+                                const size_t sampling_stride_length,
+                                const double block_switch_cost,
+                                const int quality,
+                                BlockSplit* split) {
+  const size_t data_size = FN(HistogramDataSize)();
+  size_t num_histograms = length / literals_per_histogram + 1;
+  HistogramType* histograms;
+  if (num_histograms > max_histograms) {
+    num_histograms = max_histograms;
+  }
+  if (length == 0) {
+    split->num_types = 1;
+    return;
+  } else if (length < kMinLengthForBlockSplitting) {
+    BROTLI_ENSURE_CAPACITY(m, uint8_t,
+        split->types, split->types_alloc_size, split->num_blocks + 1);
+    BROTLI_ENSURE_CAPACITY(m, uint32_t,
+        split->lengths, split->lengths_alloc_size, split->num_blocks + 1);
+    if (BROTLI_IS_OOM(m)) return;
+    split->num_types = 1;
+    split->types[split->num_blocks] = 0;
+    split->lengths[split->num_blocks] = (uint32_t)length;
+    split->num_blocks++;
+    return;
+  }
+  histograms = BROTLI_ALLOC(m, HistogramType, num_histograms);
+  if (BROTLI_IS_OOM(m)) return;
+  /* Find good entropy codes. */
+  FN(InitialEntropyCodes)(data, length,
+                          sampling_stride_length,
+                          num_histograms, histograms);
+  FN(RefineEntropyCodes)(data, length,
+                         sampling_stride_length,
+                         num_histograms, histograms);
+  {
+    /* Find a good path through literals with the good entropy codes. */
+    uint8_t* block_ids = BROTLI_ALLOC(m, uint8_t, length);
+    size_t num_blocks;
+    const size_t bitmaplen = (num_histograms + 7) >> 3;
+    double* insert_cost = BROTLI_ALLOC(m, double, data_size * num_histograms);
+    double* cost = BROTLI_ALLOC(m, double, num_histograms);
+    uint8_t* switch_signal = BROTLI_ALLOC(m, uint8_t, length * bitmaplen);
+    uint16_t* new_id = BROTLI_ALLOC(m, uint16_t, num_histograms);
+    const size_t iters = quality <= 10 ? 3 : 10;
+    size_t i;
+    if (BROTLI_IS_OOM(m)) return;
+    for (i = 0; i < iters; ++i) {
+      num_blocks = FN(FindBlocks)(data, length,
+                                  block_switch_cost,
+                                  num_histograms, histograms,
+                                  insert_cost, cost, switch_signal,
+                                  block_ids);
+      num_histograms = FN(RemapBlockIds)(block_ids, length,
+                                         new_id, num_histograms);
+      FN(BuildBlockHistograms)(data, length, block_ids,
+                               num_histograms, histograms);
+    }
+    BROTLI_FREE(m, insert_cost);
+    BROTLI_FREE(m, cost);
+    BROTLI_FREE(m, switch_signal);
+    BROTLI_FREE(m, new_id);
+    BROTLI_FREE(m, histograms);
+    FN(ClusterBlocks)(m, data, length, num_blocks, block_ids, split);
+    if (BROTLI_IS_OOM(m)) return;
+    BROTLI_FREE(m, block_ids);
+  }
+}
+
+#undef HistogramType
--- a/enc/brotli_bit_stream.c
+++ b/enc/brotli_bit_stream.c
--- a/enc/brotli_bit_stream.h
+++ b/enc/brotli_bit_stream.h
@ -16,164 +16,92 @@
 #ifndef BROTLI_ENC_BROTLI_BIT_STREAM_H_
 #define BROTLI_ENC_BROTLI_BIT_STREAM_H_

-#include <vector>
-
 #include "../common/types.h"
+#include "./command.h"
+#include "./context.h"
 #include "./entropy_encode.h"
+#include "./memory.h"
 #include "./metablock.h"
+#include "./port.h"

-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

 /* All Store functions here will use a storage_ix, which is always the bit
   position for the current storage. */

-// Stores a number between 0 and 255.
-void StoreVarLenUint8(size_t n, size_t* storage_ix, uint8_t* storage);
+BROTLI_INTERNAL void BrotliStoreHuffmanTree(const uint8_t* depths, size_t num,
+    HuffmanTree* tree, size_t *storage_ix, uint8_t *storage);

-// Stores the compressed meta-block header.
-// REQUIRES: length > 0
-// REQUIRES: length <= (1 << 24)
-void StoreCompressedMetaBlockHeader(bool final_block,
-                                    size_t length,
-                                    size_t* storage_ix,
-                                    uint8_t* storage);
-
-// Stores the uncompressed meta-block header.
-// REQUIRES: length > 0
-// REQUIRES: length <= (1 << 24)
-void StoreUncompressedMetaBlockHeader(size_t length,
-                                      size_t* storage_ix,
-                                      uint8_t* storage);
-
-// Stores a context map where the histogram type is always the block type.
-void StoreTrivialContextMap(size_t num_types,
-                            size_t context_bits,
-                            HuffmanTree* tree,
-                            size_t* storage_ix,
-                            uint8_t* storage);
-
-void StoreHuffmanTreeOfHuffmanTreeToBitMask(
-    const int num_codes,
-    const uint8_t *code_length_bitdepth,
-    size_t *storage_ix,
-    uint8_t *storage);
-
-void StoreHuffmanTree(const uint8_t* depths, size_t num, HuffmanTree* tree,
-                      size_t *storage_ix, uint8_t *storage);
-
-// Builds a Huffman tree from histogram[0:length] into depth[0:length] and
-// bits[0:length] and stores the encoded tree to the bit stream.
-void BuildAndStoreHuffmanTree(const uint32_t *histogram,
-                              const size_t length,
-                              HuffmanTree* tree,
-                              uint8_t* depth,
-                              uint16_t* bits,
-                              size_t* storage_ix,
-                              uint8_t* storage);
-
-void BuildAndStoreHuffmanTreeFast(const uint32_t *histogram,
-                                  const size_t histogram_total,
-                                  const size_t max_bits,
-                                  uint8_t* depth,
-                                  uint16_t* bits,
-                                  size_t* storage_ix,
-                                  uint8_t* storage);
-
-// Encodes the given context map to the bit stream. The number of different
-// histogram ids is given by num_clusters.
-void EncodeContextMap(const std::vector<uint32_t>& context_map,
-                      size_t num_clusters,
-                      HuffmanTree* tree,
-                      size_t* storage_ix, uint8_t* storage);
-
-// Data structure that stores everything that is needed to encode each block
-// switch command.
-struct BlockSplitCode {
-  std::vector<uint32_t> type_code;
-  std::vector<uint32_t> length_prefix;
-  std::vector<uint32_t> length_nextra;
-  std::vector<uint32_t> length_extra;
-  std::vector<uint8_t> type_depths;
-  std::vector<uint16_t> type_bits;
-  uint8_t length_depths[kNumBlockLenPrefixes];
-  uint16_t length_bits[kNumBlockLenPrefixes];
-};
-
-// Builds a BlockSplitCode data structure from the block split given by the
-// vector of block types and block lengths and stores it to the bit stream.
-void BuildAndStoreBlockSplitCode(const std::vector<uint8_t>& types,
-                                 const std::vector<uint32_t>& lengths,
-                                 const size_t num_types,
-                                 BlockSplitCode* code,
-                                 size_t* storage_ix,
-                                 uint8_t* storage);
-
-// Stores the block switch command with index block_ix to the bit stream.
-void StoreBlockSwitch(const BlockSplitCode& code,
-                      const size_t block_ix,
-                      size_t* storage_ix,
-                      uint8_t* storage);
+BROTLI_INTERNAL void BrotliBuildAndStoreHuffmanTreeFast(
+    MemoryManager* m, const uint32_t* histogram, const size_t histogram_total,
+    const size_t max_bits, uint8_t* depth, uint16_t* bits, size_t* storage_ix,
+    uint8_t* storage);

 /* REQUIRES: length > 0 */
 /* REQUIRES: length <= (1 << 24) */
-void StoreMetaBlock(const uint8_t* input,
-                    size_t start_pos,
-                    size_t length,
-                    size_t mask,
-                    uint8_t prev_byte,
-                    uint8_t prev_byte2,
-                    bool final_block,
-                    uint32_t num_direct_distance_codes,
-                    uint32_t distance_postfix_bits,
-                    ContextType literal_context_mode,
-                    const brotli::Command *commands,
-                    size_t n_commands,
-                    const MetaBlockSplit& mb,
-                    size_t *storage_ix,
-                    uint8_t *storage);
+BROTLI_INTERNAL void BrotliStoreMetaBlock(MemoryManager* m,
+                                          const uint8_t* input,
+                                          size_t start_pos,
+                                          size_t length,
+                                          size_t mask,
+                                          uint8_t prev_byte,
+                                          uint8_t prev_byte2,
+                                          int is_final_block,
+                                          uint32_t num_direct_distance_codes,
+                                          uint32_t distance_postfix_bits,
+                                          ContextType literal_context_mode,
+                                          const Command* commands,
+                                          size_t n_commands,
+                                          const MetaBlockSplit* mb,
+                                          size_t* storage_ix,
+                                          uint8_t* storage);

 /* Stores the meta-block without doing any block splitting, just collects
   one histogram per block category and uses that for entropy coding.
   REQUIRES: length > 0
   REQUIRES: length <= (1 << 24) */
-void StoreMetaBlockTrivial(const uint8_t* input,
-                           size_t start_pos,
-                           size_t length,
-                           size_t mask,
-                           bool is_last,
-                           const brotli::Command *commands,
-                           size_t n_commands,
-                           size_t *storage_ix,
-                           uint8_t *storage);
+BROTLI_INTERNAL void BrotliStoreMetaBlockTrivial(MemoryManager* m,
+                                                 const uint8_t* input,
+                                                 size_t start_pos,
+                                                 size_t length,
+                                                 size_t mask,
+                                                 int is_last,
+                                                 const Command *commands,
+                                                 size_t n_commands,
+                                                 size_t* storage_ix,
+                                                 uint8_t* storage);

 /* Same as above, but uses static prefix codes for histograms with a only a few
   symbols, and uses static code length prefix codes for all other histograms.
   REQUIRES: length > 0
   REQUIRES: length <= (1 << 24) */
-void StoreMetaBlockFast(const uint8_t* input,
-                        size_t start_pos,
-                        size_t length,
-                        size_t mask,
-                        bool is_last,
-                        const brotli::Command *commands,
-                        size_t n_commands,
-                        size_t *storage_ix,
-                        uint8_t *storage);
+BROTLI_INTERNAL void BrotliStoreMetaBlockFast(MemoryManager* m,
+                                              const uint8_t* input,
+                                              size_t start_pos,
+                                              size_t length,
+                                              size_t mask,
+                                              int is_last,
+                                              const Command *commands,
+                                              size_t n_commands,
+                                              size_t* storage_ix,
+                                              uint8_t* storage);

 /* This is for storing uncompressed blocks (simple raw storage of
   bytes-as-bytes).
   REQUIRES: length > 0
   REQUIRES: length <= (1 << 24) */
-void StoreUncompressedMetaBlock(bool final_block,
-                                const uint8_t* input,
-                                size_t position, size_t mask,
-                                size_t len,
-                                size_t* storage_ix,
-                                uint8_t* storage);
+BROTLI_INTERNAL void BrotliStoreUncompressedMetaBlock(
+    int is_final_block, const uint8_t* input, size_t position, size_t mask,
+    size_t len, size_t* storage_ix, uint8_t* storage);

 /* Stores an empty metadata meta-block and syncs to a byte boundary. */
-void StoreSyncMetaBlock(size_t* storage_ix, uint8_t* storage);
+BROTLI_INTERNAL void BrotliStoreSyncMetaBlock(size_t* storage_ix,
+                                              uint8_t* storage);

-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif

 #endif  /* BROTLI_ENC_BROTLI_BIT_STREAM_H_ */
--- a/enc/cluster.c
+++ b/enc/cluster.c
@ -0,0 +1,56 @@
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Functions for clustering similar histograms together. */
+
+#include "./cluster.h"
+
+#include "../common/types.h"
+#include "./bit_cost.h"  /* BrotliPopulationCost */
+#include "./fast_log.h"
+#include "./histogram.h"
+#include "./memory.h"
+#include "./port.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static BROTLI_INLINE int HistogramPairIsLess(
+    const HistogramPair* p1, const HistogramPair* p2) {
+  if (p1->cost_diff != p2->cost_diff) {
+    return (p1->cost_diff > p2->cost_diff) ? 1 : 0;
+  }
+  return ((p1->idx2 - p1->idx1) > (p2->idx2 - p2->idx1)) ? 1 : 0;
+}
+
+/* Returns entropy reduction of the context map when we combine two clusters. */
+static BROTLI_INLINE double ClusterCostDiff(size_t size_a, size_t size_b) {
+  size_t size_c = size_a + size_b;
+  return (double)size_a * FastLog2(size_a) +
+    (double)size_b * FastLog2(size_b) -
+    (double)size_c * FastLog2(size_c);
+}
+
+#define CODE(X) X
+
+#define FN(X) X ## Literal
+#include "./cluster_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Command
+#include "./cluster_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Distance
+#include "./cluster_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#undef CODE
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
--- a/enc/cluster.h
+++ b/enc/cluster.h
@ -9,323 +9,40 @@
 #ifndef BROTLI_ENC_CLUSTER_H_
 #define BROTLI_ENC_CLUSTER_H_

-#include <math.h>
-
-#include <algorithm>
-#include <utility>
-#include <vector>
-
 #include "../common/types.h"
-#include "./bit_cost.h"
-#include "./entropy_encode.h"
-#include "./fast_log.h"
 #include "./histogram.h"
+#include "./memory.h"
 #include "./port.h"

-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

-struct HistogramPair {
+typedef struct HistogramPair {
  uint32_t idx1;
  uint32_t idx2;
  double cost_combo;
  double cost_diff;
-};
+} HistogramPair;

-inline bool operator<(const HistogramPair& p1, const HistogramPair& p2) {
-  if (p1.cost_diff != p2.cost_diff) {
-    return p1.cost_diff > p2.cost_diff;
-  }
-  return (p1.idx2 - p1.idx1) > (p2.idx2 - p2.idx1);
-}
+#define CODE(X) /* Declaration */;

-// Returns entropy reduction of the context map when we combine two clusters.
-inline double ClusterCostDiff(size_t size_a, size_t size_b) {
-  size_t size_c = size_a + size_b;
-  return static_cast<double>(size_a) * FastLog2(size_a) +
-      static_cast<double>(size_b) * FastLog2(size_b) -
-      static_cast<double>(size_c) * FastLog2(size_c);
-}
+#define FN(X) X ## Literal
+#include "./cluster_inc.h"  /* NOLINT(build/include) */
+#undef FN

-// Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
-// it is below a threshold, stores the pair (idx1, idx2) in the *pairs queue.
-template<typename HistogramType>
-void CompareAndPushToQueue(const HistogramType* out,
-                           const uint32_t* cluster_size,
-                           uint32_t idx1, uint32_t idx2,
-                           size_t max_num_pairs,
-                           HistogramPair* pairs,
-                           size_t* num_pairs) {
-  if (idx1 == idx2) {
-    return;
-  }
-  if (idx2 < idx1) {
-    uint32_t t = idx2;
-    idx2 = idx1;
-    idx1 = t;
-  }
-  bool store_pair = false;
-  HistogramPair p;
-  p.idx1 = idx1;
-  p.idx2 = idx2;
-  p.cost_diff = 0.5 * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]);
-  p.cost_diff -= out[idx1].bit_cost_;
-  p.cost_diff -= out[idx2].bit_cost_;
+#define FN(X) X ## Command
+#include "./cluster_inc.h"  /* NOLINT(build/include) */
+#undef FN

-  if (out[idx1].total_count_ == 0) {
-    p.cost_combo = out[idx2].bit_cost_;
-    store_pair = true;
-  } else if (out[idx2].total_count_ == 0) {
-    p.cost_combo = out[idx1].bit_cost_;
-    store_pair = true;
-  } else {
-    double threshold = *num_pairs == 0 ? 1e99 :
-        std::max(0.0, pairs[0].cost_diff);
-    HistogramType combo = out[idx1];
-    combo.AddHistogram(out[idx2]);
-    double cost_combo = PopulationCost(combo);
-    if (cost_combo < threshold - p.cost_diff) {
-      p.cost_combo = cost_combo;
-      store_pair = true;
-    }
-  }
-  if (store_pair) {
-    p.cost_diff += p.cost_combo;
-    if (*num_pairs > 0 && pairs[0] < p) {
-      // Replace the top of the queue if needed.
-      if (*num_pairs < max_num_pairs) {
-        pairs[*num_pairs] = pairs[0];
-        ++(*num_pairs);
-      }
-      pairs[0] = p;
-    } else if (*num_pairs < max_num_pairs) {
-      pairs[*num_pairs] = p;
-      ++(*num_pairs);
-    }
-  }
-}
+#define FN(X) X ## Distance
+#include "./cluster_inc.h"  /* NOLINT(build/include) */
+#undef FN

-template<typename HistogramType>
-size_t HistogramCombine(HistogramType* out,
-                        uint32_t* cluster_size,
-                        uint32_t* symbols,
-                        uint32_t* clusters,
-                        HistogramPair* pairs,
-                        size_t num_clusters,
-                        size_t symbols_size,
-                        size_t max_clusters,
-                        size_t max_num_pairs) {
-  double cost_diff_threshold = 0.0;
-  size_t min_cluster_size = 1;
+#undef CODE

-  // We maintain a vector of histogram pairs, with the property that the pair
-  // with the maximum bit cost reduction is the first.
-  size_t num_pairs = 0;
-  for (size_t idx1 = 0; idx1 < num_clusters; ++idx1) {
-    for (size_t idx2 = idx1 + 1; idx2 < num_clusters; ++idx2) {
-      CompareAndPushToQueue(out, cluster_size, clusters[idx1], clusters[idx2],
-                            max_num_pairs, &pairs[0], &num_pairs);
-    }
-  }
-
-  while (num_clusters > min_cluster_size) {
-    if (pairs[0].cost_diff >= cost_diff_threshold) {
-      cost_diff_threshold = 1e99;
-      min_cluster_size = max_clusters;
-      continue;
-    }
-    // Take the best pair from the top of heap.
-    uint32_t best_idx1 = pairs[0].idx1;
-    uint32_t best_idx2 = pairs[0].idx2;
-    out[best_idx1].AddHistogram(out[best_idx2]);
-    out[best_idx1].bit_cost_ = pairs[0].cost_combo;
-    cluster_size[best_idx1] += cluster_size[best_idx2];
-    for (size_t i = 0; i < symbols_size; ++i) {
-      if (symbols[i] == best_idx2) {
-        symbols[i] = best_idx1;
-      }
-    }
-    for (size_t i = 0; i < num_clusters; ++i) {
-      if (clusters[i] == best_idx2) {
-        memmove(&clusters[i], &clusters[i + 1],
-                (num_clusters - i - 1) * sizeof(clusters[0]));
-        break;
-      }
-    }
-    --num_clusters;
-    // Remove pairs intersecting the just combined best pair.
-    size_t copy_to_idx = 0;
-    for (size_t i = 0; i < num_pairs; ++i) {
-      HistogramPair& p = pairs[i];
-      if (p.idx1 == best_idx1 || p.idx2 == best_idx1 ||
-          p.idx1 == best_idx2 || p.idx2 == best_idx2) {
-        // Remove invalid pair from the queue.
-        continue;
-      }
-      if (pairs[0] < p) {
-        // Replace the top of the queue if needed.
-        HistogramPair front = pairs[0];
-        pairs[0] = p;
-        pairs[copy_to_idx] = front;
-      } else {
-        pairs[copy_to_idx] = p;
-      }
-      ++copy_to_idx;
-    }
-    num_pairs = copy_to_idx;
-
-    // Push new pairs formed with the combined histogram to the heap.
-    for (size_t i = 0; i < num_clusters; ++i) {
-      CompareAndPushToQueue(out, cluster_size, best_idx1, clusters[i],
-                            max_num_pairs, &pairs[0], &num_pairs);
-    }
-  }
-  return num_clusters;
-}
-
-// -----------------------------------------------------------------------------
-// Histogram refinement
-
-// What is the bit cost of moving histogram from cur_symbol to candidate.
-template<typename HistogramType>
-double HistogramBitCostDistance(const HistogramType& histogram,
-                                const HistogramType& candidate) {
-  if (histogram.total_count_ == 0) {
-    return 0.0;
-  }
-  HistogramType tmp = histogram;
-  tmp.AddHistogram(candidate);
-  return PopulationCost(tmp) - candidate.bit_cost_;
-}
-
-// Find the best 'out' histogram for each of the 'in' histograms.
-// When called, clusters[0..num_clusters) contains the unique values from
-// symbols[0..in_size), but this property is not preserved in this function.
-// Note: we assume that out[]->bit_cost_ is already up-to-date.
-template<typename HistogramType>
-void HistogramRemap(const HistogramType* in, size_t in_size,
-                    const uint32_t* clusters, size_t num_clusters,
-                    HistogramType* out, uint32_t* symbols) {
-  for (size_t i = 0; i < in_size; ++i) {
-    uint32_t best_out = i == 0 ? symbols[0] : symbols[i - 1];
-    double best_bits = HistogramBitCostDistance(in[i], out[best_out]);
-    for (size_t j = 0; j < num_clusters; ++j) {
-      const double cur_bits = HistogramBitCostDistance(in[i], out[clusters[j]]);
-      if (cur_bits < best_bits) {
-        best_bits = cur_bits;
-        best_out = clusters[j];
-      }
-    }
-    symbols[i] = best_out;
-  }
-
-  // Recompute each out based on raw and symbols.
-  for (size_t j = 0; j < num_clusters; ++j) {
-    out[clusters[j]].Clear();
-  }
-  for (size_t i = 0; i < in_size; ++i) {
-    out[symbols[i]].AddHistogram(in[i]);
-  }
-}
-
-// Reorders elements of the out[0..length) array and changes values in
-// symbols[0..length) array in the following way:
-//   * when called, symbols[] contains indexes into out[], and has N unique
-//     values (possibly N < length)
-//   * on return, symbols'[i] = f(symbols[i]) and
-//                out'[symbols'[i]] = out[symbols[i]], for each 0 <= i < length,
-//     where f is a bijection between the range of symbols[] and [0..N), and
-//     the first occurrences of values in symbols'[i] come in consecutive
-//     increasing order.
-// Returns N, the number of unique values in symbols[].
-template<typename HistogramType>
-size_t HistogramReindex(HistogramType* out, uint32_t* symbols, size_t length) {
-  static const uint32_t kInvalidIndex = std::numeric_limits<uint32_t>::max();
-  std::vector<uint32_t> new_index(length, kInvalidIndex);
-  uint32_t next_index = 0;
-  for (size_t i = 0; i < length; ++i) {
-    if (new_index[symbols[i]] == kInvalidIndex) {
-      new_index[symbols[i]] = next_index;
-      ++next_index;
-    }
-  }
-  std::vector<HistogramType> tmp(next_index);
-  next_index = 0;
-  for (size_t i = 0; i < length; ++i) {
-    if (new_index[symbols[i]] == next_index) {
-      tmp[next_index] = out[symbols[i]];
-      ++next_index;
-    }
-    symbols[i] = new_index[symbols[i]];
-  }
-  for (size_t i = 0; i < next_index; ++i) {
-    out[i] = tmp[i];
-  }
-  return next_index;
-}
-
-// Clusters similar histograms in 'in' together, the selected histograms are
-// placed in 'out', and for each index in 'in', *histogram_symbols will
-// indicate which of the 'out' histograms is the best approximation.
-template<typename HistogramType>
-void ClusterHistograms(const std::vector<HistogramType>& in,
-                       size_t num_contexts, size_t num_blocks,
-                       size_t max_histograms,
-                       std::vector<HistogramType>* out,
-                       std::vector<uint32_t>* histogram_symbols) {
-  const size_t in_size = num_contexts * num_blocks;
-  assert(in_size == in.size());
-  std::vector<uint32_t> cluster_size(in_size, 1);
-  std::vector<uint32_t> clusters(in_size);
-  size_t num_clusters = 0;
-  out->resize(in_size);
-  histogram_symbols->resize(in_size);
-  for (size_t i = 0; i < in_size; ++i) {
-    (*out)[i] = in[i];
-    (*out)[i].bit_cost_ = PopulationCost(in[i]);
-    (*histogram_symbols)[i] = static_cast<uint32_t>(i);
-  }
-
-  const size_t max_input_histograms = 64;
-  // For the first pass of clustering, we allow all pairs.
-  size_t max_num_pairs = max_input_histograms * max_input_histograms / 2;
-  std::vector<HistogramPair> pairs(max_num_pairs + 1);
-
-  for (size_t i = 0; i < in_size; i += max_input_histograms) {
-    size_t num_to_combine = std::min(in_size - i, max_input_histograms);
-    for (size_t j = 0; j < num_to_combine; ++j) {
-      clusters[num_clusters + j] = static_cast<uint32_t>(i + j);
-    }
-    size_t num_new_clusters =
-        HistogramCombine(&(*out)[0], &cluster_size[0],
-                         &(*histogram_symbols)[i],
-                         &clusters[num_clusters], &pairs[0],
-                         num_to_combine, num_to_combine,
-                         max_histograms, max_num_pairs);
-    num_clusters += num_new_clusters;
-  }
-
-  // For the second pass, we limit the total number of histogram pairs.
-  // After this limit is reached, we only keep searching for the best pair.
-  max_num_pairs =
-      std::min(64 * num_clusters, (num_clusters / 2) * num_clusters);
-  pairs.resize(max_num_pairs + 1);
-
-  // Collapse similar histograms.
-  num_clusters = HistogramCombine(&(*out)[0], &cluster_size[0],
-                                  &(*histogram_symbols)[0], &clusters[0],
-                                  &pairs[0], num_clusters, in_size,
-                                  max_histograms, max_num_pairs);
-
-  // Find the optimal map from original histograms to the final ones.
-  HistogramRemap(&in[0], in_size, &clusters[0], num_clusters,
-                 &(*out)[0], &(*histogram_symbols)[0]);
-
-  // Convert the context map to a canonical form.
-  size_t num_histograms =
-      HistogramReindex(&(*out)[0], &(*histogram_symbols)[0], in_size);
-  out->resize(num_histograms);
-}
-
-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif

 #endif  /* BROTLI_ENC_CLUSTER_H_ */
--- a/enc/cluster_inc.h
+++ b/enc/cluster_inc.h
@ -0,0 +1,315 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN, CODE */
+
+#define HistogramType FN(Histogram)
+
+/* Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
+   it is below a threshold, stores the pair (idx1, idx2) in the *pairs queue. */
+BROTLI_INTERNAL void FN(BrotliCompareAndPushToQueue)(
+    const HistogramType* out, const uint32_t* cluster_size, uint32_t idx1,
+    uint32_t idx2, size_t max_num_pairs, HistogramPair* pairs,
+    size_t* num_pairs) CODE({
+  int is_good_pair = 0;
+  HistogramPair p;
+  if (idx1 == idx2) {
+    return;
+  }
+  if (idx2 < idx1) {
+    uint32_t t = idx2;
+    idx2 = idx1;
+    idx1 = t;
+  }
+  p.idx1 = idx1;
+  p.idx2 = idx2;
+  p.cost_diff = 0.5 * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]);
+  p.cost_diff -= out[idx1].bit_cost_;
+  p.cost_diff -= out[idx2].bit_cost_;
+
+  if (out[idx1].total_count_ == 0) {
+    p.cost_combo = out[idx2].bit_cost_;
+    is_good_pair = 1;
+  } else if (out[idx2].total_count_ == 0) {
+    p.cost_combo = out[idx1].bit_cost_;
+    is_good_pair = 1;
+  } else {
+    double threshold = *num_pairs == 0 ? 1e99 :
+        BROTLI_MAX(double, 0.0, pairs[0].cost_diff);
+    HistogramType combo = out[idx1];
+    double cost_combo;
+    FN(HistogramAddHistogram)(&combo, &out[idx2]);
+    cost_combo = FN(BrotliPopulationCost)(&combo);
+    if (cost_combo < threshold - p.cost_diff) {
+      p.cost_combo = cost_combo;
+      is_good_pair = 1;
+    }
+  }
+  if (is_good_pair) {
+    p.cost_diff += p.cost_combo;
+    if (*num_pairs > 0 && HistogramPairIsLess(&pairs[0], &p)) {
+      /* Replace the top of the queue if needed. */
+      if (*num_pairs < max_num_pairs) {
+        pairs[*num_pairs] = pairs[0];
+        ++(*num_pairs);
+      }
+      pairs[0] = p;
+    } else if (*num_pairs < max_num_pairs) {
+      pairs[*num_pairs] = p;
+      ++(*num_pairs);
+    }
+  }
+})
+
+BROTLI_INTERNAL size_t FN(BrotliHistogramCombine)(HistogramType* out,
+                                                  uint32_t* cluster_size,
+                                                  uint32_t* symbols,
+                                                  uint32_t* clusters,
+                                                  HistogramPair* pairs,
+                                                  size_t num_clusters,
+                                                  size_t symbols_size,
+                                                  size_t max_clusters,
+                                                  size_t max_num_pairs) CODE({
+  double cost_diff_threshold = 0.0;
+  size_t min_cluster_size = 1;
+  size_t num_pairs = 0;
+
+  {
+    /* We maintain a vector of histogram pairs, with the property that the pair
+       with the maximum bit cost reduction is the first. */
+    size_t idx1;
+    for (idx1 = 0; idx1 < num_clusters; ++idx1) {
+      size_t idx2;
+      for (idx2 = idx1 + 1; idx2 < num_clusters; ++idx2) {
+        FN(BrotliCompareAndPushToQueue)(out, cluster_size, clusters[idx1],
+            clusters[idx2], max_num_pairs, &pairs[0], &num_pairs);
+      }
+    }
+  }
+
+  while (num_clusters > min_cluster_size) {
+    uint32_t best_idx1;
+    uint32_t best_idx2;
+    size_t i;
+    if (pairs[0].cost_diff >= cost_diff_threshold) {
+      cost_diff_threshold = 1e99;
+      min_cluster_size = max_clusters;
+      continue;
+    }
+    /* Take the best pair from the top of heap. */
+    best_idx1 = pairs[0].idx1;
+    best_idx2 = pairs[0].idx2;
+    FN(HistogramAddHistogram)(&out[best_idx1], &out[best_idx2]);
+    out[best_idx1].bit_cost_ = pairs[0].cost_combo;
+    cluster_size[best_idx1] += cluster_size[best_idx2];
+    for (i = 0; i < symbols_size; ++i) {
+      if (symbols[i] == best_idx2) {
+        symbols[i] = best_idx1;
+      }
+    }
+    for (i = 0; i < num_clusters; ++i) {
+      if (clusters[i] == best_idx2) {
+        memmove(&clusters[i], &clusters[i + 1],
+                (num_clusters - i - 1) * sizeof(clusters[0]));
+        break;
+      }
+    }
+    --num_clusters;
+    {
+      /* Remove pairs intersecting the just combined best pair. */
+      size_t copy_to_idx = 0;
+      for (i = 0; i < num_pairs; ++i) {
+        HistogramPair* p = &pairs[i];
+        if (p->idx1 == best_idx1 || p->idx2 == best_idx1 ||
+            p->idx1 == best_idx2 || p->idx2 == best_idx2) {
+          /* Remove invalid pair from the queue. */
+          continue;
+        }
+        if (HistogramPairIsLess(&pairs[0], p)) {
+          /* Replace the top of the queue if needed. */
+          HistogramPair front = pairs[0];
+          pairs[0] = *p;
+          pairs[copy_to_idx] = front;
+        } else {
+          pairs[copy_to_idx] = *p;
+        }
+        ++copy_to_idx;
+      }
+      num_pairs = copy_to_idx;
+    }
+
+    /* Push new pairs formed with the combined histogram to the heap. */
+    for (i = 0; i < num_clusters; ++i) {
+      FN(BrotliCompareAndPushToQueue)(out, cluster_size, best_idx1, clusters[i],
+                                      max_num_pairs, &pairs[0], &num_pairs);
+    }
+  }
+  return num_clusters;
+})
+
+/* What is the bit cost of moving histogram from cur_symbol to candidate. */
+BROTLI_INTERNAL double FN(BrotliHistogramBitCostDistance)(
+    const HistogramType* histogram, const HistogramType* candidate) CODE({
+  if (histogram->total_count_ == 0) {
+    return 0.0;
+  } else {
+    HistogramType tmp = *histogram;
+    FN(HistogramAddHistogram)(&tmp, candidate);
+    return FN(BrotliPopulationCost)(&tmp) - candidate->bit_cost_;
+  }
+})
+
+/* Find the best 'out' histogram for each of the 'in' histograms.
+   When called, clusters[0..num_clusters) contains the unique values from
+   symbols[0..in_size), but this property is not preserved in this function.
+   Note: we assume that out[]->bit_cost_ is already up-to-date. */
+BROTLI_INTERNAL void FN(BrotliHistogramRemap)(const HistogramType* in,
+    size_t in_size, const uint32_t* clusters, size_t num_clusters,
+    HistogramType* out, uint32_t* symbols) CODE({
+  size_t i;
+  for (i = 0; i < in_size; ++i) {
+    uint32_t best_out = i == 0 ? symbols[0] : symbols[i - 1];
+    double best_bits =
+        FN(BrotliHistogramBitCostDistance)(&in[i], &out[best_out]);
+    size_t j;
+    for (j = 0; j < num_clusters; ++j) {
+      const double cur_bits =
+          FN(BrotliHistogramBitCostDistance)(&in[i], &out[clusters[j]]);
+      if (cur_bits < best_bits) {
+        best_bits = cur_bits;
+        best_out = clusters[j];
+      }
+    }
+    symbols[i] = best_out;
+  }
+
+  /* Recompute each out based on raw and symbols. */
+  for (i = 0; i < num_clusters; ++i) {
+    FN(HistogramClear)(&out[clusters[i]]);
+  }
+  for (i = 0; i < in_size; ++i) {
+    FN(HistogramAddHistogram)(&out[symbols[i]], &in[i]);
+  }
+})
+
+/* Reorders elements of the out[0..length) array and changes values in
+   symbols[0..length) array in the following way:
+     * when called, symbols[] contains indexes into out[], and has N unique
+       values (possibly N < length)
+     * on return, symbols'[i] = f(symbols[i]) and
+                  out'[symbols'[i]] = out[symbols[i]], for each 0 <= i < length,
+       where f is a bijection between the range of symbols[] and [0..N), and
+       the first occurrences of values in symbols'[i] come in consecutive
+       increasing order.
+   Returns N, the number of unique values in symbols[]. */
+BROTLI_INTERNAL size_t FN(BrotliHistogramReindex)(MemoryManager* m,
+    HistogramType* out, uint32_t* symbols, size_t length) CODE({
+  static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX;
+  uint32_t* new_index = BROTLI_ALLOC(m, uint32_t, length);
+  uint32_t next_index;
+  HistogramType* tmp;
+  size_t i;
+  if (BROTLI_IS_OOM(m)) return 0;
+  for (i = 0; i < length; ++i) {
+      new_index[i] = kInvalidIndex;
+  }
+  next_index = 0;
+  for (i = 0; i < length; ++i) {
+    if (new_index[symbols[i]] == kInvalidIndex) {
+      new_index[symbols[i]] = next_index;
+      ++next_index;
+    }
+  }
+  /* TODO: by using idea of "cycle-sort" we can avoid allocation of
+     tmp and reduce the number of copying by the factor of 2. */
+  tmp = BROTLI_ALLOC(m, HistogramType, next_index);
+  if (BROTLI_IS_OOM(m)) return 0;
+  next_index = 0;
+  for (i = 0; i < length; ++i) {
+    if (new_index[symbols[i]] == next_index) {
+      tmp[next_index] = out[symbols[i]];
+      ++next_index;
+    }
+    symbols[i] = new_index[symbols[i]];
+  }
+  BROTLI_FREE(m, new_index);
+  for (i = 0; i < next_index; ++i) {
+    out[i] = tmp[i];
+  }
+  BROTLI_FREE(m, tmp);
+  return next_index;
+})
+
+BROTLI_INTERNAL void FN(BrotliClusterHistograms)(
+    MemoryManager* m, const HistogramType* in, const size_t in_size,
+    size_t max_histograms, HistogramType* out, size_t* out_size,
+    uint32_t* histogram_symbols) CODE({
+  uint32_t* cluster_size = BROTLI_ALLOC(m, uint32_t, in_size);
+  uint32_t* clusters = BROTLI_ALLOC(m, uint32_t, in_size);
+  size_t num_clusters = 0;
+  const size_t max_input_histograms = 64;
+  size_t pairs_capacity = max_input_histograms * max_input_histograms / 2;
+  /* For the first pass of clustering, we allow all pairs. */
+  HistogramPair* pairs = BROTLI_ALLOC(m, HistogramPair, pairs_capacity + 1);
+  size_t i;
+
+  if (BROTLI_IS_OOM(m)) return;
+
+  for (i = 0; i < in_size; ++i) {
+    cluster_size[i] = 1;
+  }
+
+  for (i = 0; i < in_size; ++i) {
+    out[i] = in[i];
+    out[i].bit_cost_ = FN(BrotliPopulationCost)(&in[i]);
+    histogram_symbols[i] = (uint32_t)i;
+  }
+
+  for (i = 0; i < in_size; i += max_input_histograms) {
+    size_t num_to_combine =
+        BROTLI_MIN(size_t, in_size - i, max_input_histograms);
+    size_t num_new_clusters;
+    size_t j;
+    for (j = 0; j < num_to_combine; ++j) {
+      clusters[num_clusters + j] = (uint32_t)(i + j);
+    }
+    num_new_clusters =
+        FN(BrotliHistogramCombine)(out, cluster_size,
+                                   &histogram_symbols[i],
+                                   &clusters[num_clusters], pairs,
+                                   num_to_combine, num_to_combine,
+                                   max_histograms, pairs_capacity);
+    num_clusters += num_new_clusters;
+  }
+
+  {
+    /* For the second pass, we limit the total number of histogram pairs.
+       After this limit is reached, we only keep searching for the best pair. */
+    size_t max_num_pairs = BROTLI_MIN(size_t,
+        64 * num_clusters, (num_clusters / 2) * num_clusters);
+    BROTLI_ENSURE_CAPACITY(
+        m, HistogramPair, pairs, pairs_capacity, max_num_pairs + 1);
+    if (BROTLI_IS_OOM(m)) return;
+
+    /* Collapse similar histograms. */
+    num_clusters = FN(BrotliHistogramCombine)(out, cluster_size,
+                                              histogram_symbols, clusters,
+                                              pairs, num_clusters, in_size,
+                                              max_histograms, max_num_pairs);
+  }
+  BROTLI_FREE(m, pairs);
+  BROTLI_FREE(m, cluster_size);
+  /* Find the optimal map from original histograms to the final ones. */
+  FN(BrotliHistogramRemap)(in, in_size, clusters, num_clusters,
+                           out, histogram_symbols);
+  BROTLI_FREE(m, clusters);
+  /* Convert the context map to a canonical form. */
+  *out_size = FN(BrotliHistogramReindex)(m, out, histogram_symbols, in_size);
+  if (BROTLI_IS_OOM(m)) return;
+})
+
+#undef HistogramType
--- a/enc/command.h
+++ b/enc/command.h
@ -10,10 +10,13 @@
 #define BROTLI_ENC_COMMAND_H_

 #include "../common/types.h"
+#include "../common/port.h"
 #include "./fast_log.h"
 #include "./prefix.h"

-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

 static uint32_t kInsBase[] =   { 0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50,
    66, 98, 130, 194, 322, 578, 1090, 2114, 6210, 22594 };
@ -24,15 +27,14 @@ static uint32_t kCopyBase[] =  { 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 18, 22, 30,
 static uint32_t kCopyExtra[] = { 0, 0, 0, 0, 0, 0, 0, 0,  1,  1,  2,  2,  3,  3,
     4,  4,   5,   5,   6,   7,   8,     9,   10,    24 };

-static inline uint16_t GetInsertLengthCode(size_t insertlen) {
+static BROTLI_INLINE uint16_t GetInsertLengthCode(size_t insertlen) {
  if (insertlen < 6) {
-    return static_cast<uint16_t>(insertlen);
+    return (uint16_t)insertlen;
  } else if (insertlen < 130) {
-    insertlen -= 2;
-    uint32_t nbits = Log2FloorNonZero(insertlen) - 1u;
-    return static_cast<uint16_t>((nbits << 1) + (insertlen >> nbits) + 2);
+    uint32_t nbits = Log2FloorNonZero(insertlen - 2) - 1u;
+    return (uint16_t)((nbits << 1) + ((insertlen - 2) >> nbits) + 2);
  } else if (insertlen < 2114) {
-    return static_cast<uint16_t>(Log2FloorNonZero(insertlen - 66) + 10);
+    return (uint16_t)(Log2FloorNonZero(insertlen - 66) + 10);
  } else if (insertlen < 6210) {
    return 21u;
  } else if (insertlen < 22594) {
@ -42,24 +44,23 @@ static inline uint16_t GetInsertLengthCode(size_t insertlen) {
  }
 }

-static inline uint16_t GetCopyLengthCode(size_t copylen) {
+static BROTLI_INLINE uint16_t GetCopyLengthCode(size_t copylen) {
  if (copylen < 10) {
-    return static_cast<uint16_t>(copylen - 2);
+    return (uint16_t)(copylen - 2);
  } else if (copylen < 134) {
-    copylen -= 6;
-    uint32_t nbits = Log2FloorNonZero(copylen) - 1u;
-    return static_cast<uint16_t>((nbits << 1) + (copylen >> nbits) + 4);
+    uint32_t nbits = Log2FloorNonZero(copylen - 6) - 1u;
+    return (uint16_t)((nbits << 1) + ((copylen - 6) >> nbits) + 4);
  } else if (copylen < 2118) {
-    return static_cast<uint16_t>(Log2FloorNonZero(copylen - 70) + 12);
+    return (uint16_t)(Log2FloorNonZero(copylen - 70) + 12);
  } else {
    return 23u;
  }
 }

-static inline uint16_t CombineLengthCodes(
-    uint16_t inscode, uint16_t copycode, bool use_last_distance) {
+static BROTLI_INLINE uint16_t CombineLengthCodes(
+    uint16_t inscode, uint16_t copycode, int use_last_distance) {
  uint16_t bits64 =
-      static_cast<uint16_t>((copycode & 0x7u) | ((inscode & 0x7u) << 3));
+      (uint16_t)((copycode & 0x7u) | ((inscode & 0x7u) << 3));
  if (use_last_distance && inscode < 8 && copycode < 16) {
    return (copycode < 8) ? bits64 : (bits64 | 64);
  } else {
@ -71,86 +72,91 @@ static inline uint16_t CombineLengthCodes(
  }
 }

-static inline void GetLengthCode(size_t insertlen, size_t copylen,
-                                 bool use_last_distance,
-                                 uint16_t* code) {
+static BROTLI_INLINE void GetLengthCode(size_t insertlen, size_t copylen,
+                                        int use_last_distance,
+                                        uint16_t* code) {
  uint16_t inscode = GetInsertLengthCode(insertlen);
  uint16_t copycode = GetCopyLengthCode(copylen);
  *code = CombineLengthCodes(inscode, copycode, use_last_distance);
 }

-static inline uint32_t GetInsertBase(uint16_t inscode) {
+static BROTLI_INLINE uint32_t GetInsertBase(uint16_t inscode) {
  return kInsBase[inscode];
 }

-static inline uint32_t GetInsertExtra(uint16_t inscode) {
+static BROTLI_INLINE uint32_t GetInsertExtra(uint16_t inscode) {
  return kInsExtra[inscode];
 }

-static inline uint32_t GetCopyBase(uint16_t copycode) {
+static BROTLI_INLINE uint32_t GetCopyBase(uint16_t copycode) {
  return kCopyBase[copycode];
 }

-static inline uint32_t GetCopyExtra(uint16_t copycode) {
+static BROTLI_INLINE uint32_t GetCopyExtra(uint16_t copycode) {
  return kCopyExtra[copycode];
 }

-struct Command {
-  // distance_code is e.g. 0 for same-as-last short code, or 16 for offset 1.
-  Command(size_t insertlen, size_t copylen, size_t copylen_code,
-          size_t distance_code)
-      : insert_len_(static_cast<uint32_t>(insertlen)) {
-    copy_len_ = static_cast<uint32_t>(
-        copylen | ((copylen_code ^ copylen) << 24));
-    // The distance prefix and extra bits are stored in this Command as if
-    // npostfix and ndirect were 0, they are only recomputed later after the
-    // clustering if needed.
-    PrefixEncodeCopyDistance(distance_code, 0, 0, &dist_prefix_, &dist_extra_);
-    GetLengthCode(insertlen, copylen_code, dist_prefix_ == 0,
-                  &cmd_prefix_);
-  }
-
-  explicit Command(size_t insertlen)
-      : insert_len_(static_cast<uint32_t>(insertlen))
-      , copy_len_(4 << 24), dist_extra_(0), dist_prefix_(16) {
-    GetLengthCode(insertlen, 4, dist_prefix_ == 0, &cmd_prefix_);
-  }
-
-  uint32_t DistanceCode(void) const {
-    if (dist_prefix_ < 16) {
-      return dist_prefix_;
-    }
-    uint32_t nbits = dist_extra_ >> 24;
-    uint32_t extra = dist_extra_ & 0xffffff;
-    uint32_t prefix = dist_prefix_ - 12 - 2 * nbits;
-    return (prefix << nbits) + extra + 12;
-  }
-
-  uint32_t DistanceContext(void) const {
-    uint32_t r = cmd_prefix_ >> 6;
-    uint32_t c = cmd_prefix_ & 7;
-    if ((r == 0 || r == 2 || r == 4 || r == 7) && (c <= 2)) {
-      return c;
-    }
-    return 3;
-  }
-
-  inline uint32_t copy_len(void) const {
-    return copy_len_ & 0xFFFFFF;
-  }
-
-  inline uint32_t copy_len_code(void) const {
-    return (copy_len_ & 0xFFFFFF) ^ (copy_len_ >> 24);
-  }
-
+typedef struct Command {
  uint32_t insert_len_;
  /* Stores copy_len in low 24 bits and copy_len XOR copy_code in high 8 bit. */
  uint32_t copy_len_;
  uint32_t dist_extra_;
  uint16_t cmd_prefix_;
  uint16_t dist_prefix_;
-};
+} Command;

-}  // namespace brotli
+/* distance_code is e.g. 0 for same-as-last short code, or 16 for offset 1. */
+static BROTLI_INLINE void InitCommand(Command* self, size_t insertlen,
+    size_t copylen, size_t copylen_code, size_t distance_code) {
+  self->insert_len_ = (uint32_t)insertlen;
+  self->copy_len_ = (uint32_t)(copylen | ((copylen_code ^ copylen) << 24));
+  /* The distance prefix and extra bits are stored in this Command as if
+     npostfix and ndirect were 0, they are only recomputed later after the
+     clustering if needed. */
+  PrefixEncodeCopyDistance(
+      distance_code, 0, 0, &self->dist_prefix_, &self->dist_extra_);
+  GetLengthCode(
+      insertlen, copylen_code, self->dist_prefix_ == 0, &self->cmd_prefix_);
+}
+
+static BROTLI_INLINE void InitInsertCommand(Command* self, size_t insertlen) {
+  self->insert_len_ = (uint32_t)insertlen;
+  self->copy_len_ = 4 << 24;
+  self->dist_extra_ = 0;
+  self->dist_prefix_ = 16;
+  GetLengthCode(insertlen, 4, 0, &self->cmd_prefix_);
+}
+
+static BROTLI_INLINE uint32_t CommandDistanceCode(const Command* self) {
+  if (self->dist_prefix_ < 16) {
+    return self->dist_prefix_;
+  } else {
+    uint32_t nbits = self->dist_extra_ >> 24;
+    uint32_t extra = self->dist_extra_ & 0xffffff;
+    uint32_t prefix = self->dist_prefix_ - 12u - 2u * nbits;
+    return (prefix << nbits) + extra + 12;
+  }
+}
+
+static BROTLI_INLINE uint32_t CommandDistanceContext(const Command* self) {
+  uint32_t r = self->cmd_prefix_ >> 6;
+  uint32_t c = self->cmd_prefix_ & 7;
+  if ((r == 0 || r == 2 || r == 4 || r == 7) && (c <= 2)) {
+    return c;
+  }
+  return 3;
+}
+
+static BROTLI_INLINE uint32_t CommandCopyLen(const Command* self) {
+  return self->copy_len_ & 0xFFFFFF;
+}
+
+static BROTLI_INLINE uint32_t CommandCopyLenCode(const Command* self) {
+  return (self->copy_len_ & 0xFFFFFF) ^ (self->copy_len_ >> 24);
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif

 #endif  /* BROTLI_ENC_COMMAND_H_ */
--- a/enc/compress_fragment.c
+++ b/enc/compress_fragment.c
@ -14,18 +14,21 @@

 #include "./compress_fragment.h"

-#include <algorithm>
-#include <cstring>
+#include <string.h>  /* memcmp, memcpy, memset */

 #include "../common/types.h"
 #include "./brotli_bit_stream.h"
 #include "./entropy_encode.h"
 #include "./fast_log.h"
 #include "./find_match_length.h"
+#include "./memory.h"
 #include "./port.h"
 #include "./write_bits.h"

-namespace brotli {
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

 /* kHashMul32 multiplier has these properties:
   * The multiplier must be odd. Otherwise we may lose the highest bit.
@ -35,19 +38,22 @@ namespace brotli {
   * The number has been tuned heuristically against compression benchmarks. */
 static const uint32_t kHashMul32 = 0x1e35a7bd;

-static inline uint32_t Hash(const uint8_t* p, size_t shift) {
+static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) {
  const uint64_t h = (BROTLI_UNALIGNED_LOAD64(p) << 24) * kHashMul32;
-  return static_cast<uint32_t>(h >> shift);
+  return (uint32_t)(h >> shift);
 }

-static inline uint32_t HashBytesAtOffset(uint64_t v, int offset, size_t shift) {
+static BROTLI_INLINE uint32_t HashBytesAtOffset(
+    uint64_t v, int offset, size_t shift) {
  assert(offset >= 0);
  assert(offset <= 3);
-  const uint64_t h = ((v >> (8 * offset)) << 24) * kHashMul32;
-  return static_cast<uint32_t>(h >> shift);
+  {
+    const uint64_t h = ((v >> (8 * offset)) << 24) * kHashMul32;
+    return (uint32_t)(h >> shift);
+  }
 }

-static inline int IsMatch(const uint8_t* p1, const uint8_t* p2) {
+static BROTLI_INLINE int IsMatch(const uint8_t* p1, const uint8_t* p2) {
  return (BROTLI_UNALIGNED_LOAD32(p1) == BROTLI_UNALIGNED_LOAD32(p2) &&
          p1[4] == p2[4]);
 }
@ -57,281 +63,295 @@ static inline int IsMatch(const uint8_t* p1, const uint8_t* p2) {
   Note that the prefix code here is built from the pre-LZ77 input, therefore
   we can only approximate the statistics of the actual literal stream.
   Moreover, for long inputs we build a histogram from a sample of the input
-   and thus have to assign a non-zero depth for each literal. */
-static void BuildAndStoreLiteralPrefixCode(const uint8_t* input,
-                                           const size_t input_size,
-                                           uint8_t depths[256],
-                                           uint16_t bits[256],
-                                           size_t* storage_ix,
-                                           uint8_t* storage) {
+   and thus have to assign a non-zero depth for each literal.
+   Returns estimated compression ratio millibytes/char for encoding given input
+   with generated code. */
+static size_t BuildAndStoreLiteralPrefixCode(MemoryManager* m,
+                                             const uint8_t* input,
+                                             const size_t input_size,
+                                             uint8_t depths[256],
+                                             uint16_t bits[256],
+                                             size_t* storage_ix,
+                                             uint8_t* storage) {
  uint32_t histogram[256] = { 0 };
  size_t histogram_total;
+  size_t i;
  if (input_size < (1 << 15)) {
-    for (size_t i = 0; i < input_size; ++i)  {
+    for (i = 0; i < input_size; ++i) {
      ++histogram[input[i]];
    }
    histogram_total = input_size;
-    for (size_t i = 0; i < 256; ++i) {
+    for (i = 0; i < 256; ++i) {
      /* We weigh the first 11 samples with weight 3 to account for the
         balancing effect of the LZ77 phase on the histogram. */
-      const uint32_t adjust = 2 * std::min(histogram[i], 11u);
+      const uint32_t adjust = 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
      histogram[i] += adjust;
      histogram_total += adjust;
    }
  } else {
    static const size_t kSampleRate = 29;
-    for (size_t i = 0; i < input_size; i += kSampleRate) {
+    for (i = 0; i < input_size; i += kSampleRate) {
      ++histogram[input[i]];
    }
    histogram_total = (input_size + kSampleRate - 1) / kSampleRate;
-    for (size_t i = 0; i < 256; ++i) {
+    for (i = 0; i < 256; ++i) {
      /* We add 1 to each population count to avoid 0 bit depths (since this is
         only a sample and we don't know if the symbol appears or not), and we
         weigh the first 11 samples with weight 3 to account for the balancing
         effect of the LZ77 phase on the histogram (more frequent symbols are
         more likely to be in backward references instead as literals). */
-      const uint32_t adjust = 1 + 2 * std::min(histogram[i], 11u);
+      const uint32_t adjust = 1 + 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
      histogram[i] += adjust;
      histogram_total += adjust;
    }
  }
-  BuildAndStoreHuffmanTreeFast(histogram, histogram_total,
-                               /* max_bits = */ 8,
-                               depths, bits, storage_ix, storage);
+  BrotliBuildAndStoreHuffmanTreeFast(m, histogram, histogram_total,
+                                     /* max_bits = */ 8,
+                                     depths, bits, storage_ix, storage);
+  if (BROTLI_IS_OOM(m)) return 0;
+  {
+    size_t literal_ratio = 0;
+    for (i = 0; i < 256; ++i) {
+      if (histogram[i]) literal_ratio += histogram[i] * depths[i];
+    }
+    /* Estimated encoding ratio, millibytes per symbol. */
+    return (literal_ratio * 125) / histogram_total;
+  }
 }

 /* Builds a command and distance prefix code (each 64 symbols) into "depth" and
   "bits" based on "histogram" and stores it into the bit stream. */
 static void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128],
-                                           uint8_t depth[128],
-                                           uint16_t bits[128],
-                                           size_t* storage_ix,
-                                           uint8_t* storage) {
+    uint8_t depth[128], uint16_t bits[128], size_t* storage_ix,
+    uint8_t* storage) {
  /* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
-  static const size_t kTreeSize = 129;
-  HuffmanTree tree[kTreeSize];
-  CreateHuffmanTree(histogram, 64, 15, tree, depth);
-  CreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
+  HuffmanTree tree[129];
+  uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS] = { 0 };
+  uint16_t cmd_bits[64];
+
+  BrotliCreateHuffmanTree(histogram, 64, 15, tree, depth);
+  BrotliCreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
  /* We have to jump through a few hoopes here in order to compute
     the command bits because the symbols are in a different order than in
     the full alphabet. This looks complicated, but having the symbols
     in this order in the command bits saves a few branches in the Emit*
     functions. */
-  uint8_t cmd_depth[64];
-  uint16_t cmd_bits[64];
  memcpy(cmd_depth, depth, 24);
  memcpy(cmd_depth + 24, depth + 40, 8);
  memcpy(cmd_depth + 32, depth + 24, 8);
  memcpy(cmd_depth + 40, depth + 48, 8);
  memcpy(cmd_depth + 48, depth + 32, 8);
  memcpy(cmd_depth + 56, depth + 56, 8);
-  ConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
+  BrotliConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
  memcpy(bits, cmd_bits, 48);
  memcpy(bits + 24, cmd_bits + 32, 16);
  memcpy(bits + 32, cmd_bits + 48, 16);
  memcpy(bits + 40, cmd_bits + 24, 16);
  memcpy(bits + 48, cmd_bits + 40, 16);
  memcpy(bits + 56, cmd_bits + 56, 16);
-  ConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
+  BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
  {
    /* Create the bit length array for the full command alphabet. */
-    uint8_t cmd_depth[704] = { 0 };
+    size_t i;
+    memset(cmd_depth, 0, 64);  /* only 64 first values were used */
    memcpy(cmd_depth, depth, 8);
    memcpy(cmd_depth + 64, depth + 8, 8);
    memcpy(cmd_depth + 128, depth + 16, 8);
    memcpy(cmd_depth + 192, depth + 24, 8);
    memcpy(cmd_depth + 384, depth + 32, 8);
-    for (size_t i = 0; i < 8; ++i) {
+    for (i = 0; i < 8; ++i) {
      cmd_depth[128 + 8 * i] = depth[40 + i];
      cmd_depth[256 + 8 * i] = depth[48 + i];
      cmd_depth[448 + 8 * i] = depth[56 + i];
    }
-    StoreHuffmanTree(cmd_depth, 704, tree, storage_ix, storage);
+    BrotliStoreHuffmanTree(
+        cmd_depth, BROTLI_NUM_COMMAND_SYMBOLS, tree, storage_ix, storage);
  }
-  StoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
+  BrotliStoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
 }

 /* REQUIRES: insertlen < 6210 */
-inline void EmitInsertLen(size_t insertlen,
-                          const uint8_t depth[128],
-                          const uint16_t bits[128],
-                          uint32_t histo[128],
-                          size_t* storage_ix,
-                          uint8_t* storage) {
+static BROTLI_INLINE void EmitInsertLen(size_t insertlen,
+                                        const uint8_t depth[128],
+                                        const uint16_t bits[128],
+                                        uint32_t histo[128],
+                                        size_t* storage_ix,
+                                        uint8_t* storage) {
  if (insertlen < 6) {
    const size_t code = insertlen + 40;
-    WriteBits(depth[code], bits[code], storage_ix, storage);
+    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
    ++histo[code];
  } else if (insertlen < 130) {
-    insertlen -= 2;
-    const uint32_t nbits = Log2FloorNonZero(insertlen) - 1u;
-    const size_t prefix = insertlen >> nbits;
+    const size_t tail = insertlen - 2;
+    const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
+    const size_t prefix = tail >> nbits;
    const size_t inscode = (nbits << 1) + prefix + 42;
-    WriteBits(depth[inscode], bits[inscode], storage_ix, storage);
-    WriteBits(nbits, insertlen - (prefix << nbits), storage_ix, storage);
+    BrotliWriteBits(depth[inscode], bits[inscode], storage_ix, storage);
+    BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
    ++histo[inscode];
  } else if (insertlen < 2114) {
-    insertlen -= 66;
-    const uint32_t nbits = Log2FloorNonZero(insertlen);
+    const size_t tail = insertlen - 66;
+    const uint32_t nbits = Log2FloorNonZero(tail);
    const size_t code = nbits + 50;
-    WriteBits(depth[code], bits[code], storage_ix, storage);
-    WriteBits(nbits, insertlen - (1 << nbits), storage_ix, storage);
+    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
+    BrotliWriteBits(nbits, tail - (1u << nbits), storage_ix, storage);
    ++histo[code];
  } else {
-    WriteBits(depth[61], bits[61], storage_ix, storage);
-    WriteBits(12, insertlen - 2114, storage_ix, storage);
+    BrotliWriteBits(depth[61], bits[61], storage_ix, storage);
+    BrotliWriteBits(12, insertlen - 2114, storage_ix, storage);
    ++histo[21];
  }
 }

-inline void EmitLongInsertLen(size_t insertlen,
-                              const uint8_t depth[128],
-                              const uint16_t bits[128],
-                              uint32_t histo[128],
-                              size_t* storage_ix,
-                              uint8_t* storage) {
+static BROTLI_INLINE void EmitLongInsertLen(size_t insertlen,
+                                            const uint8_t depth[128],
+                                            const uint16_t bits[128],
+                                            uint32_t histo[128],
+                                            size_t* storage_ix,
+                                            uint8_t* storage) {
  if (insertlen < 22594) {
-    WriteBits(depth[62], bits[62], storage_ix, storage);
-    WriteBits(14, insertlen - 6210, storage_ix, storage);
+    BrotliWriteBits(depth[62], bits[62], storage_ix, storage);
+    BrotliWriteBits(14, insertlen - 6210, storage_ix, storage);
    ++histo[22];
  } else {
-    WriteBits(depth[63], bits[63], storage_ix, storage);
-    WriteBits(24, insertlen - 22594, storage_ix, storage);
+    BrotliWriteBits(depth[63], bits[63], storage_ix, storage);
+    BrotliWriteBits(24, insertlen - 22594, storage_ix, storage);
    ++histo[23];
  }
 }

-inline void EmitCopyLen(size_t copylen,
-                        const uint8_t depth[128],
-                        const uint16_t bits[128],
-                        uint32_t histo[128],
-                        size_t* storage_ix,
-                        uint8_t* storage) {
+static BROTLI_INLINE void EmitCopyLen(size_t copylen,
+                                      const uint8_t depth[128],
+                                      const uint16_t bits[128],
+                                      uint32_t histo[128],
+                                      size_t* storage_ix,
+                                      uint8_t* storage) {
  if (copylen < 10) {
-    WriteBits(depth[copylen + 14], bits[copylen + 14], storage_ix, storage);
+    BrotliWriteBits(
+        depth[copylen + 14], bits[copylen + 14], storage_ix, storage);
    ++histo[copylen + 14];
  } else if (copylen < 134) {
-    copylen -= 6;
-    const uint32_t nbits = Log2FloorNonZero(copylen) - 1u;
-    const size_t prefix = copylen >> nbits;
+    const size_t tail = copylen - 6;
+    const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
+    const size_t prefix = tail >> nbits;
    const size_t code = (nbits << 1) + prefix + 20;
-    WriteBits(depth[code], bits[code], storage_ix, storage);
-    WriteBits(nbits, copylen - (prefix << nbits), storage_ix, storage);
+    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
+    BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
    ++histo[code];
  } else if (copylen < 2118) {
-    copylen -= 70;
-    const uint32_t nbits = Log2FloorNonZero(copylen);
+    const size_t tail = copylen - 70;
+    const uint32_t nbits = Log2FloorNonZero(tail);
    const size_t code = nbits + 28;
-    WriteBits(depth[code], bits[code], storage_ix, storage);
-    WriteBits(nbits, copylen - (1 << nbits), storage_ix, storage);
+    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
+    BrotliWriteBits(nbits, tail - (1u << nbits), storage_ix, storage);
    ++histo[code];
  } else {
-    WriteBits(depth[39], bits[39], storage_ix, storage);
-    WriteBits(24, copylen - 2118, storage_ix, storage);
+    BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
+    BrotliWriteBits(24, copylen - 2118, storage_ix, storage);
    ++histo[47];
  }
 }

-inline void EmitCopyLenLastDistance(size_t copylen,
-                                    const uint8_t depth[128],
-                                    const uint16_t bits[128],
-                                    uint32_t histo[128],
-                                    size_t* storage_ix,
-                                    uint8_t* storage) {
+static BROTLI_INLINE void EmitCopyLenLastDistance(size_t copylen,
+                                                  const uint8_t depth[128],
+                                                  const uint16_t bits[128],
+                                                  uint32_t histo[128],
+                                                  size_t* storage_ix,
+                                                  uint8_t* storage) {
  if (copylen < 12) {
-    WriteBits(depth[copylen - 4], bits[copylen - 4], storage_ix, storage);
+    BrotliWriteBits(depth[copylen - 4], bits[copylen - 4], storage_ix, storage);
    ++histo[copylen - 4];
  } else if (copylen < 72) {
-    copylen -= 8;
-    const uint32_t nbits = Log2FloorNonZero(copylen) - 1;
-    const size_t prefix = copylen >> nbits;
+    const size_t tail = copylen - 8;
+    const uint32_t nbits = Log2FloorNonZero(tail) - 1;
+    const size_t prefix = tail >> nbits;
    const size_t code = (nbits << 1) + prefix + 4;
-    WriteBits(depth[code], bits[code], storage_ix, storage);
-    WriteBits(nbits, copylen - (prefix << nbits), storage_ix, storage);
+    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
+    BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
    ++histo[code];
  } else if (copylen < 136) {
-    copylen -= 8;
-    const size_t code = (copylen >> 5) + 30;
-    WriteBits(depth[code], bits[code], storage_ix, storage);
-    WriteBits(5, copylen & 31, storage_ix, storage);
-    WriteBits(depth[64], bits[64], storage_ix, storage);
+    const size_t tail = copylen - 8;
+    const size_t code = (tail >> 5) + 30;
+    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
+    BrotliWriteBits(5, tail & 31, storage_ix, storage);
+    BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
    ++histo[code];
    ++histo[64];
  } else if (copylen < 2120) {
-    copylen -= 72;
-    const uint32_t nbits = Log2FloorNonZero(copylen);
+    const size_t tail = copylen - 72;
+    const uint32_t nbits = Log2FloorNonZero(tail);
    const size_t code = nbits + 28;
-    WriteBits(depth[code], bits[code], storage_ix, storage);
-    WriteBits(nbits, copylen - (1 << nbits), storage_ix, storage);
-    WriteBits(depth[64], bits[64], storage_ix, storage);
+    BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
+    BrotliWriteBits(nbits, tail - (1u << nbits), storage_ix, storage);
+    BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
    ++histo[code];
    ++histo[64];
  } else {
-    WriteBits(depth[39], bits[39], storage_ix, storage);
-    WriteBits(24, copylen - 2120, storage_ix, storage);
-    WriteBits(depth[64], bits[64], storage_ix, storage);
+    BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
+    BrotliWriteBits(24, copylen - 2120, storage_ix, storage);
+    BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
    ++histo[47];
    ++histo[64];
  }
 }

-inline void EmitDistance(size_t distance,
-                         const uint8_t depth[128],
-                         const uint16_t bits[128],
-                         uint32_t histo[128],
-                         size_t* storage_ix, uint8_t* storage) {
-  distance += 3;
-  const uint32_t nbits = Log2FloorNonZero(distance) - 1u;
-  const size_t prefix = (distance >> nbits) & 1;
+static BROTLI_INLINE void EmitDistance(size_t distance,
+                                       const uint8_t depth[128],
+                                       const uint16_t bits[128],
+                                       uint32_t histo[128],
+                                       size_t* storage_ix, uint8_t* storage) {
+  const size_t d = distance + 3;
+  const uint32_t nbits = Log2FloorNonZero(d) - 1u;
+  const size_t prefix = (d >> nbits) & 1;
  const size_t offset = (2 + prefix) << nbits;
  const size_t distcode = 2 * (nbits - 1) + prefix + 80;
-  WriteBits(depth[distcode], bits[distcode], storage_ix, storage);
-  WriteBits(nbits, distance - offset, storage_ix, storage);
+  BrotliWriteBits(depth[distcode], bits[distcode], storage_ix, storage);
+  BrotliWriteBits(nbits, d - offset, storage_ix, storage);
  ++histo[distcode];
 }

-inline void EmitLiterals(const uint8_t* input, const size_t len,
-                         const uint8_t depth[256], const uint16_t bits[256],
-                         size_t* storage_ix, uint8_t* storage) {
-  for (size_t j = 0; j < len; j++) {
+static BROTLI_INLINE void EmitLiterals(const uint8_t* input, const size_t len,
+                                       const uint8_t depth[256],
+                                       const uint16_t bits[256],
+                                       size_t* storage_ix, uint8_t* storage) {
+  size_t j;
+  for (j = 0; j < len; j++) {
    const uint8_t lit = input[j];
-    WriteBits(depth[lit], bits[lit], storage_ix, storage);
+    BrotliWriteBits(depth[lit], bits[lit], storage_ix, storage);
  }
 }

 /* REQUIRES: len <= 1 << 20. */
-static void StoreMetaBlockHeader(
-    size_t len, bool is_uncompressed, size_t* storage_ix, uint8_t* storage) {
+static void BrotliStoreMetaBlockHeader(
+    size_t len, int is_uncompressed, size_t* storage_ix, uint8_t* storage) {
  /* ISLAST */
-  WriteBits(1, 0, storage_ix, storage);
+  BrotliWriteBits(1, 0, storage_ix, storage);
  if (len <= (1U << 16)) {
    /* MNIBBLES is 4 */
-    WriteBits(2, 0, storage_ix, storage);
-    WriteBits(16, len - 1, storage_ix, storage);
+    BrotliWriteBits(2, 0, storage_ix, storage);
+    BrotliWriteBits(16, len - 1, storage_ix, storage);
  } else {
    /* MNIBBLES is 5 */
-    WriteBits(2, 1, storage_ix, storage);
-    WriteBits(20, len - 1, storage_ix, storage);
+    BrotliWriteBits(2, 1, storage_ix, storage);
+    BrotliWriteBits(20, len - 1, storage_ix, storage);
  }
  /* ISUNCOMPRESSED */
-  WriteBits(1, is_uncompressed, storage_ix, storage);
+  BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
 }

-static void UpdateBits(size_t n_bits,
-                       uint32_t bits,
-                       size_t pos,
-                       uint8_t *array) {
+static void UpdateBits(size_t n_bits, uint32_t bits, size_t pos,
+    uint8_t *array) {
  while (n_bits > 0) {
    size_t byte_pos = pos >> 3;
    size_t n_unchanged_bits = pos & 7;
-    size_t n_changed_bits = std::min(n_bits, 8 - n_unchanged_bits);
+    size_t n_changed_bits = BROTLI_MIN(size_t, n_bits, 8 - n_unchanged_bits);
    size_t total_bits = n_unchanged_bits + n_changed_bits;
-    uint32_t mask = (~((1 << total_bits) - 1)) | ((1 << n_unchanged_bits) - 1);
+    uint32_t mask =
+        (~((1u << total_bits) - 1u)) | ((1u << n_unchanged_bits) - 1u);
    uint32_t unchanged_bits = array[byte_pos] & mask;
-    uint32_t changed_bits = bits & ((1 << n_changed_bits) - 1);
+    uint32_t changed_bits = bits & ((1u << n_changed_bits) - 1u);
    array[byte_pos] =
-        static_cast<uint8_t>((changed_bits << n_unchanged_bits) |
-                             unchanged_bits);
+        (uint8_t)((changed_bits << n_unchanged_bits) | unchanged_bits);
    n_bits -= n_changed_bits;
    bits >>= n_changed_bits;
    pos += n_changed_bits;
@ -342,69 +362,72 @@ static void RewindBitPosition(const size_t new_storage_ix,
                              size_t* storage_ix, uint8_t* storage) {
  const size_t bitpos = new_storage_ix & 7;
  const size_t mask = (1u << bitpos) - 1;
-  storage[new_storage_ix >> 3] &= static_cast<uint8_t>(mask);
+  storage[new_storage_ix >> 3] &= (uint8_t)mask;
  *storage_ix = new_storage_ix;
 }

-static bool ShouldMergeBlock(const uint8_t* data, size_t len,
-                             const uint8_t* depths) {
+static int ShouldMergeBlock(const uint8_t* data, size_t len,
+                            const uint8_t* depths) {
  size_t histo[256] = { 0 };
  static const size_t kSampleRate = 43;
-  for (size_t i = 0; i < len; i += kSampleRate) {
+  size_t i;
+  for (i = 0; i < len; i += kSampleRate) {
    ++histo[data[i]];
  }
-  const size_t total = (len + kSampleRate - 1) / kSampleRate;
-  double r = (FastLog2(total) + 0.5) * static_cast<double>(total) + 200;
-  for (size_t i = 0; i < 256; ++i) {
-    r -= static_cast<double>(histo[i]) * (depths[i] + FastLog2(histo[i]));
+  {
+    const size_t total = (len + kSampleRate - 1) / kSampleRate;
+    double r = (FastLog2(total) + 0.5) * (double)total + 200;
+    for (i = 0; i < 256; ++i) {
+      r -= (double)histo[i] * (depths[i] + FastLog2(histo[i]));
+    }
+    return (r >= 0.0) ? 1 : 0;
  }
-  return r >= 0.0;
 }

-inline bool ShouldUseUncompressedMode(const uint8_t* metablock_start,
-                                      const uint8_t* next_emit,
-                                      const size_t insertlen,
-                                      const uint8_t literal_depths[256]) {
-  const size_t compressed = static_cast<size_t>(next_emit - metablock_start);
+/* Acceptable loss for uncompressible speedup is 2% */
+#define MIN_RATIO 980
+
+static BROTLI_INLINE int ShouldUseUncompressedMode(
+    const uint8_t* metablock_start, const uint8_t* next_emit,
+    const size_t insertlen, const size_t literal_ratio) {
+  const size_t compressed = (size_t)(next_emit - metablock_start);
  if (compressed * 50 > insertlen) {
-    return false;
+    return 0;
+  } else {
+    return (literal_ratio > MIN_RATIO) ? 1 : 0;
  }
-  static const double kAcceptableLossForUncompressibleSpeedup = 0.02;
-  static const double kMinEntropy =
-      8 * (1.0 - kAcceptableLossForUncompressibleSpeedup);
-  uint32_t sum = 0;
-  for (int i = 0; i < 256; ++i) {
-    const uint32_t n = literal_depths[i];
-    sum += n << (15 - n);
-  }
-  return sum > static_cast<uint32_t>((1 << 15) * kMinEntropy);
 }

 static void EmitUncompressedMetaBlock(const uint8_t* begin, const uint8_t* end,
                                      const size_t storage_ix_start,
                                      size_t* storage_ix, uint8_t* storage) {
-  const size_t len = static_cast<size_t>(end - begin);
+  const size_t len = (size_t)(end - begin);
  RewindBitPosition(storage_ix_start, storage_ix, storage);
-  StoreMetaBlockHeader(len, 1, storage_ix, storage);
+  BrotliStoreMetaBlockHeader(len, 1, storage_ix, storage);
  *storage_ix = (*storage_ix + 7u) & ~7u;
  memcpy(&storage[*storage_ix >> 3], begin, len);
  *storage_ix += len << 3;
  storage[*storage_ix >> 3] = 0;
 }

-void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
-                                bool is_last,
+static uint32_t kCmdHistoSeed[128] = {
+  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 0, 0, 0, 0,
+};
+
+void BrotliCompressFragmentFast(MemoryManager* m,
+                                const uint8_t* input, size_t input_size,
+                                int is_last,
                                int* table, size_t table_size,
                                uint8_t cmd_depth[128], uint16_t cmd_bits[128],
                                size_t* cmd_code_numbits, uint8_t* cmd_code,
                                size_t* storage_ix, uint8_t* storage) {
-  if (input_size == 0) {
-    assert(is_last);
-    WriteBits(1, 1, storage_ix, storage);  // islast
-    WriteBits(1, 1, storage_ix, storage);  // isempty
-    *storage_ix = (*storage_ix + 7u) & ~7u;
-    return;
-  }
+  uint32_t cmd_histo[128];
+  const uint8_t* ip_end;

  /* "next_emit" is a pointer to the first byte that is not covered by a
     previous copy. Bytes between "next_emit" and the start of the next copy or
@ -417,66 +440,81 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
  static const size_t kFirstBlockSize = 3 << 15;
  static const size_t kMergeBlockSize = 1 << 16;

+  const size_t kInputMarginBytes = 16;
+  const size_t kMinMatchLen = 5;
+
  const uint8_t* metablock_start = input;
-  size_t block_size = std::min(input_size, kFirstBlockSize);
+  size_t block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
  size_t total_block_size = block_size;
  /* Save the bit position of the MLEN field of the meta-block header, so that
     we can update it later if we decide to extend this meta-block. */
  size_t mlen_storage_ix = *storage_ix + 3;
-  StoreMetaBlockHeader(block_size, 0, storage_ix, storage);
-  // No block splits, no contexts.
-  WriteBits(13, 0, storage_ix, storage);

-  uint8_t lit_depth[256] = { 0 };
-  uint16_t lit_bits[256] = { 0 };
-  BuildAndStoreLiteralPrefixCode(input, block_size, lit_depth, lit_bits,
-                                 storage_ix, storage);
+  uint8_t lit_depth[256];
+  uint16_t lit_bits[256];

-  // Store the pre-compressed command and distance prefix codes.
-  for (size_t i = 0; i + 7 < *cmd_code_numbits; i += 8) {
-    WriteBits(8, cmd_code[i >> 3], storage_ix, storage);
+  size_t literal_ratio;
+
+  const uint8_t* ip;
+  int last_distance;
+
+  const size_t shift = 64u - Log2FloorNonZero(table_size);
+  assert(table_size);
+  assert(table_size <= (1u << 31));
+  /* table must be power of two */
+  assert((table_size & (table_size - 1)) == 0);
+  assert(table_size - 1 ==
+      (size_t)(MAKE_UINT64_T(0xFFFFFFFF, 0xFFFFFF) >> shift));
+
+  if (input_size == 0) {
+    assert(is_last);
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* islast */
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* isempty */
+    *storage_ix = (*storage_ix + 7u) & ~7u;
+    return;
  }
-  WriteBits(*cmd_code_numbits & 7, cmd_code[*cmd_code_numbits >> 3],
-            storage_ix, storage);
+
+  BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
+  /* No block splits, no contexts. */
+  BrotliWriteBits(13, 0, storage_ix, storage);
+
+  literal_ratio = BuildAndStoreLiteralPrefixCode(
+      m, input, block_size, lit_depth, lit_bits, storage_ix, storage);
+  if (BROTLI_IS_OOM(m)) return;
+
+  {
+    /* Store the pre-compressed command and distance prefix codes. */
+    size_t i;
+    for (i = 0; i + 7 < *cmd_code_numbits; i += 8) {
+      BrotliWriteBits(8, cmd_code[i >> 3], storage_ix, storage);
+    }
+  }
+  BrotliWriteBits(*cmd_code_numbits & 7, cmd_code[*cmd_code_numbits >> 3],
+                  storage_ix, storage);

 emit_commands:
  /* Initialize the command and distance histograms. We will gather
     statistics of command and distance codes during the processing
     of this block and use it to update the command and distance
     prefix codes for the next block. */
-  uint32_t cmd_histo[128] = {
-    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 0, 0, 0, 0,
-  };
+  memcpy(cmd_histo, kCmdHistoSeed, sizeof(kCmdHistoSeed));

-  // "ip" is the input pointer.
-  const uint8_t* ip = input;
-  assert(table_size);
-  assert(table_size <= (1u << 31));
-  assert((table_size & (table_size - 1)) == 0);  // table must be power of two
-  const size_t shift = 64u - Log2FloorNonZero(table_size);
-  assert(table_size - 1 == static_cast<size_t>(
-      MAKE_UINT64_T(0xFFFFFFFF, 0xFFFFFF) >> shift));
-  const uint8_t* ip_end = input + block_size;
+  /* "ip" is the input pointer. */
+  ip = input;
+  last_distance = -1;
+  ip_end = input + block_size;

-  int last_distance = -1;
-  const size_t kInputMarginBytes = 16;
-  const size_t kMinMatchLen = 5;
  if (PREDICT_TRUE(block_size >= kInputMarginBytes)) {
    /* For the last block, we need to keep a 16 bytes margin so that we can be
       sure that all distances are at most window size - 16.
       For all other blocks, we only need to keep a margin of 5 bytes so that
       we don't go over the block size with a copy. */
-    const size_t len_limit = std::min(block_size - kMinMatchLen,
-                                      input_size - kInputMarginBytes);
+    const size_t len_limit = BROTLI_MIN(size_t, block_size - kMinMatchLen,
+                                        input_size - kInputMarginBytes);
    const uint8_t* ip_limit = input + len_limit;

-    for (uint32_t next_hash = Hash(++ip, shift); ; ) {
-      assert(next_emit < ip);
+    uint32_t next_hash;
+    for (next_hash = Hash(++ip, shift); ; ) {
      /* Step 1: Scan forward in the input looking for a 5-byte-long match.
         If we get close to exhausting the input then goto emit_remainder.

@ -496,11 +534,13 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,

      const uint8_t* next_ip = ip;
      const uint8_t* candidate;
+      assert(next_emit < ip);
+
      do {
-        ip = next_ip;
        uint32_t hash = next_hash;
-        assert(hash == Hash(ip, shift));
        uint32_t bytes_between_hash_lookups = skip++ >> 5;
+        assert(hash == Hash(next_ip, shift));
+        ip = next_ip;
        next_ip = ip + bytes_between_hash_lookups;
        if (PREDICT_FALSE(next_ip > ip_limit)) {
          goto emit_remainder;
@ -509,7 +549,7 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
        candidate = ip - last_distance;
        if (IsMatch(ip, candidate)) {
          if (PREDICT_TRUE(candidate < ip)) {
-            table[hash] = static_cast<int>(ip - base_ip);
+            table[hash] = (int)(ip - base_ip);
            break;
          }
        }
@ -517,33 +557,32 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
        assert(candidate >= base_ip);
        assert(candidate < ip);

-        table[hash] = static_cast<int>(ip - base_ip);
+        table[hash] = (int)(ip - base_ip);
      } while (PREDICT_TRUE(!IsMatch(ip, candidate)));

      /* Step 2: Emit the found match together with the literal bytes from
         "next_emit" to the bit stream, and then see if we can find a next macth
         immediately afterwards. Repeat until we find no match for the input
         without emitting some literal bytes. */
-      uint64_t input_bytes;

      {
        /* We have a 5-byte match at ip, and we need to emit bytes in
           [next_emit, ip). */
        const uint8_t* base = ip;
        size_t matched = 5 + FindMatchLengthWithLimit(
-            candidate + 5, ip + 5, static_cast<size_t>(ip_end - ip) - 5);
+            candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
+        int distance = (int)(base - candidate);  /* > 0 */
+        size_t insert = (size_t)(base - next_emit);
        ip += matched;
-        int distance = static_cast<int>(base - candidate);  /* > 0 */
-        size_t insert = static_cast<size_t>(base - next_emit);
        assert(0 == memcmp(base, candidate, matched));
        if (PREDICT_TRUE(insert < 6210)) {
          EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
                        storage_ix, storage);
        } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
-                                             lit_depth)) {
+                                             literal_ratio)) {
          EmitUncompressedMetaBlock(metablock_start, base, mlen_storage_ix - 3,
                                    storage_ix, storage);
-          input_size -= static_cast<size_t>(base - input);
+          input_size -= (size_t)(base - input);
          input = base;
          next_emit = input;
          goto next_block;
@ -554,10 +593,10 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
        EmitLiterals(next_emit, insert, lit_depth, lit_bits,
                     storage_ix, storage);
        if (distance == last_distance) {
-          WriteBits(cmd_depth[64], cmd_bits[64], storage_ix, storage);
+          BrotliWriteBits(cmd_depth[64], cmd_bits[64], storage_ix, storage);
          ++cmd_histo[64];
        } else {
-          EmitDistance(static_cast<size_t>(distance), cmd_depth, cmd_bits,
+          EmitDistance((size_t)distance, cmd_depth, cmd_bits,
                       cmd_histo, storage_ix, storage);
          last_distance = distance;
        }
@ -571,17 +610,19 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
        /* We could immediately start working at ip now, but to improve
           compression we first update "table" with the hashes of some positions
           within the last copy. */
-        input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3);
-        uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
-        table[prev_hash] = static_cast<int>(ip - base_ip - 3);
-        prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
-        table[prev_hash] = static_cast<int>(ip - base_ip - 2);
-        prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
-        table[prev_hash] = static_cast<int>(ip - base_ip - 1);
+        {
+          uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3);
+          uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
+          uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
+          table[prev_hash] = (int)(ip - base_ip - 3);
+          prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
+          table[prev_hash] = (int)(ip - base_ip - 2);
+          prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
+          table[prev_hash] = (int)(ip - base_ip - 1);

-        uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
-        candidate = base_ip + table[cur_hash];
-        table[cur_hash] = static_cast<int>(ip - base_ip);
+          candidate = base_ip + table[cur_hash];
+          table[cur_hash] = (int)(ip - base_ip);
+        }
      }

      while (IsMatch(ip, candidate)) {
@ -589,13 +630,13 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
           prior to ip. */
        const uint8_t* base = ip;
        size_t matched = 5 + FindMatchLengthWithLimit(
-            candidate + 5, ip + 5, static_cast<size_t>(ip_end - ip) - 5);
+            candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
        ip += matched;
-        last_distance = static_cast<int>(base - candidate);  /* > 0 */
+        last_distance = (int)(base - candidate);  /* > 0 */
        assert(0 == memcmp(base, candidate, matched));
        EmitCopyLen(matched, cmd_depth, cmd_bits, cmd_histo,
                    storage_ix, storage);
-        EmitDistance(static_cast<size_t>(last_distance), cmd_depth, cmd_bits,
+        EmitDistance((size_t)last_distance, cmd_depth, cmd_bits,
                     cmd_histo, storage_ix, storage);

        next_emit = ip;
@ -605,17 +646,19 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
        /* We could immediately start working at ip now, but to improve
           compression we first update "table" with the hashes of some positions
           within the last copy. */
-        input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3);
-        uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
-        table[prev_hash] = static_cast<int>(ip - base_ip - 3);
-        prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
-        table[prev_hash] = static_cast<int>(ip - base_ip - 2);
-        prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
-        table[prev_hash] = static_cast<int>(ip - base_ip - 1);
+        {
+          uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3);
+          uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
+          uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
+          table[prev_hash] = (int)(ip - base_ip - 3);
+          prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
+          table[prev_hash] = (int)(ip - base_ip - 2);
+          prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
+          table[prev_hash] = (int)(ip - base_ip - 1);

-        uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
-        candidate = base_ip + table[cur_hash];
-        table[cur_hash] = static_cast<int>(ip - base_ip);
+          candidate = base_ip + table[cur_hash];
+          table[cur_hash] = (int)(ip - base_ip);
+        }
      }

      next_hash = Hash(++ip, shift);
@ -626,7 +669,7 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
  assert(next_emit <= ip_end);
  input += block_size;
  input_size -= block_size;
-  block_size = std::min(input_size, kMergeBlockSize);
+  block_size = BROTLI_MIN(size_t, input_size, kMergeBlockSize);

  /* Decide if we want to continue this meta-block instead of emitting the
     last insert-only command. */
@ -638,20 +681,19 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
       We can do this because the current size and the new size both have 5
       nibbles. */
    total_block_size += block_size;
-    UpdateBits(20, static_cast<uint32_t>(total_block_size - 1),
-               mlen_storage_ix, storage);
+    UpdateBits(20, (uint32_t)(total_block_size - 1), mlen_storage_ix, storage);
    goto emit_commands;
  }

  /* Emit the remaining bytes as literals. */
  if (next_emit < ip_end) {
-    const size_t insert = static_cast<size_t>(ip_end - next_emit);
+    const size_t insert = (size_t)(ip_end - next_emit);
    if (PREDICT_TRUE(insert < 6210)) {
      EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
                    storage_ix, storage);
      EmitLiterals(next_emit, insert, lit_depth, lit_bits, storage_ix, storage);
    } else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
-                                         lit_depth)) {
+                                         literal_ratio)) {
      EmitUncompressedMetaBlock(metablock_start, ip_end, mlen_storage_ix - 3,
                                storage_ix, storage);
    } else {
@ -668,26 +710,25 @@ next_block:
     then continue emitting commands. */
  if (input_size > 0) {
    metablock_start = input;
-    block_size = std::min(input_size, kFirstBlockSize);
+    block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
    total_block_size = block_size;
    /* Save the bit position of the MLEN field of the meta-block header, so that
       we can update it later if we decide to extend this meta-block. */
    mlen_storage_ix = *storage_ix + 3;
-    StoreMetaBlockHeader(block_size, 0, storage_ix, storage);
+    BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
    /* No block splits, no contexts. */
-    WriteBits(13, 0, storage_ix, storage);
-    memset(lit_depth, 0, sizeof(lit_depth));
-    memset(lit_bits, 0, sizeof(lit_bits));
-    BuildAndStoreLiteralPrefixCode(input, block_size, lit_depth, lit_bits,
-                                   storage_ix, storage);
+    BrotliWriteBits(13, 0, storage_ix, storage);
+    literal_ratio = BuildAndStoreLiteralPrefixCode(
+        m, input, block_size, lit_depth, lit_bits, storage_ix, storage);
+    if (BROTLI_IS_OOM(m)) return;
    BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
                                   storage_ix, storage);
    goto emit_commands;
  }

  if (is_last) {
-    WriteBits(1, 1, storage_ix, storage);  /* islast */
-    WriteBits(1, 1, storage_ix, storage);  /* isempty */
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* islast */
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* isempty */
    *storage_ix = (*storage_ix + 7u) & ~7u;
  } else {
    /* If this is not the last block, update the command and distance prefix
@ -699,4 +740,6 @@ next_block:
  }
 }

-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
--- a/enc/compress_fragment.h
+++ b/enc/compress_fragment.h
@ -13,8 +13,12 @@
 #define BROTLI_ENC_COMPRESS_FRAGMENT_H_

 #include "../common/types.h"
+#include "./memory.h"
+#include "./port.h"

-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

 /* Compresses "input" string to the "*storage" buffer as one or more complete
   meta-blocks, and updates the "*storage_ix" bit position.
@ -35,13 +39,20 @@ namespace brotli {
   REQUIRES: "input_size" is greater than zero, or "is_last" is 1.
   REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
   REQUIRES: "table_size" is a power of two */
-void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
-                                bool is_last,
-                                int* table, size_t table_size,
-                                uint8_t cmd_depth[128], uint16_t cmd_bits[128],
-                                size_t* cmd_code_numbits, uint8_t* cmd_code,
-                                size_t* storage_ix, uint8_t* storage);
+BROTLI_INTERNAL void BrotliCompressFragmentFast(MemoryManager* m,
+                                                const uint8_t* input,
+                                                size_t input_size,
+                                                int is_last,
+                                                int* table, size_t table_size,
+                                                uint8_t cmd_depth[128],
+                                                uint16_t cmd_bits[128],
+                                                size_t* cmd_code_numbits,
+                                                uint8_t* cmd_code,
+                                                size_t* storage_ix,
+                                                uint8_t* storage);

-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif

 #endif  /* BROTLI_ENC_COMPRESS_FRAGMENT_H_ */
--- a/enc/compress_fragment_two_pass.c
+++ b/enc/compress_fragment_two_pass.c
@ -12,7 +12,7 @@

 #include "./compress_fragment_two_pass.h"

-#include <algorithm>
+#include <string.h>  /* memcmp, memcpy, memset */

 #include "../common/types.h"
 #include "./bit_cost.h"
@ -20,10 +20,14 @@
 #include "./entropy_encode.h"
 #include "./fast_log.h"
 #include "./find_match_length.h"
+#include "./memory.h"
 #include "./port.h"
 #include "./write_bits.h"

-namespace brotli {
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

 /* kHashMul32 multiplier has these properties:
   * The multiplier must be odd. Otherwise we may lose the highest bit.
@ -33,19 +37,22 @@ namespace brotli {
   * The number has been tuned heuristically against compression benchmarks. */
 static const uint32_t kHashMul32 = 0x1e35a7bd;

-static inline uint32_t Hash(const uint8_t* p, size_t shift) {
+static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) {
  const uint64_t h = (BROTLI_UNALIGNED_LOAD64(p) << 16) * kHashMul32;
-  return static_cast<uint32_t>(h >> shift);
+  return (uint32_t)(h >> shift);
 }

-static inline uint32_t HashBytesAtOffset(uint64_t v, int offset, size_t shift) {
+static BROTLI_INLINE uint32_t HashBytesAtOffset(
+    uint64_t v, int offset, size_t shift) {
  assert(offset >= 0);
  assert(offset <= 2);
-  const uint64_t h = ((v >> (8 * offset)) << 16) * kHashMul32;
-  return static_cast<uint32_t>(h >> shift);
+  {
+    const uint64_t h = ((v >> (8 * offset)) << 16) * kHashMul32;
+    return (uint32_t)(h >> shift);
+  }
 }

-static inline int IsMatch(const uint8_t* p1, const uint8_t* p2) {
+static BROTLI_INLINE int IsMatch(const uint8_t* p1, const uint8_t* p2) {
  return (BROTLI_UNALIGNED_LOAD32(p1) == BROTLI_UNALIGNED_LOAD32(p2) &&
          p1[4] == p2[4] &&
          p1[5] == p2[5]);
@ -58,64 +65,66 @@ static void BuildAndStoreCommandPrefixCode(
    uint8_t depth[128], uint16_t bits[128],
    size_t* storage_ix, uint8_t* storage) {
  /* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
-  static const size_t kTreeSize = 129;
-  HuffmanTree tree[kTreeSize];
-  CreateHuffmanTree(histogram, 64, 15, tree, depth);
-  CreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
+  HuffmanTree tree[129];
+  uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS] = { 0 };
+  uint16_t cmd_bits[64];
+  BrotliCreateHuffmanTree(histogram, 64, 15, tree, depth);
+  BrotliCreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
  /* We have to jump through a few hoopes here in order to compute
     the command bits because the symbols are in a different order than in
     the full alphabet. This looks complicated, but having the symbols
     in this order in the command bits saves a few branches in the Emit*
     functions. */
-  uint8_t cmd_depth[64];
-  uint16_t cmd_bits[64];
  memcpy(cmd_depth, depth + 24, 24);
  memcpy(cmd_depth + 24, depth, 8);
  memcpy(cmd_depth + 32, depth + 48, 8);
  memcpy(cmd_depth + 40, depth + 8, 8);
  memcpy(cmd_depth + 48, depth + 56, 8);
  memcpy(cmd_depth + 56, depth + 16, 8);
-  ConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
+  BrotliConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
  memcpy(bits, cmd_bits + 24, 16);
  memcpy(bits + 8, cmd_bits + 40, 16);
  memcpy(bits + 16, cmd_bits + 56, 16);
  memcpy(bits + 24, cmd_bits, 48);
  memcpy(bits + 48, cmd_bits + 32, 16);
  memcpy(bits + 56, cmd_bits + 48, 16);
-  ConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
+  BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
  {
    /* Create the bit length array for the full command alphabet. */
-    uint8_t cmd_depth[704] = { 0 };
+    size_t i;
+    memset(cmd_depth, 0, 64);  /* only 64 first values were used */
    memcpy(cmd_depth, depth + 24, 8);
    memcpy(cmd_depth + 64, depth + 32, 8);
    memcpy(cmd_depth + 128, depth + 40, 8);
    memcpy(cmd_depth + 192, depth + 48, 8);
    memcpy(cmd_depth + 384, depth + 56, 8);
-    for (size_t i = 0; i < 8; ++i) {
+    for (i = 0; i < 8; ++i) {
      cmd_depth[128 + 8 * i] = depth[i];
      cmd_depth[256 + 8 * i] = depth[8 + i];
      cmd_depth[448 + 8 * i] = depth[16 + i];
    }
-    StoreHuffmanTree(cmd_depth, 704, tree, storage_ix, storage);
+    BrotliStoreHuffmanTree(
+        cmd_depth, BROTLI_NUM_COMMAND_SYMBOLS, tree, storage_ix, storage);
  }
-  StoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
+  BrotliStoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
 }

-inline void EmitInsertLen(uint32_t insertlen, uint32_t** commands) {
+static BROTLI_INLINE void EmitInsertLen(
+    uint32_t insertlen, uint32_t** commands) {
  if (insertlen < 6) {
    **commands = insertlen;
  } else if (insertlen < 130) {
-    insertlen -= 2;
-    const uint32_t nbits = Log2FloorNonZero(insertlen) - 1u;
-    const uint32_t prefix = insertlen >> nbits;
+    const uint32_t tail = insertlen - 2;
+    const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
+    const uint32_t prefix = tail >> nbits;
    const uint32_t inscode = (nbits << 1) + prefix + 2;
-    const uint32_t extra = insertlen - (prefix << nbits);
+    const uint32_t extra = tail - (prefix << nbits);
    **commands = inscode | (extra << 8);
  } else if (insertlen < 2114) {
-    insertlen -= 66;
-    const uint32_t nbits = Log2FloorNonZero(insertlen);
+    const uint32_t tail = insertlen - 66;
+    const uint32_t nbits = Log2FloorNonZero(tail);
    const uint32_t code = nbits + 10;
-    const uint32_t extra = insertlen - (1 << nbits);
+    const uint32_t extra = tail - (1u << nbits);
    **commands = code | (extra << 8);
  } else if (insertlen < 6210) {
    const uint32_t extra = insertlen - 2114;
@ -130,108 +139,103 @@ inline void EmitInsertLen(uint32_t insertlen, uint32_t** commands) {
  ++(*commands);
 }

-inline void EmitCopyLen(size_t copylen, uint32_t** commands) {
+static BROTLI_INLINE void EmitCopyLen(size_t copylen, uint32_t** commands) {
  if (copylen < 10) {
-    **commands = static_cast<uint32_t>(copylen + 38);
+    **commands = (uint32_t)(copylen + 38);
  } else if (copylen < 134) {
-    copylen -= 6;
-    const size_t nbits = Log2FloorNonZero(copylen) - 1;
-    const size_t prefix = copylen >> nbits;
+    const size_t tail = copylen - 6;
+    const size_t nbits = Log2FloorNonZero(tail) - 1;
+    const size_t prefix = tail >> nbits;
    const size_t code = (nbits << 1) + prefix + 44;
-    const size_t extra = copylen - (prefix << nbits);
-    **commands = static_cast<uint32_t>(code | (extra << 8));
+    const size_t extra = tail - (prefix << nbits);
+    **commands = (uint32_t)(code | (extra << 8));
  } else if (copylen < 2118) {
-    copylen -= 70;
-    const size_t nbits = Log2FloorNonZero(copylen);
+    const size_t tail = copylen - 70;
+    const size_t nbits = Log2FloorNonZero(tail);
    const size_t code = nbits + 52;
-    const size_t extra = copylen - (1 << nbits);
-    **commands = static_cast<uint32_t>(code | (extra << 8));
+    const size_t extra = tail - (1u << nbits);
+    **commands = (uint32_t)(code | (extra << 8));
  } else {
    const size_t extra = copylen - 2118;
-    **commands = static_cast<uint32_t>(63 | (extra << 8));
+    **commands = (uint32_t)(63 | (extra << 8));
  }
  ++(*commands);
 }

-inline void EmitCopyLenLastDistance(size_t copylen, uint32_t** commands) {
+static BROTLI_INLINE void EmitCopyLenLastDistance(
+    size_t copylen, uint32_t** commands) {
  if (copylen < 12) {
-    **commands = static_cast<uint32_t>(copylen + 20);
+    **commands = (uint32_t)(copylen + 20);
    ++(*commands);
  } else if (copylen < 72) {
-    copylen -= 8;
-    const size_t nbits = Log2FloorNonZero(copylen) - 1;
-    const size_t prefix = copylen >> nbits;
+    const size_t tail = copylen - 8;
+    const size_t nbits = Log2FloorNonZero(tail) - 1;
+    const size_t prefix = tail >> nbits;
    const size_t code = (nbits << 1) + prefix + 28;
-    const size_t extra = copylen - (prefix << nbits);
-    **commands = static_cast<uint32_t>(code | (extra << 8));
+    const size_t extra = tail - (prefix << nbits);
+    **commands = (uint32_t)(code | (extra << 8));
    ++(*commands);
  } else if (copylen < 136) {
-    copylen -= 8;
-    const size_t code = (copylen >> 5) + 54;
-    const size_t extra = copylen & 31;
-    **commands = static_cast<uint32_t>(code | (extra << 8));
+    const size_t tail = copylen - 8;
+    const size_t code = (tail >> 5) + 54;
+    const size_t extra = tail & 31;
+    **commands = (uint32_t)(code | (extra << 8));
    ++(*commands);
    **commands = 64;
    ++(*commands);
  } else if (copylen < 2120) {
-    copylen -= 72;
-    const size_t nbits = Log2FloorNonZero(copylen);
+    const size_t tail = copylen - 72;
+    const size_t nbits = Log2FloorNonZero(tail);
    const size_t code = nbits + 52;
-    const size_t extra = copylen - (1 << nbits);
-    **commands = static_cast<uint32_t>(code | (extra << 8));
+    const size_t extra = tail - (1u << nbits);
+    **commands = (uint32_t)(code | (extra << 8));
    ++(*commands);
    **commands = 64;
    ++(*commands);
  } else {
    const size_t extra = copylen - 2120;
-    **commands = static_cast<uint32_t>(63 | (extra << 8));
+    **commands = (uint32_t)(63 | (extra << 8));
    ++(*commands);
    **commands = 64;
    ++(*commands);
  }
 }

-inline void EmitDistance(uint32_t distance, uint32_t** commands) {
-  distance += 3;
-  uint32_t nbits = Log2FloorNonZero(distance) - 1;
-  const uint32_t prefix = (distance >> nbits) & 1;
+static BROTLI_INLINE void EmitDistance(uint32_t distance, uint32_t** commands) {
+  uint32_t d = distance + 3;
+  uint32_t nbits = Log2FloorNonZero(d) - 1;
+  const uint32_t prefix = (d >> nbits) & 1;
  const uint32_t offset = (2 + prefix) << nbits;
  const uint32_t distcode = 2 * (nbits - 1) + prefix + 80;
-  uint32_t extra = distance - offset;
+  uint32_t extra = d - offset;
  **commands = distcode | (extra << 8);
  ++(*commands);
 }

 /* REQUIRES: len <= 1 << 20. */
-static void StoreMetaBlockHeader(
-    size_t len, bool is_uncompressed, size_t* storage_ix, uint8_t* storage) {
+static void BrotliStoreMetaBlockHeader(
+    size_t len, int is_uncompressed, size_t* storage_ix, uint8_t* storage) {
  /* ISLAST */
-  WriteBits(1, 0, storage_ix, storage);
+  BrotliWriteBits(1, 0, storage_ix, storage);
  if (len <= (1U << 16)) {
    /* MNIBBLES is 4 */
-    WriteBits(2, 0, storage_ix, storage);
-    WriteBits(16, len - 1, storage_ix, storage);
+    BrotliWriteBits(2, 0, storage_ix, storage);
+    BrotliWriteBits(16, len - 1, storage_ix, storage);
  } else {
    /* MNIBBLES is 5 */
-    WriteBits(2, 1, storage_ix, storage);
-    WriteBits(20, len - 1, storage_ix, storage);
+    BrotliWriteBits(2, 1, storage_ix, storage);
+    BrotliWriteBits(20, len - 1, storage_ix, storage);
  }
  /* ISUNCOMPRESSED */
-  WriteBits(1, is_uncompressed, storage_ix, storage);
+  BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
 }

 static void CreateCommands(const uint8_t* input, size_t block_size,
-                           size_t input_size, const uint8_t* base_ip,
-                           int* table, size_t table_size,
-                           uint8_t** literals, uint32_t** commands) {
+    size_t input_size, const uint8_t* base_ip, int* table, size_t table_size,
+    uint8_t** literals, uint32_t** commands) {
  /* "ip" is the input pointer. */
  const uint8_t* ip = input;
-  assert(table_size);
-  assert(table_size <= (1u << 31));
-  assert((table_size & (table_size - 1)) == 0);  // table must be power of two
  const size_t shift = 64u - Log2FloorNonZero(table_size);
-  assert(table_size - 1 == static_cast<size_t>(
-      MAKE_UINT64_T(0xFFFFFFFF, 0xFFFFFF) >> shift));
  const uint8_t* ip_end = input + block_size;
  /* "next_emit" is a pointer to the first byte that is not covered by a
     previous copy. Bytes between "next_emit" and the start of the next copy or
@ -241,17 +245,25 @@ static void CreateCommands(const uint8_t* input, size_t block_size,
  int last_distance = -1;
  const size_t kInputMarginBytes = 16;
  const size_t kMinMatchLen = 6;
+
+  assert(table_size);
+  assert(table_size <= (1u << 31));
+  /* table must be power of two */
+  assert((table_size & (table_size - 1)) == 0);
+  assert(table_size - 1 ==
+      (size_t)(MAKE_UINT64_T(0xFFFFFFFF, 0xFFFFFF) >> shift));
+
  if (PREDICT_TRUE(block_size >= kInputMarginBytes)) {
    /* For the last block, we need to keep a 16 bytes margin so that we can be
       sure that all distances are at most window size - 16.
       For all other blocks, we only need to keep a margin of 5 bytes so that
       we don't go over the block size with a copy. */
-    const size_t len_limit = std::min(block_size - kMinMatchLen,
-                                      input_size - kInputMarginBytes);
+    const size_t len_limit = BROTLI_MIN(size_t, block_size - kMinMatchLen,
+                                        input_size - kInputMarginBytes);
    const uint8_t* ip_limit = input + len_limit;

-    for (uint32_t next_hash = Hash(++ip, shift); ; ) {
-      assert(next_emit < ip);
+    uint32_t next_hash;
+    for (next_hash = Hash(++ip, shift); ; ) {
      /* Step 1: Scan forward in the input looking for a 6-byte-long match.
         If we get close to exhausting the input then goto emit_remainder.

@ -271,11 +283,14 @@ static void CreateCommands(const uint8_t* input, size_t block_size,

      const uint8_t* next_ip = ip;
      const uint8_t* candidate;
+
+      assert(next_emit < ip);
+
      do {
-        ip = next_ip;
        uint32_t hash = next_hash;
-        assert(hash == Hash(ip, shift));
        uint32_t bytes_between_hash_lookups = skip++ >> 5;
+        ip = next_ip;
+        assert(hash == Hash(ip, shift));
        next_ip = ip + bytes_between_hash_lookups;
        if (PREDICT_FALSE(next_ip > ip_limit)) {
          goto emit_remainder;
@ -284,7 +299,7 @@ static void CreateCommands(const uint8_t* input, size_t block_size,
        candidate = ip - last_distance;
        if (IsMatch(ip, candidate)) {
          if (PREDICT_TRUE(candidate < ip)) {
-            table[hash] = static_cast<int>(ip - base_ip);
+            table[hash] = (int)(ip - base_ip);
            break;
          }
        }
@ -292,33 +307,32 @@ static void CreateCommands(const uint8_t* input, size_t block_size,
        assert(candidate >= base_ip);
        assert(candidate < ip);

-        table[hash] = static_cast<int>(ip - base_ip);
+        table[hash] = (int)(ip - base_ip);
      } while (PREDICT_TRUE(!IsMatch(ip, candidate)));

      /* Step 2: Emit the found match together with the literal bytes from
         "next_emit", and then see if we can find a next macth immediately
         afterwards. Repeat until we find no match for the input
         without emitting some literal bytes. */
-      uint64_t input_bytes;

      {
        /* We have a 6-byte match at ip, and we need to emit bytes in
           [next_emit, ip). */
        const uint8_t* base = ip;
        size_t matched = 6 + FindMatchLengthWithLimit(
-            candidate + 6, ip + 6, static_cast<size_t>(ip_end - ip) - 6);
+            candidate + 6, ip + 6, (size_t)(ip_end - ip) - 6);
+        int distance = (int)(base - candidate);  /* > 0 */
+        int insert = (int)(base - next_emit);
        ip += matched;
-        int distance = static_cast<int>(base - candidate);  /* > 0 */
-        int insert = static_cast<int>(base - next_emit);
        assert(0 == memcmp(base, candidate, matched));
-        EmitInsertLen(static_cast<uint32_t>(insert), commands);
-        memcpy(*literals, next_emit, static_cast<size_t>(insert));
+        EmitInsertLen((uint32_t)insert, commands);
+        memcpy(*literals, next_emit, (size_t)insert);
        *literals += insert;
        if (distance == last_distance) {
          **commands = 64;
          ++(*commands);
        } else {
-          EmitDistance(static_cast<uint32_t>(distance), commands);
+          EmitDistance((uint32_t)distance, commands);
          last_distance = distance;
        }
        EmitCopyLenLastDistance(matched, commands);
@ -327,25 +341,28 @@ static void CreateCommands(const uint8_t* input, size_t block_size,
        if (PREDICT_FALSE(ip >= ip_limit)) {
          goto emit_remainder;
        }
+        {
          /* We could immediately start working at ip now, but to improve
             compression we first update "table" with the hashes of some
             positions within the last copy. */
-        input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 5);
-        uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
-        table[prev_hash] = static_cast<int>(ip - base_ip - 5);
-        prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
-        table[prev_hash] = static_cast<int>(ip - base_ip - 4);
-        prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
-        table[prev_hash] = static_cast<int>(ip - base_ip - 3);
-        input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 2);
-        prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
-        table[prev_hash] = static_cast<int>(ip - base_ip - 2);
-        prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
-        table[prev_hash] = static_cast<int>(ip - base_ip - 1);
+          uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 5);
+          uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
+          uint32_t cur_hash;
+          table[prev_hash] = (int)(ip - base_ip - 5);
+          prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
+          table[prev_hash] = (int)(ip - base_ip - 4);
+          prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
+          table[prev_hash] = (int)(ip - base_ip - 3);
+          input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 2);
+          cur_hash = HashBytesAtOffset(input_bytes, 2, shift);
+          prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
+          table[prev_hash] = (int)(ip - base_ip - 2);
+          prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
+          table[prev_hash] = (int)(ip - base_ip - 1);

-        uint32_t cur_hash = HashBytesAtOffset(input_bytes, 2, shift);
-        candidate = base_ip + table[cur_hash];
-        table[cur_hash] = static_cast<int>(ip - base_ip);
+          candidate = base_ip + table[cur_hash];
+          table[cur_hash] = (int)(ip - base_ip);
+        }
      }

      while (IsMatch(ip, candidate)) {
@ -353,36 +370,39 @@ static void CreateCommands(const uint8_t* input, size_t block_size,
           literal bytes prior to ip. */
        const uint8_t* base = ip;
        size_t matched = 6 + FindMatchLengthWithLimit(
-            candidate + 6, ip + 6, static_cast<size_t>(ip_end - ip) - 6);
+            candidate + 6, ip + 6, (size_t)(ip_end - ip) - 6);
        ip += matched;
-        last_distance = static_cast<int>(base - candidate);  /* > 0 */
+        last_distance = (int)(base - candidate);  /* > 0 */
        assert(0 == memcmp(base, candidate, matched));
        EmitCopyLen(matched, commands);
-        EmitDistance(static_cast<uint32_t>(last_distance), commands);
+        EmitDistance((uint32_t)last_distance, commands);

        next_emit = ip;
        if (PREDICT_FALSE(ip >= ip_limit)) {
          goto emit_remainder;
        }
+        {
          /* We could immediately start working at ip now, but to improve
             compression we first update "table" with the hashes of some
             positions within the last copy. */
-        input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 5);
-        uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
-        table[prev_hash] = static_cast<int>(ip - base_ip - 5);
-        prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
-        table[prev_hash] = static_cast<int>(ip - base_ip - 4);
-        prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
-        table[prev_hash] = static_cast<int>(ip - base_ip - 3);
-        input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 2);
-        prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
-        table[prev_hash] = static_cast<int>(ip - base_ip - 2);
-        prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
-        table[prev_hash] = static_cast<int>(ip - base_ip - 1);
+          uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 5);
+          uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
+          uint32_t cur_hash;
+          table[prev_hash] = (int)(ip - base_ip - 5);
+          prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
+          table[prev_hash] = (int)(ip - base_ip - 4);
+          prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
+          table[prev_hash] = (int)(ip - base_ip - 3);
+          input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 2);
+          cur_hash = HashBytesAtOffset(input_bytes, 2, shift);
+          prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
+          table[prev_hash] = (int)(ip - base_ip - 2);
+          prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
+          table[prev_hash] = (int)(ip - base_ip - 1);

-        uint32_t cur_hash = HashBytesAtOffset(input_bytes, 2, shift);
-        candidate = base_ip + table[cur_hash];
-        table[cur_hash] = static_cast<int>(ip - base_ip);
+          candidate = base_ip + table[cur_hash];
+          table[cur_hash] = (int)(ip - base_ip);
+        }
      }

      next_hash = Hash(++ip, shift);
@ -393,40 +413,17 @@ emit_remainder:
  assert(next_emit <= ip_end);
  /* Emit the remaining bytes as literals. */
  if (next_emit < ip_end) {
-    const uint32_t insert = static_cast<uint32_t>(ip_end - next_emit);
+    const uint32_t insert = (uint32_t)(ip_end - next_emit);
    EmitInsertLen(insert, commands);
    memcpy(*literals, next_emit, insert);
    *literals += insert;
  }
 }

-static void StoreCommands(const uint8_t* literals, const size_t num_literals,
+static void StoreCommands(MemoryManager* m,
+                          const uint8_t* literals, const size_t num_literals,
                          const uint32_t* commands, const size_t num_commands,
                          size_t* storage_ix, uint8_t* storage) {
-  uint8_t lit_depths[256] = { 0 };
-  uint16_t lit_bits[256] = { 0 };
-  uint32_t lit_histo[256] = { 0 };
-  for (size_t i = 0; i < num_literals; ++i) {
-    ++lit_histo[literals[i]];
-  }
-  BuildAndStoreHuffmanTreeFast(lit_histo, num_literals,
-                               /* max_bits = */ 8,
-                               lit_depths, lit_bits,
-                               storage_ix, storage);
-
-  uint8_t cmd_depths[128] = { 0 };
-  uint16_t cmd_bits[128] = { 0 };
-  uint32_t cmd_histo[128] = { 0 };
-  for (size_t i = 0; i < num_commands; ++i) {
-    ++cmd_histo[commands[i] & 0xff];
-  }
-  cmd_histo[1] += 1;
-  cmd_histo[2] += 1;
-  cmd_histo[64] += 1;
-  cmd_histo[84] += 1;
-  BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depths, cmd_bits,
-                                 storage_ix, storage);
-
  static const uint32_t kNumExtraBits[128] = {
    0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 12, 14, 24,
    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4,
@ -441,45 +438,73 @@ static void StoreCommands(const uint8_t* literals, const size_t num_literals,
    1090, 2114, 6210, 22594,
  };

-  for (size_t i = 0; i < num_commands; ++i) {
+  uint8_t lit_depths[256];
+  uint16_t lit_bits[256];
+  uint32_t lit_histo[256] = { 0 };
+  uint8_t cmd_depths[128] = { 0 };
+  uint16_t cmd_bits[128] = { 0 };
+  uint32_t cmd_histo[128] = { 0 };
+  size_t i;
+  for (i = 0; i < num_literals; ++i) {
+    ++lit_histo[literals[i]];
+  }
+  BrotliBuildAndStoreHuffmanTreeFast(m, lit_histo, num_literals,
+                                     /* max_bits = */ 8,
+                                     lit_depths, lit_bits,
+                                     storage_ix, storage);
+  if (BROTLI_IS_OOM(m)) return;
+
+  for (i = 0; i < num_commands; ++i) {
+    ++cmd_histo[commands[i] & 0xff];
+  }
+  cmd_histo[1] += 1;
+  cmd_histo[2] += 1;
+  cmd_histo[64] += 1;
+  cmd_histo[84] += 1;
+  BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depths, cmd_bits,
+                                 storage_ix, storage);
+
+  for (i = 0; i < num_commands; ++i) {
    const uint32_t cmd = commands[i];
    const uint32_t code = cmd & 0xff;
    const uint32_t extra = cmd >> 8;
-    WriteBits(cmd_depths[code], cmd_bits[code], storage_ix, storage);
-    WriteBits(kNumExtraBits[code], extra, storage_ix, storage);
+    BrotliWriteBits(cmd_depths[code], cmd_bits[code], storage_ix, storage);
+    BrotliWriteBits(kNumExtraBits[code], extra, storage_ix, storage);
    if (code < 24) {
      const uint32_t insert = kInsertOffset[code] + extra;
-      for (uint32_t j = 0; j < insert; ++j) {
+      uint32_t j;
+      for (j = 0; j < insert; ++j) {
        const uint8_t lit = *literals;
-        WriteBits(lit_depths[lit], lit_bits[lit], storage_ix, storage);
+        BrotliWriteBits(lit_depths[lit], lit_bits[lit], storage_ix, storage);
        ++literals;
      }
    }
  }
 }

-static bool ShouldCompress(const uint8_t* input, size_t input_size,
-                           size_t num_literals) {
-  static const double kAcceptableLossForUncompressibleSpeedup = 0.02;
-  static const double kMaxRatioOfLiterals =
-      1.0 - kAcceptableLossForUncompressibleSpeedup;
-  if (num_literals < kMaxRatioOfLiterals * static_cast<double>(input_size)) {
-    return true;
+/* Acceptable loss for uncompressible speedup is 2% */
+#define MIN_RATIO 0.98
+#define SAMPLE_RATE 43
+
+static int ShouldCompress(const uint8_t* input, size_t input_size,
+                          size_t num_literals) {
+  double corpus_size = (double)input_size;
+  if (num_literals < MIN_RATIO * corpus_size) {
+    return 1;
+  } else {
+    uint32_t literal_histo[256] = { 0 };
+    const double max_total_bit_cost = corpus_size * 8 * MIN_RATIO / SAMPLE_RATE;
+    size_t i;
+    for (i = 0; i < input_size; i += SAMPLE_RATE) {
+      ++literal_histo[input[i]];
+    }
+    return BitsEntropy(literal_histo, 256) < max_total_bit_cost;
  }
-  uint32_t literal_histo[256] = { 0 };
-  static const uint32_t kSampleRate = 43;
-  static const double kMaxEntropy =
-      8 * (1.0 - kAcceptableLossForUncompressibleSpeedup);
-  const double max_total_bit_cost =
-      static_cast<double>(input_size) * kMaxEntropy / kSampleRate;
-  for (size_t i = 0; i < input_size; i += kSampleRate) {
-    ++literal_histo[input[i]];
-  }
-  return BitsEntropy(literal_histo, 256) < max_total_bit_cost;
 }

-void BrotliCompressFragmentTwoPass(const uint8_t* input, size_t input_size,
-                                   bool is_last,
+void BrotliCompressFragmentTwoPass(MemoryManager* m,
+                                   const uint8_t* input, size_t input_size,
+                                   int is_last,
                                   uint32_t* command_buf, uint8_t* literal_buf,
                                   int* table, size_t table_size,
                                   size_t* storage_ix, uint8_t* storage) {
@ -488,24 +513,27 @@ void BrotliCompressFragmentTwoPass(const uint8_t* input, size_t input_size,
  const uint8_t* base_ip = input;

  while (input_size > 0) {
-    size_t block_size = std::min(input_size, kCompressFragmentTwoPassBlockSize);
+    size_t block_size =
+        BROTLI_MIN(size_t, input_size, kCompressFragmentTwoPassBlockSize);
    uint32_t* commands = command_buf;
    uint8_t* literals = literal_buf;
+    size_t num_literals;
    CreateCommands(input, block_size, input_size, base_ip, table, table_size,
                   &literals, &commands);
-    const size_t num_literals = static_cast<size_t>(literals - literal_buf);
-    const size_t num_commands = static_cast<size_t>(commands - command_buf);
+    num_literals = (size_t)(literals - literal_buf);
    if (ShouldCompress(input, block_size, num_literals)) {
-      StoreMetaBlockHeader(block_size, 0, storage_ix, storage);
+      const size_t num_commands = (size_t)(commands - command_buf);
+      BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
      /* No block splits, no contexts. */
-      WriteBits(13, 0, storage_ix, storage);
-      StoreCommands(literal_buf, num_literals, command_buf, num_commands,
+      BrotliWriteBits(13, 0, storage_ix, storage);
+      StoreCommands(m, literal_buf, num_literals, command_buf, num_commands,
                    storage_ix, storage);
+      if (BROTLI_IS_OOM(m)) return;
    } else {
      /* Since we did not find many backward references and the entropy of
         the data is close to 8 bits, we can simply emit an uncompressed block.
         This makes compression speed of uncompressible data about 3x faster. */
-      StoreMetaBlockHeader(block_size, 1, storage_ix, storage);
+      BrotliStoreMetaBlockHeader(block_size, 1, storage_ix, storage);
      *storage_ix = (*storage_ix + 7u) & ~7u;
      memcpy(&storage[*storage_ix >> 3], input, block_size);
      *storage_ix += block_size << 3;
@ -516,10 +544,12 @@ void BrotliCompressFragmentTwoPass(const uint8_t* input, size_t input_size,
  }

  if (is_last) {
-    WriteBits(1, 1, storage_ix, storage);  /* islast */
-    WriteBits(1, 1, storage_ix, storage);  /* isempty */
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* islast */
+    BrotliWriteBits(1, 1, storage_ix, storage);  /* isempty */
    *storage_ix = (*storage_ix + 7u) & ~7u;
  }
 }

-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
--- a/enc/compress_fragment_two_pass.h
+++ b/enc/compress_fragment_two_pass.h
@ -14,8 +14,12 @@
 #define BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_

 #include "../common/types.h"
+#include "./memory.h"
+#include "./port.h"

-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

 static const size_t kCompressFragmentTwoPassBlockSize = 1 << 17;

@ -29,12 +33,19 @@ static const size_t kCompressFragmentTwoPassBlockSize = 1 << 17;
              kCompressFragmentTwoPassBlockSize long arrays.
   REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
   REQUIRES: "table_size" is a power of two */
-void BrotliCompressFragmentTwoPass(const uint8_t* input, size_t input_size,
-                                   bool is_last,
-                                   uint32_t* command_buf, uint8_t* literal_buf,
-                                   int* table, size_t table_size,
-                                   size_t* storage_ix, uint8_t* storage);
+BROTLI_INTERNAL void BrotliCompressFragmentTwoPass(MemoryManager* m,
+                                                   const uint8_t* input,
+                                                   size_t input_size,
+                                                   int is_last,
+                                                   uint32_t* command_buf,
+                                                   uint8_t* literal_buf,
+                                                   int* table,
+                                                   size_t table_size,
+                                                   size_t* storage_ix,
+                                                   uint8_t* storage);

-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif

 #endif  /* BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_ */
--- a/enc/compressor.cc
+++ b/enc/compressor.cc
@ -0,0 +1,138 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Brotli compressor API C++ wrapper and utilities. */
+
+#include "./compressor.h"
+
+#include <cstdlib>  /* exit */
+
+namespace brotli {
+
+static void ConvertParams(const BrotliParams* from, BrotliEncoderParams* to) {
+  BrotliEncoderParamsSetDefault(to);
+  if (from->mode == BrotliParams::MODE_TEXT) {
+    to->mode = BROTLI_MODE_TEXT;
+  } else if (from->mode == BrotliParams::MODE_FONT) {
+    to->mode = BROTLI_MODE_FONT;
+  }
+  to->quality = from->quality;
+  to->lgwin = from->lgwin;
+  to->lgblock = from->lgblock;
+}
+
+BrotliCompressor::BrotliCompressor(BrotliParams params) {
+  BrotliEncoderParams encoder_params;
+  ConvertParams(&params, &encoder_params);
+  state_ = BrotliEncoderCreateState(&encoder_params, 0, 0, 0);
+  if (state_ == 0) std::exit(EXIT_FAILURE);  /* OOM */
+}
+
+BrotliCompressor::~BrotliCompressor(void) { BrotliEncoderDestroyState(state_); }
+
+bool BrotliCompressor::WriteMetaBlock(const size_t input_size,
+                                      const uint8_t* input_buffer,
+                                      const bool is_last, size_t* encoded_size,
+                                      uint8_t* encoded_buffer) {
+  return !!BrotliEncoderWriteMetaBlock(state_, input_size, input_buffer,
+                                       is_last ? 1 : 0, encoded_size,
+                                       encoded_buffer);
+}
+
+bool BrotliCompressor::WriteMetadata(const size_t input_size,
+                                     const uint8_t* input_buffer,
+                                     const bool is_last, size_t* encoded_size,
+                                     uint8_t* encoded_buffer) {
+  return !!BrotliEncoderWriteMetadata(state_, input_size, input_buffer,
+                                      is_last ? 1 : 0, encoded_size,
+                                      encoded_buffer);
+}
+
+bool BrotliCompressor::FinishStream(size_t* encoded_size,
+                                    uint8_t* encoded_buffer) {
+  return !!BrotliEncoderFinishStream(state_, encoded_size, encoded_buffer);
+}
+
+void BrotliCompressor::CopyInputToRingBuffer(const size_t input_size,
+                                             const uint8_t* input_buffer) {
+  BrotliEncoderCopyInputToRingBuffer(state_, input_size, input_buffer);
+}
+
+bool BrotliCompressor::WriteBrotliData(const bool is_last,
+                                       const bool force_flush, size_t* out_size,
+                                       uint8_t** output) {
+  return !!BrotliEncoderWriteData(
+      state_, is_last ? 1 : 0, force_flush ? 1 : 0, out_size, output);
+}
+
+void BrotliCompressor::BrotliSetCustomDictionary(size_t size,
+                                                 const uint8_t* dict) {
+  BrotliEncoderSetCustomDictionary(state_, size, dict);
+}
+
+int BrotliCompressBuffer(BrotliParams params, size_t input_size,
+                         const uint8_t* input_buffer, size_t* encoded_size,
+                         uint8_t* encoded_buffer) {
+  return BrotliEncoderCompress(params.quality, params.lgwin,
+      (BrotliEncoderMode)params.mode, input_size, input_buffer,
+      encoded_size, encoded_buffer);
+}
+
+int BrotliCompress(BrotliParams params, BrotliIn* in, BrotliOut* out) {
+  return BrotliCompressWithCustomDictionary(0, 0, params, in, out);
+}
+
+int BrotliCompressWithCustomDictionary(size_t dictsize, const uint8_t* dict,
+                                       BrotliParams params, BrotliIn* in,
+                                       BrotliOut* out) {
+  const size_t kOutputBufferSize = 65536;
+  uint8_t* output_buffer;
+  bool result = true;
+  size_t available_in = 0;
+  const uint8_t* next_in = NULL;
+  size_t total_out = 0;
+  bool end_of_input = false;
+  BrotliEncoderParams encoder_params;
+  BrotliEncoderState* s;
+
+  ConvertParams(&params, &encoder_params);
+  s = BrotliEncoderCreateState(&encoder_params, 0, 0, 0);
+  if (!s) return 0;
+  BrotliEncoderSetCustomDictionary(s, dictsize, dict);
+  output_buffer = new uint8_t[kOutputBufferSize];
+
+  while (true) {
+    if (available_in == 0 && !end_of_input) {
+      next_in = reinterpret_cast<const uint8_t*>(
+          in->Read(BrotliEncoderInputBlockSize(s), &available_in));
+      if (!next_in) {
+        end_of_input = true;
+        available_in = 0;
+      } else if (available_in == 0) {
+        continue;
+      }
+    }
+    size_t available_out = kOutputBufferSize;
+    uint8_t* next_out = output_buffer;
+    result = !!BrotliEncoderCompressStream(
+        s, end_of_input ? BROTLI_OPERATION_FINISH : BROTLI_OPERATION_PROCESS,
+        &available_in, &next_in, &available_out, &next_out, &total_out);
+    if (!result) break;
+    size_t used_output = kOutputBufferSize - available_out;
+    if (used_output != 0) {
+      result = out->Write(output_buffer, used_output);
+      if (!result) break;
+    }
+    if (BrotliEncoderIsFinished(s)) break;
+  }
+
+  delete[] output_buffer;
+  BrotliEncoderDestroyState(s);
+  return result ? 1 : 0;
+}
+
+
+}  /* namespace brotli */
--- a/enc/compressor.h
+++ b/enc/compressor.h
@ -12,4 +12,145 @@
 #include "./encode.h"
 #include "./streams.h"

+namespace brotli {
+
+struct BrotliParams {
+  BrotliParams(void)
+      : mode(MODE_GENERIC),
+        quality(11),
+        lgwin(22),
+        lgblock(0),
+        enable_dictionary(true),
+        enable_transforms(false),
+        greedy_block_split(false),
+        enable_context_modeling(true) {}
+
+  enum Mode {
+    /* Default compression mode. The compressor does not know anything in
+       advance about the properties of the input. */
+    MODE_GENERIC = 0,
+    /* Compression mode for UTF-8 format text input. */
+    MODE_TEXT = 1,
+    /* Compression mode used in WOFF 2.0. */
+    MODE_FONT = 2
+  };
+  Mode mode;
+
+  /* Controls the compression-speed vs compression-density tradeoffs. The higher
+     the |quality|, the slower the compression. Range is 0 to 11. */
+  int quality;
+  /* Base 2 logarithm of the sliding window size. Range is 10 to 24. */
+  int lgwin;
+  /* Base 2 logarithm of the maximum input block size. Range is 16 to 24.
+     If set to 0, the value will be set based on the quality. */
+  int lgblock;
+
+  /* These settings are deprecated and will be ignored.
+     All speed vs. size compromises are controlled by the |quality| param. */
+  bool enable_dictionary;
+  bool enable_transforms;
+  bool greedy_block_split;
+  bool enable_context_modeling;
+};
+
+/* An instance can not be reused for multiple brotli streams. */
+class BrotliCompressor {
+ public:
+  explicit BrotliCompressor(BrotliParams params);
+  ~BrotliCompressor(void);
+
+  /* The maximum input size that can be processed at once. */
+  size_t input_block_size(void) const {
+    return BrotliEncoderInputBlockSize(state_);
+  }
+
+  /* Encodes the data in |input_buffer| as a meta-block and writes it to
+     |encoded_buffer| (|*encoded_size should| be set to the size of
+     |encoded_buffer|) and sets |*encoded_size| to the number of bytes that
+     was written. The |input_size| must not be greater than input_block_size().
+     Returns false if there was an error and true otherwise. */
+  bool WriteMetaBlock(const size_t input_size,
+                      const uint8_t* input_buffer,
+                      const bool is_last,
+                      size_t* encoded_size,
+                      uint8_t* encoded_buffer);
+
+  /* Writes a metadata meta-block containing the given input to encoded_buffer.
+     |*encoded_size| should be set to the size of the encoded_buffer.
+     Sets |*encoded_size| to the number of bytes that was written.
+     Note that the given input data will not be part of the sliding window and
+     thus no backward references can be made to this data from subsequent
+     metablocks. |input_size| must not be greater than 2^24 and provided
+     |*encoded_size| must not be less than |input_size| + 6.
+     Returns false if there was an error and true otherwise. */
+  bool WriteMetadata(const size_t input_size,
+                     const uint8_t* input_buffer,
+                     const bool is_last,
+                     size_t* encoded_size,
+                     uint8_t* encoded_buffer);
+
+  /* Writes a zero-length meta-block with end-of-input bit set to the
+     internal output buffer and copies the output buffer to |encoded_buffer|
+     (|*encoded_size| should be set to the size of |encoded_buffer|) and sets
+     |*encoded_size| to the number of bytes written.
+     Returns false if there was an error and true otherwise. */
+  bool FinishStream(size_t* encoded_size, uint8_t* encoded_buffer);
+
+  /* Copies the given input data to the internal ring buffer of the compressor.
+     No processing of the data occurs at this time and this function can be
+     called multiple times before calling WriteBrotliData() to process the
+     accumulated input. At most input_block_size() bytes of input data can be
+     copied to the ring buffer, otherwise the next WriteBrotliData() will fail.
+   */
+  void CopyInputToRingBuffer(const size_t input_size,
+                             const uint8_t* input_buffer);
+
+  /* Processes the accumulated input data and sets |*out_size| to the length of
+     the new output meta-block, or to zero if no new output meta-block has been
+     created (in this case the processed input data is buffered internally).
+     If |*out_size| is positive, |*output| points to the start of the output
+     data. If |is_last| or |force_flush| is true, an output meta-block is always
+     created. However, until |is_last| is true encoder may retain up to 7 bits
+     of the last byte of output. To force encoder to dump the remaining bits
+     use WriteMetadata() to append an empty meta-data block.
+     Returns false if the size of the input data is larger than
+     input_block_size(). */
+  bool WriteBrotliData(const bool is_last, const bool force_flush,
+                       size_t* out_size, uint8_t** output);
+
+  /* Fills the new state with a dictionary for LZ77, warming up the ringbuffer,
+     e.g. for custom static dictionaries for data formats.
+     Not to be confused with the built-in transformable dictionary of Brotli.
+     To decode, use BrotliSetCustomDictionary() of the decoder with the same
+     dictionary. */
+  void BrotliSetCustomDictionary(size_t size, const uint8_t* dict);
+
+  /* No-op, but we keep it here for API backward-compatibility. */
+  void WriteStreamHeader(void) {}
+
+ private:
+  BrotliEncoderState* state_;
+};
+
+/* Compresses the data in |input_buffer| into |encoded_buffer|, and sets
+   |*encoded_size| to the compressed length.
+   Returns 0 if there was an error and 1 otherwise. */
+int BrotliCompressBuffer(BrotliParams params,
+                         size_t input_size,
+                         const uint8_t* input_buffer,
+                         size_t* encoded_size,
+                         uint8_t* encoded_buffer);
+
+/* Same as above, but uses the specified input and output classes instead
+   of reading from and writing to pre-allocated memory buffers. */
+int BrotliCompress(BrotliParams params, BrotliIn* in, BrotliOut* out);
+
+/* Before compressing the data, sets a custom LZ77 dictionary with
+   BrotliCompressor::BrotliSetCustomDictionary. */
+int BrotliCompressWithCustomDictionary(size_t dictsize, const uint8_t* dict,
+                                       BrotliParams params,
+                                       BrotliIn* in, BrotliOut* out);
+
+}  /* namespace brotli */
+
 #endif  /* BROTLI_ENC_COMPRESSOR_H_ */
--- a/enc/context.h
+++ b/enc/context.h
@ -10,8 +10,11 @@
 #define BROTLI_ENC_CONTEXT_H_

 #include "../common/types.h"
+#include "../common/port.h"

-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

 /* Second-order context lookup table for UTF8 byte streams.

@ -151,29 +154,31 @@ static const uint8_t kSigned3BitContextLookup[] = {
  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
 };

-enum ContextType {
+typedef enum ContextType {
  CONTEXT_LSB6         = 0,
  CONTEXT_MSB6         = 1,
  CONTEXT_UTF8         = 2,
  CONTEXT_SIGNED       = 3
-};
+} ContextType;

-static inline uint8_t Context(uint8_t p1, uint8_t p2, ContextType mode) {
+static BROTLI_INLINE uint8_t Context(uint8_t p1, uint8_t p2, ContextType mode) {
  switch (mode) {
    case CONTEXT_LSB6:
      return p1 & 0x3f;
    case CONTEXT_MSB6:
-      return static_cast<uint8_t>(p1 >> 2);
+      return (uint8_t)(p1 >> 2);
    case CONTEXT_UTF8:
      return kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256];
    case CONTEXT_SIGNED:
-      return static_cast<uint8_t>((kSigned3BitContextLookup[p1] << 3) +
-                                  kSigned3BitContextLookup[p2]);
+      return (uint8_t)((kSigned3BitContextLookup[p1] << 3) +
+                       kSigned3BitContextLookup[p2]);
    default:
      return 0;
  }
 }

-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif

 #endif  /* BROTLI_ENC_CONTEXT_H_ */
--- a/enc/dictionary_hash.h
+++ b/enc/dictionary_hash.h
@ -11,7 +11,9 @@

 #include "../common/types.h"

-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

 static const uint16_t kStaticDictionaryHash[] = {
  0x7d48, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
@ -4112,6 +4114,8 @@ static const uint16_t kStaticDictionaryHash[] = {
  0x0000, 0x0000, 0x0d88, 0x4ac5, 0x0000, 0x0000, 0x0000, 0x0000,
 };

-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif

 #endif  /* BROTLI_ENC_DICTIONARY_HASH_H_ */
--- a/enc/encode.c
+++ b/enc/encode.c
--- a/enc/encode.h
+++ b/enc/encode.h
@ -9,45 +9,44 @@
 #ifndef BROTLI_ENC_ENCODE_H_
 #define BROTLI_ENC_ENCODE_H_

-#include <string>
-#include <vector>
-
 #include "../common/types.h"
-#include "./command.h"
-#include "./hash.h"
-#include "./ringbuffer.h"
-#include "./static_dict.h"
-#include "./streams.h"

-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

-static const int kMaxWindowBits = 24;
-static const int kMinWindowBits = 10;
-static const int kMinInputBlockBits = 16;
-static const int kMaxInputBlockBits = 24;
+static const int kBrotliMaxWindowBits = 24;
+static const int kBrotliMinWindowBits = 10;
+static const int kBrotliMinInputBlockBits = 16;
+static const int kBrotliMaxInputBlockBits = 24;

-struct BrotliParams {
-  BrotliParams(void)
-      : mode(MODE_GENERIC),
-        quality(11),
-        lgwin(22),
-        lgblock(0),
-        enable_dictionary(true),
-        enable_transforms(false),
-        greedy_block_split(false),
-        enable_context_modeling(true) {}
-
-  enum Mode {
+typedef enum BrotliEncoderMode {
  /* Default compression mode. The compressor does not know anything in
     advance about the properties of the input. */
-    MODE_GENERIC = 0,
+  BROTLI_MODE_GENERIC = 0,
  /* Compression mode for UTF-8 format text input. */
-    MODE_TEXT = 1,
+  BROTLI_MODE_TEXT = 1,
  /* Compression mode used in WOFF 2.0. */
-    MODE_FONT = 2
-  };
-  Mode mode;
+  BROTLI_MODE_FONT = 2
+} BrotliEncoderMode;

+#define BROTLI_DEFAULT_QUALITY 11
+#define BROTLI_DEFAULT_WINDOW 22
+#define BROTLI_DEFAULT_MODE BROTLI_MODE_GENERIC
+
+typedef enum BrotliEncoderOperation {
+  BROTLI_OPERATION_PROCESS = 0,
+  /* Request output stream to flush. Performed when input stream is depleted
+     and there is enough space in output stream. */
+  BROTLI_OPERATION_FLUSH = 1,
+  /* Request output stream to finish. Performed when input stream is depleted
+     and there is enough space in output stream. */
+  BROTLI_OPERATION_FINISH = 2
+} BrotliEncoderOperation;
+
+/* DEPRECATED */
+typedef struct BrotliEncoderParams {
+  BrotliEncoderMode mode;
  /* Controls the compression-speed vs compression-density tradeoffs. The higher
     the |quality|, the slower the compression. Range is 0 to 11. */
  int quality;
@ -56,155 +55,202 @@ struct BrotliParams {
  /* Base 2 logarithm of the maximum input block size. Range is 16 to 24.
     If set to 0, the value will be set based on the quality. */
  int lgblock;
+} BrotliEncoderParams;

-  // These settings are deprecated and will be ignored.
-  // All speed vs. size compromises are controlled by the quality param.
-  bool enable_dictionary;
-  bool enable_transforms;
-  bool greedy_block_split;
-  bool enable_context_modeling;
-};
+typedef enum BrotliEncoderParameter {
+  BROTLI_PARAM_MODE = 0,
+  /* Controls the compression-speed vs compression-density tradeoffs. The higher
+     the quality, the slower the compression. Range is 0 to 11. */
+  BROTLI_PARAM_QUALITY = 1,
+  /* Base 2 logarithm of the sliding window size. Range is 10 to 24. */
+  BROTLI_PARAM_LGWIN = 2,
+  /* Base 2 logarithm of the maximum input block size. Range is 16 to 24.
+     If set to 0, the value will be set based on the quality. */
+  BROTLI_PARAM_LGBLOCK = 3
+} BrotliEncoderParameter;

-// An instance can not be reused for multiple brotli streams.
-class BrotliCompressor {
- public:
-  explicit BrotliCompressor(BrotliParams params);
-  ~BrotliCompressor(void);
+/* DEPRECATED */
+void BrotliEncoderParamsSetDefault(BrotliEncoderParams* params);

-  // The maximum input size that can be processed at once.
-  size_t input_block_size(void) const { return size_t(1) << params_.lgblock; }
+/* A state can not be reused for multiple brotli streams. */
+typedef struct BrotliEncoderStateStruct BrotliEncoderState;

-  // Encodes the data in input_buffer as a meta-block and writes it to
-  // encoded_buffer (*encoded_size should be set to the size of
-  // encoded_buffer) and sets *encoded_size to the number of bytes that
-  // was written. The input_size must be <= input_block_size().
-  // Returns 0 if there was an error and 1 otherwise.
-  bool WriteMetaBlock(const size_t input_size,
-                      const uint8_t* input_buffer,
-                      const bool is_last,
-                      size_t* encoded_size,
-                      uint8_t* encoded_buffer);
+int BrotliEncoderSetParameter(
+    BrotliEncoderState* state, BrotliEncoderParameter p, uint32_t value);

-  // Writes a metadata meta-block containing the given input to encoded_buffer.
-  // *encoded_size should be set to the size of the encoded_buffer.
-  // Sets *encoded_size to the number of bytes that was written.
-  // Note that the given input data will not be part of the sliding window and
-  // thus no backward references can be made to this data from subsequent
-  // metablocks.
-  bool WriteMetadata(const size_t input_size,
-                     const uint8_t* input_buffer,
-                     const bool is_last,
-                     size_t* encoded_size,
-                     uint8_t* encoded_buffer);
+/* Creates the instance of BrotliEncoderState and initializes it.
+   |alloc_func| and |free_func| MUST be both zero or both non-zero. In the case
+   they are both zero, default memory allocators are used. |opaque| is passed to
+   |alloc_func| and |free_func| when they are called. */
+BrotliEncoderState* BrotliEncoderCreateInstance(brotli_alloc_func alloc_func,
+                                                brotli_free_func free_func,
+                                                void* opaque);
+/* DEPRECATED */
+static inline BrotliEncoderState* BrotliEncoderCreateState(
+    const BrotliEncoderParams* params, brotli_alloc_func alloc_func,
+    brotli_free_func free_func, void* opaque) {
+  BrotliEncoderState* result = BrotliEncoderCreateInstance(
+      alloc_func, free_func, opaque);
+  if (!result) return result;
+  BrotliEncoderSetParameter(
+      result, BROTLI_PARAM_MODE, (uint32_t)params->mode);
+  BrotliEncoderSetParameter(
+      result, BROTLI_PARAM_QUALITY, (uint32_t)params->quality);
+  BrotliEncoderSetParameter(
+      result, BROTLI_PARAM_LGWIN, (uint32_t)params->lgwin);
+  BrotliEncoderSetParameter(
+      result, BROTLI_PARAM_LGBLOCK, (uint32_t)params->lgblock);
+  return result;
+}

-  // Writes a zero-length meta-block with end-of-input bit set to the
-  // internal output buffer and copies the output buffer to encoded_buffer
-  // (*encoded_size should be set to the size of encoded_buffer) and sets
-  // *encoded_size to the number of bytes written. Returns false if there was
-  // an error and true otherwise.
-  bool FinishStream(size_t* encoded_size, uint8_t* encoded_buffer);
+/* Deinitializes and frees BrotliEncoderState instance. */
+void BrotliEncoderDestroyInstance(BrotliEncoderState* state);
+/* DEPRECATED */
+static inline void BrotliEncoderDestroyState(BrotliEncoderState* state) {
+  BrotliEncoderDestroyInstance(state);
+}

-  // Copies the given input data to the internal ring buffer of the compressor.
-  // No processing of the data occurs at this time and this function can be
-  // called multiple times before calling WriteBrotliData() to process the
-  // accumulated input. At most input_block_size() bytes of input data can be
-  // copied to the ring buffer, otherwise the next WriteBrotliData() will fail.
-  void CopyInputToRingBuffer(const size_t input_size,
-                             const uint8_t* input_buffer);
+/* The maximum input size that can be processed at once. */
+size_t BrotliEncoderInputBlockSize(BrotliEncoderState* state);

-  // Processes the accumulated input data and sets *out_size to the length of
-  // the new output meta-block, or to zero if no new output meta-block was
-  // created (in this case the processed input data is buffered internally).
-  // If *out_size is positive, *output points to the start of the output data.
-  // If is_last or force_flush is true, an output meta-block is always created.
-  // Returns false if the size of the input data is larger than
-  // input_block_size().
-  bool WriteBrotliData(const bool is_last, const bool force_flush,
-                       size_t* out_size, uint8_t** output);
+/* Encodes the data in |input_buffer| as a meta-block and writes it to
+   |encoded_buffer| (|*encoded_size should| be set to the size of
+   |encoded_buffer|) and sets |*encoded_size| to the number of bytes that
+   was written. The |input_size| must not be greater than input_block_size().
+   Returns 0 if there was an error and 1 otherwise. */
+int BrotliEncoderWriteMetaBlock(BrotliEncoderState* state,
+                                const size_t input_size,
+                                const uint8_t* input_buffer, const int is_last,
+                                size_t* encoded_size, uint8_t* encoded_buffer);

-  // Fills the new state with a dictionary for LZ77, warming up the ringbuffer,
-  // e.g. for custom static dictionaries for data formats.
-  // Not to be confused with the built-in transformable dictionary of Brotli.
-  // To decode, use BrotliSetCustomDictionary of the decoder with the same
-  // dictionary.
-  void BrotliSetCustomDictionary(size_t size, const uint8_t* dict);
+/* Writes a metadata meta-block containing the given input to encoded_buffer.
+   |*encoded_size| should be set to the size of the encoded_buffer.
+   Sets |*encoded_size| to the number of bytes that was written.
+   Note that the given input data will not be part of the sliding window and
+   thus no backward references can be made to this data from subsequent
+   metablocks. |input_size| must not be greater than 2^24 and provided
+   |*encoded_size| must not be less than |input_size| + 6.
+   Returns 0 if there was an error and 1 otherwise. */
+int BrotliEncoderWriteMetadata(BrotliEncoderState* state,
+                               const size_t input_size,
+                               const uint8_t* input_buffer, const int is_last,
+                               size_t* encoded_size, uint8_t* encoded_buffer);

-  // No-op, but we keep it here for API backward-compatibility.
-  void WriteStreamHeader(void) {}
+/* Writes a zero-length meta-block with end-of-input bit set to the
+   internal output buffer and copies the output buffer to |encoded_buffer|
+   (|*encoded_size| should be set to the size of |encoded_buffer|) and sets
+   |*encoded_size| to the number of bytes written.
+   Returns 0 if there was an error and 1 otherwise. */
+int BrotliEncoderFinishStream(BrotliEncoderState* state, size_t* encoded_size,
+                              uint8_t* encoded_buffer);

- private:
-  uint8_t* GetBrotliStorage(size_t size);
+/* Copies the given input data to the internal ring buffer of the compressor.
+   No processing of the data occurs at this time and this function can be
+   called multiple times before calling WriteBrotliData() to process the
+   accumulated input. At most input_block_size() bytes of input data can be
+   copied to the ring buffer, otherwise the next WriteBrotliData() will fail.
+ */
+void BrotliEncoderCopyInputToRingBuffer(BrotliEncoderState* state,
+                                        const size_t input_size,
+                                        const uint8_t* input_buffer);

-  // Allocates and clears a hash table using memory in "*this",
-  // stores the number of buckets in "*table_size" and returns a pointer to
-  // the base of the hash table.
-  int* GetHashTable(int quality,
-                    size_t input_size, size_t* table_size);
+/* Processes the accumulated input data and sets |*out_size| to the length of
+   the new output meta-block, or to zero if no new output meta-block has been
+   created (in this case the processed input data is buffered internally).
+   If |*out_size| is positive, |*output| points to the start of the output
+   data. If |is_last| or |force_flush| is 1, an output meta-block is always
+   created. However, until |is_last| is 1 encoder may retain up to 7 bits
+   of the last byte of output. To force encoder to dump the remaining bits
+   use WriteMetadata() to append an empty meta-data block.
+   Returns 0 if the size of the input data is larger than
+   input_block_size(). */
+int BrotliEncoderWriteData(BrotliEncoderState* state, const int is_last,
+                           const int force_flush, size_t* out_size,
+                           uint8_t** output);

-  BrotliParams params_;
-  Hashers* hashers_;
-  int hash_type_;
-  uint64_t input_pos_;
-  RingBuffer* ringbuffer_;
-  size_t cmd_alloc_size_;
-  Command* commands_;
-  size_t num_commands_;
-  size_t num_literals_;
-  size_t last_insert_len_;
-  uint64_t last_flush_pos_;
-  uint64_t last_processed_pos_;
-  int dist_cache_[4];
-  int saved_dist_cache_[4];
-  uint8_t last_byte_;
-  uint8_t last_byte_bits_;
-  uint8_t prev_byte_;
-  uint8_t prev_byte2_;
-  size_t storage_size_;
-  uint8_t* storage_;
-  // Hash table for quality 0 mode.
-  int small_table_[1 << 10];  // 2KB
-  int* large_table_;          // Allocated only when needed
-  // Command and distance prefix codes (each 64 symbols, stored back-to-back)
-  // used for the next block in quality 0. The command prefix code is over a
-  // smaller alphabet with the following 64 symbols:
-  //    0 - 15: insert length code 0, copy length code 0 - 15, same distance
-  //   16 - 39: insert length code 0, copy length code 0 - 23
-  //   40 - 63: insert length code 0 - 23, copy length code 0
-  // Note that symbols 16 and 40 represent the same code in the full alphabet,
-  // but we do not use either of them in quality 0.
-  uint8_t cmd_depths_[128];
-  uint16_t cmd_bits_[128];
-  // The compressed form of the command and distance prefix codes for the next
-  // block in quality 0.
-  uint8_t cmd_code_[512];
-  size_t cmd_code_numbits_;
-  // Command and literal buffers for quality 1.
-  uint32_t* command_buf_;
-  uint8_t* literal_buf_;
-  
-  int is_last_block_emitted_;
-};
+/* Fills the new state with a dictionary for LZ77, warming up the ringbuffer,
+   e.g. for custom static dictionaries for data formats.
+   Not to be confused with the built-in transformable dictionary of Brotli.
+   To decode, use BrotliSetCustomDictionary() of the decoder with the same
+   dictionary. */
+void BrotliEncoderSetCustomDictionary(BrotliEncoderState* state, size_t size,
+                                      const uint8_t* dict);

-// Compresses the data in input_buffer into encoded_buffer, and sets
-// *encoded_size to the compressed length.
-// Returns 0 if there was an error and 1 otherwise.
-int BrotliCompressBuffer(BrotliParams params,
-                         size_t input_size,
-                         const uint8_t* input_buffer,
-                         size_t* encoded_size,
-                         uint8_t* encoded_buffer);
+/* Returns buffer size that is large enough to contain BrotliEncoderCompress
+   output for any input.
+   Returns 0 if result does not fit size_t. */
+size_t BrotliEncoderMaxCompressedSize(size_t input_size);

-// Same as above, but uses the specified input and output classes instead
-// of reading from and writing to pre-allocated memory buffers.
-int BrotliCompress(BrotliParams params, BrotliIn* in, BrotliOut* out);
+/* Compresses the data in |input_buffer| into |encoded_buffer|, and sets
+   |*encoded_size| to the compressed length.
+   BROTLI_DEFAULT_QUALITY, BROTLI_DEFAULT_WINDOW and BROTLI_DEFAULT_MODE should
+   be used as |quality|, |lgwin| and |mode| if there are no specific
+   requirements to encoder speed and compression ratio.
+   If compression fails, |*encoded_size| is set to 0.
+   If BrotliEncoderMaxCompressedSize(|input_size|) is not zero, then
+   |*encoded_size| is never set to the bigger value.
+   Returns 0 if there was an error and 1 otherwise. */
+int BrotliEncoderCompress(int quality, int lgwin, BrotliEncoderMode mode,
+                          size_t input_size, const uint8_t* input_buffer,
+                          size_t* encoded_size, uint8_t* encoded_buffer);

-// Before compressing the data, sets a custom LZ77 dictionary with
-// BrotliCompressor::BrotliSetCustomDictionary.
-int BrotliCompressWithCustomDictionary(size_t dictsize, const uint8_t* dict,
-                                       BrotliParams params,
-                                       BrotliIn* in, BrotliOut* out);
+/* Progressively compress input stream and push produced bytes to output stream.
+   Internally workflow consists of 3 tasks:
+    * (optional) copy input data to internal buffer
+    * actually compress data and (optionally) store it to internal buffer
+    * (optional) copy compressed bytes from internal buffer to output stream
+   Whenever all 3 tasks can't move forward anymore, or error occurs, this
+   method returns.
+
+   |available_in| and |next_in| represent input stream; when X bytes of input
+   are consumed, X is subtracted from |available_in| and added to |next_in|.
+   |available_out| and |next_out| represent output stream; when Y bytes are
+   pushed to output, Y is subtracted from |available_out| and added to
+   |next_out|. |total_out|, if it is not a null-pointer, is assigned to the
+   total amount of bytes pushed by the instance of encoder to output.
+
+   |op| is used to perform flush or finish the stream.
+
+   Flushing the stream means forcing encoding of all input passed to encoder and
+   completing the current output block, so it could be fully decoded by stream
+   decoder. To perform flush |op| must be set to BROTLI_OPERATION_FLUSH. Under
+   some circumstances (e.g. lack of output stream capacity) this operation would
+   require several calls to BrotliEncoderCompressStream. The method must be
+   called again until both input stream is depleted and encoder has no more
+   output (see BrotliEncoderHasMoreOutput) after the method is called.
+
+   Finishing the stream means encoding of all input passed to encoder and
+   adding specific "final" marks, so stream decoder could determine that stream
+   is complete. To perform finish |op| must be set to BROTLI_OPERATION_FINISH.
+   Under some circumstances (e.g. lack of output stream capacity) this operation
+   would require several calls to BrotliEncoderCompressStream. The method must
+   be called again until both input stream is depleted and encoder has no more
+   output (see BrotliEncoderHasMoreOutput) after the method is called.
+
+   WARNING: when flushing and finishing, |op| should not change until operation
+   is complete; input stream should not be refilled as well.
+
+   Returns 0 if there was an error and 1 otherwise.
+*/
+int BrotliEncoderCompressStream(BrotliEncoderState* s,
+                                BrotliEncoderOperation op, size_t* available_in,
+                                const uint8_t** next_in, size_t* available_out,
+                                uint8_t** next_out, size_t* total_out);
+
+/* Check if encoder is in "finished" state, i.e. no more input is acceptable and
+   no more output will be produced.
+   Works only with BrotliEncoderCompressStream workflow.
+   Returns 1 if stream is finished and 0 otherwise. */
+int BrotliEncoderIsFinished(BrotliEncoderState* s);
+
+/* Check if encoder has more output bytes in internal buffer.
+   Works only with BrotliEncoderCompressStream workflow.
+   Returns 1 if has more output (in internal buffer) and 0 otherwise. */
+int BrotliEncoderHasMoreOutput(BrotliEncoderState* s);


-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif

 #endif  /* BROTLI_ENC_ENCODE_H_ */
--- a/enc/encode_parallel.cc
+++ b/enc/encode_parallel.cc
@ -8,40 +8,34 @@

 #include "./encode_parallel.h"

-#include <algorithm>
-#include <limits>
+#include <vector>

 #include "./backward_references.h"
-#include "./bit_cost.h"
-#include "./block_splitter.h"
 #include "./brotli_bit_stream.h"
-#include "./cluster.h"
 #include "./context.h"
 #include "./entropy_encode.h"
 #include "./fast_log.h"
 #include "./hash.h"
-#include "./histogram.h"
 #include "./metablock.h"
+#include "./port.h"
 #include "./prefix.h"
-#include "./transform.h"
 #include "./utf8_util.h"
-#include "./write_bits.h"

 namespace brotli {

 namespace {

-void RecomputeDistancePrefixes(Command* cmds, size_t num_commands,
-                               uint32_t num_direct_distance_codes,
-                               uint32_t distance_postfix_bits) {
+static void RecomputeDistancePrefixes(Command* cmds, size_t num_commands,
+                                      uint32_t num_direct_distance_codes,
+                                      uint32_t distance_postfix_bits) {
  if (num_direct_distance_codes == 0 &&
      distance_postfix_bits == 0) {
    return;
  }
  for (size_t i = 0; i < num_commands; ++i) {
    Command* cmd = &cmds[i];
-    if (cmd->copy_len() && cmd->cmd_prefix_ >= 128) {
-      PrefixEncodeCopyDistance(cmd->DistanceCode(),
+    if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
+      PrefixEncodeCopyDistance(CommandDistanceCode(cmd),
                               num_direct_distance_codes,
                               distance_postfix_bits,
                               &cmd->dist_prefix_,
@ -50,102 +44,115 @@ void RecomputeDistancePrefixes(Command* cmds, size_t num_commands,
  }
 }

-bool WriteMetaBlockParallel(const BrotliParams& params,
-                            const uint32_t input_size,
-                            const uint8_t* input_buffer,
-                            const uint32_t prefix_size,
-                            const uint8_t* prefix_buffer,
-                            const bool is_first,
-                            const bool is_last,
-                            size_t* encoded_size,
-                            uint8_t* encoded_buffer) {
+/* Returns 1 on success, otherwise 0. */
+int WriteMetaBlockParallel(const BrotliParams& params,
+                           const uint32_t input_size,
+                           const uint8_t* input_buffer,
+                           const uint32_t prefix_size,
+                           const uint8_t* prefix_buffer,
+                           const int is_first,
+                           const int is_last,
+                           size_t* encoded_size,
+                           uint8_t* encoded_buffer) {
  if (input_size == 0) {
-    return false;
+    return 0;
  }

+  MemoryManager memory_manager;
+  MemoryManager* m = &memory_manager;
+  BrotliInitMemoryManager(m, 0, 0, 0);
+
+  uint8_t* storage;
+  size_t storage_ix;
+  uint8_t first_byte;
+  size_t first_byte_bits;
+  size_t output_size;
+  uint32_t num_direct_distance_codes;
+  uint32_t distance_postfix_bits;
+  ContextType literal_context_mode;
+  size_t last_insert_len = 0;
+  size_t num_commands = 0;
+  size_t num_literals = 0;
+  int dist_cache[4] = { -4, -4, -4, -4 };
+  Command* commands;
+  int hash_type = BROTLI_MIN(int, 10, params.quality);
+  Hashers* hashers;
+  int use_utf8_mode;
+  uint8_t prev_byte;
+  uint8_t prev_byte2;
+  const uint32_t mask = BROTLI_UINT32_MAX >> 1;
+
  /* Copy prefix + next input block into a continuous area. */
  uint32_t input_pos = prefix_size;
  /* CreateBackwardReferences reads up to 3 bytes past the end of input if the
     mask points past the end of input.
     FindMatchLengthWithLimit could do another 8 bytes look-forward. */
-  std::vector<uint8_t> input(prefix_size + input_size + 4 + 8);
-  memcpy(&input[0], prefix_buffer, prefix_size);
-  memcpy(&input[input_pos], input_buffer, input_size);
+  uint8_t* input = BROTLI_ALLOC(m, uint8_t, prefix_size + input_size + 4 + 8);
+  if (BROTLI_IS_OOM(m)) goto oom;
+  memcpy(input, prefix_buffer, prefix_size);
+  memcpy(input + input_pos, input_buffer, input_size);
  /* Since we don't have a ringbuffer, masking is a no-op.
     We use one less bit than the full range because some of the code uses
     mask + 1 as the size of the ringbuffer. */
-  const uint32_t mask = std::numeric_limits<uint32_t>::max() >> 1;

-  uint8_t prev_byte = input_pos > 0 ? input[(input_pos - 1) & mask] : 0;
-  uint8_t prev_byte2 = input_pos > 1 ? input[(input_pos - 2) & mask] : 0;
+  prev_byte = input_pos > 0 ? input[(input_pos - 1) & mask] : 0;
+  prev_byte2 = input_pos > 1 ? input[(input_pos - 2) & mask] : 0;

  /* Decide about UTF8 mode. */
  static const double kMinUTF8Ratio = 0.75;
-  bool utf8_mode = IsMostlyUTF8(&input[0], input_pos, mask, input_size,
-                                kMinUTF8Ratio);
+  use_utf8_mode = BrotliIsMostlyUTF8(
+      input, input_pos, mask, input_size, kMinUTF8Ratio);

  /* Initialize hashers. */
-  int hash_type = std::min(10, params.quality);
-  Hashers* hashers = new Hashers();
-  hashers->Init(hash_type);
+  hashers = BROTLI_ALLOC(m, Hashers, 1);
+  if (BROTLI_IS_OOM(m)) goto oom;
+  InitHashers(hashers);
+  HashersSetup(m, hashers, hash_type);
+  if (BROTLI_IS_OOM(m)) goto oom;

  /* Compute backward references. */
-  size_t last_insert_len = 0;
-  size_t num_commands = 0;
-  size_t num_literals = 0;
-  int dist_cache[4] = { -4, -4, -4, -4 };
-  Command* commands = static_cast<Command*>(
-      malloc(sizeof(Command) * ((input_size + 1) >> 1)));
-  if (commands == 0) {
-    delete hashers;
-    return false;
-  }
-  CreateBackwardReferences(
-      input_size, input_pos, is_last,
-      &input[0], mask,
-      params.quality,
-      params.lgwin,
-      hashers,
-      hash_type,
-      dist_cache,
-      &last_insert_len,
-      commands,
-      &num_commands,
-      &num_literals);
-  delete hashers;
+  commands = BROTLI_ALLOC(m, Command, ((input_size + 1) >> 1));
+  if (BROTLI_IS_OOM(m)) goto oom;
+  BrotliCreateBackwardReferences(m, input_size, input_pos, is_last, input,
+      mask, params.quality, params.lgwin, hashers, hash_type, dist_cache,
+      &last_insert_len, commands, &num_commands, &num_literals);
+  if (BROTLI_IS_OOM(m)) goto oom;
+  DestroyHashers(m, hashers);
+  BROTLI_FREE(m, hashers);
  if (last_insert_len > 0) {
-    commands[num_commands++] = Command(last_insert_len);
+    InitInsertCommand(&commands[num_commands++], last_insert_len);
    num_literals += last_insert_len;
  }
  assert(num_commands != 0);

  /* Build the meta-block. */
  MetaBlockSplit mb;
-  uint32_t num_direct_distance_codes =
-      params.mode == BrotliParams::MODE_FONT ? 12 : 0;
-  uint32_t distance_postfix_bits =
-      params.mode == BrotliParams::MODE_FONT ? 1 : 0;
-  ContextType literal_context_mode = utf8_mode ? CONTEXT_UTF8 : CONTEXT_SIGNED;
+  InitMetaBlockSplit(&mb);
+  num_direct_distance_codes = params.mode == BrotliParams::MODE_FONT ? 12 : 0;
+  distance_postfix_bits = params.mode == BrotliParams::MODE_FONT ? 1 : 0;
+  literal_context_mode = use_utf8_mode ? CONTEXT_UTF8 : CONTEXT_SIGNED;
  RecomputeDistancePrefixes(commands, num_commands,
                            num_direct_distance_codes,
                            distance_postfix_bits);
  if (params.quality <= 9) {
-    BuildMetaBlockGreedy(&input[0], input_pos, mask,
-                         commands, num_commands,
-                         &mb);
+    BrotliBuildMetaBlockGreedy(m, input, input_pos, mask,
+                               commands, num_commands,
+                               &mb);
+    if (BROTLI_IS_OOM(m)) goto oom;
  } else {
-    BuildMetaBlock(&input[0], input_pos, mask,
-                   prev_byte, prev_byte2,
-                   commands, num_commands,
-                   literal_context_mode,
-                   &mb);
+    BrotliBuildMetaBlock(m, input, input_pos, mask, params.quality,
+                         prev_byte, prev_byte2,
+                         commands, num_commands,
+                         literal_context_mode,
+                         &mb);
+    if (BROTLI_IS_OOM(m)) goto oom;
  }

  /* Set up the temporary output storage. */
-  const size_t max_out_size = 2 * input_size + 500;
-  std::vector<uint8_t> storage(max_out_size);
-  uint8_t first_byte = 0;
-  size_t first_byte_bits = 0;
+  storage = BROTLI_ALLOC(m, uint8_t, 2 * input_size + 500);
+  if (BROTLI_IS_OOM(m)) goto oom;
+  first_byte = 0;
+  first_byte_bits = 0;
  if (is_first) {
    if (params.lgwin == 16) {
      first_byte = 0;
@ -159,45 +166,55 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
    }
  }
  storage[0] = static_cast<uint8_t>(first_byte);
-  size_t storage_ix = first_byte_bits;
+  storage_ix = first_byte_bits;

  /* Store the meta-block to the temporary output. */
-  StoreMetaBlock(&input[0], input_pos, input_size, mask,
-                 prev_byte, prev_byte2,
-                 is_last,
-                 num_direct_distance_codes,
-                 distance_postfix_bits,
-                 literal_context_mode,
-                 commands, num_commands,
-                 mb,
-                 &storage_ix, &storage[0]);
-  free(commands);
+  BrotliStoreMetaBlock(m, input, input_pos, input_size, mask,
+                       prev_byte, prev_byte2,
+                       is_last,
+                       num_direct_distance_codes,
+                       distance_postfix_bits,
+                       literal_context_mode,
+                       commands, num_commands,
+                       &mb,
+                       &storage_ix, storage);
+  if (BROTLI_IS_OOM(m)) goto oom;
+  DestroyMetaBlockSplit(m, &mb);
+  BROTLI_FREE(m, commands);

  /* If this is not the last meta-block, store an empty metadata
     meta-block so that the meta-block will end at a byte boundary. */
  if (!is_last) {
-    StoreSyncMetaBlock(&storage_ix, &storage[0]);
+    BrotliStoreSyncMetaBlock(&storage_ix, storage);
  }

  /* If the compressed data is too large, fall back to an uncompressed
     meta-block. */
-  size_t output_size = storage_ix >> 3;
+  output_size = storage_ix >> 3;
  if (input_size + 4 < output_size) {
    storage[0] = static_cast<uint8_t>(first_byte);
    storage_ix = first_byte_bits;
-    StoreUncompressedMetaBlock(is_last, &input[0], input_pos, mask,
-                               input_size,
-                               &storage_ix, &storage[0]);
+    BrotliStoreUncompressedMetaBlock(is_last, input, input_pos, mask,
+                                     input_size,
+                                     &storage_ix, storage);
    output_size = storage_ix >> 3;
  }

  /* Copy the temporary output with size-check to the output. */
  if (output_size > *encoded_size) {
-    return false;
+    BROTLI_FREE(m, storage);
+    BROTLI_FREE(m, input);
+    return 0;
  }
-  memcpy(encoded_buffer, &storage[0], output_size);
+  memcpy(encoded_buffer, storage, output_size);
  *encoded_size = output_size;
-  return true;
+  BROTLI_FREE(m, storage);
+  BROTLI_FREE(m, input);
+  return 1;
+
+oom:
+  BrotliWipeOutMemoryManager(m);
+  return 0;
 }

 }  /* namespace */
@ -217,20 +234,20 @@ int BrotliCompressBufferParallel(BrotliParams params,
  }

  /* Sanitize params. */
-  if (params.lgwin < kMinWindowBits) {
-    params.lgwin = kMinWindowBits;
-  } else if (params.lgwin > kMaxWindowBits) {
-    params.lgwin = kMaxWindowBits;
+  if (params.lgwin < kBrotliMinWindowBits) {
+    params.lgwin = kBrotliMinWindowBits;
+  } else if (params.lgwin > kBrotliMaxWindowBits) {
+    params.lgwin = kBrotliMaxWindowBits;
  }
  if (params.lgblock == 0) {
    params.lgblock = 16;
    if (params.quality >= 9 && params.lgwin > params.lgblock) {
-      params.lgblock = std::min(21, params.lgwin);
+      params.lgblock = BROTLI_MIN(int, 21, params.lgwin);
    }
-  } else if (params.lgblock < kMinInputBlockBits) {
-    params.lgblock = kMinInputBlockBits;
-  } else if (params.lgblock > kMaxInputBlockBits) {
-    params.lgblock = kMaxInputBlockBits;
+  } else if (params.lgblock < kBrotliMinInputBlockBits) {
+    params.lgblock = kBrotliMinInputBlockBits;
+  } else if (params.lgblock > kBrotliMaxInputBlockBits) {
+    params.lgblock = kBrotliMaxInputBlockBits;
  }
  size_t max_input_block_size = 1 << params.lgblock;
  size_t max_prefix_size = 1u << params.lgwin;
@ -239,10 +256,10 @@ int BrotliCompressBufferParallel(BrotliParams params,

  /* Compress block-by-block independently. */
  for (size_t pos = 0; pos < input_size; ) {
-    uint32_t input_block_size =
-        static_cast<uint32_t>(std::min(max_input_block_size, input_size - pos));
+    uint32_t input_block_size = static_cast<uint32_t>(
+        BROTLI_MIN(size_t, max_input_block_size, input_size - pos));
    uint32_t prefix_size =
-        static_cast<uint32_t>(std::min(max_prefix_size, pos));
+        static_cast<uint32_t>(BROTLI_MIN(size_t, max_prefix_size, pos));
    size_t out_size = input_block_size + (input_block_size >> 3) + 1024;
    std::vector<uint8_t> out(out_size);
    if (!WriteMetaBlockParallel(params,
@ -250,11 +267,11 @@ int BrotliCompressBufferParallel(BrotliParams params,
                                &input_buffer[pos],
                                prefix_size,
                                &input_buffer[pos - prefix_size],
-                                pos == 0,
-                                pos + input_block_size == input_size,
+                                (pos == 0) ? 1 : 0,
+                                (pos + input_block_size == input_size) ? 1 : 0,
                                &out_size,
                                &out[0])) {
-      return false;
+      return 0;
    }
    out.resize(out_size);
    compressed_pieces.push_back(out);
@ -266,14 +283,14 @@ int BrotliCompressBufferParallel(BrotliParams params,
  for (size_t i = 0; i < compressed_pieces.size(); ++i) {
    const std::vector<uint8_t>& out = compressed_pieces[i];
    if (out_pos + out.size() > *encoded_size) {
-      return false;
+      return 0;
    }
    memcpy(&encoded_buffer[out_pos], &out[0], out.size());
    out_pos += out.size();
  }
  *encoded_size = out_pos;

-  return true;
+  return 1;
 }

 }  /* namespace brotli */
--- a/enc/encode_parallel.h
+++ b/enc/encode_parallel.h
@ -12,7 +12,7 @@
 #define BROTLI_ENC_ENCODE_PARALLEL_H_

 #include "../common/types.h"
-#include "./encode.h"
+#include "./compressor.h"

 namespace brotli {

--- a/enc/entropy_encode.c
+++ b/enc/entropy_encode.c
@ -8,36 +8,46 @@

 #include "./entropy_encode.h"

-#include <algorithm>
-#include <cstdlib>
-#include <limits>
+#include <string.h>  /* memset */

+#include "../common/constants.h"
 #include "../common/types.h"
-#include "./histogram.h"
 #include "./port.h"

-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

-void SetDepth(const HuffmanTree &p,
-              HuffmanTree *pool,
-              uint8_t *depth,
-              uint8_t level) {
-  if (p.index_left_ >= 0) {
-    ++level;
-    SetDepth(pool[p.index_left_], pool, depth, level);
-    SetDepth(pool[p.index_right_or_value_], pool, depth, level);
-  } else {
-    depth[p.index_right_or_value_] = level;
+int BrotliSetDepth(int p0, HuffmanTree* pool, uint8_t* depth, int max_depth) {
+  int stack[16];
+  int level = 0;
+  int p = p0;
+  assert(max_depth <= 15);
+  stack[0] = -1;
+  while (1) {
+    if (pool[p].index_left_ >= 0) {
+      level++;
+      if (level > max_depth) return 0;
+      stack[level] = pool[p].index_right_or_value_;
+      p = pool[p].index_left_;
+      continue;
+    } else {
+      depth[pool[p].index_right_or_value_] = (uint8_t)level;
+    }
+    while (level >= 0 && stack[level] == -1) level--;
+    if (level < 0) return 1;
+    p = stack[level];
+    stack[level] = -1;
  }
 }

 /* Sort the root nodes, least popular first. */
-static inline bool SortHuffmanTree(const HuffmanTree& v0,
-                                   const HuffmanTree& v1) {
-  if (v0.total_count_ != v1.total_count_) {
-    return v0.total_count_ < v1.total_count_;
+static inline int SortHuffmanTree(const HuffmanTree* v0,
+                                  const HuffmanTree* v1) {
+  if (v0->total_count_ != v1->total_count_) {
+    return (v0->total_count_ < v1->total_count_) ? 1 : 0;
  }
-  return v0.index_right_or_value_ > v1.index_right_or_value_;
+  return (v0->index_right_or_value_ > v1->index_right_or_value_) ? 1 : 0;
 }

 /* This function will create a Huffman tree.
@ -55,31 +65,37 @@ static inline bool SortHuffmanTree(const HuffmanTree& v0,
   we are not planning to use this with extremely long blocks.

   See http://en.wikipedia.org/wiki/Huffman_coding */
-void CreateHuffmanTree(const uint32_t *data,
-                       const size_t length,
-                       const int tree_limit,
-                       HuffmanTree* tree,
-                       uint8_t *depth) {
+void BrotliCreateHuffmanTree(const uint32_t *data,
+                             const size_t length,
+                             const int tree_limit,
+                             HuffmanTree* tree,
+                             uint8_t *depth) {
+  uint32_t count_limit;
+  HuffmanTree sentinel;
+  InitHuffmanTree(&sentinel, BROTLI_UINT32_MAX, -1, -1);
  /* For block sizes below 64 kB, we never need to do a second iteration
     of this loop. Probably all of our block sizes will be smaller than
     that, so this loop is mostly of academic interest. If we actually
     would need this, we would be better off with the Katajainen algorithm. */
-  for (uint32_t count_limit = 1; ; count_limit *= 2) {
+  for (count_limit = 1; ; count_limit *= 2) {
    size_t n = 0;
-    for (size_t i = length; i != 0;) {
+    size_t i;
+    size_t j;
+    size_t k;
+    for (i = length; i != 0;) {
      --i;
      if (data[i]) {
-        const uint32_t count = std::max(data[i], count_limit);
-        tree[n++] = HuffmanTree(count, -1, static_cast<int16_t>(i));
+        const uint32_t count = BROTLI_MAX(uint32_t, data[i], count_limit);
+        InitHuffmanTree(&tree[n++], count, -1, (int16_t)i);
      }
    }

    if (n == 1) {
-      depth[tree[0].index_right_or_value_] = 1;      // Only one element.
+      depth[tree[0].index_right_or_value_] = 1;  /* Only one element. */
      break;
    }

-    std::sort(tree, tree + n, SortHuffmanTree);
+    SortHuffmanTreeItems(tree, n, SortHuffmanTree);

    /* The nodes are:
       [0, n): the sorted leaf nodes that we start with.
@ -88,13 +104,12 @@ void CreateHuffmanTree(const uint32_t *data,
                    (n+1). These are naturally in ascending order.
       [2n]: we add a sentinel at the end as well.
       There will be (2n+1) elements at the end. */
-    const HuffmanTree sentinel(std::numeric_limits<uint32_t>::max(), -1, -1);
    tree[n] = sentinel;
    tree[n + 1] = sentinel;

-    size_t i = 0;      /* Points to the next leaf node. */
-    size_t j = n + 1;  /* Points to the next non-leaf node. */
-    for (size_t k = n - 1; k != 0; --k) {
+    i = 0;      /* Points to the next leaf node. */
+    j = n + 1;  /* Points to the next non-leaf node. */
+    for (k = n - 1; k != 0; --k) {
      size_t left, right;
      if (tree[i].total_count_ <= tree[j].total_count_) {
        left = i;
@ -111,21 +126,21 @@ void CreateHuffmanTree(const uint32_t *data,
        ++j;
      }

+      {
        /* The sentinel node becomes the parent node. */
-      size_t j_end = 2 * n - k;
-      tree[j_end].total_count_ =
-          tree[left].total_count_ + tree[right].total_count_;
-      tree[j_end].index_left_ = static_cast<int16_t>(left);
-      tree[j_end].index_right_or_value_ = static_cast<int16_t>(right);
+        size_t j_end = 2 * n - k;
+        tree[j_end].total_count_ =
+            tree[left].total_count_ + tree[right].total_count_;
+        tree[j_end].index_left_ = (int16_t)left;
+        tree[j_end].index_right_or_value_ = (int16_t)right;

        /* Add back the last sentinel node. */
-      tree[j_end + 1] = sentinel;
+        tree[j_end + 1] = sentinel;
+      }
    }
-    SetDepth(tree[2 * n - 1], &tree[0], depth, 0);
-
+    if (BrotliSetDepth((int)(2 * n - 1), &tree[0], depth, tree_limit)) {
      /* We need to pack the Huffman tree in tree_limit bits. If this was not
         successful, add fake entities to the lowest values and retry. */
-    if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) {
      break;
    }
  }
@ -142,7 +157,7 @@ static void Reverse(uint8_t* v, size_t start, size_t end) {
  }
 }

-static void WriteHuffmanTreeRepetitions(
+static void BrotliWriteHuffmanTreeRepetitions(
    const uint8_t previous_value,
    const uint8_t value,
    size_t repetitions,
@ -163,16 +178,17 @@ static void WriteHuffmanTreeRepetitions(
    --repetitions;
  }
  if (repetitions < 3) {
-    for (size_t i = 0; i < repetitions; ++i) {
+    size_t i;
+    for (i = 0; i < repetitions; ++i) {
      tree[*tree_size] = value;
      extra_bits_data[*tree_size] = 0;
      ++(*tree_size);
    }
  } else {
-    repetitions -= 3;
    size_t start = *tree_size;
-    while (true) {
-      tree[*tree_size] = 16;
+    repetitions -= 3;
+    while (1) {
+      tree[*tree_size] = BROTLI_REPEAT_PREVIOUS_CODE_LENGTH;
      extra_bits_data[*tree_size] = repetitions & 0x3;
      ++(*tree_size);
      repetitions >>= 2;
@ -186,7 +202,7 @@ static void WriteHuffmanTreeRepetitions(
  }
 }

-static void WriteHuffmanTreeRepetitionsZeros(
+static void BrotliWriteHuffmanTreeRepetitionsZeros(
    size_t repetitions,
    size_t* tree_size,
    uint8_t* tree,
@ -198,16 +214,17 @@ static void WriteHuffmanTreeRepetitionsZeros(
    --repetitions;
  }
  if (repetitions < 3) {
-    for (size_t i = 0; i < repetitions; ++i) {
+    size_t i;
+    for (i = 0; i < repetitions; ++i) {
      tree[*tree_size] = 0;
      extra_bits_data[*tree_size] = 0;
      ++(*tree_size);
    }
  } else {
-    repetitions -= 3;
    size_t start = *tree_size;
-    while (true) {
-      tree[*tree_size] = 17;
+    repetitions -= 3;
+    while (1) {
+      tree[*tree_size] = BROTLI_REPEAT_ZERO_CODE_LENGTH;
      extra_bits_data[*tree_size] = repetitions & 0x7;
      ++(*tree_size);
      repetitions >>= 3;
@ -221,8 +238,8 @@ static void WriteHuffmanTreeRepetitionsZeros(
  }
 }

-void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
-                                 uint8_t* good_for_rle) {
+void BrotliOptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
+                                       uint8_t* good_for_rle) {
  size_t nonzero_count = 0;
  size_t stride;
  size_t limit;
@ -260,8 +277,8 @@ void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
      /* Small histogram will model it well. */
      return;
    }
-    size_t zeros = length - nonzeros;
    if (smallest_nonzero < 4) {
+      size_t zeros = length - nonzeros;
      if (zeros < 6) {
        for (i = 1; i < length - 1; ++i) {
          if (counts[i - 1] != 0 && counts[i] == 0 && counts[i + 1] != 0) {
@ -324,7 +341,7 @@ void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
        for (k = 0; k < stride; ++k) {
          /* We don't want to change value at counts[i],
             that is already belonging to the next stride. Thus - 1. */
-          counts[i - k - 1] = static_cast<uint32_t>(count);
+          counts[i - k - 1] = (uint32_t)count;
        }
      }
      stride = 0;
@ -353,16 +370,18 @@ void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
 }

 static void DecideOverRleUse(const uint8_t* depth, const size_t length,
-                             bool *use_rle_for_non_zero,
-                             bool *use_rle_for_zero) {
+                             int *use_rle_for_non_zero,
+                             int *use_rle_for_zero) {
  size_t total_reps_zero = 0;
  size_t total_reps_non_zero = 0;
  size_t count_reps_zero = 1;
  size_t count_reps_non_zero = 1;
-  for (size_t i = 0; i < length;) {
+  size_t i;
+  for (i = 0; i < length;) {
    const uint8_t value = depth[i];
    size_t reps = 1;
-    for (size_t k = i + 1; k < length && depth[k] == value; ++k) {
+    size_t k;
+    for (k = i + 1; k < length && depth[k] == value; ++k) {
      ++reps;
    }
    if (reps >= 3 && value == 0) {
@ -375,20 +394,24 @@ static void DecideOverRleUse(const uint8_t* depth, const size_t length,
    }
    i += reps;
  }
-  *use_rle_for_non_zero = total_reps_non_zero > count_reps_non_zero * 2;
-  *use_rle_for_zero = total_reps_zero > count_reps_zero * 2;
+  *use_rle_for_non_zero =
+      (total_reps_non_zero > count_reps_non_zero * 2) ? 1 : 0;
+  *use_rle_for_zero = (total_reps_zero > count_reps_zero * 2) ? 1 : 0;
 }

-void WriteHuffmanTree(const uint8_t* depth,
-                      size_t length,
-                      size_t* tree_size,
-                      uint8_t* tree,
-                      uint8_t* extra_bits_data) {
-  uint8_t previous_value = 8;
+void BrotliWriteHuffmanTree(const uint8_t* depth,
+                            size_t length,
+                            size_t* tree_size,
+                            uint8_t* tree,
+                            uint8_t* extra_bits_data) {
+  uint8_t previous_value = BROTLI_INITIAL_REPEATED_CODE_LENGTH;
+  size_t i;
+  int use_rle_for_non_zero = 0;
+  int use_rle_for_zero = 0;

  /* Throw away trailing zeros. */
  size_t new_length = length;
-  for (size_t i = 0; i < length; ++i) {
+  for (i = 0; i < length; ++i) {
    if (depth[length - i - 1] == 0) {
      --new_length;
    } else {
@ -397,8 +420,6 @@ void WriteHuffmanTree(const uint8_t* depth,
  }

  /* First gather statistics on if it is a good idea to do rle. */
-  bool use_rle_for_non_zero = false;
-  bool use_rle_for_zero = false;
  if (length > 50) {
    /* Find rle coding for longer codes.
       Shorter codes seem not to benefit from rle. */
@ -407,73 +428,73 @@ void WriteHuffmanTree(const uint8_t* depth,
  }

  /* Actual rle coding. */
-  for (size_t i = 0; i < new_length;) {
+  for (i = 0; i < new_length;) {
    const uint8_t value = depth[i];
    size_t reps = 1;
    if ((value != 0 && use_rle_for_non_zero) ||
        (value == 0 && use_rle_for_zero)) {
-      for (size_t k = i + 1; k < new_length && depth[k] == value; ++k) {
+      size_t k;
+      for (k = i + 1; k < new_length && depth[k] == value; ++k) {
        ++reps;
      }
    }
    if (value == 0) {
-      WriteHuffmanTreeRepetitionsZeros(reps, tree_size, tree, extra_bits_data);
+      BrotliWriteHuffmanTreeRepetitionsZeros(
+          reps, tree_size, tree, extra_bits_data);
    } else {
-      WriteHuffmanTreeRepetitions(previous_value,
-                                  value, reps, tree_size,
-                                  tree, extra_bits_data);
+      BrotliWriteHuffmanTreeRepetitions(previous_value,
+                                        value, reps, tree_size,
+                                        tree, extra_bits_data);
      previous_value = value;
    }
    i += reps;
  }
 }

-namespace {
-
-uint16_t ReverseBits(int num_bits, uint16_t bits) {
+static uint16_t BrotliReverseBits(size_t num_bits, uint16_t bits) {
  static const size_t kLut[16] = {  /* Pre-reversed 4-bit values. */
    0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
    0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf
  };
  size_t retval = kLut[bits & 0xf];
-  for (int i = 4; i < num_bits; i += 4) {
+  size_t i;
+  for (i = 4; i < num_bits; i += 4) {
    retval <<= 4;
-    bits = static_cast<uint16_t>(bits >> 4);
+    bits = (uint16_t)(bits >> 4);
    retval |= kLut[bits & 0xf];
  }
  retval >>= (-num_bits & 0x3);
-  return static_cast<uint16_t>(retval);
+  return (uint16_t)retval;
 }

-}  // namespace
+/* 0..15 are values for bits */
+#define MAX_HUFFMAN_BITS 16

-void ConvertBitDepthsToSymbols(const uint8_t *depth,
-                               size_t len,
-                               uint16_t *bits) {
+void BrotliConvertBitDepthsToSymbols(const uint8_t *depth,
+                                     size_t len,
+                                     uint16_t *bits) {
  /* In Brotli, all bit depths are [1..15]
     0 bit depth means that the symbol does not exist. */
-  const int kMaxBits = 16;  // 0..15 are values for bits
-  uint16_t bl_count[kMaxBits] = { 0 };
-  {
-    for (size_t i = 0; i < len; ++i) {
-      ++bl_count[depth[i]];
-    }
-    bl_count[0] = 0;
+  uint16_t bl_count[MAX_HUFFMAN_BITS] = { 0 };
+  uint16_t next_code[MAX_HUFFMAN_BITS];
+  size_t i;
+  int code = 0;
+  for (i = 0; i < len; ++i) {
+    ++bl_count[depth[i]];
  }
-  uint16_t next_code[kMaxBits];
+  bl_count[0] = 0;
  next_code[0] = 0;
-  {
-    int code = 0;
-    for (int bits = 1; bits < kMaxBits; ++bits) {
-      code = (code + bl_count[bits - 1]) << 1;
-      next_code[bits] = static_cast<uint16_t>(code);
-    }
+  for (i = 1; i < MAX_HUFFMAN_BITS; ++i) {
+    code = (code + bl_count[i - 1]) << 1;
+    next_code[i] = (uint16_t)code;
  }
-  for (size_t i = 0; i < len; ++i) {
+  for (i = 0; i < len; ++i) {
    if (depth[i]) {
-      bits[i] = ReverseBits(depth[i], next_code[depth[i]]++);
+      bits[i] = BrotliReverseBits(depth[i], next_code[depth[i]]++);
    }
  }
 }

-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
--- a/enc/entropy_encode.h
+++ b/enc/entropy_encode.h
@ -9,29 +9,30 @@
 #ifndef BROTLI_ENC_ENTROPY_ENCODE_H_
 #define BROTLI_ENC_ENTROPY_ENCODE_H_

-#include <string.h>
-
 #include "../common/types.h"
-#include "./histogram.h"
-#include "./prefix.h"
+#include "./port.h"

-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

 /* A node of a Huffman tree. */
-struct HuffmanTree {
-  HuffmanTree() {}
-  HuffmanTree(uint32_t count, int16_t left, int16_t right)
-      : total_count_(count),
-        index_left_(left),
-        index_right_or_value_(right) {
-  }
+typedef struct HuffmanTree {
  uint32_t total_count_;
  int16_t index_left_;
  int16_t index_right_or_value_;
-};
+} HuffmanTree;

-void SetDepth(const HuffmanTree &p, HuffmanTree *pool,
-              uint8_t *depth, uint8_t level);
+static BROTLI_INLINE void InitHuffmanTree(HuffmanTree* self, uint32_t count,
+    int16_t left, int16_t right) {
+  self->total_count_ = count;
+  self->index_left_ = left;
+  self->index_right_or_value_ = right;
+}
+
+/* Returns 1 is assignment of depths succeded, otherwise 0. */
+BROTLI_INTERNAL int BrotliSetDepth(
+    int p, HuffmanTree* pool, uint8_t* depth, int max_depth);

 /* This function will create a Huffman tree.

@ -45,11 +46,11 @@ void SetDepth(const HuffmanTree &p, HuffmanTree *pool,
   be at least 2 * length + 1 long.

   See http://en.wikipedia.org/wiki/Huffman_coding */
-void CreateHuffmanTree(const uint32_t *data,
-                       const size_t length,
-                       const int tree_limit,
-                       HuffmanTree* tree,
-                       uint8_t *depth);
+BROTLI_INTERNAL void BrotliCreateHuffmanTree(const uint32_t *data,
+                                             const size_t length,
+                                             const int tree_limit,
+                                             HuffmanTree* tree,
+                                             uint8_t *depth);

 /* Change the population counts in a way that the consequent
   Huffman tree compression, especially its rle-part will be more
@ -58,48 +59,63 @@ void CreateHuffmanTree(const uint32_t *data,
   length contains the size of the histogram.
   counts contains the population counts.
   good_for_rle is a buffer of at least length size */
-void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
-                                 uint8_t* good_for_rle);
+BROTLI_INTERNAL void BrotliOptimizeHuffmanCountsForRle(
+    size_t length, uint32_t* counts, uint8_t* good_for_rle);

 /* Write a Huffman tree from bit depths into the bitstream representation
   of a Huffman tree. The generated Huffman tree is to be compressed once
   more using a Huffman tree */
-void WriteHuffmanTree(const uint8_t* depth,
-                      size_t num,
-                      size_t* tree_size,
-                      uint8_t* tree,
-                      uint8_t* extra_bits_data);
+BROTLI_INTERNAL void BrotliWriteHuffmanTree(const uint8_t* depth,
+                                            size_t num,
+                                            size_t* tree_size,
+                                            uint8_t* tree,
+                                            uint8_t* extra_bits_data);

 /* Get the actual bit values for a tree of bit depths. */
-void ConvertBitDepthsToSymbols(const uint8_t *depth,
-                               size_t len,
-                               uint16_t *bits);
+BROTLI_INTERNAL void BrotliConvertBitDepthsToSymbols(const uint8_t *depth,
+                                                     size_t len,
+                                                     uint16_t *bits);

-template<int kSize>
-struct EntropyCode {
-  // How many bits for symbol.
-  uint8_t depth_[kSize];
-  // Actual bits used to represent the symbol.
-  uint16_t bits_[kSize];
-  // How many non-zero depth.
-  int count_;
-  // First four symbols with non-zero depth.
-  int symbols_[4];
-};
+/* Input size optimized Shell sort. */
+typedef int (*HuffmanTreeComparator)(const HuffmanTree*, const HuffmanTree*);
+static BROTLI_INLINE void SortHuffmanTreeItems(HuffmanTree* items,
+    const size_t n, HuffmanTreeComparator comparator) {
+  static const size_t gaps[] = {132, 57, 23, 10, 4, 1};
+  if (n < 13) {
+    /* Insertion sort. */
+    size_t i;
+    for (i = 1; i < n; ++i) {
+      HuffmanTree tmp = items[i];
+      size_t k = i;
+      size_t j = i - 1;
+      while (comparator(&tmp, &items[j])) {
+        items[k] = items[j];
+        k = j;
+        if (!j--) break;
+      }
+      items[k] = tmp;
+    }
+    return;
+  } else {
+    /* Shell sort. */
+    int g = n < 57 ? 2 : 0;
+    for (; g < 6; ++g) {
+      size_t gap = gaps[g];
+      size_t i;
+      for (i = gap; i < n; ++i) {
+        size_t j = i;
+        HuffmanTree tmp = items[i];
+        for (; j >= gap && comparator(&tmp, &items[j - gap]); j -= gap) {
+          items[j] = items[j - gap];
+        }
+        items[j] = tmp;
+      }
+    }
+  }
+}

-static const int kCodeLengthCodes = 18;
-
-// Literal entropy code.
-typedef EntropyCode<256> EntropyCodeLiteral;
-// Prefix entropy codes.
-typedef EntropyCode<kNumCommandPrefixes> EntropyCodeCommand;
-typedef EntropyCode<kNumDistancePrefixes> EntropyCodeDistance;
-typedef EntropyCode<kNumBlockLenPrefixes> EntropyCodeBlockLength;
-// Context map entropy code, 256 Huffman tree indexes + 16 run length codes.
-typedef EntropyCode<272> EntropyCodeContextMap;
-// Block type entropy code, 256 block types + 2 special symbols.
-typedef EntropyCode<258> EntropyCodeBlockType;
-
-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif

 #endif  /* BROTLI_ENC_ENTROPY_ENCODE_H_ */
--- a/enc/entropy_encode_static.h
+++ b/enc/entropy_encode_static.h
@ -9,17 +9,20 @@
 #ifndef BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
 #define BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_

+#include "../common/constants.h"
+#include "../common/port.h"
 #include "../common/types.h"
-#include "./prefix.h"
 #include "./write_bits.h"

-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

 static const uint8_t kCodeLengthDepth[18] = {
  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 0, 4, 4,
 };

-static const uint8_t kStaticCommandCodeDepth[kNumCommandPrefixes] = {
+static const uint8_t kStaticCommandCodeDepth[BROTLI_NUM_COMMAND_SYMBOLS] = {
   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
@ -77,11 +80,13 @@ static const uint32_t kCodeLengthBits[18] = {
  0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 15, 31, 0, 11, 7,
 };

-inline void StoreStaticCodeLengthCode(size_t* storage_ix, uint8_t* storage) {
-  WriteBits(40, MAKE_UINT64_T(0xff, 0x55555554), storage_ix, storage);
+static BROTLI_INLINE void StoreStaticCodeLengthCode(
+    size_t* storage_ix, uint8_t* storage) {
+  BrotliWriteBits(
+      40, MAKE_UINT64_T(0x0000ffU, 0x55555554U), storage_ix, storage);
 }

-static const uint64_t kZeroRepsBits[704] = {
+static const uint64_t kZeroRepsBits[BROTLI_NUM_COMMAND_SYMBOLS] = {
  0x00000000, 0x00000000, 0x00000000, 0x00000007, 0x00000017, 0x00000027,
  0x00000037, 0x00000047, 0x00000057, 0x00000067, 0x00000077, 0x00000770,
  0x00000b87, 0x00001387, 0x00001b87, 0x00002387, 0x00002b87, 0x00003387,
@ -202,7 +207,7 @@ static const uint64_t kZeroRepsBits[704] = {
  0x06f9cb87, 0x08f9cb87,
 };

-static const uint32_t kZeroRepsDepth[704] = {
+static const uint32_t kZeroRepsDepth[BROTLI_NUM_COMMAND_SYMBOLS] = {
   0,  4,  8,  7,  7,  7,  7,  7,  7,  7,  7, 11, 14, 14, 14, 14,
  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
@ -249,7 +254,7 @@ static const uint32_t kZeroRepsDepth[704] = {
  28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
 };

-static const uint64_t kNonZeroRepsBits[704] = {
+static const uint64_t kNonZeroRepsBits[BROTLI_NUM_COMMAND_SYMBOLS] = {
  0x0000000b, 0x0000001b, 0x0000002b, 0x0000003b, 0x000002cb, 0x000006cb,
  0x00000acb, 0x00000ecb, 0x000002db, 0x000006db, 0x00000adb, 0x00000edb,
  0x000002eb, 0x000006eb, 0x00000aeb, 0x00000eeb, 0x000002fb, 0x000006fb,
@ -370,7 +375,7 @@ static const uint64_t kNonZeroRepsBits[704] = {
  0x2baeb6db, 0x3baeb6db,
 };

-static const uint32_t kNonZeroRepsDepth[704] = {
+static const uint32_t kNonZeroRepsDepth[BROTLI_NUM_COMMAND_SYMBOLS] = {
   6,  6,  6,  6, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
  12, 12, 12, 12, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
  18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
@ -417,47 +422,7 @@ static const uint32_t kNonZeroRepsDepth[704] = {
  30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
 };

-static const uint16_t kStaticLiteralCodeBits[256] = {
-    0,  128,   64,  192,   32,  160,   96,  224,
-   16,  144,   80,  208,   48,  176,  112,  240,
-    8,  136,   72,  200,   40,  168,  104,  232,
-   24,  152,   88,  216,   56,  184,  120,  248,
-    4,  132,   68,  196,   36,  164,  100,  228,
-   20,  148,   84,  212,   52,  180,  116,  244,
-   12,  140,   76,  204,   44,  172,  108,  236,
-   28,  156,   92,  220,   60,  188,  124,  252,
-    2,  130,   66,  194,   34,  162,   98,  226,
-   18,  146,   82,  210,   50,  178,  114,  242,
-   10,  138,   74,  202,   42,  170,  106,  234,
-   26,  154,   90,  218,   58,  186,  122,  250,
-    6,  134,   70,  198,   38,  166,  102,  230,
-   22,  150,   86,  214,   54,  182,  118,  246,
-   14,  142,   78,  206,   46,  174,  110,  238,
-   30,  158,   94,  222,   62,  190,  126,  254,
-    1,  129,   65,  193,   33,  161,   97,  225,
-   17,  145,   81,  209,   49,  177,  113,  241,
-    9,  137,   73,  201,   41,  169,  105,  233,
-   25,  153,   89,  217,   57,  185,  121,  249,
-    5,  133,   69,  197,   37,  165,  101,  229,
-   21,  149,   85,  213,   53,  181,  117,  245,
-   13,  141,   77,  205,   45,  173,  109,  237,
-   29,  157,   93,  221,   61,  189,  125,  253,
-    3,  131,   67,  195,   35,  163,   99,  227,
-   19,  147,   83,  211,   51,  179,  115,  243,
-   11,  139,   75,  203,   43,  171,  107,  235,
-   27,  155,   91,  219,   59,  187,  123,  251,
-    7,  135,   71,  199,   39,  167,  103,  231,
-   23,  151,   87,  215,   55,  183,  119,  247,
-   15,  143,   79,  207,   47,  175,  111,  239,
-   31,  159,   95,  223,   63,  191,  127,  255,
-};
-
-inline void StoreStaticLiteralHuffmanTree(size_t* storage_ix,
-                                          uint8_t* storage) {
-  WriteBits(32, 0x00010003U, storage_ix, storage);
-}
-
-static const uint16_t kStaticCommandCodeBits[kNumCommandPrefixes] = {
+static const uint16_t kStaticCommandCodeBits[BROTLI_NUM_COMMAND_SYMBOLS] = {
    0,  256,  128,  384,   64,  320,  192,  448,
   32,  288,  160,  416,   96,  352,  224,  480,
   16,  272,  144,  400,   80,  336,  208,  464,
@ -548,10 +513,11 @@ static const uint16_t kStaticCommandCodeBits[kNumCommandPrefixes] = {
  255, 1279,  767, 1791,  511, 1535, 1023, 2047,
 };

-inline void StoreStaticCommandHuffmanTree(size_t* storage_ix,
-                                          uint8_t* storage) {
-  WriteBits(28, 0x0000000006307003U, storage_ix, storage);
-  WriteBits(31, 0x0000000009262441U, storage_ix, storage);
+static BROTLI_INLINE void StoreStaticCommandHuffmanTree(
+    size_t* storage_ix, uint8_t* storage) {
+  BrotliWriteBits(
+      56, MAKE_UINT64_T(0x926244U, 0x16307003U), storage_ix, storage);
+  BrotliWriteBits(3, 0x00000000U, storage_ix, storage);
 }

 static const uint16_t kStaticDistanceCodeBits[64] = {
@ -561,12 +527,13 @@ static const uint16_t kStaticDistanceCodeBits[64] = {
   3, 35, 19, 51, 11, 43, 27, 59,  7, 39, 23, 55, 15, 47, 31, 63,
 };

-inline void StoreStaticDistanceHuffmanTree(size_t* storage_ix,
-                                           uint8_t* storage) {
-  WriteBits(18, 0x000000000001dc03U, storage_ix, storage);
-  WriteBits(10, 0x00000000000000daU, storage_ix, storage);
+static BROTLI_INLINE void StoreStaticDistanceHuffmanTree(
+    size_t* storage_ix, uint8_t* storage) {
+  BrotliWriteBits(28, 0x0369dc03U, storage_ix, storage);
 }

-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif

 #endif  /* BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_ */
--- a/enc/fast_log.h
+++ b/enc/fast_log.h
@ -9,16 +9,18 @@
 #ifndef BROTLI_ENC_FAST_LOG_H_
 #define BROTLI_ENC_FAST_LOG_H_

-#include <assert.h>
 #include <math.h>

 #include "../common/types.h"
+#include "../common/port.h"

-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

-static inline uint32_t Log2FloorNonZero(size_t n) {
+static BROTLI_INLINE uint32_t Log2FloorNonZero(size_t n) {
 #ifdef __GNUC__
-  return 31u ^ static_cast<uint32_t>(__builtin_clz(static_cast<uint32_t>(n)));
+  return 31u ^ (uint32_t)__builtin_clz((uint32_t)n);
 #else
  uint32_t result = 0;
  while (n >>= 1) result++;
@ -120,7 +122,7 @@ static const float kLog2Table[] = {
 };

 /* Faster logarithm for small integers, with the property of log2(0) == 0. */
-static inline double FastLog2(size_t v) {
+static BROTLI_INLINE double FastLog2(size_t v) {
  if (v < sizeof(kLog2Table) / sizeof(kLog2Table[0])) {
    return kLog2Table[v];
  }
@ -129,12 +131,14 @@ static inline double FastLog2(size_t v) {
  /* Visual Studio 2010 and Android API levels < 18 do not have the log2()
   * function defined, so we use log() and a multiplication instead. */
  static const double kLog2Inv = 1.4426950408889634f;
-  return log(static_cast<double>(v)) * kLog2Inv;
+  return log((double)v) * kLog2Inv;
 #else
-  return log2(static_cast<double>(v));
+  return log2((double)v);
 #endif
 }

-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif

 #endif  /* BROTLI_ENC_FAST_LOG_H_ */
--- a/enc/find_match_length.h
+++ b/enc/find_match_length.h
@ -12,14 +12,16 @@
 #include "../common/types.h"
 #include "./port.h"

-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

 /* Separate implementation for little-endian 64-bit targets, for speed. */
 #if defined(__GNUC__) && defined(_LP64) && defined(IS_LITTLE_ENDIAN)

-static inline size_t FindMatchLengthWithLimit(const uint8_t* s1,
-                                              const uint8_t* s2,
-                                              size_t limit) {
+static BROTLI_INLINE size_t FindMatchLengthWithLimit(const uint8_t* s1,
+                                                     const uint8_t* s2,
+                                                     size_t limit) {
  size_t matched = 0;
  size_t limit2 = (limit >> 3) + 1;  /* + 1 is for pre-decrement in while */
  while (PREDICT_TRUE(--limit2)) {
@ -30,7 +32,7 @@ static inline size_t FindMatchLengthWithLimit(const uint8_t* s1,
    } else {
      uint64_t x =
          BROTLI_UNALIGNED_LOAD64(s2) ^ BROTLI_UNALIGNED_LOAD64(s1 + matched);
-      size_t matching_bits = static_cast<size_t>(__builtin_ctzll(x));
+      size_t matching_bits = (size_t)__builtin_ctzll(x);
      matched += matching_bits >> 3;
      return matched;
    }
@ -47,9 +49,9 @@ static inline size_t FindMatchLengthWithLimit(const uint8_t* s1,
  return matched;
 }
 #else
-static inline size_t FindMatchLengthWithLimit(const uint8_t* s1,
-                                              const uint8_t* s2,
-                                              size_t limit) {
+static BROTLI_INLINE size_t FindMatchLengthWithLimit(const uint8_t* s1,
+                                                     const uint8_t* s2,
+                                                     size_t limit) {
  size_t matched = 0;
  const uint8_t* s2_limit = s2 + limit;
  const uint8_t* s2_ptr = s2;
@ -71,6 +73,8 @@ static inline size_t FindMatchLengthWithLimit(const uint8_t* s1,
 }
 #endif

-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif

 #endif  /* BROTLI_ENC_FIND_MATCH_LENGTH_H_ */
--- a/enc/hash.h
+++ b/enc/hash.h
--- a/enc/hash_longest_match_inc.h
+++ b/enc/hash_longest_match_inc.h
@ -0,0 +1,285 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN, BUCKET_BITS, BLOCK_BITS,
+                        NUM_LAST_DISTANCES_TO_CHECK */
+
+/* A (forgetful) hash table to the data seen by the compressor, to
+   help create backward references to previous data.
+
+   This is a hash map of fixed size (BUCKET_SIZE) to a ring buffer of
+   fixed size (BLOCK_SIZE). The ring buffer contains the last BLOCK_SIZE
+   index positions of the given hash key in the compressed data. */
+
+#define HashLongestMatch HASHER()
+
+/* Number of hash buckets. */
+#define BUCKET_SIZE (1 << BUCKET_BITS)
+
+/* Only BLOCK_SIZE newest backward references are kept,
+   and the older are forgotten. */
+#define BLOCK_SIZE (1u << BLOCK_BITS)
+
+/* Mask for accessing entries in a block (in a ringbuffer manner). */
+#define BLOCK_MASK ((1 << BLOCK_BITS) - 1)
+
+#define HASH_MAP_SIZE (2 << BUCKET_BITS)
+
+static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
+static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
+
+/* HashBytes is the function that chooses the bucket to place
+   the address in. The HashLongestMatch and HashLongestMatchQuickly
+   classes have separate, different implementations of hashing. */
+static uint32_t FN(HashBytes)(const uint8_t *data) {
+  uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
+  /* The higher bits contain more mixture from the multiplication,
+     so we take our results from there. */
+  return h >> (32 - BUCKET_BITS);
+}
+
+typedef struct HashLongestMatch {
+  /* Number of entries in a particular bucket. */
+  uint16_t num_[BUCKET_SIZE];
+
+  /* Buckets containing BLOCK_SIZE of backward references. */
+  uint32_t buckets_[BLOCK_SIZE << BUCKET_BITS];
+
+  /* True if num_ array needs to be initialized. */
+  int is_dirty_;
+
+  size_t num_dict_lookups_;
+  size_t num_dict_matches_;
+} HashLongestMatch;
+
+static void FN(Reset)(HashLongestMatch* self) {
+  self->is_dirty_ = 1;
+  self->num_dict_lookups_ = 0;
+  self->num_dict_matches_ = 0;
+}
+
+static void FN(InitEmpty)(HashLongestMatch* self) {
+  if (self->is_dirty_) {
+    memset(self->num_, 0, sizeof(self->num_));
+    self->is_dirty_ = 0;
+  }
+}
+
+static void FN(InitForData)(HashLongestMatch* self, const uint8_t* data,
+    size_t num) {
+  size_t i;
+  for (i = 0; i < num; ++i) {
+    const uint32_t key = FN(HashBytes)(&data[i]);
+    self->num_[key] = 0;
+  }
+  if (num != 0) {
+    self->is_dirty_ = 0;
+  }
+}
+
+static void FN(Init)(
+    MemoryManager* m, HashLongestMatch* self, const uint8_t* data, int lgwin,
+    size_t position, size_t bytes, int is_last) {
+  /* Choose which init method is faster.
+     Init() is about 100 times faster than InitForData(). */
+  const size_t kMaxBytesForPartialHashInit = HASH_MAP_SIZE >> 7;
+  BROTLI_UNUSED(m);
+  BROTLI_UNUSED(lgwin);
+  if (position == 0 && is_last && bytes <= kMaxBytesForPartialHashInit) {
+    FN(InitForData)(self, data, bytes);
+  } else {
+    FN(InitEmpty)(self);
+  }
+}
+
+/* Look at 4 bytes at &data[ix & mask].
+   Compute a hash from these, and store the value of ix at that position. */
+static BROTLI_INLINE void FN(Store)(HashLongestMatch* self, const uint8_t *data,
+    const size_t mask, const size_t ix) {
+  const uint32_t key = FN(HashBytes)(&data[ix & mask]);
+  const size_t minor_ix = self->num_[key] & BLOCK_MASK;
+  self->buckets_[minor_ix + (key << BLOCK_BITS)] = (uint32_t)ix;
+  ++self->num_[key];
+}
+
+static BROTLI_INLINE void FN(StoreRange)(HashLongestMatch* self,
+    const uint8_t *data, const size_t mask, const size_t ix_start,
+    const size_t ix_end) {
+  size_t i;
+  for (i = ix_start; i < ix_end; ++i) {
+    FN(Store)(self, data, mask, i);
+  }
+}
+
+static BROTLI_INLINE void FN(StitchToPreviousBlock)(HashLongestMatch* self,
+    size_t num_bytes, size_t position, const uint8_t* ringbuffer,
+    size_t ringbuffer_mask) {
+  if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
+    /* Prepare the hashes for three last bytes of the last write.
+       These could not be calculated before, since they require knowledge
+       of both the previous and the current block. */
+    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 3);
+    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 2);
+    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 1);
+  }
+}
+
+/* Find a longest backward match of &data[cur_ix] up to the length of
+   max_length and stores the position cur_ix in the hash table.
+
+   Does not look for matches longer than max_length.
+   Does not look for matches further away than max_backward.
+   Writes the best found match length into best_len_out.
+   Writes the index (&data[index]) offset from the start of the best match
+   into best_distance_out.
+   Write the score of the best match into best_score_out.
+   Returns 1 when match is found, otherwise 0. */
+static BROTLI_INLINE int FN(FindLongestMatch)(HashLongestMatch* self,
+    const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
+    const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
+    const size_t max_length, const size_t max_backward,
+    size_t* BROTLI_RESTRICT best_len_out,
+    size_t* BROTLI_RESTRICT best_len_code_out,
+    size_t* BROTLI_RESTRICT best_distance_out,
+    double* BROTLI_RESTRICT best_score_out) {
+  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+  int is_match_found = 0;
+  /* Don't accept a short copy from far away. */
+  double best_score = *best_score_out;
+  size_t best_len = *best_len_out;
+  size_t i;
+  *best_len_code_out = 0;
+  *best_len_out = 0;
+  /* Try last distance first. */
+  for (i = 0; i < NUM_LAST_DISTANCES_TO_CHECK; ++i) {
+    const size_t idx = kDistanceCacheIndex[i];
+    const size_t backward =
+        (size_t)(distance_cache[idx] + kDistanceCacheOffset[i]);
+    size_t prev_ix = (size_t)(cur_ix - backward);
+    if (prev_ix >= cur_ix) {
+      continue;
+    }
+    if (PREDICT_FALSE(backward > max_backward)) {
+      continue;
+    }
+    prev_ix &= ring_buffer_mask;
+
+    if (cur_ix_masked + best_len > ring_buffer_mask ||
+        prev_ix + best_len > ring_buffer_mask ||
+        data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
+      continue;
+    }
+    {
+      const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
+                                                  &data[cur_ix_masked],
+                                                  max_length);
+      if (len >= 3 || (len == 2 && i < 2)) {
+        /* Comparing for >= 2 does not change the semantics, but just saves for
+           a few unnecessary binary logarithms in backward reference score,
+           since we are not interested in such short matches. */
+        double score = BackwardReferenceScoreUsingLastDistance(len, i);
+        if (best_score < score) {
+          best_score = score;
+          best_len = len;
+          *best_len_out = best_len;
+          *best_len_code_out = best_len;
+          *best_distance_out = backward;
+          *best_score_out = best_score;
+          is_match_found = 1;
+        }
+      }
+    }
+  }
+  {
+    const uint32_t key = FN(HashBytes)(&data[cur_ix_masked]);
+    const uint32_t * BROTLI_RESTRICT const bucket =
+        &self->buckets_[key << BLOCK_BITS];
+    const size_t down =
+        (self->num_[key] > BLOCK_SIZE) ? (self->num_[key] - BLOCK_SIZE) : 0u;
+    for (i = self->num_[key]; i > down;) {
+      size_t prev_ix = bucket[--i & BLOCK_MASK];
+      const size_t backward = cur_ix - prev_ix;
+      if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
+        break;
+      }
+      prev_ix &= ring_buffer_mask;
+      if (cur_ix_masked + best_len > ring_buffer_mask ||
+          prev_ix + best_len > ring_buffer_mask ||
+          data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
+        continue;
+      }
+      {
+        const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
+                                                    &data[cur_ix_masked],
+                                                    max_length);
+        if (len >= 4) {
+          /* Comparing for >= 3 does not change the semantics, but just saves
+             for a few unnecessary binary logarithms in backward reference
+             score, since we are not interested in such short matches. */
+          double score = BackwardReferenceScore(len, backward);
+          if (best_score < score) {
+            best_score = score;
+            best_len = len;
+            *best_len_out = best_len;
+            *best_len_code_out = best_len;
+            *best_distance_out = backward;
+            *best_score_out = best_score;
+            is_match_found = 1;
+          }
+        }
+      }
+    }
+    self->buckets_[(key << BLOCK_BITS) + (self->num_[key] & BLOCK_MASK)] =
+        (uint32_t)cur_ix;
+    ++self->num_[key];
+  }
+  if (!is_match_found &&
+      self->num_dict_matches_ >= (self->num_dict_lookups_ >> 7)) {
+    size_t dict_key = Hash14(&data[cur_ix_masked]) << 1;
+    int k;
+    for (k = 0; k < 2; ++k, ++dict_key) {
+      const uint16_t v = kStaticDictionaryHash[dict_key];
+      ++self->num_dict_lookups_;
+      if (v > 0) {
+        const size_t len = v & 31;
+        const size_t dist = v >> 5;
+        const size_t offset =
+            kBrotliDictionaryOffsetsByLength[len] + len * dist;
+        if (len <= max_length) {
+          const size_t matchlen =
+              FindMatchLengthWithLimit(&data[cur_ix_masked],
+                                       &kBrotliDictionary[offset], len);
+          if (matchlen + kCutoffTransformsCount > len && matchlen > 0) {
+            const size_t transform_id = kCutoffTransforms[len - matchlen];
+            const size_t word_id = dist +
+                transform_id * (1u << kBrotliDictionarySizeBitsByLength[len]);
+            const size_t backward = max_backward + word_id + 1;
+            double score = BackwardReferenceScore(matchlen, backward);
+            if (best_score < score) {
+              ++self->num_dict_matches_;
+              best_score = score;
+              best_len = matchlen;
+              *best_len_out = best_len;
+              *best_len_code_out = len;
+              *best_distance_out = backward;
+              *best_score_out = best_score;
+              is_match_found = 1;
+            }
+          }
+        }
+      }
+    }
+  }
+  return is_match_found;
+}
+
+#undef HASH_MAP_SIZE
+#undef BLOCK_MASK
+#undef BLOCK_SIZE
+#undef BUCKET_SIZE
+
+#undef HashLongestMatch
--- a/enc/hash_longest_match_quickly_inc.h
+++ b/enc/hash_longest_match_quickly_inc.h
@ -0,0 +1,268 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN, BUCKET_BITS, BUCKET_SWEEP, USE_DICTIONARY */
+
+#define HashLongestMatchQuickly HASHER()
+
+#define BUCKET_SIZE (1 << BUCKET_BITS)
+
+#define HASH_MAP_SIZE (4 << BUCKET_BITS)
+
+static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 8; }
+static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; }
+
+/* HashBytes is the function that chooses the bucket to place
+   the address in. The HashLongestMatch and HashLongestMatchQuickly
+   classes have separate, different implementations of hashing. */
+static uint32_t FN(HashBytes)(const uint8_t *data) {
+  /* Computing a hash based on 5 bytes works much better for
+     qualities 1 and 3, where the next hash value is likely to replace */
+  uint64_t h = (BROTLI_UNALIGNED_LOAD64(data) << 24) * kHashMul32;
+  /* The higher bits contain more mixture from the multiplication,
+     so we take our results from there. */
+  return (uint32_t)(h >> (64 - BUCKET_BITS));
+}
+
+/* A (forgetful) hash table to the data seen by the compressor, to
+   help create backward references to previous data.
+
+   This is a hash map of fixed size (BUCKET_SIZE). Starting from the
+   given index, BUCKET_SWEEP buckets are used to store values of a key. */
+typedef struct HashLongestMatchQuickly {
+  uint32_t buckets_[BUCKET_SIZE + BUCKET_SWEEP];
+  /* True if buckets_ array needs to be initialized. */
+  int is_dirty_;
+  size_t num_dict_lookups_;
+  size_t num_dict_matches_;
+} HashLongestMatchQuickly;
+
+static void FN(Reset)(HashLongestMatchQuickly* self) {
+  self->is_dirty_ = 1;
+  self->num_dict_lookups_ = 0;
+  self->num_dict_matches_ = 0;
+}
+
+static void FN(InitEmpty)(HashLongestMatchQuickly* self) {
+  if (self->is_dirty_) {
+    /* It is not strictly necessary to fill this buffer here, but
+       not filling will make the results of the compression stochastic
+       (but correct). This is because random data would cause the
+       system to find accidentally good backward references here and there. */
+    memset(&self->buckets_[0], 0, sizeof(self->buckets_));
+    self->is_dirty_ = 0;
+  }
+}
+
+static void FN(InitForData)(HashLongestMatchQuickly* self, const uint8_t* data,
+    size_t num) {
+  size_t i;
+  for (i = 0; i < num; ++i) {
+    const uint32_t key = FN(HashBytes)(&data[i]);
+    memset(&self->buckets_[key], 0, BUCKET_SWEEP * sizeof(self->buckets_[0]));
+  }
+  if (num != 0) {
+    self->is_dirty_ = 0;
+  }
+}
+
+static void FN(Init)(
+    MemoryManager* m, HashLongestMatchQuickly* self, const uint8_t* data,
+    int lgwin, size_t position, size_t bytes, int is_last) {
+  /* Choose which init method is faster.
+     Init() is about 100 times faster than InitForData(). */
+  const size_t kMaxBytesForPartialHashInit = HASH_MAP_SIZE >> 7;
+  BROTLI_UNUSED(m);
+  BROTLI_UNUSED(lgwin);
+  if (position == 0 && is_last && bytes <= kMaxBytesForPartialHashInit) {
+    FN(InitForData)(self, data, bytes);
+  } else {
+    FN(InitEmpty)(self);
+  }
+}
+
+/* Look at 5 bytes at &data[ix & mask].
+   Compute a hash from these, and store the value somewhere within
+   [ix .. ix+3]. */
+static BROTLI_INLINE void FN(Store)(HashLongestMatchQuickly* self,
+    const uint8_t *data, const size_t mask, const size_t ix) {
+  const uint32_t key = FN(HashBytes)(&data[ix & mask]);
+  /* Wiggle the value with the bucket sweep range. */
+  const uint32_t off = (ix >> 3) % BUCKET_SWEEP;
+  self->buckets_[key + off] = (uint32_t)ix;
+}
+
+static BROTLI_INLINE void FN(StoreRange)(HashLongestMatchQuickly* self,
+    const uint8_t *data, const size_t mask, const size_t ix_start,
+    const size_t ix_end) {
+  size_t i;
+  for (i = ix_start; i < ix_end; ++i) {
+    FN(Store)(self, data, mask, i);
+  }
+}
+
+static BROTLI_INLINE void FN(StitchToPreviousBlock)(
+    HashLongestMatchQuickly* self, size_t num_bytes, size_t position,
+    const uint8_t* ringbuffer, size_t ringbuffer_mask) {
+  if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
+    /* Prepare the hashes for three last bytes of the last write.
+       These could not be calculated before, since they require knowledge
+       of both the previous and the current block. */
+    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 3);
+    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 2);
+    FN(Store)(self, ringbuffer, ringbuffer_mask, position - 1);
+  }
+}
+
+/* Find a longest backward match of &ring_buffer[cur_ix & ring_buffer_mask]
+   up to the length of max_length and stores the position cur_ix in the
+   hash table.
+
+   Does not look for matches longer than max_length.
+   Does not look for matches further away than max_backward.
+   Writes the best found match length into best_len_out.
+   Writes the index (&data[index]) of the start of the best match into
+   best_distance_out.
+   Returns 1 if match is found, otherwise 0. */
+static BROTLI_INLINE int FN(FindLongestMatch)(HashLongestMatchQuickly* self,
+    const uint8_t* BROTLI_RESTRICT ring_buffer, const size_t ring_buffer_mask,
+    const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
+    const size_t max_length, const size_t max_backward,
+    size_t* BROTLI_RESTRICT best_len_out,
+    size_t* BROTLI_RESTRICT best_len_code_out,
+    size_t* BROTLI_RESTRICT best_distance_out,
+    double* BROTLI_RESTRICT best_score_out) {
+  const size_t best_len_in = *best_len_out;
+  const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
+  const uint32_t key = FN(HashBytes)(&ring_buffer[cur_ix_masked]);
+  int compare_char = ring_buffer[cur_ix_masked + best_len_in];
+  double best_score = *best_score_out;
+  size_t best_len = best_len_in;
+  size_t cached_backward = (size_t)distance_cache[0];
+  size_t prev_ix = cur_ix - cached_backward;
+  int is_match_found = 0;
+  if (prev_ix < cur_ix) {
+    prev_ix &= (uint32_t)ring_buffer_mask;
+    if (compare_char == ring_buffer[prev_ix + best_len]) {
+      size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
+                                            &ring_buffer[cur_ix_masked],
+                                            max_length);
+      if (len >= 4) {
+        best_score = BackwardReferenceScoreUsingLastDistance(len, 0);
+        best_len = len;
+        *best_len_out = len;
+        *best_len_code_out = len;
+        *best_distance_out = cached_backward;
+        *best_score_out = best_score;
+        compare_char = ring_buffer[cur_ix_masked + best_len];
+        if (BUCKET_SWEEP == 1) {
+          self->buckets_[key] = (uint32_t)cur_ix;
+          return 1;
+        } else {
+          is_match_found = 1;
+        }
+      }
+    }
+  }
+  if (BUCKET_SWEEP == 1) {
+    size_t backward;
+    size_t len;
+    /* Only one to look for, don't bother to prepare for a loop. */
+    prev_ix = self->buckets_[key];
+    self->buckets_[key] = (uint32_t)cur_ix;
+    backward = cur_ix - prev_ix;
+    prev_ix &= (uint32_t)ring_buffer_mask;
+    if (compare_char != ring_buffer[prev_ix + best_len_in]) {
+      return 0;
+    }
+    if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
+      return 0;
+    }
+    len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
+                                   &ring_buffer[cur_ix_masked],
+                                   max_length);
+    if (len >= 4) {
+      *best_len_out = len;
+      *best_len_code_out = len;
+      *best_distance_out = backward;
+      *best_score_out = BackwardReferenceScore(len, backward);
+      return 1;
+    }
+  } else {
+    uint32_t *bucket = self->buckets_ + key;
+    int i;
+    prev_ix = *bucket++;
+    for (i = 0; i < BUCKET_SWEEP; ++i, prev_ix = *bucket++) {
+      const size_t backward = cur_ix - prev_ix;
+      size_t len;
+      prev_ix &= (uint32_t)ring_buffer_mask;
+      if (compare_char != ring_buffer[prev_ix + best_len]) {
+        continue;
+      }
+      if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
+        continue;
+      }
+      len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
+                                     &ring_buffer[cur_ix_masked],
+                                     max_length);
+      if (len >= 4) {
+        const double score = BackwardReferenceScore(len, backward);
+        if (best_score < score) {
+          best_score = score;
+          best_len = len;
+          *best_len_out = best_len;
+          *best_len_code_out = best_len;
+          *best_distance_out = backward;
+          *best_score_out = score;
+          compare_char = ring_buffer[cur_ix_masked + best_len];
+          is_match_found = 1;
+        }
+      }
+    }
+  }
+  if (USE_DICTIONARY && !is_match_found &&
+      self->num_dict_matches_ >= (self->num_dict_lookups_ >> 7)) {
+    const uint32_t dict_key = Hash14(&ring_buffer[cur_ix_masked]) << 1;
+    const uint16_t v = kStaticDictionaryHash[dict_key];
+    ++self->num_dict_lookups_;
+    if (v > 0) {
+      const uint32_t len = v & 31;
+      const uint32_t dist = v >> 5;
+      const size_t offset =
+          kBrotliDictionaryOffsetsByLength[len] + len * dist;
+      if (len <= max_length) {
+        const size_t matchlen =
+            FindMatchLengthWithLimit(&ring_buffer[cur_ix_masked],
+                                     &kBrotliDictionary[offset], len);
+        if (matchlen + kCutoffTransformsCount > len && matchlen > 0) {
+          const size_t transform_id = kCutoffTransforms[len - matchlen];
+          const size_t word_id = dist +
+              transform_id * (1u << kBrotliDictionarySizeBitsByLength[len]);
+          const size_t backward = max_backward + word_id + 1;
+          const double score = BackwardReferenceScore(matchlen, backward);
+          if (best_score < score) {
+            ++self->num_dict_matches_;
+            best_score = score;
+            best_len = matchlen;
+            *best_len_out = best_len;
+            *best_len_code_out = len;
+            *best_distance_out = backward;
+            *best_score_out = best_score;
+            is_match_found = 1;
+          }
+        }
+      }
+    }
+  }
+  self->buckets_[key + ((cur_ix >> 3) % BUCKET_SWEEP)] = (uint32_t)cur_ix;
+  return is_match_found;
+}
+
+#undef HASH_MAP_SIZE
+#undef BUCKET_SIZE
+
+#undef HashLongestMatchQuickly
--- a/enc/histogram.c
+++ b/enc/histogram.c
@ -8,60 +8,88 @@

 #include "./histogram.h"

-#include <cmath>
-
 #include "./block_splitter.h"
 #include "./command.h"
 #include "./context.h"
-#include "./prefix.h"

-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

-void BuildHistograms(
-    const Command* cmds,
-    const size_t num_commands,
-    const BlockSplit& literal_split,
-    const BlockSplit& insert_and_copy_split,
-    const BlockSplit& dist_split,
-    const uint8_t* ringbuffer,
-    size_t start_pos,
-    size_t mask,
-    uint8_t prev_byte,
-    uint8_t prev_byte2,
-    const std::vector<ContextType>& context_modes,
-    std::vector<HistogramLiteral>* literal_histograms,
-    std::vector<HistogramCommand>* insert_and_copy_histograms,
-    std::vector<HistogramDistance>* copy_dist_histograms) {
+typedef struct BlockSplitIterator {
+  const BlockSplit* split_;  /* Not owned. */
+  size_t idx_;
+  size_t type_;
+  size_t length_;
+} BlockSplitIterator;
+
+static void InitBlockSplitIterator(BlockSplitIterator* self,
+    const BlockSplit* split) {
+  self->split_ = split;
+  self->idx_ = 0;
+  self->type_ = 0;
+  self->length_ = split->lengths ? split->lengths[0] : 0;
+}
+
+static void BlockSplitIteratorNext(BlockSplitIterator* self) {
+  if (self->length_ == 0) {
+    ++self->idx_;
+    self->type_ = self->split_->types[self->idx_];
+    self->length_ = self->split_->lengths[self->idx_];
+  }
+  --self->length_;
+}
+
+void BrotliBuildHistogramsWithContext(
+    const Command* cmds, const size_t num_commands,
+    const BlockSplit* literal_split, const BlockSplit* insert_and_copy_split,
+    const BlockSplit* dist_split, const uint8_t* ringbuffer, size_t start_pos,
+    size_t mask, uint8_t prev_byte, uint8_t prev_byte2,
+    const ContextType* context_modes, HistogramLiteral* literal_histograms,
+    HistogramCommand* insert_and_copy_histograms,
+    HistogramDistance* copy_dist_histograms) {
  size_t pos = start_pos;
-  BlockSplitIterator literal_it(literal_split);
-  BlockSplitIterator insert_and_copy_it(insert_and_copy_split);
-  BlockSplitIterator dist_it(dist_split);
-  for (size_t i = 0; i < num_commands; ++i) {
-    const Command &cmd = cmds[i];
-    insert_and_copy_it.Next();
-    (*insert_and_copy_histograms)[insert_and_copy_it.type_].Add(
-        cmd.cmd_prefix_);
-    for (size_t j = cmd.insert_len_; j != 0; --j) {
-      literal_it.Next();
-      size_t context = (literal_it.type_ << kLiteralContextBits) +
+  BlockSplitIterator literal_it;
+  BlockSplitIterator insert_and_copy_it;
+  BlockSplitIterator dist_it;
+  size_t i;
+
+  InitBlockSplitIterator(&literal_it, literal_split);
+  InitBlockSplitIterator(&insert_and_copy_it, insert_and_copy_split);
+  InitBlockSplitIterator(&dist_it, dist_split);
+  for (i = 0; i < num_commands; ++i) {
+    const Command* cmd = &cmds[i];
+    size_t j;
+    BlockSplitIteratorNext(&insert_and_copy_it);
+    HistogramAddCommand(&insert_and_copy_histograms[insert_and_copy_it.type_],
+        cmd->cmd_prefix_);
+    for (j = cmd->insert_len_; j != 0; --j) {
+      size_t context;
+      BlockSplitIteratorNext(&literal_it);
+      context = (literal_it.type_ << BROTLI_LITERAL_CONTEXT_BITS) +
          Context(prev_byte, prev_byte2, context_modes[literal_it.type_]);
-      (*literal_histograms)[context].Add(ringbuffer[pos & mask]);
+      HistogramAddLiteral(&literal_histograms[context],
+          ringbuffer[pos & mask]);
      prev_byte2 = prev_byte;
      prev_byte = ringbuffer[pos & mask];
      ++pos;
    }
-    pos += cmd.copy_len();
-    if (cmd.copy_len()) {
+    pos += CommandCopyLen(cmd);
+    if (CommandCopyLen(cmd)) {
      prev_byte2 = ringbuffer[(pos - 2) & mask];
      prev_byte = ringbuffer[(pos - 1) & mask];
-      if (cmd.cmd_prefix_ >= 128) {
-        dist_it.Next();
-        size_t context = (dist_it.type_ << kDistanceContextBits) +
-            cmd.DistanceContext();
-        (*copy_dist_histograms)[context].Add(cmd.dist_prefix_);
+      if (cmd->cmd_prefix_ >= 128) {
+        size_t context;
+        BlockSplitIteratorNext(&dist_it);
+        context = (dist_it.type_ << BROTLI_DISTANCE_CONTEXT_BITS) +
+            CommandDistanceContext(cmd);
+        HistogramAddDistance(&copy_dist_histograms[context],
+            cmd->dist_prefix_);
      }
    }
  }
 }

-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
--- a/enc/histogram.h
+++ b/enc/histogram.h
@ -9,87 +9,52 @@
 #ifndef BROTLI_ENC_HISTOGRAM_H_
 #define BROTLI_ENC_HISTOGRAM_H_

-#include <cstring>
-#include <limits>
-#include <vector>
+#include <string.h>  /* memset */

+#include "../common/constants.h"
 #include "../common/types.h"
+#include "./block_splitter.h"
 #include "./command.h"
 #include "./context.h"
-#include "./fast_log.h"
-#include "./prefix.h"
+#include "./port.h"

-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

-struct BlockSplit;
+#define FN(X) X ## Literal
+#define DATA_SIZE BROTLI_NUM_LITERAL_SYMBOLS
+#define DataType uint8_t
+#include "./histogram_inc.h"  /* NOLINT(build/include) */
+#undef DataType
+#undef DATA_SIZE
+#undef FN

-// A simple container for histograms of data in blocks.
-template<int kDataSize>
-struct Histogram {
-  Histogram(void) {
-    Clear();
-  }
-  void Clear(void) {
-    memset(data_, 0, sizeof(data_));
-    total_count_ = 0;
-    bit_cost_ = std::numeric_limits<double>::infinity();
-  }
-  void Add(size_t val) {
-    ++data_[val];
-    ++total_count_;
-  }
-  void Remove(size_t val) {
-    --data_[val];
-    --total_count_;
-  }
-  template<typename DataType>
-  void Add(const DataType *p, size_t n) {
-    total_count_ += n;
-    n += 1;
-    while(--n) ++data_[*p++];
-  }
-  void AddHistogram(const Histogram& v) {
-    total_count_ += v.total_count_;
-    for (size_t i = 0; i < kDataSize; ++i) {
-      data_[i] += v.data_[i];
-    }
-  }
+#define FN(X) X ## Command
+#define DataType uint16_t
+#define DATA_SIZE BROTLI_NUM_COMMAND_SYMBOLS
+#include "./histogram_inc.h"  /* NOLINT(build/include) */
+#undef DATA_SIZE
+#undef FN

-  uint32_t data_[kDataSize];
-  size_t total_count_;
-  double bit_cost_;
-};
+#define FN(X) X ## Distance
+#define DATA_SIZE BROTLI_NUM_DISTANCE_SYMBOLS
+#include "./histogram_inc.h"  /* NOLINT(build/include) */
+#undef DataType
+#undef DATA_SIZE
+#undef FN

-// Literal histogram.
-typedef Histogram<256> HistogramLiteral;
-// Prefix histograms.
-typedef Histogram<kNumCommandPrefixes> HistogramCommand;
-typedef Histogram<kNumDistancePrefixes> HistogramDistance;
-typedef Histogram<kNumBlockLenPrefixes> HistogramBlockLength;
-// Context map histogram, 256 Huffman tree indexes + 16 run length codes.
-typedef Histogram<272> HistogramContextMap;
-// Block type histogram, 256 block types + 2 special symbols.
-typedef Histogram<258> HistogramBlockType;
+BROTLI_INTERNAL void BrotliBuildHistogramsWithContext(
+    const Command* cmds, const size_t num_commands,
+    const BlockSplit* literal_split, const BlockSplit* insert_and_copy_split,
+    const BlockSplit* dist_split, const uint8_t* ringbuffer, size_t pos,
+    size_t mask, uint8_t prev_byte, uint8_t prev_byte2,
+    const ContextType* context_modes, HistogramLiteral* literal_histograms,
+    HistogramCommand* insert_and_copy_histograms,
+    HistogramDistance* copy_dist_histograms);

-static const size_t kLiteralContextBits = 6;
-static const size_t kDistanceContextBits = 2;
-
-void BuildHistograms(
-    const Command* cmds,
-    const size_t num_commands,
-    const BlockSplit& literal_split,
-    const BlockSplit& insert_and_copy_split,
-    const BlockSplit& dist_split,
-    const uint8_t* ringbuffer,
-    size_t pos,
-    size_t mask,
-    uint8_t prev_byte,
-    uint8_t prev_byte2,
-    const std::vector<ContextType>& context_modes,
-    std::vector<HistogramLiteral>* literal_histograms,
-    std::vector<HistogramCommand>* insert_and_copy_histograms,
-    std::vector<HistogramDistance>* copy_dist_histograms);
-
-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif

 #endif  /* BROTLI_ENC_HISTOGRAM_H_ */
--- a/enc/histogram_inc.h
+++ b/enc/histogram_inc.h
@ -0,0 +1,51 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2013 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: Histogram, DATA_SIZE, DataType */
+
+/* A simple container for histograms of data in blocks. */
+
+typedef struct FN(Histogram) {
+  uint32_t data_[DATA_SIZE];
+  size_t total_count_;
+  double bit_cost_;
+} FN(Histogram);
+
+static BROTLI_INLINE void FN(HistogramClear)(FN(Histogram)* self) {
+  memset(self->data_, 0, sizeof(self->data_));
+  self->total_count_ = 0;
+  self->bit_cost_ = HUGE_VAL;
+}
+
+static BROTLI_INLINE void FN(ClearHistograms)(
+    FN(Histogram)* array, size_t length) {
+  size_t i;
+  for (i = 0; i < length; ++i) FN(HistogramClear)(array + i);
+}
+
+static BROTLI_INLINE void FN(HistogramAdd)(FN(Histogram)* self, size_t val) {
+  ++self->data_[val];
+  ++self->total_count_;
+}
+
+static BROTLI_INLINE void FN(HistogramAddVector)(FN(Histogram)* self,
+    const DataType *p, size_t n) {
+  self->total_count_ += n;
+  n += 1;
+  while (--n) ++self->data_[*p++];
+}
+
+static BROTLI_INLINE void FN(HistogramAddHistogram)(FN(Histogram)* self,
+    const FN(Histogram)* v) {
+  size_t i;
+  self->total_count_ += v->total_count_;
+  for (i = 0; i < DATA_SIZE; ++i) {
+    self->data_[i] += v->data_[i];
+  }
+}
+
+static BROTLI_INLINE size_t FN(HistogramDataSize)(void) { return DATA_SIZE; }
--- a/enc/literal_cost.c
+++ b/enc/literal_cost.c
@ -9,27 +9,26 @@

 #include "./literal_cost.h"

-#include <math.h>
-
-#include <algorithm>
-
 #include "../common/types.h"
 #include "./fast_log.h"
+#include "./port.h"
 #include "./utf8_util.h"

-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

 static size_t UTF8Position(size_t last, size_t c, size_t clamp) {
  if (c < 128) {
    return 0;  /* Next one is the 'Byte 1' again. */
  } else if (c >= 192) {  /* Next one is the 'Byte 2' of utf-8 encoding. */
-    return std::min<size_t>(1, clamp);
+    return BROTLI_MIN(size_t, 1, clamp);
  } else {
    /* Let's decide over the last byte if this ends the sequence. */
    if (last < 0xe0) {
      return 0;  /* Completed two or three byte coding. */
    } else {  /* Next one is the 'Byte 3' of utf-8 encoding. */
-      return std::min<size_t>(2, clamp);
+      return BROTLI_MIN(size_t, 2, clamp);
    }
  }
 }
@ -40,7 +39,8 @@ static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
  size_t max_utf8 = 1;  /* should be 2, but 1 compresses better. */
  size_t last_c = 0;
  size_t utf8_pos = 0;
-  for (size_t i = 0; i < len; ++i) {
+  size_t i;
+  for (i = 0; i < len; ++i) {
    size_t c = data[(pos + i) & mask];
    utf8_pos = UTF8Position(last_c, c, 2);
    ++counts[utf8_pos];
@ -62,28 +62,31 @@ static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
  const size_t max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data);
  size_t histogram[3][256] = { { 0 } };
  size_t window_half = 495;
-  size_t in_window = std::min(window_half, len);
+  size_t in_window = BROTLI_MIN(size_t, window_half, len);
  size_t in_window_utf8[3] = { 0 };

-  /* Bootstrap histograms. */
-  size_t last_c = 0;
-  size_t utf8_pos = 0;
-  for (size_t i = 0; i < in_window; ++i) {
-    size_t c = data[(pos + i) & mask];
-    ++histogram[utf8_pos][c];
-    ++in_window_utf8[utf8_pos];
-    utf8_pos = UTF8Position(last_c, c, max_utf8);
-    last_c = c;
+
+  size_t i;
+  {  /* Bootstrap histograms. */
+    size_t last_c = 0;
+    size_t utf8_pos = 0;
+    for (i = 0; i < in_window; ++i) {
+      size_t c = data[(pos + i) & mask];
+      ++histogram[utf8_pos][c];
+      ++in_window_utf8[utf8_pos];
+      utf8_pos = UTF8Position(last_c, c, max_utf8);
+      last_c = c;
+    }
  }

  /* Compute bit costs with sliding window. */
-  for (size_t i = 0; i < len; ++i) {
+  for (i = 0; i < len; ++i) {
    if (i >= window_half) {
      /* Remove a byte in the past. */
-      size_t c = i < window_half + 1 ?
-          0 : data[(pos + i - window_half - 1) & mask];
-      size_t last_c = i < window_half + 2 ?
-          0 : data[(pos + i - window_half - 2) & mask];
+      size_t c =
+          i < window_half + 1 ? 0 : data[(pos + i - window_half - 1) & mask];
+      size_t last_c =
+          i < window_half + 2 ? 0 : data[(pos + i - window_half - 2) & mask];
      size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
      --histogram[utf8_pos2][data[(pos + i - window_half) & mask]];
      --in_window_utf8[utf8_pos2];
@ -96,71 +99,80 @@ static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
      ++histogram[utf8_pos2][data[(pos + i + window_half) & mask]];
      ++in_window_utf8[utf8_pos2];
    }
-    size_t c = i < 1 ? 0 : data[(pos + i - 1) & mask];
-    size_t last_c = i < 2 ? 0 : data[(pos + i - 2) & mask];
-    size_t utf8_pos = UTF8Position(last_c, c, max_utf8);
-    size_t masked_pos = (pos + i) & mask;
-    size_t histo = histogram[utf8_pos][data[masked_pos]];
-    if (histo == 0) {
-      histo = 1;
-    }
-    double lit_cost = FastLog2(in_window_utf8[utf8_pos]) - FastLog2(histo);
-    lit_cost += 0.02905;
-    if (lit_cost < 1.0) {
-      lit_cost *= 0.5;
-      lit_cost += 0.5;
-    }
+    {
+      size_t c = i < 1 ? 0 : data[(pos + i - 1) & mask];
+      size_t last_c = i < 2 ? 0 : data[(pos + i - 2) & mask];
+      size_t utf8_pos = UTF8Position(last_c, c, max_utf8);
+      size_t masked_pos = (pos + i) & mask;
+      size_t histo = histogram[utf8_pos][data[masked_pos]];
+      double lit_cost;
+      if (histo == 0) {
+        histo = 1;
+      }
+      lit_cost = FastLog2(in_window_utf8[utf8_pos]) - FastLog2(histo);
+      lit_cost += 0.02905;
+      if (lit_cost < 1.0) {
+        lit_cost *= 0.5;
+        lit_cost += 0.5;
+      }
      /* Make the first bytes more expensive -- seems to help, not sure why.
         Perhaps because the entropy source is changing its properties
         rapidly in the beginning of the file, perhaps because the beginning
         of the data is a statistical "anomaly". */
-    if (i < 2000) {
-      lit_cost += 0.7 - (static_cast<double>(2000 - i) / 2000.0 * 0.35);
+      if (i < 2000) {
+        lit_cost += 0.7 - ((double)(2000 - i) / 2000.0 * 0.35);
+      }
+      cost[i] = (float)lit_cost;
    }
-    cost[i] = static_cast<float>(lit_cost);
  }
 }

-void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
-                                 const uint8_t *data, float *cost) {
-  if (IsMostlyUTF8(data, pos, mask, len, kMinUTF8Ratio)) {
+void BrotliEstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
+                                       const uint8_t *data, float *cost) {
+  if (BrotliIsMostlyUTF8(data, pos, mask, len, kMinUTF8Ratio)) {
    EstimateBitCostsForLiteralsUTF8(pos, len, mask, data, cost);
    return;
-  }
-  size_t histogram[256] = { 0 };
-  size_t window_half = 2000;
-  size_t in_window = std::min(window_half, len);
+  } else {
+    size_t histogram[256] = { 0 };
+    size_t window_half = 2000;
+    size_t in_window = BROTLI_MIN(size_t, window_half, len);

    /* Bootstrap histogram. */
-  for (size_t i = 0; i < in_window; ++i) {
-    ++histogram[data[(pos + i) & mask]];
-  }
+    size_t i;
+    for (i = 0; i < in_window; ++i) {
+      ++histogram[data[(pos + i) & mask]];
+    }

    /* Compute bit costs with sliding window. */
-  for (size_t i = 0; i < len; ++i) {
-    if (i >= window_half) {
+    for (i = 0; i < len; ++i) {
+      size_t histo;
+      if (i >= window_half) {
        /* Remove a byte in the past. */
-      --histogram[data[(pos + i - window_half) & mask]];
-      --in_window;
-    }
-    if (i + window_half < len) {
+        --histogram[data[(pos + i - window_half) & mask]];
+        --in_window;
+      }
+      if (i + window_half < len) {
        /* Add a byte in the future. */
-      ++histogram[data[(pos + i + window_half) & mask]];
-      ++in_window;
+        ++histogram[data[(pos + i + window_half) & mask]];
+        ++in_window;
+      }
+      histo = histogram[data[(pos + i) & mask]];
+      if (histo == 0) {
+        histo = 1;
+      }
+      {
+        double lit_cost = FastLog2(in_window) - FastLog2(histo);
+        lit_cost += 0.029;
+        if (lit_cost < 1.0) {
+          lit_cost *= 0.5;
+          lit_cost += 0.5;
+        }
+        cost[i] = (float)lit_cost;
+      }
    }
-    size_t histo = histogram[data[(pos + i) & mask]];
-    if (histo == 0) {
-      histo = 1;
-    }
-    double lit_cost = FastLog2(in_window) - FastLog2(histo);
-    lit_cost += 0.029;
-    if (lit_cost < 1.0) {
-      lit_cost *= 0.5;
-      lit_cost += 0.5;
-    }
-    cost[i] = static_cast<float>(lit_cost);
  }
 }

-
-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
--- a/enc/literal_cost.h
+++ b/enc/literal_cost.h
@ -11,15 +11,20 @@
 #define BROTLI_ENC_LITERAL_COST_H_

 #include "../common/types.h"
+#include "./port.h"

-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

 /* Estimates how many bits the literals in the interval [pos, pos + len) in the
   ringbuffer (data, mask) will take entropy coded and writes these estimates
   to the cost[0..len) array. */
-void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
-                                 const uint8_t *data, float *cost);
+BROTLI_INTERNAL void BrotliEstimateBitCostsForLiterals(
+    size_t pos, size_t len, size_t mask, const uint8_t *data, float *cost);

-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif

 #endif  /* BROTLI_ENC_LITERAL_COST_H_ */
--- a/enc/memory.c
+++ b/enc/memory.c
@ -0,0 +1,181 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Algorithms for distributing the literals and commands of a metablock between
+   block types and contexts. */
+
+#include "./memory.h"
+
+#include <assert.h>
+#include <stdlib.h>  /* exit, free, malloc */
+#include <strings.h>  /* memcpy */
+
+#include "../common/types.h"
+#include "./port.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define MAX_PERM_ALLOCATED 128
+#define MAX_NEW_ALLOCATED 64
+#define MAX_NEW_FREED 64
+
+#define PERM_ALLOCATED_OFFSET 0
+#define NEW_ALLOCATED_OFFSET MAX_PERM_ALLOCATED
+#define NEW_FREED_OFFSET (MAX_PERM_ALLOCATED + MAX_NEW_ALLOCATED)
+
+static void* DefaultAllocFunc(void* opaque, size_t size) {
+  BROTLI_UNUSED(opaque);
+  return malloc(size);
+}
+
+static void DefaultFreeFunc(void* opaque, void* address) {
+  BROTLI_UNUSED(opaque);
+  free(address);
+}
+
+void BrotliInitMemoryManager(
+    MemoryManager* m, brotli_alloc_func alloc_func, brotli_free_func free_func,
+    void* opaque) {
+  if (!alloc_func) {
+    m->alloc_func = DefaultAllocFunc;
+    m->free_func = DefaultFreeFunc;
+    m->opaque = 0;
+  } else {
+    m->alloc_func = alloc_func;
+    m->free_func = free_func;
+    m->opaque = opaque;
+  }
+#if !defined(BROTLI_ENCODER_EXIT_ON_OOM)
+  m->is_oom = 0;
+  m->perm_allocated = 0;
+  m->new_allocated = 0;
+  m->new_freed = 0;
+#endif  /* BROTLI_ENCODER_EXIT_ON_OOM */
+}
+
+#if defined(BROTLI_ENCODER_EXIT_ON_OOM)
+
+void* BrotliAllocate(MemoryManager* m, size_t n) {
+  void* result = m->alloc_func(m->opaque, n);
+  if (!result) exit(EXIT_FAILURE);
+  return result;
+}
+
+void BrotliFree(MemoryManager* m, void* p) {
+  m->free_func(m->opaque, p);
+}
+
+void BrotliWipeOutMemoryManager(MemoryManager* m) {
+  BROTLI_UNUSED(m);
+}
+
+#else  /* BROTLI_ENCODER_EXIT_ON_OOM */
+
+static void SortPointers(void** items, const size_t n) {
+  /* Shell sort. */
+  static const size_t gaps[] = {23, 10, 4, 1};
+  int g = 0;
+  for (; g < 4; ++g) {
+    size_t gap = gaps[g];
+    size_t i;
+    for (i = gap; i < n; ++i) {
+      size_t j = i;
+      void* tmp = items[i];
+      for (; j >= gap && tmp < items[j - gap]; j -= gap) {
+        items[j] = items[j - gap];
+      }
+      items[j] = tmp;
+    }
+  }
+}
+
+static size_t Annihilate(void** a, size_t a_len, void** b, size_t b_len) {
+  size_t a_read_index = 0;
+  size_t b_read_index = 0;
+  size_t a_write_index = 0;
+  size_t b_write_index = 0;
+  size_t annihilated = 0;
+  while (a_read_index < a_len && b_read_index < b_len) {
+    if (a[a_read_index] == b[b_read_index]) {
+      a_read_index++;
+      b_read_index++;
+      annihilated++;
+    } else if (a[a_read_index] < b[b_read_index]) {
+      a[a_write_index++] = a[a_read_index++];
+    } else {
+      b[b_write_index++] = b[b_read_index++];
+    }
+  }
+  while (a_read_index < a_len) a[a_write_index++] = a[a_read_index++];
+  while (b_read_index < b_len) b[b_write_index++] = b[b_read_index++];
+  return annihilated;
+}
+
+static void CollectGarbagePointers(MemoryManager* m) {
+  size_t annihilated;
+  SortPointers(m->pointers + NEW_ALLOCATED_OFFSET, m->new_allocated);
+  SortPointers(m->pointers + NEW_FREED_OFFSET, m->new_freed);
+  annihilated = Annihilate(
+      m->pointers + NEW_ALLOCATED_OFFSET, m->new_allocated,
+      m->pointers + NEW_FREED_OFFSET, m->new_freed);
+  m->new_allocated -= annihilated;
+  m->new_freed -= annihilated;
+
+  if (m->new_freed != 0) {
+    annihilated = Annihilate(
+        m->pointers + PERM_ALLOCATED_OFFSET, m->perm_allocated,
+        m->pointers + NEW_FREED_OFFSET, m->new_freed);
+    m->perm_allocated -= annihilated;
+    m->new_freed -= annihilated;
+    assert(m->new_freed == 0);
+  }
+
+  if (m->new_allocated != 0) {
+    assert(m->perm_allocated + m->new_allocated <= MAX_PERM_ALLOCATED);
+    memcpy(m->pointers + PERM_ALLOCATED_OFFSET + m->perm_allocated,
+           m->pointers + NEW_ALLOCATED_OFFSET,
+           sizeof(void*) * m->new_allocated);
+    m->perm_allocated += m->new_allocated;
+    m->new_allocated = 0;
+    SortPointers(m->pointers + PERM_ALLOCATED_OFFSET, m->perm_allocated);
+  }
+}
+
+void* BrotliAllocate(MemoryManager* m, size_t n) {
+  void* result = m->alloc_func(m->opaque, n);
+  if (!result) {
+    m->is_oom = 1;
+    return NULL;
+  }
+  if (m->new_allocated == MAX_NEW_ALLOCATED) CollectGarbagePointers(m);
+  m->pointers[NEW_ALLOCATED_OFFSET + (m->new_allocated++)] = result;
+  return result;
+}
+
+void BrotliFree(MemoryManager* m, void* p) {
+  if (!p) return;
+  m->free_func(m->opaque, p);
+  if (m->new_freed == MAX_NEW_FREED) CollectGarbagePointers(m);
+  m->pointers[NEW_FREED_OFFSET + (m->new_freed++)] = p;
+}
+
+void BrotliWipeOutMemoryManager(MemoryManager* m) {
+  size_t i;
+  CollectGarbagePointers(m);
+  /* Now all unfreed pointers are in perm-allocated list. */
+  for (i = 0; i < m->perm_allocated; ++i) {
+    m->free_func(m->opaque, m->pointers[PERM_ALLOCATED_OFFSET + i]);
+  }
+  m->perm_allocated = 0;
+}
+
+#endif  /* BROTLI_ENCODER_EXIT_ON_OOM */
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
--- a/enc/memory.h
+++ b/enc/memory.h
@ -0,0 +1,62 @@
+/* Copyright 2016 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* Macros for memory management. */
+
+#ifndef BROTLI_ENC_MEMORY_H_
+#define BROTLI_ENC_MEMORY_H_
+
+#include "../common/types.h"
+#include "./port.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#if !defined(BROTLI_ENCODER_CLEANUP_ON_OOM) && \
+    !defined(BROTLI_ENCODER_EXIT_ON_OOM)
+#define BROTLI_ENCODER_EXIT_ON_OOM
+#endif
+
+typedef struct MemoryManager {
+  brotli_alloc_func alloc_func;
+  brotli_free_func free_func;
+  void* opaque;
+#if !defined(BROTLI_ENCODER_EXIT_ON_OOM)
+  int is_oom;
+  size_t perm_allocated;
+  size_t new_allocated;
+  size_t new_freed;
+  void* pointers[256];
+#endif  /* BROTLI_ENCODER_EXIT_ON_OOM */
+} MemoryManager;
+
+BROTLI_INTERNAL void BrotliInitMemoryManager(
+    MemoryManager* m, brotli_alloc_func alloc_func, brotli_free_func free_func,
+    void* opaque);
+
+BROTLI_INTERNAL void* BrotliAllocate(MemoryManager* m, size_t n);
+#define BROTLI_ALLOC(M, T, N) ((T*)BrotliAllocate((M), (N) * sizeof(T)))
+
+BROTLI_INTERNAL void BrotliFree(MemoryManager* m, void* p);
+#define BROTLI_FREE(M, P) { \
+  BrotliFree((M), (P));     \
+  P = NULL;                 \
+}
+
+#if defined(BROTLI_ENCODER_EXIT_ON_OOM)
+#define BROTLI_IS_OOM(M) (!!0)
+#else  /* BROTLI_ENCODER_EXIT_ON_OOM */
+#define BROTLI_IS_OOM(M) (!!(M)->is_oom)
+#endif  /* BROTLI_ENCODER_EXIT_ON_OOM */
+
+BROTLI_INTERNAL void BrotliWipeOutMemoryManager(MemoryManager* m);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
+
+#endif  /* BROTLI_ENC_MEMORY_H_ */
--- a/enc/metablock.c
+++ b/enc/metablock.c
@ -9,212 +9,199 @@

 #include "./metablock.h"

+#include "../common/constants.h"
 #include "../common/types.h"
+#include "./bit_cost.h"
 #include "./block_splitter.h"
 #include "./cluster.h"
 #include "./context.h"
+#include "./entropy_encode.h"
 #include "./histogram.h"
+#include "./memory.h"
+#include "./port.h"

-namespace brotli {
-
-void BuildMetaBlock(const uint8_t* ringbuffer,
-                    const size_t pos,
-                    const size_t mask,
-                    uint8_t prev_byte,
-                    uint8_t prev_byte2,
-                    const Command* cmds,
-                    size_t num_commands,
-                    ContextType literal_context_mode,
-                    MetaBlockSplit* mb) {
-  SplitBlock(cmds, num_commands,
-             ringbuffer, pos, mask,
-             &mb->literal_split,
-             &mb->command_split,
-             &mb->distance_split);
-
-  std::vector<ContextType> literal_context_modes(mb->literal_split.num_types,
-                                                 literal_context_mode);
-
-  size_t num_literal_contexts =
-      mb->literal_split.num_types << kLiteralContextBits;
-  size_t num_distance_contexts =
-      mb->distance_split.num_types << kDistanceContextBits;
-  std::vector<HistogramLiteral> literal_histograms(num_literal_contexts);
-  mb->command_histograms.resize(mb->command_split.num_types);
-  std::vector<HistogramDistance> distance_histograms(num_distance_contexts);
-  BuildHistograms(cmds, num_commands,
-                  mb->literal_split,
-                  mb->command_split,
-                  mb->distance_split,
-                  ringbuffer,
-                  pos,
-                  mask,
-                  prev_byte,
-                  prev_byte2,
-                  literal_context_modes,
-                  &literal_histograms,
-                  &mb->command_histograms,
-                  &distance_histograms);
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

+void BrotliBuildMetaBlock(MemoryManager* m,
+                          const uint8_t* ringbuffer,
+                          const size_t pos,
+                          const size_t mask,
+                          const int quality,
+                          uint8_t prev_byte,
+                          uint8_t prev_byte2,
+                          const Command* cmds,
+                          size_t num_commands,
+                          ContextType literal_context_mode,
+                          MetaBlockSplit* mb) {
  /* Histogram ids need to fit in one byte. */
  static const size_t kMaxNumberOfHistograms = 256;
+  HistogramDistance* distance_histograms;
+  HistogramLiteral* literal_histograms;
+  ContextType* literal_context_modes;
+  size_t num_literal_contexts;
+  size_t num_distance_contexts;
+  size_t i;

-  ClusterHistograms(literal_histograms,
-                    1u << kLiteralContextBits,
-                    mb->literal_split.num_types,
-                    kMaxNumberOfHistograms,
-                    &mb->literal_histograms,
-                    &mb->literal_context_map);
+  BrotliSplitBlock(m, cmds, num_commands,
+                   ringbuffer, pos, mask, quality,
+                   &mb->literal_split,
+                   &mb->command_split,
+                   &mb->distance_split);
+  if (BROTLI_IS_OOM(m)) return;

-  ClusterHistograms(distance_histograms,
-                    1u << kDistanceContextBits,
-                    mb->distance_split.num_types,
-                    kMaxNumberOfHistograms,
-                    &mb->distance_histograms,
-                    &mb->distance_context_map);
+  literal_context_modes =
+      BROTLI_ALLOC(m, ContextType, mb->literal_split.num_types);
+  if (BROTLI_IS_OOM(m)) return;
+  for (i = 0; i < mb->literal_split.num_types; ++i) {
+    literal_context_modes[i] = literal_context_mode;
+  }
+
+  num_literal_contexts =
+      mb->literal_split.num_types << BROTLI_LITERAL_CONTEXT_BITS;
+  num_distance_contexts =
+      mb->distance_split.num_types << BROTLI_DISTANCE_CONTEXT_BITS;
+  literal_histograms = BROTLI_ALLOC(m, HistogramLiteral, num_literal_contexts);
+  if (BROTLI_IS_OOM(m)) return;
+  ClearHistogramsLiteral(literal_histograms, num_literal_contexts);
+
+  assert(mb->command_histograms == 0);
+  mb->command_histograms_size = mb->command_split.num_types;
+  mb->command_histograms =
+      BROTLI_ALLOC(m, HistogramCommand, mb->command_histograms_size);
+  if (BROTLI_IS_OOM(m)) return;
+  ClearHistogramsCommand(mb->command_histograms, mb->command_histograms_size);
+  distance_histograms =
+      BROTLI_ALLOC(m, HistogramDistance, num_distance_contexts);
+  if (BROTLI_IS_OOM(m)) return;
+  ClearHistogramsDistance(distance_histograms, num_distance_contexts);
+  BrotliBuildHistogramsWithContext(cmds, num_commands,
+      &mb->literal_split, &mb->command_split, &mb->distance_split,
+      ringbuffer, pos, mask, prev_byte, prev_byte2, literal_context_modes,
+      literal_histograms, mb->command_histograms, distance_histograms);
+  BROTLI_FREE(m, literal_context_modes);
+
+  assert(mb->literal_context_map == 0);
+  mb->literal_context_map_size =
+      mb->literal_split.num_types << BROTLI_LITERAL_CONTEXT_BITS;
+  mb->literal_context_map =
+      BROTLI_ALLOC(m, uint32_t, mb->literal_context_map_size);
+  if (BROTLI_IS_OOM(m)) return;
+  assert(mb->literal_histograms == 0);
+  mb->literal_histograms_size = mb->literal_context_map_size;
+  mb->literal_histograms =
+      BROTLI_ALLOC(m, HistogramLiteral, mb->literal_histograms_size);
+  if (BROTLI_IS_OOM(m)) return;
+  BrotliClusterHistogramsLiteral(m, literal_histograms,
+                                 mb->literal_context_map_size,
+                                 kMaxNumberOfHistograms,
+                                 mb->literal_histograms,
+                                 &mb->literal_histograms_size,
+                                 mb->literal_context_map);
+  if (BROTLI_IS_OOM(m)) return;
+  BROTLI_FREE(m, literal_histograms);
+
+  assert(mb->distance_context_map == 0);
+  mb->distance_context_map_size =
+      mb->distance_split.num_types << BROTLI_DISTANCE_CONTEXT_BITS;
+  mb->distance_context_map =
+      BROTLI_ALLOC(m, uint32_t, mb->distance_context_map_size);
+  if (BROTLI_IS_OOM(m)) return;
+  assert(mb->distance_histograms == 0);
+  mb->distance_histograms_size = mb->distance_context_map_size;
+  mb->distance_histograms =
+      BROTLI_ALLOC(m, HistogramDistance, mb->distance_histograms_size);
+  if (BROTLI_IS_OOM(m)) return;
+  BrotliClusterHistogramsDistance(m, distance_histograms,
+                                  mb->distance_context_map_size,
+                                  kMaxNumberOfHistograms,
+                                  mb->distance_histograms,
+                                  &mb->distance_histograms_size,
+                                  mb->distance_context_map);
+  if (BROTLI_IS_OOM(m)) return;
+  BROTLI_FREE(m, distance_histograms);
 }

-// Greedy block splitter for one block category (literal, command or distance).
-template<typename HistogramType>
-class BlockSplitter {
- public:
-  BlockSplitter(size_t alphabet_size,
-                size_t min_block_size,
-                double split_threshold,
-                size_t num_symbols,
-                BlockSplit* split,
-                std::vector<HistogramType>* histograms)
-      : alphabet_size_(alphabet_size),
-        min_block_size_(min_block_size),
-        split_threshold_(split_threshold),
-        num_blocks_(0),
-        split_(split),
-        histograms_(histograms),
-        target_block_size_(min_block_size),
-        block_size_(0),
-        curr_histogram_ix_(0),
-        merge_last_count_(0) {
-    size_t max_num_blocks = num_symbols / min_block_size + 1;
-    // We have to allocate one more histogram than the maximum number of block
-    // types for the current histogram when the meta-block is too big.
-    size_t max_num_types = std::min<size_t>(max_num_blocks, kMaxBlockTypes + 1);
-    split_->lengths.resize(max_num_blocks);
-    split_->types.resize(max_num_blocks);
-    histograms_->resize(max_num_types);
-    last_histogram_ix_[0] = last_histogram_ix_[1] = 0;
+#define FN(X) X ## Literal
+#include "./metablock_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Command
+#include "./metablock_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+#define FN(X) X ## Distance
+#include "./metablock_inc.h"  /* NOLINT(build/include) */
+#undef FN
+
+void BrotliBuildMetaBlockGreedy(MemoryManager* m,
+                                const uint8_t* ringbuffer,
+                                size_t pos,
+                                size_t mask,
+                                const Command *commands,
+                                size_t n_commands,
+                                MetaBlockSplit* mb) {
+  BlockSplitterLiteral lit_blocks;
+  BlockSplitterCommand cmd_blocks;
+  BlockSplitterDistance dist_blocks;
+  size_t num_literals = 0;
+  size_t i;
+  for (i = 0; i < n_commands; ++i) {
+    num_literals += commands[i].insert_len_;
  }

-  // Adds the next symbol to the current histogram. When the current histogram
-  // reaches the target size, decides on merging the block.
-  void AddSymbol(size_t symbol) {
-    (*histograms_)[curr_histogram_ix_].Add(symbol);
-    ++block_size_;
-    if (block_size_ == target_block_size_) {
-      FinishBlock(/* is_final = */ false);
+  InitBlockSplitterLiteral(m, &lit_blocks, 256, 512, 400.0, num_literals,
+      &mb->literal_split, &mb->literal_histograms,
+      &mb->literal_histograms_size);
+  if (BROTLI_IS_OOM(m)) return;
+  InitBlockSplitterCommand(m, &cmd_blocks, BROTLI_NUM_COMMAND_SYMBOLS, 1024,
+      500.0, n_commands, &mb->command_split, &mb->command_histograms,
+      &mb->command_histograms_size);
+  if (BROTLI_IS_OOM(m)) return;
+  InitBlockSplitterDistance(m, &dist_blocks, 64, 512, 100.0, n_commands,
+      &mb->distance_split, &mb->distance_histograms,
+      &mb->distance_histograms_size);
+  if (BROTLI_IS_OOM(m)) return;
+
+  for (i = 0; i < n_commands; ++i) {
+    const Command cmd = commands[i];
+    size_t j;
+    BlockSplitterAddSymbolCommand(&cmd_blocks, cmd.cmd_prefix_);
+    for (j = cmd.insert_len_; j != 0; --j) {
+      BlockSplitterAddSymbolLiteral(&lit_blocks, ringbuffer[pos & mask]);
+      ++pos;
+    }
+    pos += CommandCopyLen(&cmd);
+    if (CommandCopyLen(&cmd) && cmd.cmd_prefix_ >= 128) {
+      BlockSplitterAddSymbolDistance(&dist_blocks, cmd.dist_prefix_);
    }
  }

-  // Does either of three things:
-  //   (1) emits the current block with a new block type;
-  //   (2) emits the current block with the type of the second last block;
-  //   (3) merges the current block with the last block.
-  void FinishBlock(bool is_final) {
-    if (block_size_ < min_block_size_) {
-      block_size_ = min_block_size_;
-    }
-    if (num_blocks_ == 0) {
-      // Create first block.
-      split_->lengths[0] = static_cast<uint32_t>(block_size_);
-      split_->types[0] = 0;
-      last_entropy_[0] =
-          BitsEntropy(&(*histograms_)[0].data_[0], alphabet_size_);
-      last_entropy_[1] = last_entropy_[0];
-      ++num_blocks_;
-      ++split_->num_types;
-      ++curr_histogram_ix_;
-      block_size_ = 0;
-    } else if (block_size_ > 0) {
-      double entropy = BitsEntropy(&(*histograms_)[curr_histogram_ix_].data_[0],
-                                   alphabet_size_);
-      HistogramType combined_histo[2];
-      double combined_entropy[2];
-      double diff[2];
-      for (size_t j = 0; j < 2; ++j) {
-        size_t last_histogram_ix = last_histogram_ix_[j];
-        combined_histo[j] = (*histograms_)[curr_histogram_ix_];
-        combined_histo[j].AddHistogram((*histograms_)[last_histogram_ix]);
-        combined_entropy[j] = BitsEntropy(
-            &combined_histo[j].data_[0], alphabet_size_);
-        diff[j] = combined_entropy[j] - entropy - last_entropy_[j];
-      }
-
-      if (split_->num_types < kMaxBlockTypes &&
-          diff[0] > split_threshold_ &&
-          diff[1] > split_threshold_) {
-        // Create new block.
-        split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
-        split_->types[num_blocks_] = static_cast<uint8_t>(split_->num_types);
-        last_histogram_ix_[1] = last_histogram_ix_[0];
-        last_histogram_ix_[0] = static_cast<uint8_t>(split_->num_types);
-        last_entropy_[1] = last_entropy_[0];
-        last_entropy_[0] = entropy;
-        ++num_blocks_;
-        ++split_->num_types;
-        ++curr_histogram_ix_;
-        block_size_ = 0;
-        merge_last_count_ = 0;
-        target_block_size_ = min_block_size_;
-      } else if (diff[1] < diff[0] - 20.0) {
-        // Combine this block with second last block.
-        split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
-        split_->types[num_blocks_] = split_->types[num_blocks_ - 2];
-        std::swap(last_histogram_ix_[0], last_histogram_ix_[1]);
-        (*histograms_)[last_histogram_ix_[0]] = combined_histo[1];
-        last_entropy_[1] = last_entropy_[0];
-        last_entropy_[0] = combined_entropy[1];
-        ++num_blocks_;
-        block_size_ = 0;
-        (*histograms_)[curr_histogram_ix_].Clear();
-        merge_last_count_ = 0;
-        target_block_size_ = min_block_size_;
-      } else {
-        // Combine this block with last block.
-        split_->lengths[num_blocks_ - 1] += static_cast<uint32_t>(block_size_);
-        (*histograms_)[last_histogram_ix_[0]] = combined_histo[0];
-        last_entropy_[0] = combined_entropy[0];
-        if (split_->num_types == 1) {
-          last_entropy_[1] = last_entropy_[0];
-        }
-        block_size_ = 0;
-        (*histograms_)[curr_histogram_ix_].Clear();
-        if (++merge_last_count_ > 1) {
-          target_block_size_ += min_block_size_;
-        }
-      }
-    }
-    if (is_final) {
-      (*histograms_).resize(split_->num_types);
-      split_->types.resize(num_blocks_);
-      split_->lengths.resize(num_blocks_);
-    }
-  }
-
- private:
-  static const uint16_t kMaxBlockTypes = 256;
+  BlockSplitterFinishBlockLiteral(&lit_blocks, /* is_final = */ 1);
+  BlockSplitterFinishBlockCommand(&cmd_blocks, /* is_final = */ 1);
+  BlockSplitterFinishBlockDistance(&dist_blocks, /* is_final = */ 1);
+}

+/* Greedy block splitter for one block category (literal, command or distance).
+   Gathers histograms for all context buckets. */
+typedef struct ContextBlockSplitter {
  /* Alphabet size of particular block category. */
-  const size_t alphabet_size_;
+  size_t alphabet_size_;
+  size_t num_contexts_;
+  size_t max_block_types_;
  /* We collect at least this many symbols for each block. */
-  const size_t min_block_size_;
+  size_t min_block_size_;
  /* We merge histograms A and B if
       entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
     where A is the current histogram and B is the histogram of the last or the
     second last block type. */
-  const double split_threshold_;
+  double split_threshold_;

  size_t num_blocks_;
  BlockSplit* split_;  /* not owned */
-  std::vector<HistogramType>* histograms_;  /* not owned */
+  HistogramLiteral* histograms_;  /* not owned */
+  size_t* histograms_size_;  /* not owned */

  /* The number of symbols that we want to collect before deciding on whether
     or not to merge the block with a previous one or emit a new block. */
@ -226,315 +213,302 @@ class BlockSplitter {
  /* Offset of the histograms of the previous two block types. */
  size_t last_histogram_ix_[2];
  /* Entropy of the previous two block types. */
-  double last_entropy_[2];
+  double* last_entropy_;
  /* The number of times we merged the current block with the last one. */
  size_t merge_last_count_;
-};
+} ContextBlockSplitter;

-void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
-                          size_t pos,
-                          size_t mask,
-                          const Command *commands,
-                          size_t n_commands,
-                          MetaBlockSplit* mb) {
-  size_t num_literals = 0;
-  for (size_t i = 0; i < n_commands; ++i) {
-    num_literals += commands[i].insert_len_;
-  }
+static void InitContextBlockSplitter(
+    MemoryManager* m, ContextBlockSplitter* self, size_t alphabet_size,
+    size_t num_contexts, size_t min_block_size, double split_threshold,
+    size_t num_symbols, BlockSplit* split, HistogramLiteral** histograms,
+    size_t* histograms_size) {
+  size_t max_num_blocks = num_symbols / min_block_size + 1;
+  size_t max_num_types;

-  BlockSplitter<HistogramLiteral> lit_blocks(
-      256, 512, 400.0, num_literals,
-      &mb->literal_split, &mb->literal_histograms);
-  BlockSplitter<HistogramCommand> cmd_blocks(
-      kNumCommandPrefixes, 1024, 500.0, n_commands,
-      &mb->command_split, &mb->command_histograms);
-  BlockSplitter<HistogramDistance> dist_blocks(
-      64, 512, 100.0, n_commands,
-      &mb->distance_split, &mb->distance_histograms);
+  self->alphabet_size_ = alphabet_size;
+  self->num_contexts_ = num_contexts;
+  self->max_block_types_ = BROTLI_MAX_NUMBER_OF_BLOCK_TYPES / num_contexts;
+  self->min_block_size_ = min_block_size;
+  self->split_threshold_ = split_threshold;
+  self->num_blocks_ = 0;
+  self->split_ = split;
+  self->histograms_size_ = histograms_size;
+  self->target_block_size_ = min_block_size;
+  self->block_size_ = 0;
+  self->curr_histogram_ix_ = 0;
+  self->merge_last_count_ = 0;

-  for (size_t i = 0; i < n_commands; ++i) {
-    const Command cmd = commands[i];
-    cmd_blocks.AddSymbol(cmd.cmd_prefix_);
-    for (size_t j = cmd.insert_len_; j != 0; --j) {
-      lit_blocks.AddSymbol(ringbuffer[pos & mask]);
-      ++pos;
-    }
-    pos += cmd.copy_len();
-    if (cmd.copy_len() && cmd.cmd_prefix_ >= 128) {
-      dist_blocks.AddSymbol(cmd.dist_prefix_);
-    }
-  }
-
-  lit_blocks.FinishBlock(/* is_final = */ true);
-  cmd_blocks.FinishBlock(/* is_final = */ true);
-  dist_blocks.FinishBlock(/* is_final = */ true);
+  /* We have to allocate one more histogram than the maximum number of block
+     types for the current histogram when the meta-block is too big. */
+  max_num_types =
+      BROTLI_MIN(size_t, max_num_blocks, self->max_block_types_ + 1);
+  BROTLI_ENSURE_CAPACITY(m, uint8_t,
+      split->types, split->types_alloc_size, max_num_blocks);
+  BROTLI_ENSURE_CAPACITY(m, uint32_t,
+      split->lengths, split->lengths_alloc_size, max_num_blocks);
+  if (BROTLI_IS_OOM(m)) return;
+  split->num_blocks = max_num_blocks;
+  self->last_entropy_ = BROTLI_ALLOC(m, double, 2 * num_contexts);
+  if (BROTLI_IS_OOM(m)) return;
+  assert(*histograms == 0);
+  *histograms_size = max_num_types * num_contexts;
+  *histograms = BROTLI_ALLOC(m, HistogramLiteral, *histograms_size);
+  self->histograms_ = *histograms;
+  if (BROTLI_IS_OOM(m)) return;
+  /* Clear only current historgram. */
+  ClearHistogramsLiteral(&self->histograms_[0], num_contexts);
+  self->last_histogram_ix_[0] = self->last_histogram_ix_[1] = 0;
 }

-// Greedy block splitter for one block category (literal, command or distance).
-// Gathers histograms for all context buckets.
-template<typename HistogramType>
-class ContextBlockSplitter {
- public:
-  ContextBlockSplitter(size_t alphabet_size,
-                       size_t num_contexts,
-                       size_t min_block_size,
-                       double split_threshold,
-                       size_t num_symbols,
-                       BlockSplit* split,
-                       std::vector<HistogramType>* histograms)
-      : alphabet_size_(alphabet_size),
-        num_contexts_(num_contexts),
-        max_block_types_(kMaxBlockTypes / num_contexts),
-        min_block_size_(min_block_size),
-        split_threshold_(split_threshold),
-        num_blocks_(0),
-        split_(split),
-        histograms_(histograms),
-        target_block_size_(min_block_size),
-        block_size_(0),
-        curr_histogram_ix_(0),
-        last_entropy_(2 * num_contexts),
-        merge_last_count_(0) {
-    size_t max_num_blocks = num_symbols / min_block_size + 1;
-    // We have to allocate one more histogram than the maximum number of block
-    // types for the current histogram when the meta-block is too big.
-    size_t max_num_types = std::min(max_num_blocks, max_block_types_ + 1);
-    split_->lengths.resize(max_num_blocks);
-    split_->types.resize(max_num_blocks);
-    histograms_->resize(max_num_types * num_contexts);
-    last_histogram_ix_[0] = last_histogram_ix_[1] = 0;
-  }
-
-  // Adds the next symbol to the current block type and context. When the
-  // current block reaches the target size, decides on merging the block.
-  void AddSymbol(size_t symbol, size_t context) {
-    (*histograms_)[curr_histogram_ix_ + context].Add(symbol);
-    ++block_size_;
-    if (block_size_ == target_block_size_) {
-      FinishBlock(/* is_final = */ false);
-    }
-  }
+static void CleanupContextBlockSplitter(
+    MemoryManager* m, ContextBlockSplitter* self) {
+  BROTLI_FREE(m, self->last_entropy_);
+}

 /* Does either of three things:
     (1) emits the current block with a new block type;
     (2) emits the current block with the type of the second last block;
     (3) merges the current block with the last block. */
-  void FinishBlock(bool is_final) {
-    if (block_size_ < min_block_size_) {
-      block_size_ = min_block_size_;
+static void ContextBlockSplitterFinishBlock(
+    MemoryManager* m, ContextBlockSplitter* self, int is_final) {
+  BlockSplit* split = self->split_;
+  const size_t num_contexts = self->num_contexts_;
+  double* last_entropy = self->last_entropy_;
+  HistogramLiteral* histograms = self->histograms_;
+
+  if (self->block_size_ < self->min_block_size_) {
+    self->block_size_ = self->min_block_size_;
+  }
+  if (self->num_blocks_ == 0) {
+    size_t i;
+    /* Create first block. */
+    split->lengths[0] = (uint32_t)self->block_size_;
+    split->types[0] = 0;
+
+    for (i = 0; i < num_contexts; ++i) {
+      last_entropy[i] =
+          BitsEntropy(histograms[i].data_, self->alphabet_size_);
+      last_entropy[num_contexts + i] = last_entropy[i];
    }
-    if (num_blocks_ == 0) {
-      // Create first block.
-      split_->lengths[0] = static_cast<uint32_t>(block_size_);
-      split_->types[0] = 0;
-      for (size_t i = 0; i < num_contexts_; ++i) {
-        last_entropy_[i] =
-            BitsEntropy(&(*histograms_)[i].data_[0], alphabet_size_);
-        last_entropy_[num_contexts_ + i] = last_entropy_[i];
-      }
-      ++num_blocks_;
-      ++split_->num_types;
-      curr_histogram_ix_ += num_contexts_;
-      block_size_ = 0;
-    } else if (block_size_ > 0) {
+    ++self->num_blocks_;
+    ++split->num_types;
+    self->curr_histogram_ix_ += num_contexts;
+    if (self->curr_histogram_ix_ < *self->histograms_size_) {
+      ClearHistogramsLiteral(
+          &self->histograms_[self->curr_histogram_ix_], self->num_contexts_);
+    }
+    self->block_size_ = 0;
+  } else if (self->block_size_ > 0) {
    /* Try merging the set of histograms for the current block type with the
       respective set of histograms for the last and second last block types.
       Decide over the split based on the total reduction of entropy across
       all contexts. */
-      std::vector<double> entropy(num_contexts_);
-      std::vector<HistogramType> combined_histo(2 * num_contexts_);
-      std::vector<double> combined_entropy(2 * num_contexts_);
-      double diff[2] = { 0.0 };
-      for (size_t i = 0; i < num_contexts_; ++i) {
-        size_t curr_histo_ix = curr_histogram_ix_ + i;
-        entropy[i] = BitsEntropy(&(*histograms_)[curr_histo_ix].data_[0],
-                                 alphabet_size_);
-        for (size_t j = 0; j < 2; ++j) {
-          size_t jx = j * num_contexts_ + i;
-          size_t last_histogram_ix = last_histogram_ix_[j] + i;
-          combined_histo[jx] = (*histograms_)[curr_histo_ix];
-          combined_histo[jx].AddHistogram((*histograms_)[last_histogram_ix]);
-          combined_entropy[jx] = BitsEntropy(
-              &combined_histo[jx].data_[0], alphabet_size_);
-          diff[j] += combined_entropy[jx] - entropy[i] - last_entropy_[jx];
-        }
+    double* entropy = BROTLI_ALLOC(m, double, num_contexts);
+    HistogramLiteral* combined_histo =
+        BROTLI_ALLOC(m, HistogramLiteral, 2 * num_contexts);
+    double* combined_entropy = BROTLI_ALLOC(m, double, 2 * num_contexts);
+    double diff[2] = { 0.0 };
+    size_t i;
+    if (BROTLI_IS_OOM(m)) return;
+    for (i = 0; i < num_contexts; ++i) {
+      size_t curr_histo_ix = self->curr_histogram_ix_ + i;
+      size_t j;
+      entropy[i] = BitsEntropy(histograms[curr_histo_ix].data_,
+                               self->alphabet_size_);
+      for (j = 0; j < 2; ++j) {
+        size_t jx = j * num_contexts + i;
+        size_t last_histogram_ix = self->last_histogram_ix_[j] + i;
+        combined_histo[jx] = histograms[curr_histo_ix];
+        HistogramAddHistogramLiteral(&combined_histo[jx],
+            &histograms[last_histogram_ix]);
+        combined_entropy[jx] = BitsEntropy(
+            &combined_histo[jx].data_[0], self->alphabet_size_);
+        diff[j] += combined_entropy[jx] - entropy[i] - last_entropy[jx];
      }
+    }

-      if (split_->num_types < max_block_types_ &&
-          diff[0] > split_threshold_ &&
-          diff[1] > split_threshold_) {
-        // Create new block.
-        split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
-        split_->types[num_blocks_] = static_cast<uint8_t>(split_->num_types);
-        last_histogram_ix_[1] = last_histogram_ix_[0];
-        last_histogram_ix_[0] = split_->num_types * num_contexts_;
-        for (size_t i = 0; i < num_contexts_; ++i) {
-          last_entropy_[num_contexts_ + i] = last_entropy_[i];
-          last_entropy_[i] = entropy[i];
-        }
-        ++num_blocks_;
-        ++split_->num_types;
-        curr_histogram_ix_ += num_contexts_;
-        block_size_ = 0;
-        merge_last_count_ = 0;
-        target_block_size_ = min_block_size_;
-      } else if (diff[1] < diff[0] - 20.0) {
-        // Combine this block with second last block.
-        split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
-        split_->types[num_blocks_] = split_->types[num_blocks_ - 2];
-        std::swap(last_histogram_ix_[0], last_histogram_ix_[1]);
-        for (size_t i = 0; i < num_contexts_; ++i) {
-          (*histograms_)[last_histogram_ix_[0] + i] =
-              combined_histo[num_contexts_ + i];
-          last_entropy_[num_contexts_ + i] = last_entropy_[i];
-          last_entropy_[i] = combined_entropy[num_contexts_ + i];
-          (*histograms_)[curr_histogram_ix_ + i].Clear();
-        }
-        ++num_blocks_;
-        block_size_ = 0;
-        merge_last_count_ = 0;
-        target_block_size_ = min_block_size_;
-      } else {
-        // Combine this block with last block.
-        split_->lengths[num_blocks_ - 1] += static_cast<uint32_t>(block_size_);
-        for (size_t i = 0; i < num_contexts_; ++i) {
-          (*histograms_)[last_histogram_ix_[0] + i] = combined_histo[i];
-          last_entropy_[i] = combined_entropy[i];
-          if (split_->num_types == 1) {
-            last_entropy_[num_contexts_ + i] = last_entropy_[i];
-          }
-          (*histograms_)[curr_histogram_ix_ + i].Clear();
-        }
-        block_size_ = 0;
-        if (++merge_last_count_ > 1) {
-          target_block_size_ += min_block_size_;
+    if (split->num_types < self->max_block_types_ &&
+        diff[0] > self->split_threshold_ &&
+        diff[1] > self->split_threshold_) {
+      /* Create new block. */
+      split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
+      split->types[self->num_blocks_] = (uint8_t)split->num_types;
+      self->last_histogram_ix_[1] = self->last_histogram_ix_[0];
+      self->last_histogram_ix_[0] = split->num_types * num_contexts;
+      for (i = 0; i < num_contexts; ++i) {
+        last_entropy[num_contexts + i] = last_entropy[i];
+        last_entropy[i] = entropy[i];
+      }
+      ++self->num_blocks_;
+      ++split->num_types;
+      self->curr_histogram_ix_ += num_contexts;
+      if (self->curr_histogram_ix_ < *self->histograms_size_) {
+        ClearHistogramsLiteral(
+            &self->histograms_[self->curr_histogram_ix_], self->num_contexts_);
+      }
+      self->block_size_ = 0;
+      self->merge_last_count_ = 0;
+      self->target_block_size_ = self->min_block_size_;
+    } else if (diff[1] < diff[0] - 20.0) {
+      /* Combine this block with second last block. */
+      split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
+      split->types[self->num_blocks_] = split->types[self->num_blocks_ - 2];
+      BROTLI_SWAP(size_t, self->last_histogram_ix_, 0, 1);
+      for (i = 0; i < num_contexts; ++i) {
+        histograms[self->last_histogram_ix_[0] + i] =
+            combined_histo[num_contexts + i];
+        last_entropy[num_contexts + i] = last_entropy[i];
+        last_entropy[i] = combined_entropy[num_contexts + i];
+        HistogramClearLiteral(&histograms[self->curr_histogram_ix_ + i]);
+      }
+      ++self->num_blocks_;
+      self->block_size_ = 0;
+      self->merge_last_count_ = 0;
+      self->target_block_size_ = self->min_block_size_;
+    } else {
+      /* Combine this block with last block. */
+      split->lengths[self->num_blocks_ - 1] += (uint32_t)self->block_size_;
+      for (i = 0; i < num_contexts; ++i) {
+        histograms[self->last_histogram_ix_[0] + i] = combined_histo[i];
+        last_entropy[i] = combined_entropy[i];
+        if (split->num_types == 1) {
+          last_entropy[num_contexts + i] = last_entropy[i];
        }
+        HistogramClearLiteral(&histograms[self->curr_histogram_ix_ + i]);
+      }
+      self->block_size_ = 0;
+      if (++self->merge_last_count_ > 1) {
+        self->target_block_size_ += self->min_block_size_;
      }
    }
-    if (is_final) {
-      (*histograms_).resize(split_->num_types * num_contexts_);
-      split_->types.resize(num_blocks_);
-      split_->lengths.resize(num_blocks_);
-    }
+    BROTLI_FREE(m, combined_entropy);
+    BROTLI_FREE(m, combined_histo);
+    BROTLI_FREE(m, entropy);
  }
+  if (is_final) {
+    *self->histograms_size_ = split->num_types * num_contexts;
+    split->num_blocks = self->num_blocks_;
+  }
+}

- private:
-  static const int kMaxBlockTypes = 256;
+/* Adds the next symbol to the current block type and context. When the
+   current block reaches the target size, decides on merging the block. */
+static void ContextBlockSplitterAddSymbol(MemoryManager* m,
+    ContextBlockSplitter* self, size_t symbol, size_t context) {
+  HistogramAddLiteral(&self->histograms_[self->curr_histogram_ix_ + context],
+      symbol);
+  ++self->block_size_;
+  if (self->block_size_ == self->target_block_size_) {
+    ContextBlockSplitterFinishBlock(m, self, /* is_final = */ 0);
+    if (BROTLI_IS_OOM(m)) return;
+  }
+}

-  // Alphabet size of particular block category.
-  const size_t alphabet_size_;
-  const size_t num_contexts_;
-  const size_t max_block_types_;
-  // We collect at least this many symbols for each block.
-  const size_t min_block_size_;
-  // We merge histograms A and B if
-  //   entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
-  // where A is the current histogram and B is the histogram of the last or the
-  // second last block type.
-  const double split_threshold_;
-
-  size_t num_blocks_;
-  BlockSplit* split_;  // not owned
-  std::vector<HistogramType>* histograms_;  // not owned
-
-  // The number of symbols that we want to collect before deciding on whether
-  // or not to merge the block with a previous one or emit a new block.
-  size_t target_block_size_;
-  // The number of symbols in the current histogram.
-  size_t block_size_;
-  // Offset of the current histogram.
-  size_t curr_histogram_ix_;
-  // Offset of the histograms of the previous two block types.
-  size_t last_histogram_ix_[2];
-  // Entropy of the previous two block types.
-  std::vector<double> last_entropy_;
-  // The number of times we merged the current block with the last one.
-  size_t merge_last_count_;
-};
-
-void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
-                                      size_t pos,
-                                      size_t mask,
-                                      uint8_t prev_byte,
-                                      uint8_t prev_byte2,
-                                      ContextType literal_context_mode,
-                                      size_t num_contexts,
-                                      const uint32_t* static_context_map,
-                                      const Command *commands,
-                                      size_t n_commands,
-                                      MetaBlockSplit* mb) {
+void BrotliBuildMetaBlockGreedyWithContexts(MemoryManager* m,
+                                            const uint8_t* ringbuffer,
+                                            size_t pos,
+                                            size_t mask,
+                                            uint8_t prev_byte,
+                                            uint8_t prev_byte2,
+                                            ContextType literal_context_mode,
+                                            size_t num_contexts,
+                                            const uint32_t* static_context_map,
+                                            const Command *commands,
+                                            size_t n_commands,
+                                            MetaBlockSplit* mb) {
+  ContextBlockSplitter lit_blocks;
+  BlockSplitterCommand cmd_blocks;
+  BlockSplitterDistance dist_blocks;
  size_t num_literals = 0;
-  for (size_t i = 0; i < n_commands; ++i) {
+  size_t i;
+  for (i = 0; i < n_commands; ++i) {
    num_literals += commands[i].insert_len_;
  }

-  ContextBlockSplitter<HistogramLiteral> lit_blocks(
-      256, num_contexts, 512, 400.0, num_literals,
-      &mb->literal_split, &mb->literal_histograms);
-  BlockSplitter<HistogramCommand> cmd_blocks(
-      kNumCommandPrefixes, 1024, 500.0, n_commands,
-      &mb->command_split, &mb->command_histograms);
-  BlockSplitter<HistogramDistance> dist_blocks(
-      64, 512, 100.0, n_commands,
-      &mb->distance_split, &mb->distance_histograms);
+  InitContextBlockSplitter(m, &lit_blocks, 256, num_contexts, 512, 400.0,
+      num_literals, &mb->literal_split, &mb->literal_histograms,
+      &mb->literal_histograms_size);
+  if (BROTLI_IS_OOM(m)) return;
+  InitBlockSplitterCommand(m, &cmd_blocks, BROTLI_NUM_COMMAND_SYMBOLS, 1024,
+      500.0, n_commands, &mb->command_split, &mb->command_histograms,
+      &mb->command_histograms_size);
+  if (BROTLI_IS_OOM(m)) return;
+  InitBlockSplitterDistance(m, &dist_blocks, 64, 512, 100.0, n_commands,
+      &mb->distance_split, &mb->distance_histograms,
+      &mb->distance_histograms_size);
+  if (BROTLI_IS_OOM(m)) return;

-  for (size_t i = 0; i < n_commands; ++i) {
+  for (i = 0; i < n_commands; ++i) {
    const Command cmd = commands[i];
-    cmd_blocks.AddSymbol(cmd.cmd_prefix_);
-    for (size_t j = cmd.insert_len_; j != 0; --j) {
+    size_t j;
+    BlockSplitterAddSymbolCommand(&cmd_blocks, cmd.cmd_prefix_);
+    for (j = cmd.insert_len_; j != 0; --j) {
      size_t context = Context(prev_byte, prev_byte2, literal_context_mode);
      uint8_t literal = ringbuffer[pos & mask];
-      lit_blocks.AddSymbol(literal, static_context_map[context]);
+      ContextBlockSplitterAddSymbol(
+          m, &lit_blocks, literal, static_context_map[context]);
      prev_byte2 = prev_byte;
+      if (BROTLI_IS_OOM(m)) return;
      prev_byte = literal;
      ++pos;
    }
-    pos += cmd.copy_len();
-    if (cmd.copy_len()) {
+    pos += CommandCopyLen(&cmd);
+    if (CommandCopyLen(&cmd)) {
      prev_byte2 = ringbuffer[(pos - 2) & mask];
      prev_byte = ringbuffer[(pos - 1) & mask];
      if (cmd.cmd_prefix_ >= 128) {
-        dist_blocks.AddSymbol(cmd.dist_prefix_);
+        BlockSplitterAddSymbolDistance(&dist_blocks, cmd.dist_prefix_);
      }
    }
  }

-  lit_blocks.FinishBlock(/* is_final = */ true);
-  cmd_blocks.FinishBlock(/* is_final = */ true);
-  dist_blocks.FinishBlock(/* is_final = */ true);
+  ContextBlockSplitterFinishBlock(m, &lit_blocks, /* is_final = */ 1);
+  if (BROTLI_IS_OOM(m)) return;
+  CleanupContextBlockSplitter(m, &lit_blocks);
+  BlockSplitterFinishBlockCommand(&cmd_blocks, /* is_final = */ 1);
+  BlockSplitterFinishBlockDistance(&dist_blocks, /* is_final = */ 1);

-  mb->literal_context_map.resize(
-      mb->literal_split.num_types << kLiteralContextBits);
-  for (size_t i = 0; i < mb->literal_split.num_types; ++i) {
-    for (size_t j = 0; j < (1u << kLiteralContextBits); ++j) {
-      mb->literal_context_map[(i << kLiteralContextBits) + j] =
-          static_cast<uint32_t>(i * num_contexts) + static_context_map[j];
+  assert(mb->literal_context_map == 0);
+  mb->literal_context_map_size =
+      mb->literal_split.num_types << BROTLI_LITERAL_CONTEXT_BITS;
+  mb->literal_context_map =
+      BROTLI_ALLOC(m, uint32_t, mb->literal_context_map_size);
+  if (BROTLI_IS_OOM(m)) return;
+
+  for (i = 0; i < mb->literal_split.num_types; ++i) {
+    size_t j;
+    for (j = 0; j < (1u << BROTLI_LITERAL_CONTEXT_BITS); ++j) {
+      mb->literal_context_map[(i << BROTLI_LITERAL_CONTEXT_BITS) + j] =
+          (uint32_t)(i * num_contexts) + static_context_map[j];
    }
  }
 }

-void OptimizeHistograms(size_t num_direct_distance_codes,
-                        size_t distance_postfix_bits,
-                        MetaBlockSplit* mb) {
-  uint8_t* good_for_rle = new uint8_t[kNumCommandPrefixes];
-  for (size_t i = 0; i < mb->literal_histograms.size(); ++i) {
-    OptimizeHuffmanCountsForRle(256, &mb->literal_histograms[i].data_[0],
-                                good_for_rle);
+void BrotliOptimizeHistograms(size_t num_direct_distance_codes,
+                              size_t distance_postfix_bits,
+                              MetaBlockSplit* mb) {
+  uint8_t good_for_rle[BROTLI_NUM_COMMAND_SYMBOLS];
+  size_t num_distance_codes;
+  size_t i;
+  for (i = 0; i < mb->literal_histograms_size; ++i) {
+    BrotliOptimizeHuffmanCountsForRle(256, mb->literal_histograms[i].data_,
+                                      good_for_rle);
  }
-  for (size_t i = 0; i < mb->command_histograms.size(); ++i) {
-    OptimizeHuffmanCountsForRle(kNumCommandPrefixes,
-                                &mb->command_histograms[i].data_[0],
-                                good_for_rle);
+  for (i = 0; i < mb->command_histograms_size; ++i) {
+    BrotliOptimizeHuffmanCountsForRle(BROTLI_NUM_COMMAND_SYMBOLS,
+                                      mb->command_histograms[i].data_,
+                                      good_for_rle);
  }
-  size_t num_distance_codes =
-      kNumDistanceShortCodes + num_direct_distance_codes +
-      (48u << distance_postfix_bits);
-  for (size_t i = 0; i < mb->distance_histograms.size(); ++i) {
-    OptimizeHuffmanCountsForRle(num_distance_codes,
-                                &mb->distance_histograms[i].data_[0],
-                                good_for_rle);
+  num_distance_codes = BROTLI_NUM_DISTANCE_SHORT_CODES +
+      num_direct_distance_codes + (48u << distance_postfix_bits);
+  for (i = 0; i < mb->distance_histograms_size; ++i) {
+    BrotliOptimizeHuffmanCountsForRle(num_distance_codes,
+                                      mb->distance_histograms[i].data_,
+                                      good_for_rle);
  }
-  delete[] good_for_rle;
 }

-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
--- a/enc/metablock.h
+++ b/enc/metablock.h
@ -10,72 +10,100 @@
 #ifndef BROTLI_ENC_METABLOCK_H_
 #define BROTLI_ENC_METABLOCK_H_

-#include <vector>
-
 #include "../common/types.h"
+#include "./block_splitter.h"
 #include "./command.h"
+#include "./context.h"
 #include "./histogram.h"
+#include "./memory.h"
+#include "./port.h"

-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

-struct BlockSplit {
-  BlockSplit(void) : num_types(0) {}
-
-  size_t num_types;
-  std::vector<uint8_t> types;
-  std::vector<uint32_t> lengths;
-};
-
-struct MetaBlockSplit {
+typedef struct MetaBlockSplit {
  BlockSplit literal_split;
  BlockSplit command_split;
  BlockSplit distance_split;
-  std::vector<uint32_t> literal_context_map;
-  std::vector<uint32_t> distance_context_map;
-  std::vector<HistogramLiteral> literal_histograms;
-  std::vector<HistogramCommand> command_histograms;
-  std::vector<HistogramDistance> distance_histograms;
-};
+  uint32_t* literal_context_map;
+  size_t literal_context_map_size;
+  uint32_t* distance_context_map;
+  size_t distance_context_map_size;
+  HistogramLiteral* literal_histograms;
+  size_t literal_histograms_size;
+  HistogramCommand* command_histograms;
+  size_t command_histograms_size;
+  HistogramDistance* distance_histograms;
+  size_t distance_histograms_size;
+} MetaBlockSplit;
+
+static BROTLI_INLINE void InitMetaBlockSplit(MetaBlockSplit* mb) {
+  BrotliInitBlockSplit(&mb->literal_split);
+  BrotliInitBlockSplit(&mb->command_split);
+  BrotliInitBlockSplit(&mb->distance_split);
+  mb->literal_context_map = 0;
+  mb->literal_context_map_size = 0;
+  mb->distance_context_map = 0;
+  mb->distance_context_map_size = 0;
+  mb->literal_histograms = 0;
+  mb->literal_histograms_size = 0;
+  mb->command_histograms = 0;
+  mb->command_histograms_size = 0;
+  mb->distance_histograms = 0;
+  mb->distance_histograms_size = 0;
+}
+
+static BROTLI_INLINE void DestroyMetaBlockSplit(
+    MemoryManager* m, MetaBlockSplit* mb) {
+  BrotliDestroyBlockSplit(m, &mb->literal_split);
+  BrotliDestroyBlockSplit(m, &mb->command_split);
+  BrotliDestroyBlockSplit(m, &mb->distance_split);
+  BROTLI_FREE(m, mb->literal_context_map);
+  BROTLI_FREE(m, mb->distance_context_map);
+  BROTLI_FREE(m, mb->literal_histograms);
+  BROTLI_FREE(m, mb->command_histograms);
+  BROTLI_FREE(m, mb->distance_histograms);
+}

 /* Uses the slow shortest-path block splitter and does context clustering. */
-void BuildMetaBlock(const uint8_t* ringbuffer,
-                    const size_t pos,
-                    const size_t mask,
-                    uint8_t prev_byte,
-                    uint8_t prev_byte2,
-                    const Command* cmds,
-                    size_t num_commands,
-                    ContextType literal_context_mode,
-                    MetaBlockSplit* mb);
+BROTLI_INTERNAL void BrotliBuildMetaBlock(MemoryManager* m,
+                                          const uint8_t* ringbuffer,
+                                          const size_t pos,
+                                          const size_t mask,
+                                          const int quality,
+                                          uint8_t prev_byte,
+                                          uint8_t prev_byte2,
+                                          const Command* cmds,
+                                          size_t num_commands,
+                                          ContextType literal_context_mode,
+                                          MetaBlockSplit* mb);

 /* Uses a fast greedy block splitter that tries to merge current block with the
   last or the second last block and does not do any context modeling. */
-void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
-                          size_t pos,
-                          size_t mask,
-                          const Command *commands,
-                          size_t n_commands,
-                          MetaBlockSplit* mb);
+BROTLI_INTERNAL void BrotliBuildMetaBlockGreedy(MemoryManager* m,
+                                                const uint8_t* ringbuffer,
+                                                size_t pos,
+                                                size_t mask,
+                                                const Command* commands,
+                                                size_t n_commands,
+                                                MetaBlockSplit* mb);

 /* Uses a fast greedy block splitter that tries to merge current block with the
   last or the second last block and uses a static context clustering which
   is the same for all block types. */
-void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
-                                      size_t pos,
-                                      size_t mask,
-                                      uint8_t prev_byte,
-                                      uint8_t prev_byte2,
-                                      ContextType literal_context_mode,
-                                      size_t num_contexts,
-                                      const uint32_t* static_context_map,
-                                      const Command *commands,
-                                      size_t n_commands,
-                                      MetaBlockSplit* mb);
+BROTLI_INTERNAL void BrotliBuildMetaBlockGreedyWithContexts(
+    MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask,
+    uint8_t prev_byte, uint8_t prev_byte2, ContextType literal_context_mode,
+    size_t num_contexts, const uint32_t* static_context_map,
+    const Command* commands, size_t n_commands, MetaBlockSplit* mb);

-void OptimizeHistograms(size_t num_direct_distance_codes,
-                        size_t distance_postfix_bits,
-                        MetaBlockSplit* mb);
+BROTLI_INTERNAL void BrotliOptimizeHistograms(size_t num_direct_distance_codes,
+                                              size_t distance_postfix_bits,
+                                              MetaBlockSplit* mb);

-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif

 #endif  /* BROTLI_ENC_METABLOCK_H_ */
--- a/enc/metablock_inc.h
+++ b/enc/metablock_inc.h
@ -0,0 +1,183 @@
+/* NOLINT(build/header_guard) */
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+   Distributed under MIT license.
+   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* template parameters: FN */
+
+#define HistogramType FN(Histogram)
+
+/* Greedy block splitter for one block category (literal, command or distance).
+*/
+typedef struct FN(BlockSplitter) {
+  /* Alphabet size of particular block category. */
+  size_t alphabet_size_;
+  /* We collect at least this many symbols for each block. */
+  size_t min_block_size_;
+  /* We merge histograms A and B if
+       entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
+     where A is the current histogram and B is the histogram of the last or the
+     second last block type. */
+  double split_threshold_;
+
+  size_t num_blocks_;
+  BlockSplit* split_;  /* not owned */
+  HistogramType* histograms_;  /* not owned */
+  size_t* histograms_size_;  /* not owned */
+
+  /* The number of symbols that we want to collect before deciding on whether
+     or not to merge the block with a previous one or emit a new block. */
+  size_t target_block_size_;
+  /* The number of symbols in the current histogram. */
+  size_t block_size_;
+  /* Offset of the current histogram. */
+  size_t curr_histogram_ix_;
+  /* Offset of the histograms of the previous two block types. */
+  size_t last_histogram_ix_[2];
+  /* Entropy of the previous two block types. */
+  double last_entropy_[2];
+  /* The number of times we merged the current block with the last one. */
+  size_t merge_last_count_;
+} FN(BlockSplitter);
+
+static void FN(InitBlockSplitter)(
+    MemoryManager* m, FN(BlockSplitter)* self, size_t alphabet_size,
+    size_t min_block_size, double split_threshold, size_t num_symbols,
+    BlockSplit* split, HistogramType** histograms, size_t* histograms_size) {
+  size_t max_num_blocks = num_symbols / min_block_size + 1;
+  /* We have to allocate one more histogram than the maximum number of block
+     types for the current histogram when the meta-block is too big. */
+  size_t max_num_types =
+      BROTLI_MIN(size_t, max_num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + 1);
+  self->alphabet_size_ = alphabet_size;
+  self->min_block_size_ = min_block_size;
+  self->split_threshold_ = split_threshold;
+  self->num_blocks_ = 0;
+  self->split_ = split;
+  self->histograms_size_ = histograms_size;
+  self->target_block_size_ = min_block_size;
+  self->block_size_ = 0;
+  self->curr_histogram_ix_ = 0;
+  self->merge_last_count_ = 0;
+  BROTLI_ENSURE_CAPACITY(m, uint8_t,
+      split->types, split->types_alloc_size, max_num_blocks);
+  BROTLI_ENSURE_CAPACITY(m, uint32_t,
+      split->lengths, split->lengths_alloc_size, max_num_blocks);
+  if (BROTLI_IS_OOM(m)) return;
+  self->split_->num_blocks = max_num_blocks;
+  assert(*histograms == 0);
+  *histograms_size = max_num_types;
+  *histograms = BROTLI_ALLOC(m, HistogramType, *histograms_size);
+  self->histograms_ = *histograms;
+  if (BROTLI_IS_OOM(m)) return;
+  /* Clear only current histogram. */
+  FN(HistogramClear)(&self->histograms_[0]);
+  self->last_histogram_ix_[0] = self->last_histogram_ix_[1] = 0;
+}
+
+/* Does either of three things:
+     (1) emits the current block with a new block type;
+     (2) emits the current block with the type of the second last block;
+     (3) merges the current block with the last block. */
+static void FN(BlockSplitterFinishBlock)(FN(BlockSplitter)* self,
+    int is_final) {
+  BlockSplit* split = self->split_;
+  double* last_entropy = self->last_entropy_;
+  HistogramType* histograms = self->histograms_;
+  self->block_size_ =
+      BROTLI_MAX(size_t, self->block_size_, self->min_block_size_);
+  if (self->num_blocks_ == 0) {
+    /* Create first block. */
+    split->lengths[0] = (uint32_t)self->block_size_;
+    split->types[0] = 0;
+    last_entropy[0] =
+        BitsEntropy(histograms[0].data_, self->alphabet_size_);
+    last_entropy[1] = last_entropy[0];
+    ++self->num_blocks_;
+    ++split->num_types;
+    ++self->curr_histogram_ix_;
+    if (self->curr_histogram_ix_ < *self->histograms_size_)
+      FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
+    self->block_size_ = 0;
+  } else if (self->block_size_ > 0) {
+    double entropy = BitsEntropy(histograms[self->curr_histogram_ix_].data_,
+                                 self->alphabet_size_);
+    HistogramType combined_histo[2];
+    double combined_entropy[2];
+    double diff[2];
+    size_t j;
+    for (j = 0; j < 2; ++j) {
+      size_t last_histogram_ix = self->last_histogram_ix_[j];
+      combined_histo[j] = histograms[self->curr_histogram_ix_];
+      FN(HistogramAddHistogram)(&combined_histo[j],
+          &histograms[last_histogram_ix]);
+      combined_entropy[j] = BitsEntropy(
+          &combined_histo[j].data_[0], self->alphabet_size_);
+      diff[j] = combined_entropy[j] - entropy - last_entropy[j];
+    }
+
+    if (split->num_types < BROTLI_MAX_NUMBER_OF_BLOCK_TYPES &&
+        diff[0] > self->split_threshold_ &&
+        diff[1] > self->split_threshold_) {
+      /* Create new block. */
+      split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
+      split->types[self->num_blocks_] = (uint8_t)split->num_types;
+      self->last_histogram_ix_[1] = self->last_histogram_ix_[0];
+      self->last_histogram_ix_[0] = (uint8_t)split->num_types;
+      last_entropy[1] = last_entropy[0];
+      last_entropy[0] = entropy;
+      ++self->num_blocks_;
+      ++split->num_types;
+      ++self->curr_histogram_ix_;
+      if (self->curr_histogram_ix_ < *self->histograms_size_)
+        FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
+      self->block_size_ = 0;
+      self->merge_last_count_ = 0;
+      self->target_block_size_ = self->min_block_size_;
+    } else if (diff[1] < diff[0] - 20.0) {
+      /* Combine this block with second last block. */
+      split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
+      split->types[self->num_blocks_] = split->types[self->num_blocks_ - 2];
+      BROTLI_SWAP(size_t, self->last_histogram_ix_, 0, 1);
+      histograms[self->last_histogram_ix_[0]] = combined_histo[1];
+      last_entropy[1] = last_entropy[0];
+      last_entropy[0] = combined_entropy[1];
+      ++self->num_blocks_;
+      self->block_size_ = 0;
+      FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
+      self->merge_last_count_ = 0;
+      self->target_block_size_ = self->min_block_size_;
+    } else {
+      /* Combine this block with last block. */
+      split->lengths[self->num_blocks_ - 1] += (uint32_t)self->block_size_;
+      histograms[self->last_histogram_ix_[0]] = combined_histo[0];
+      last_entropy[0] = combined_entropy[0];
+      if (split->num_types == 1) {
+        last_entropy[1] = last_entropy[0];
+      }
+      self->block_size_ = 0;
+      FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
+      if (++self->merge_last_count_ > 1) {
+        self->target_block_size_ += self->min_block_size_;
+      }
+    }
+  }
+  if (is_final) {
+    *self->histograms_size_ = split->num_types;
+    split->num_blocks = self->num_blocks_;
+  }
+}
+
+/* Adds the next symbol to the current histogram. When the current histogram
+   reaches the target size, decides on merging the block. */
+static void FN(BlockSplitterAddSymbol)(FN(BlockSplitter)* self, size_t symbol) {
+  FN(HistogramAdd)(&self->histograms_[self->curr_histogram_ix_], symbol);
+  ++self->block_size_;
+  if (self->block_size_ == self->target_block_size_) {
+    FN(BlockSplitterFinishBlock)(self, /* is_final = */ 0);
+  }
+}
+
+#undef HistogramType
--- a/enc/port.h
+++ b/enc/port.h
@ -62,13 +62,13 @@
   but note: the FPU still sends unaligned loads and stores to a trap handler!
 */

-#define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
-#define BROTLI_UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64_t *>(_p))
+#define BROTLI_UNALIGNED_LOAD32(_p) (*(const uint32_t *)(_p))
+#define BROTLI_UNALIGNED_LOAD64(_p) (*(const uint64_t *)(_p))

 #define BROTLI_UNALIGNED_STORE32(_p, _val) \
-  (*reinterpret_cast<uint32_t *>(_p) = (_val))
+  (*(uint32_t *)(_p) = (_val))
 #define BROTLI_UNALIGNED_STORE64(_p, _val) \
-  (*reinterpret_cast<uint64_t *>(_p) = (_val))
+  (*(uint64_t *)(_p) = (_val))

 #elif defined(__arm__) && \
  !defined(__ARM_ARCH_5__) && \
@ -87,17 +87,17 @@
   do an unaligned read and rotate the words around a bit, or do the reads very
   slowly (trip through kernel mode). */

-#define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
+#define BROTLI_UNALIGNED_LOAD32(_p) (*(const uint32_t *)(_p))
 #define BROTLI_UNALIGNED_STORE32(_p, _val) \
-  (*reinterpret_cast<uint32_t *>(_p) = (_val))
+  (*(uint32_t *)(_p) = (_val))

-static inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
+static BROTLI_INLINE uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
  uint64_t t;
  memcpy(&t, p, sizeof t);
  return t;
 }

-static inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
+static BROTLI_INLINE void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
  memcpy(p, &v, sizeof v);
 }

@ -106,26 +106,63 @@ static inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
 /* These functions are provided for architectures that don't support */
 /* unaligned loads and stores. */

-static inline uint32_t BROTLI_UNALIGNED_LOAD32(const void *p) {
+static BROTLI_INLINE uint32_t BROTLI_UNALIGNED_LOAD32(const void *p) {
  uint32_t t;
  memcpy(&t, p, sizeof t);
  return t;
 }

-static inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
+static BROTLI_INLINE uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
  uint64_t t;
  memcpy(&t, p, sizeof t);
  return t;
 }

-static inline void BROTLI_UNALIGNED_STORE32(void *p, uint32_t v) {
+static BROTLI_INLINE void BROTLI_UNALIGNED_STORE32(void *p, uint32_t v) {
  memcpy(p, &v, sizeof v);
 }

-static inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
+static BROTLI_INLINE void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
  memcpy(p, &v, sizeof v);
 }

 #endif

+#if !defined(__cplusplus) && !defined(c_plusplus) && __STDC_VERSION__ >= 199901L
+#define BROTLI_RESTRICT restrict
+#elif BROTLI_GCC_VERSION > 295 || defined(__llvm__)
+#define BROTLI_RESTRICT __restrict
+#else
+#define BROTLI_RESTRICT
+#endif
+
+#define _TEMPLATE(T)                                                           \
+  static BROTLI_INLINE T brotli_min_ ## T (T a, T b) { return a < b ? a : b; } \
+  static BROTLI_INLINE T brotli_max_ ## T (T a, T b) { return a > b ? a : b; }
+_TEMPLATE(double) _TEMPLATE(float) _TEMPLATE(int)
+_TEMPLATE(size_t) _TEMPLATE(uint32_t) _TEMPLATE(uint8_t)
+#undef _TEMPLATE
+#define BROTLI_MIN(T, A, B) (brotli_min_ ## T((A), (B)))
+#define BROTLI_MAX(T, A, B) (brotli_max_ ## T((A), (B)))
+
+#define BROTLI_SWAP(T, A, I, J) { \
+  T __brotli_swap_tmp = (A)[(I)]; \
+  (A)[(I)] = (A)[(J)];            \
+  (A)[(J)] = __brotli_swap_tmp;   \
+}
+
+#define BROTLI_ENSURE_CAPACITY(M, T, A, C, R) {  \
+  if (C < (R)) {                                 \
+    size_t _new_size = (C == 0) ? (R) : C;       \
+    T* new_array;                                \
+    while (_new_size < (R)) _new_size *= 2;      \
+    new_array = BROTLI_ALLOC((M), T, _new_size); \
+    if (!BROTLI_IS_OOM(m))                       \
+      memcpy(new_array, A, C * sizeof(T));       \
+    BROTLI_FREE((M), A);                         \
+    A = new_array;                               \
+    C = _new_size;                               \
+  }                                              \
+}
+
 #endif  /* BROTLI_ENC_PORT_H_ */
--- a/enc/prefix.h
+++ b/enc/prefix.h
@ -10,70 +10,43 @@
 #ifndef BROTLI_ENC_PREFIX_H_
 #define BROTLI_ENC_PREFIX_H_

+#include "../common/constants.h"
+#include "../common/port.h"
 #include "../common/types.h"
 #include "./fast_log.h"

-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

-static const uint32_t kNumInsertLenPrefixes = 24;
-static const uint32_t kNumCopyLenPrefixes = 24;
-static const uint32_t kNumCommandPrefixes = 704;
-static const uint32_t kNumBlockLenPrefixes = 26;
-static const uint32_t kNumDistanceShortCodes = 16;
-static const uint32_t kNumDistancePrefixes = 520;
-
-// Represents the range of values belonging to a prefix code:
-// [offset, offset + 2^nbits)
-struct PrefixCodeRange {
-  uint32_t offset;
-  uint32_t nbits;
-};
-
-static const PrefixCodeRange kBlockLengthPrefixCode[kNumBlockLenPrefixes] = {
-  {   1,  2}, {    5,  2}, {  9,   2}, {  13,  2},
-  {  17,  3}, {   25,  3}, {  33,  3}, {  41,  3},
-  {  49,  4}, {   65,  4}, {  81,  4}, {  97,  4},
-  { 113,  5}, {  145,  5}, { 177,  5}, { 209,  5},
-  { 241,  6}, {  305,  6}, { 369,  7}, { 497,  8},
-  { 753,  9}, { 1265, 10}, {2289, 11}, {4337, 12},
-  {8433, 13}, {16625, 24}
-};
-
-inline void GetBlockLengthPrefixCode(uint32_t len, uint32_t* code,
-                                     uint32_t* n_extra, uint32_t* extra) {
-  *code = 0;
-  while (*code < 25 && len >= kBlockLengthPrefixCode[*code + 1].offset) {
-    ++(*code);
-  }
-  *n_extra = kBlockLengthPrefixCode[*code].nbits;
-  *extra = len - kBlockLengthPrefixCode[*code].offset;
-}
-
-inline void PrefixEncodeCopyDistance(size_t distance_code,
-                                     size_t num_direct_codes,
-                                     size_t postfix_bits,
-                                     uint16_t* code,
-                                     uint32_t* extra_bits) {
-  if (distance_code < kNumDistanceShortCodes + num_direct_codes) {
-    *code = static_cast<uint16_t>(distance_code);
+static BROTLI_INLINE void PrefixEncodeCopyDistance(size_t distance_code,
+                                                   size_t num_direct_codes,
+                                                   size_t postfix_bits,
+                                                   uint16_t* code,
+                                                   uint32_t* extra_bits) {
+  if (distance_code < BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_codes) {
+    *code = (uint16_t)distance_code;
    *extra_bits = 0;
    return;
+  } else {
+    size_t dist = (1u << (postfix_bits + 2u)) +
+        (distance_code - BROTLI_NUM_DISTANCE_SHORT_CODES - num_direct_codes);
+    size_t bucket = Log2FloorNonZero(dist) - 1;
+    size_t postfix_mask = (1u << postfix_bits) - 1;
+    size_t postfix = dist & postfix_mask;
+    size_t prefix = (dist >> bucket) & 1;
+    size_t offset = (2 + prefix) << bucket;
+    size_t nbits = bucket - postfix_bits;
+    *code = (uint16_t)(
+        (BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_codes +
+         ((2 * (nbits - 1) + prefix) << postfix_bits) + postfix));
+    *extra_bits = (uint32_t)(
+        (nbits << 24) | ((dist - offset) >> postfix_bits));
  }
-  distance_code -= kNumDistanceShortCodes + num_direct_codes;  /* >= 0 */
-  distance_code += (1u << (postfix_bits + 2u));  /* > 0 */
-  size_t bucket = Log2FloorNonZero(distance_code) - 1;
-  size_t postfix_mask = (1 << postfix_bits) - 1;
-  size_t postfix = distance_code & postfix_mask;
-  size_t prefix = (distance_code >> bucket) & 1;
-  size_t offset = (2 + prefix) << bucket;
-  size_t nbits = bucket - postfix_bits;
-  *code = static_cast<uint16_t>(
-      (kNumDistanceShortCodes + num_direct_codes +
-       ((2 * (nbits - 1) + prefix) << postfix_bits) + postfix));
-  *extra_bits = static_cast<uint32_t>(
-      (nbits << 24) | ((distance_code - offset) >> postfix_bits));
 }

-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif

 #endif  /* BROTLI_ENC_PREFIX_H_ */
--- a/enc/ringbuffer.h
+++ b/enc/ringbuffer.h
@ -9,12 +9,15 @@
 #ifndef BROTLI_ENC_RINGBUFFER_H_
 #define BROTLI_ENC_RINGBUFFER_H_

-#include <cstdlib>  /* free, realloc */
+#include <string.h>  /* memcpy */

 #include "../common/types.h"
+#include "./memory.h"
 #include "./port.h"

-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

 /* A RingBuffer(window_bits, tail_bits) contains `1 << window_bits' bytes of
   data in a circular manner: writing a byte writes it to:
@ -25,121 +28,130 @@ namespace brotli {
   and another copy of the last two bytes:
     buffer_[-1] == buffer_[(1 << window_bits) - 1] and
     buffer_[-2] == buffer_[(1 << window_bits) - 2]. */
-class RingBuffer {
- public:
-  RingBuffer(int window_bits, int tail_bits)
-      : size_(1u << window_bits),
-        mask_((1u << window_bits) - 1),
-        tail_size_(1u << tail_bits),
-        total_size_(size_ + tail_size_),
-        cur_size_(0),
-        pos_(0),
-        data_(0),
-        buffer_(0) {}
-
-  ~RingBuffer(void) {
-    free(data_);
-  }
-
-/* Allocates or re-allocates data_ to the given length + plus some slack
-   region before and after. Fills the slack regions with zeros. */
-  inline void InitBuffer(const uint32_t buflen) {
-    static const size_t kSlackForEightByteHashingEverywhere = 7;
-    cur_size_ = buflen;
-    data_ = static_cast<uint8_t*>(realloc(
-        data_, 2 + buflen + kSlackForEightByteHashingEverywhere));
-    buffer_ = data_ + 2;
-    buffer_[-2] = buffer_[-1] = 0;
-    for (size_t i = 0; i < kSlackForEightByteHashingEverywhere; ++i) {
-      buffer_[cur_size_ + i] = 0;
-    }
-  }
-
-/* Push bytes into the ring buffer. */
-  void Write(const uint8_t *bytes, size_t n) {
-    if (pos_ == 0 && n < tail_size_) {
-    /* Special case for the first write: to process the first block, we don't
-       need to allocate the whole ringbuffer and we don't need the tail
-       either. However, we do this memory usage optimization only if the
-       first write is less than the tail size, which is also the input block
-       size, otherwise it is likely that other blocks will follow and we
-       will need to reallocate to the full size anyway. */
-      pos_ = static_cast<uint32_t>(n);
-      InitBuffer(pos_);
-      memcpy(buffer_, bytes, n);
-      return;
-    }
-    if (cur_size_ < total_size_) {
-    /* Lazily allocate the full buffer. */
-      InitBuffer(total_size_);
-    /* Initialize the last two bytes to zero, so that we don't have to worry
-       later when we copy the last two bytes to the first two positions. */
-      buffer_[size_ - 2] = 0;
-      buffer_[size_ - 1] = 0;
-    }
-    const size_t masked_pos = pos_ & mask_;
-    /* The length of the writes is limited so that we do not need to worry
-       about a write */
-    WriteTail(bytes, n);
-    if (PREDICT_TRUE(masked_pos + n <= size_)) {
-      /* A single write fits. */
-      memcpy(&buffer_[masked_pos], bytes, n);
-    } else {
-      /* Split into two writes.
-         Copy into the end of the buffer, including the tail buffer. */
-      memcpy(&buffer_[masked_pos], bytes,
-             std::min(n, total_size_ - masked_pos));
-      /* Copy into the beginning of the buffer */
-      memcpy(&buffer_[0], bytes + (size_ - masked_pos),
-             n - (size_ - masked_pos));
-    }
-    buffer_[-2] = buffer_[size_ - 2];
-    buffer_[-1] = buffer_[size_ - 1];
-    pos_ += static_cast<uint32_t>(n);
-    if (pos_ > (1u << 30)) {  /* Wrap, but preserve not-a-first-lap feature. */
-      pos_ = (pos_ & ((1u << 30) - 1)) | (1u << 30);
-    }
-  }
-
-  void Reset(void) {
-    pos_ = 0;
-  }
-
-  // Logical cursor position in the ring buffer.
-  uint32_t position(void) const { return pos_; }
-
-  // Bit mask for getting the physical position for a logical position.
-  uint32_t mask(void) const { return mask_; }
-
-  uint8_t *start(void) { return &buffer_[0]; }
-  const uint8_t *start(void) const { return &buffer_[0]; }
-
- private:
-  void WriteTail(const uint8_t *bytes, size_t n) {
-    const size_t masked_pos = pos_ & mask_;
-    if (PREDICT_FALSE(masked_pos < tail_size_)) {
-      // Just fill the tail buffer with the beginning data.
-      const size_t p = size_ + masked_pos;
-      memcpy(&buffer_[p], bytes, std::min(n, tail_size_ - masked_pos));
-    }
-  }
-
-  // Size of the ringbuffer is (1 << window_bits) + tail_size_.
+typedef struct RingBuffer {
+  /* Size of the ringbuffer is (1 << window_bits) + tail_size_. */
  const uint32_t size_;
  const uint32_t mask_;
  const uint32_t tail_size_;
  const uint32_t total_size_;

  uint32_t cur_size_;
-  // Position to write in the ring buffer.
+  /* Position to write in the ring buffer. */
  uint32_t pos_;
-  // The actual ring buffer containing the copy of the last two bytes, the data,
-  // and the copy of the beginning as a tail.
+  /* The actual ring buffer containing the copy of the last two bytes, the data,
+     and the copy of the beginning as a tail. */
  uint8_t *data_;
-  // The start of the ringbuffer.
+  /* The start of the ringbuffer. */
  uint8_t *buffer_;
-};
+} RingBuffer;

-}  // namespace brotli
+static BROTLI_INLINE void RingBufferInit(RingBuffer* rb) {
+  rb->cur_size_ = 0;
+  rb->pos_ = 0;
+  rb->data_ = 0;
+  rb->buffer_ = 0;
+}
+
+static BROTLI_INLINE void RingBufferSetup(
+    int window_bits, int tail_bits, RingBuffer* rb) {
+  *(uint32_t*)&rb->size_ = 1u << window_bits;
+  *(uint32_t*)&rb->mask_ = (1u << window_bits) - 1;
+  *(uint32_t*)&rb->tail_size_ = 1u << tail_bits;
+  *(uint32_t*)&rb->total_size_ = rb->size_ + rb->tail_size_;
+}
+
+static BROTLI_INLINE void RingBufferFree(MemoryManager* m, RingBuffer* rb) {
+  BROTLI_FREE(m, rb->data_);
+}
+
+/* Allocates or re-allocates data_ to the given length + plus some slack
+   region before and after. Fills the slack regions with zeros. */
+static BROTLI_INLINE void RingBufferInitBuffer(
+    MemoryManager* m, const uint32_t buflen, RingBuffer* rb) {
+  static const size_t kSlackForEightByteHashingEverywhere = 7;
+  uint8_t* new_data = BROTLI_ALLOC(
+      m, uint8_t, 2 + buflen + kSlackForEightByteHashingEverywhere);
+  size_t i;
+  if (BROTLI_IS_OOM(m)) return;
+  if (rb->data_) {
+    memcpy(new_data, rb->data_,
+        2 + rb->cur_size_ + kSlackForEightByteHashingEverywhere);
+    BROTLI_FREE(m, rb->data_);
+  }
+  rb->data_ = new_data;
+  rb->cur_size_ = buflen;
+  rb->buffer_ = rb->data_ + 2;
+  rb->buffer_[-2] = rb->buffer_[-1] = 0;
+  for (i = 0; i < kSlackForEightByteHashingEverywhere; ++i) {
+    rb->buffer_[rb->cur_size_ + i] = 0;
+  }
+}
+
+static BROTLI_INLINE void RingBufferWriteTail(
+    const uint8_t *bytes, size_t n, RingBuffer* rb) {
+  const size_t masked_pos = rb->pos_ & rb->mask_;
+  if (PREDICT_FALSE(masked_pos < rb->tail_size_)) {
+    /* Just fill the tail buffer with the beginning data. */
+    const size_t p = rb->size_ + masked_pos;
+    memcpy(&rb->buffer_[p], bytes,
+        BROTLI_MIN(size_t, n, rb->tail_size_ - masked_pos));
+  }
+}
+
+/* Push bytes into the ring buffer. */
+static BROTLI_INLINE void RingBufferWrite(
+    MemoryManager* m, const uint8_t *bytes, size_t n, RingBuffer* rb) {
+  if (rb->pos_ == 0 && n < rb->tail_size_) {
+    /* Special case for the first write: to process the first block, we don't
+       need to allocate the whole ringbuffer and we don't need the tail
+       either. However, we do this memory usage optimization only if the
+       first write is less than the tail size, which is also the input block
+       size, otherwise it is likely that other blocks will follow and we
+       will need to reallocate to the full size anyway. */
+    rb->pos_ = (uint32_t)n;
+    RingBufferInitBuffer(m, rb->pos_, rb);
+    if (BROTLI_IS_OOM(m)) return;
+    memcpy(rb->buffer_, bytes, n);
+    return;
+  }
+  if (rb->cur_size_ < rb->total_size_) {
+    /* Lazily allocate the full buffer. */
+    RingBufferInitBuffer(m, rb->total_size_, rb);
+    if (BROTLI_IS_OOM(m)) return;
+    /* Initialize the last two bytes to zero, so that we don't have to worry
+       later when we copy the last two bytes to the first two positions. */
+    rb->buffer_[rb->size_ - 2] = 0;
+    rb->buffer_[rb->size_ - 1] = 0;
+  }
+  {
+    const size_t masked_pos = rb->pos_ & rb->mask_;
+    /* The length of the writes is limited so that we do not need to worry
+       about a write */
+    RingBufferWriteTail(bytes, n, rb);
+    if (PREDICT_TRUE(masked_pos + n <= rb->size_)) {
+      /* A single write fits. */
+      memcpy(&rb->buffer_[masked_pos], bytes, n);
+    } else {
+      /* Split into two writes.
+         Copy into the end of the buffer, including the tail buffer. */
+      memcpy(&rb->buffer_[masked_pos], bytes,
+             BROTLI_MIN(size_t, n, rb->total_size_ - masked_pos));
+      /* Copy into the beginning of the buffer */
+      memcpy(&rb->buffer_[0], bytes + (rb->size_ - masked_pos),
+             n - (rb->size_ - masked_pos));
+    }
+  }
+  rb->buffer_[-2] = rb->buffer_[rb->size_ - 2];
+  rb->buffer_[-1] = rb->buffer_[rb->size_ - 1];
+  rb->pos_ += (uint32_t)n;
+  if (rb->pos_ > (1u << 30)) {
+    /* Wrap, but preserve not-a-first-lap feature. */
+    rb->pos_ = (rb->pos_ & ((1u << 30) - 1)) | (1u << 30);
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif

 #endif  /* BROTLI_ENC_RINGBUFFER_H_ */
--- a/enc/static_dict.c
+++ b/enc/static_dict.c
@ -6,86 +6,102 @@

 #include "./static_dict.h"

-#include <algorithm>
-
 #include "../common/dictionary.h"
 #include "./find_match_length.h"
+#include "./port.h"
 #include "./static_dict_lut.h"
-#include "./transform.h"

-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

-inline uint32_t Hash(const uint8_t *data) {
+static const uint8_t kUppercaseFirst = 10;
+static const uint8_t kOmitLastNTransforms[10] = {
+  0, 12, 27, 23, 42, 63, 56, 48, 59, 64,
+};
+
+static BROTLI_INLINE uint32_t Hash(const uint8_t *data) {
  uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kDictHashMul32;
  /* The higher bits contain more mixture from the multiplication,
     so we take our results from there. */
  return h >> (32 - kDictNumBits);
 }

-inline void AddMatch(size_t distance, size_t len, size_t len_code,
-                     uint32_t* matches) {
-  uint32_t match = static_cast<uint32_t>((distance << 5) + len_code);
-  matches[len] = std::min(matches[len], match);
+static BROTLI_INLINE void AddMatch(size_t distance, size_t len, size_t len_code,
+                                   uint32_t* matches) {
+  uint32_t match = (uint32_t)((distance << 5) + len_code);
+  matches[len] = BROTLI_MIN(uint32_t, matches[len], match);
 }

-inline size_t DictMatchLength(const uint8_t* data,
-                              size_t id,
-                              size_t len,
-                              size_t maxlen) {
+static BROTLI_INLINE size_t DictMatchLength(const uint8_t* data,
+                                            size_t id,
+                                            size_t len,
+                                            size_t maxlen) {
  const size_t offset = kBrotliDictionaryOffsetsByLength[len] + len * id;
  return FindMatchLengthWithLimit(&kBrotliDictionary[offset], data,
-                                  std::min(len, maxlen));
+                                  BROTLI_MIN(size_t, len, maxlen));
 }

-inline bool IsMatch(DictWord w, const uint8_t* data, size_t max_length) {
-  if (w.len > max_length) return false;
-  const size_t offset = kBrotliDictionaryOffsetsByLength[w.len] + w.len * w.idx;
-  const uint8_t* dict = &kBrotliDictionary[offset];
-  if (w.transform == 0) {
+static BROTLI_INLINE int IsMatch(
+    DictWord w, const uint8_t* data, size_t max_length) {
+  if (w.len > max_length) {
+    return 0;
+  } else {
+    const size_t offset = kBrotliDictionaryOffsetsByLength[w.len] +
+        (size_t)w.len * (size_t)w.idx;
+    const uint8_t* dict = &kBrotliDictionary[offset];
+    if (w.transform == 0) {
      /* Match against base dictionary word. */
-    return FindMatchLengthWithLimit(dict, data, w.len) == w.len;
-  } else if (w.transform == 10) {
+      return FindMatchLengthWithLimit(dict, data, w.len) == w.len;
+    } else if (w.transform == 10) {
      /* Match against uppercase first transform.
         Note that there are only ASCII uppercase words in the lookup table. */
-    return (dict[0] >= 'a' && dict[0] <= 'z' &&
-            (dict[0] ^ 32) == data[0] &&
-            FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) ==
-            w.len - 1u);
-  } else {
+      return (dict[0] >= 'a' && dict[0] <= 'z' &&
+              (dict[0] ^ 32) == data[0] &&
+              FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) ==
+              w.len - 1u);
+    } else {
      /* Match against uppercase all transform.
         Note that there are only ASCII uppercase words in the lookup table. */
-    for (size_t i = 0; i < w.len; ++i) {
-      if (dict[i] >= 'a' && dict[i] <= 'z') {
-        if ((dict[i] ^ 32) != data[i]) return false;
-      } else {
-        if (dict[i] != data[i]) return false;
+      size_t i;
+      for (i = 0; i < w.len; ++i) {
+        if (dict[i] >= 'a' && dict[i] <= 'z') {
+          if ((dict[i] ^ 32) != data[i]) return 0;
+        } else {
+          if (dict[i] != data[i]) return 0;
+        }
      }
+      return 1;
    }
-    return true;
  }
 }

-bool FindAllStaticDictionaryMatches(const uint8_t* data,
-                                    size_t min_length,
-                                    size_t max_length,
-                                    uint32_t* matches) {
-  bool found_match = false;
-  size_t key = Hash(data);
-  size_t bucket = kStaticDictionaryBuckets[key];
-  if (bucket != 0) {
-    size_t num = bucket & 0xff;
-    size_t offset = bucket >> 8;
-    for (size_t i = 0; i < num; ++i) {
+int BrotliFindAllStaticDictionaryMatches(const uint8_t* data,
+                                         size_t min_length,
+                                         size_t max_length,
+                                         uint32_t* matches) {
+  int has_found_match = 0;
+  size_t key0 = Hash(data);
+  size_t bucket0 = kStaticDictionaryBuckets[key0];
+  if (bucket0 != 0) {
+    size_t num = bucket0 & 0xff;
+    size_t offset = bucket0 >> 8;
+    size_t i;
+    for (i = 0; i < num; ++i) {
      const DictWord w = kStaticDictionaryWords[offset + i];
      const size_t l = w.len;
      const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
      const size_t id = w.idx;
      if (w.transform == 0) {
        const size_t matchlen = DictMatchLength(data, id, l, max_length);
+        const uint8_t* s;
+        size_t minlen;
+        size_t maxlen;
+        size_t len;
        /* Transform "" + kIdentity + "" */
        if (matchlen == l) {
          AddMatch(id, l, l, matches);
-          found_match = true;
+          has_found_match = 1;
        }
        /* Transforms "" + kOmitLast1 + "" and "" + kOmitLast1 + "ing " */
        if (matchlen >= l - 1) {
@ -95,20 +111,20 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
              data[l + 2] == ' ') {
            AddMatch(id + 49 * n, l + 3, l, matches);
          }
-          found_match = true;
+          has_found_match = 1;
        }
        /* Transform "" + kOmitLastN + "" (N = 2 .. 9) */
-        size_t minlen = min_length;
-        if (l > 9) minlen = std::max(minlen, l - 9);
-        size_t maxlen = std::min(matchlen, l - 2);
-        for (size_t len = minlen; len <= maxlen; ++len) {
+        minlen = min_length;
+        if (l > 9) minlen = BROTLI_MAX(size_t, minlen, l - 9);
+        maxlen = BROTLI_MIN(size_t, matchlen, l - 2);
+        for (len = minlen; len <= maxlen; ++len) {
          AddMatch(id + kOmitLastNTransforms[l - len] * n, len, l, matches);
-          found_match = true;
+          has_found_match = 1;
        }
        if (matchlen < l || l + 6 >= max_length) {
          continue;
        }
-        const uint8_t* s = &data[l];
+        s = &data[l];
        /* Transforms "" + kIdentity + <suffix> */
        if (s[0] == ' ') {
          AddMatch(id + n, l + 1, l, matches);
@ -258,44 +274,45 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
      } else {
        /* Set is_all_caps=0 for kUppercaseFirst and
               is_all_caps=1 otherwise (kUppercaseAll) transform. */
-        const bool t = w.transform != kUppercaseFirst;
+        const int is_all_caps = (w.transform != kUppercaseFirst) ? 1 : 0;
+        const uint8_t* s;
        if (!IsMatch(w, data, max_length)) {
          continue;
        }
        /* Transform "" + kUppercase{First,All} + "" */
-        AddMatch(id + (t ? 44 : 9) * n, l, l, matches);
-        found_match = true;
+        AddMatch(id + (is_all_caps ? 44 : 9) * n, l, l, matches);
+        has_found_match = 1;
        if (l + 1 >= max_length) {
          continue;
        }
        /* Transforms "" + kUppercase{First,All} + <suffix> */
-        const uint8_t* s = &data[l];
+        s = &data[l];
        if (s[0] == ' ') {
-          AddMatch(id + (t ? 68 : 4) * n, l + 1, l, matches);
+          AddMatch(id + (is_all_caps ? 68 : 4) * n, l + 1, l, matches);
        } else if (s[0] == '"') {
-          AddMatch(id + (t ? 87 : 66) * n, l + 1, l, matches);
+          AddMatch(id + (is_all_caps ? 87 : 66) * n, l + 1, l, matches);
          if (s[1] == '>') {
-            AddMatch(id + (t ? 97 : 69) * n, l + 2, l, matches);
+            AddMatch(id + (is_all_caps ? 97 : 69) * n, l + 2, l, matches);
          }
        } else if (s[0] == '.') {
-          AddMatch(id + (t ? 101 : 79) * n, l + 1, l, matches);
+          AddMatch(id + (is_all_caps ? 101 : 79) * n, l + 1, l, matches);
          if (s[1] == ' ') {
-            AddMatch(id + (t ? 114 : 88) * n, l + 2, l, matches);
+            AddMatch(id + (is_all_caps ? 114 : 88) * n, l + 2, l, matches);
          }
        } else if (s[0] == ',') {
-          AddMatch(id + (t ? 112 : 99) * n, l + 1, l, matches);
+          AddMatch(id + (is_all_caps ? 112 : 99) * n, l + 1, l, matches);
          if (s[1] == ' ') {
-            AddMatch(id + (t ? 107 : 58) * n, l + 2, l, matches);
+            AddMatch(id + (is_all_caps ? 107 : 58) * n, l + 2, l, matches);
          }
        } else if (s[0] == '\'') {
-          AddMatch(id + (t ? 94 : 74) * n, l + 1, l, matches);
+          AddMatch(id + (is_all_caps ? 94 : 74) * n, l + 1, l, matches);
        } else if (s[0] == '(') {
-          AddMatch(id + (t ? 113 : 78) * n, l + 1, l, matches);
+          AddMatch(id + (is_all_caps ? 113 : 78) * n, l + 1, l, matches);
        } else if (s[0] == '=') {
          if (s[1] == '"') {
-            AddMatch(id + (t ? 105 : 104) * n, l + 2, l, matches);
+            AddMatch(id + (is_all_caps ? 105 : 104) * n, l + 2, l, matches);
          } else if (s[1] == '\'') {
-            AddMatch(id + (t ? 116 : 108) * n, l + 2, l, matches);
+            AddMatch(id + (is_all_caps ? 116 : 108) * n, l + 2, l, matches);
          }
        }
      }
@ -303,29 +320,31 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
  }
  /* Transforms with prefixes " " and "." */
  if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) {
-    bool is_space = (data[0] == ' ');
-    key = Hash(&data[1]);
-    bucket = kStaticDictionaryBuckets[key];
-    size_t num = bucket & 0xff;
-    size_t offset = bucket >> 8;
-    for (size_t i = 0; i < num; ++i) {
+    int is_space = (data[0] == ' ') ? 1 : 0;
+    size_t key1 = Hash(&data[1]);
+    size_t bucket1 = kStaticDictionaryBuckets[key1];
+    size_t num = bucket1 & 0xff;
+    size_t offset = bucket1 >> 8;
+    size_t i;
+    for (i = 0; i < num; ++i) {
      const DictWord w = kStaticDictionaryWords[offset + i];
      const size_t l = w.len;
      const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
      const size_t id = w.idx;
      if (w.transform == 0) {
+        const uint8_t* s;
        if (!IsMatch(w, &data[1], max_length - 1)) {
          continue;
        }
        /* Transforms " " + kIdentity + "" and "." + kIdentity + "" */
        AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches);
-        found_match = true;
+        has_found_match = 1;
        if (l + 2 >= max_length) {
          continue;
        }
        /* Transforms " " + kIdentity + <suffix> and "." + kIdentity + <suffix>
        */
-        const uint8_t* s = &data[l + 1];
+        s = &data[l + 1];
        if (s[0] == ' ') {
          AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches);
        } else if (s[0] == '(') {
@ -352,37 +371,38 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
      } else if (is_space) {
        /* Set is_all_caps=0 for kUppercaseFirst and
               is_all_caps=1 otherwise (kUppercaseAll) transform. */
-        const bool t = w.transform != kUppercaseFirst;
+        const int is_all_caps = (w.transform != kUppercaseFirst) ? 1 : 0;
+        const uint8_t* s;
        if (!IsMatch(w, &data[1], max_length - 1)) {
          continue;
        }
        /* Transforms " " + kUppercase{First,All} + "" */
-        AddMatch(id + (t ? 85 : 30) * n, l + 1, l, matches);
-        found_match = true;
+        AddMatch(id + (is_all_caps ? 85 : 30) * n, l + 1, l, matches);
+        has_found_match = 1;
        if (l + 2 >= max_length) {
          continue;
        }
        /* Transforms " " + kUppercase{First,All} + <suffix> */
-        const uint8_t* s = &data[l + 1];
+        s = &data[l + 1];
        if (s[0] == ' ') {
-          AddMatch(id + (t ? 83 : 15) * n, l + 2, l, matches);
+          AddMatch(id + (is_all_caps ? 83 : 15) * n, l + 2, l, matches);
        } else if (s[0] == ',') {
-          if (!t) {
+          if (!is_all_caps) {
            AddMatch(id + 109 * n, l + 2, l, matches);
-        }
+          }
          if (s[1] == ' ') {
-            AddMatch(id + (t ? 111 : 65) * n, l + 3, l, matches);
+            AddMatch(id + (is_all_caps ? 111 : 65) * n, l + 3, l, matches);
          }
        } else if (s[0] == '.') {
-          AddMatch(id + (t ? 115 : 96) * n, l + 2, l, matches);
+          AddMatch(id + (is_all_caps ? 115 : 96) * n, l + 2, l, matches);
          if (s[1] == ' ') {
-            AddMatch(id + (t ? 117 : 91) * n, l + 3, l, matches);
+            AddMatch(id + (is_all_caps ? 117 : 91) * n, l + 3, l, matches);
          }
        } else if (s[0] == '=') {
          if (s[1] == '"') {
-            AddMatch(id + (t ? 110 : 118) * n, l + 3, l, matches);
+            AddMatch(id + (is_all_caps ? 110 : 118) * n, l + 3, l, matches);
          } else if (s[1] == '\'') {
-            AddMatch(id + (t ? 119 : 120) * n, l + 3, l, matches);
+            AddMatch(id + (is_all_caps ? 119 : 120) * n, l + 3, l, matches);
          }
        }
      }
@ -393,11 +413,12 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
    if ((data[1] == ' ' &&
         (data[0] == 'e' || data[0] == 's' || data[0] == ',')) ||
        (data[0] == 0xc2 && data[1] == 0xa0)) {
-      key = Hash(&data[2]);
-      bucket = kStaticDictionaryBuckets[key];
-      size_t num = bucket & 0xff;
-      size_t offset = bucket >> 8;
-      for (size_t i = 0; i < num; ++i) {
+      size_t key2 = Hash(&data[2]);
+      size_t bucket2 = kStaticDictionaryBuckets[key2];
+      size_t num = bucket2 & 0xff;
+      size_t offset = bucket2 >> 8;
+      size_t i;
+      for (i = 0; i < num; ++i) {
        const DictWord w = kStaticDictionaryWords[offset + i];
        const size_t l = w.len;
        const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
@ -405,11 +426,11 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
        if (w.transform == 0 && IsMatch(w, &data[2], max_length - 2)) {
          if (data[0] == 0xc2) {
            AddMatch(id + 102 * n, l + 2, l, matches);
-            found_match = true;
+            has_found_match = 1;
          } else if (l + 2 < max_length && data[l + 2] == ' ') {
            size_t t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13);
            AddMatch(id + t * n, l + 3, l, matches);
-            found_match = true;
+            has_found_match = 1;
          }
        }
      }
@ -421,18 +442,19 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
         data[3] == 'e' && data[4] == ' ') ||
        (data[0] == '.' && data[1] == 'c' && data[2] == 'o' &&
         data[3] == 'm' && data[4] == '/')) {
-      key = Hash(&data[5]);
-      bucket = kStaticDictionaryBuckets[key];
-      size_t num = bucket & 0xff;
-      size_t offset = bucket >> 8;
-      for (size_t i = 0; i < num; ++i) {
+      size_t key5 = Hash(&data[5]);
+      size_t bucket5 = kStaticDictionaryBuckets[key5];
+      size_t num = bucket5 & 0xff;
+      size_t offset = bucket5 >> 8;
+      size_t i;
+      for (i = 0; i < num; ++i) {
        const DictWord w = kStaticDictionaryWords[offset + i];
        const size_t l = w.len;
        const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
        const size_t id = w.idx;
        if (w.transform == 0 && IsMatch(w, &data[5], max_length - 5)) {
          AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);
-          found_match = true;
+          has_found_match = 1;
          if (l + 5 < max_length) {
            const uint8_t* s = &data[l + 5];
            if (data[0] == ' ') {
@ -450,7 +472,9 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
      }
    }
  }
-  return found_match;
+  return has_found_match;
 }

-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
--- a/enc/static_dict.h
+++ b/enc/static_dict.h
@ -10,10 +10,13 @@
 #define BROTLI_ENC_STATIC_DICT_H_

 #include "../common/types.h"
+#include "./port.h"

-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

-static const size_t kMaxDictionaryMatchLen = 37;
+#define BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN 37
 static const uint32_t kInvalidMatch = 0xfffffff;

 /* Matches data against static dictionary words, and for each length l,
@ -23,11 +26,13 @@ static const uint32_t kInvalidMatch = 0xfffffff;
   Prerequisites:
     matches array is at least BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1 long
     all elements are initialized to kInvalidMatch */
-bool FindAllStaticDictionaryMatches(const uint8_t* data,
-                                    size_t min_length,
-                                    size_t max_length,
-                                    uint32_t* matches);
+BROTLI_INTERNAL int BrotliFindAllStaticDictionaryMatches(const uint8_t* data,
+                                                         size_t min_length,
+                                                         size_t max_length,
+                                                         uint32_t* matches);

-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif

 #endif  /* BROTLI_ENC_STATIC_DICT_H_ */
--- a/enc/static_dict_lut.h
+++ b/enc/static_dict_lut.h
@ -6,21 +6,23 @@

 /* Lookup table for static dictionary and transforms. */

-#ifndef BROTLI_ENC_DICTIONARY_LUT_H_
-#define BROTLI_ENC_DICTIONARY_LUT_H_
+#ifndef BROTLI_ENC_STATIC_DICT_LUT_H_
+#define BROTLI_ENC_STATIC_DICT_LUT_H_

 #include "../common/types.h"

-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

 static const int kDictNumBits = 15;
 static const uint32_t kDictHashMul32 = 0x1e35a7bd;

-struct DictWord {
+typedef struct DictWord {
  uint8_t len;
  uint8_t transform;
  uint16_t idx;
-};
+} DictWord;

 static const uint32_t kStaticDictionaryBuckets[] = {
 0x000002, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000,
@ -12050,6 +12052,8 @@ static const DictWord kStaticDictionaryWords[] = {
  { 12, 10,   542 }, { 14, 11,   410 }, {  9, 11,   660 }, { 10, 11,   347 },
 };

-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif

 #endif  /* BROTLI_ENC_STATIC_DICT_LUT_H_ */
--- a/enc/streams.h
+++ b/enc/streams.h
@ -13,7 +13,6 @@
 #include <string>

 #include "../common/types.h"
-#include "./port.h"

 namespace brotli {

--- a/enc/transform.h
+++ b/enc/transform.h
@ -1,248 +0,0 @@
-/* Copyright 2010 Google Inc. All Rights Reserved.
-
-   Distributed under MIT license.
-   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
-*/
-
-// Transformations on dictionary words.
-
-#ifndef BROTLI_ENC_TRANSFORM_H_
-#define BROTLI_ENC_TRANSFORM_H_
-
-#include <string>
-
-#include "../common/dictionary.h"
-
-namespace brotli {
-
-enum WordTransformType {
-  kIdentity       = 0,
-  kOmitLast1      = 1,
-  kOmitLast2      = 2,
-  kOmitLast3      = 3,
-  kOmitLast4      = 4,
-  kOmitLast5      = 5,
-  kOmitLast6      = 6,
-  kOmitLast7      = 7,
-  kOmitLast8      = 8,
-  kOmitLast9      = 9,
-  kUppercaseFirst = 10,
-  kUppercaseAll   = 11,
-  kOmitFirst1     = 12,
-  kOmitFirst2     = 13,
-  kOmitFirst3     = 14,
-  kOmitFirst4     = 15,
-  kOmitFirst5     = 16,
-  kOmitFirst6     = 17,
-  kOmitFirst7     = 18,
-  kOmitFirst8     = 19,
-  kOmitFirst9     = 20
-};
-
-struct Transform {
-  const char* prefix;
-  WordTransformType word_transform;
-  const char* suffix;
-};
-
-static const Transform kTransforms[] = {
-     {         "", kIdentity,       ""           },
-     {         "", kIdentity,       " "          },
-     {        " ", kIdentity,       " "          },
-     {         "", kOmitFirst1,     ""           },
-     {         "", kUppercaseFirst, " "          },
-     {         "", kIdentity,       " the "      },
-     {        " ", kIdentity,       ""           },
-     {       "s ", kIdentity,       " "          },
-     {         "", kIdentity,       " of "       },
-     {         "", kUppercaseFirst, ""           },
-     {         "", kIdentity,       " and "      },
-     {         "", kOmitFirst2,     ""           },
-     {         "", kOmitLast1,      ""           },
-     {       ", ", kIdentity,       " "          },
-     {         "", kIdentity,       ", "         },
-     {        " ", kUppercaseFirst, " "          },
-     {         "", kIdentity,       " in "       },
-     {         "", kIdentity,       " to "       },
-     {       "e ", kIdentity,       " "          },
-     {         "", kIdentity,       "\""         },
-     {         "", kIdentity,       "."          },
-     {         "", kIdentity,       "\">"        },
-     {         "", kIdentity,       "\n"         },
-     {         "", kOmitLast3,      ""           },
-     {         "", kIdentity,       "]"          },
-     {         "", kIdentity,       " for "      },
-     {         "", kOmitFirst3,     ""           },
-     {         "", kOmitLast2,      ""           },
-     {         "", kIdentity,       " a "        },
-     {         "", kIdentity,       " that "     },
-     {        " ", kUppercaseFirst, ""           },
-     {         "", kIdentity,       ". "         },
-     {        ".", kIdentity,       ""           },
-     {        " ", kIdentity,       ", "         },
-     {         "", kOmitFirst4,     ""           },
-     {         "", kIdentity,       " with "     },
-     {         "", kIdentity,       "'"          },
-     {         "", kIdentity,       " from "     },
-     {         "", kIdentity,       " by "       },
-     {         "", kOmitFirst5,     ""           },
-     {         "", kOmitFirst6,     ""           },
-     {    " the ", kIdentity,       ""           },
-     {         "", kOmitLast4,      ""           },
-     {         "", kIdentity,       ". The "     },
-     {         "", kUppercaseAll,   ""           },
-     {         "", kIdentity,       " on "       },
-     {         "", kIdentity,       " as "       },
-     {         "", kIdentity,       " is "       },
-     {         "", kOmitLast7,      ""           },
-     {         "", kOmitLast1,      "ing "       },
-     {         "", kIdentity,       "\n\t"       },
-     {         "", kIdentity,       ":"          },
-     {        " ", kIdentity,       ". "         },
-     {         "", kIdentity,       "ed "        },
-     {         "", kOmitFirst9,     ""           },
-     {         "", kOmitFirst7,     ""           },
-     {         "", kOmitLast6,      ""           },
-     {         "", kIdentity,       "("          },
-     {         "", kUppercaseFirst, ", "         },
-     {         "", kOmitLast8,      ""           },
-     {         "", kIdentity,       " at "       },
-     {         "", kIdentity,       "ly "        },
-     {    " the ", kIdentity,       " of "       },
-     {         "", kOmitLast5,      ""           },
-     {         "", kOmitLast9,      ""           },
-     {        " ", kUppercaseFirst, ", "         },
-     {         "", kUppercaseFirst, "\""         },
-     {        ".", kIdentity,       "("          },
-     {         "", kUppercaseAll,   " "          },
-     {         "", kUppercaseFirst, "\">"        },
-     {         "", kIdentity,       "=\""        },
-     {        " ", kIdentity,       "."          },
-     {    ".com/", kIdentity,       ""           },
-     {    " the ", kIdentity,       " of the "   },
-     {         "", kUppercaseFirst, "'"          },
-     {         "", kIdentity,       ". This "    },
-     {         "", kIdentity,       ","          },
-     {        ".", kIdentity,       " "          },
-     {         "", kUppercaseFirst, "("          },
-     {         "", kUppercaseFirst, "."          },
-     {         "", kIdentity,       " not "      },
-     {        " ", kIdentity,       "=\""        },
-     {         "", kIdentity,       "er "        },
-     {        " ", kUppercaseAll,   " "          },
-     {         "", kIdentity,       "al "        },
-     {        " ", kUppercaseAll,   ""           },
-     {         "", kIdentity,       "='"         },
-     {         "", kUppercaseAll,   "\""         },
-     {         "", kUppercaseFirst, ". "         },
-     {        " ", kIdentity,       "("          },
-     {         "", kIdentity,       "ful "       },
-     {        " ", kUppercaseFirst, ". "         },
-     {         "", kIdentity,       "ive "       },
-     {         "", kIdentity,       "less "      },
-     {         "", kUppercaseAll,   "'"          },
-     {         "", kIdentity,       "est "       },
-     {        " ", kUppercaseFirst, "."          },
-     {         "", kUppercaseAll,   "\">"        },
-     {        " ", kIdentity,       "='"         },
-     {         "", kUppercaseFirst, ","          },
-     {         "", kIdentity,       "ize "       },
-     {         "", kUppercaseAll,   "."          },
-     { "\xc2\xa0", kIdentity,       ""           },
-     {        " ", kIdentity,       ","          },
-     {         "", kUppercaseFirst, "=\""        },
-     {         "", kUppercaseAll,   "=\""        },
-     {         "", kIdentity,       "ous "       },
-     {         "", kUppercaseAll,   ", "         },
-     {         "", kUppercaseFirst, "='"         },
-     {        " ", kUppercaseFirst, ","          },
-     {        " ", kUppercaseAll,   "=\""        },
-     {        " ", kUppercaseAll,   ", "         },
-     {         "", kUppercaseAll,   ","          },
-     {         "", kUppercaseAll,   "("          },
-     {         "", kUppercaseAll,   ". "         },
-     {        " ", kUppercaseAll,   "."          },
-     {         "", kUppercaseAll,   "='"         },
-     {        " ", kUppercaseAll,   ". "         },
-     {        " ", kUppercaseFirst, "=\""        },
-     {        " ", kUppercaseAll,   "='"         },
-     {        " ", kUppercaseFirst, "='"         },
-};
-
-static const size_t kNumTransforms =
-    sizeof(kTransforms) / sizeof(kTransforms[0]);
-
-static const size_t kOmitLastNTransforms[10] = {
-  0, 12, 27, 23, 42, 63, 56, 48, 59, 64,
-};
-
-static size_t ToUpperCase(uint8_t *p, size_t len) {
-  if (len == 1 || p[0] < 0xc0) {
-    if (p[0] >= 'a' && p[0] <= 'z') {
-      p[0] ^= 32;
-    }
-    return 1;
-  }
-  if (p[0] < 0xe0) {
-    p[1] ^= 32;
-    return 2;
-  }
-  if (len == 2) {
-    return 2;
-  }
-  p[2] ^= 5;
-  return 3;
-}
-
-inline std::string TransformWord(
-    WordTransformType transform_type, const uint8_t* word, size_t len) {
-  if (transform_type <= kOmitLast9) {
-    if (len <= static_cast<size_t>(transform_type)) {
-      return std::string();
-    }
-    return std::string(word, word + len - transform_type);
-  }
-
-  if (transform_type >= kOmitFirst1) {
-    const size_t skip = transform_type - (kOmitFirst1 - 1);
-    if (len <= skip) {
-      return std::string();
-    }
-    return std::string(word + skip, word + len);
-  }
-
-  std::string ret = std::string(word, word + len);
-  uint8_t *uppercase = reinterpret_cast<uint8_t*>(&ret[0]);
-  if (transform_type == kUppercaseFirst) {
-    ToUpperCase(uppercase, len);
-  } else if (transform_type == kUppercaseAll) {
-    size_t position = 0;
-    while (position < len) {
-      size_t step = ToUpperCase(uppercase, len - position);
-      uppercase += step;
-      position += step;
-    }
-  }
-  return ret;
-}
-
-inline std::string ApplyTransform(
-    const Transform& t, const uint8_t* word, size_t len) {
-  return std::string(t.prefix) +
-      TransformWord(t.word_transform, word, len) + std::string(t.suffix);
-}
-
-inline std::string GetTransformedDictionaryWord(size_t len_code,
-                                                size_t word_id) {
-  size_t num_words = 1u << kBrotliDictionarySizeBitsByLength[len_code];
-  size_t offset = kBrotliDictionaryOffsetsByLength[len_code];
-  size_t t = word_id / num_words;
-  size_t word_idx = word_id % num_words;
-  offset += len_code * word_idx;
-  const uint8_t* word = &kBrotliDictionary[offset];
-  return ApplyTransform(kTransforms[t], word, len_code);
-}
-
-}  // namespace brotli
-
-#endif  // BROTLI_ENC_TRANSFORM_H_
--- a/enc/utf8_util.c
+++ b/enc/utf8_util.c
@ -10,11 +10,12 @@

 #include "../common/types.h"

-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

-namespace {
-
-size_t ParseAsUTF8(int* symbol, const uint8_t* input, size_t size) {
+static size_t BrotliParseAsUTF8(
+    int* symbol, const uint8_t* input, size_t size) {
  /* ASCII */
  if ((input[0] & 0x80) == 0) {
    *symbol = input[0];
@ -63,21 +64,21 @@ size_t ParseAsUTF8(int* symbol, const uint8_t* input, size_t size) {
  return 1;
 }

-}  // namespace
-
 /* Returns 1 if at least min_fraction of the data is UTF8-encoded.*/
-bool IsMostlyUTF8(const uint8_t* data, const size_t pos, const size_t mask,
-                  const size_t length, const double min_fraction) {
+int BrotliIsMostlyUTF8(const uint8_t* data, const size_t pos,
+    const size_t mask, const size_t length, const double min_fraction) {
  size_t size_utf8 = 0;
  size_t i = 0;
  while (i < length) {
    int symbol;
-    size_t bytes_read = ParseAsUTF8(
-        &symbol, &data[(pos + i) & mask], length - i);
+    size_t bytes_read =
+        BrotliParseAsUTF8(&symbol, &data[(pos + i) & mask], length - i);
    i += bytes_read;
    if (symbol < 0x110000) size_utf8 += bytes_read;
  }
-  return size_utf8 > min_fraction * static_cast<double>(length);
+  return (size_utf8 > min_fraction * (double)length) ? 1 : 0;
 }

-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif
--- a/enc/utf8_util.h
+++ b/enc/utf8_util.h
@ -10,17 +10,23 @@
 #define BROTLI_ENC_UTF8_UTIL_H_

 #include "../common/types.h"
+#include "./port.h"

-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

 static const double kMinUTF8Ratio = 0.75;

 /* Returns 1 if at least min_fraction of the bytes between pos and
   pos + length in the (data, mask) ringbuffer is UTF8-encoded, otherwise
   returns 0. */
-bool IsMostlyUTF8(const uint8_t* data, const size_t pos, const size_t mask,
-                  const size_t length, const double min_fraction);
+BROTLI_INTERNAL int BrotliIsMostlyUTF8(
+    const uint8_t* data, const size_t pos, const size_t mask,
+    const size_t length, const double min_fraction);

-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif

 #endif  /* BROTLI_ENC_UTF8_UTIL_H_ */
--- a/enc/write_bits.h
+++ b/enc/write_bits.h
@ -15,7 +15,9 @@
 #include "../common/types.h"
 #include "./port.h"

-namespace brotli {
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif

 /*#define BIT_WRITER_DEBUG */

@ -34,15 +36,10 @@ namespace brotli {

   For n bits, we take the last 5 bits, OR that with high bits in BYTE-0,
   and locate the rest in BYTE+1, BYTE+2, etc. */
-inline void WriteBits(size_t n_bits,
-                      uint64_t bits,
-                      size_t * __restrict pos,
-                      uint8_t * __restrict array) {
-#ifdef BIT_WRITER_DEBUG
-  printf("WriteBits  %2d  0x%016llx  %10d\n", n_bits, bits, *pos);
-#endif
-  assert((bits >> n_bits) == 0);
-  assert(n_bits <= 56);
+static BROTLI_INLINE void BrotliWriteBits(size_t n_bits,
+                                          uint64_t bits,
+                                          size_t * BROTLI_RESTRICT pos,
+                                          uint8_t * BROTLI_RESTRICT array) {
 #ifdef IS_LITTLE_ENDIAN
  /* This branch of the code can write up to 56 bits at a time,
     7 bits are lost by being perhaps already in *p and at least
@ -51,6 +48,11 @@ inline void WriteBits(size_t n_bits,
     access a byte that was never initialized). */
  uint8_t *p = &array[*pos >> 3];
  uint64_t v = *p;
+#ifdef BIT_WRITER_DEBUG
+  printf("WriteBits  %2d  0x%016llx  %10d\n", n_bits, bits, *pos);
+#endif
+  assert((bits >> n_bits) == 0);
+  assert(n_bits <= 56);
  v |= bits << (*pos & 7);
  BROTLI_UNALIGNED_STORE64(p, v);  /* Set some bits. */
  *pos += n_bits;
@ -59,19 +61,20 @@ inline void WriteBits(size_t n_bits,
  uint8_t *array_pos = &array[*pos >> 3];
  const size_t bits_reserved_in_first_byte = (*pos & 7);
  bits <<= bits_reserved_in_first_byte;
-  *array_pos++ |= static_cast<uint8_t>(bits);
+  *array_pos++ |= (uint8_t)bits;
  for (size_t bits_left_to_write = n_bits + bits_reserved_in_first_byte;
       bits_left_to_write >= 9;
       bits_left_to_write -= 8) {
    bits >>= 8;
-    *array_pos++ = static_cast<uint8_t>(bits);
+    *array_pos++ = (uint8_t)bits;
  }
  *array_pos = 0;
  *pos += n_bits;
 #endif
 }

-inline void WriteBitsPrepareStorage(size_t pos, uint8_t *array) {
+static BROTLI_INLINE void BrotliWriteBitsPrepareStorage(
+    size_t pos, uint8_t *array) {
 #ifdef BIT_WRITER_DEBUG
  printf("WriteBitsPrepareStorage            %10d\n", pos);
 #endif
@ -79,6 +82,8 @@ inline void WriteBitsPrepareStorage(size_t pos, uint8_t *array) {
  array[pos >> 3] = 0;
 }

-}  // namespace brotli
+#if defined(__cplusplus) || defined(c_plusplus)
+}  /* extern "C" */
+#endif

 #endif  /* BROTLI_ENC_WRITE_BITS_H_ */
--- a/python/tests/roundtrip_test.py
+++ b/python/tests/roundtrip_test.py
@ -12,7 +12,7 @@ testdata/alice29.txt
 testdata/asyoulik.txt
 testdata/lcet10.txt
 testdata/plrabn12.txt
-../enc/encode.cc
+../enc/encode.c
 ../common/dictionary.h
 ../dec/decode.c
 %s
--- a/tests/roundtrip_test.sh
+++ b/tests/roundtrip_test.sh
@ -10,7 +10,7 @@ testdata/alice29.txt
 testdata/asyoulik.txt
 testdata/lcet10.txt
 testdata/plrabn12.txt
-../enc/encode.cc
+../enc/encode.c
 ../common/dictionary.h
 ../dec/decode.c
 $BRO
--- a/tools/bro.cc
+++ b/tools/bro.cc
@ -15,9 +15,10 @@
 #include <cstring>
 #include <ctime>
 #include <string>
+#include <vector>

 #include "../dec/decode.h"
-#include "../enc/compressor.h"
+#include "../enc/encode.h"

 #if !defined(_WIN32)
 #include <unistd.h>
@ -52,7 +53,6 @@ static inline int ms_open(const char *filename, int oflag, int pmode) {
 }
 #endif  /* WIN32 */

-
 static bool ParseQuality(const char* s, int* quality) {
  if (s[0] >= '0' && s[0] <= '9') {
    *quality = s[0] - '0';
@ -68,6 +68,7 @@ static bool ParseQuality(const char* s, int* quality) {
 static void ParseArgv(int argc, char **argv,
                      char **input_path,
                      char **output_path,
+                      char **dictionary_path,
                      int *force,
                      int *quality,
                      int *decompress,
@ -125,6 +126,13 @@ static void ParseArgv(int argc, char **argv,
        *output_path = argv[k + 1];
        ++k;
        continue;
+      } else if (!strcmp("--custom-dictionary", argv[k])) {
+        if (*dictionary_path != 0) {
+          goto error;
+        }
+        *dictionary_path = argv[k + 1];
+        ++k;
+        continue;
      } else if (!strcmp("--quality", argv[k]) ||
                 !strcmp("-q", argv[k])) {
        if (!ParseQuality(argv[k + 1], quality)) {
@ -158,7 +166,7 @@ error:
  fprintf(stderr,
          "Usage: %s [--force] [--quality n] [--decompress]"
          " [--input filename] [--output filename] [--repeat iters]"
-          " [--verbose] [--window n]\n",
+          " [--verbose] [--window n] [--custom-dictionary filename]\n",
          argv[0]);
  exit(1);
 }
@ -196,7 +204,7 @@ static FILE *OpenOutputFile(const char *output_path, const int force) {
  return fdopen(fd, "wb");
 }

-static int64_t FileSize(char *path) {
+static int64_t FileSize(const char *path) {
  FILE *f = fopen(path, "rb");
  if (f == NULL) {
    return -1;
@ -212,13 +220,50 @@ static int64_t FileSize(char *path) {
  return retval;
 }

+static std::vector<uint8_t> ReadDictionary(const char* path) {
+  FILE *f = fopen(path, "rb");
+  if (f == NULL) {
+    perror("fopen");
+    exit(1);
+  }
+
+  int64_t file_size = FileSize(path);
+  if (file_size == -1) {
+    fprintf(stderr, "could not get size of dictionary file");
+    exit(1);
+  }
+
+  static const int kMaxDictionarySize = (1 << 24) - 16;
+  if (file_size > kMaxDictionarySize) {
+    fprintf(stderr, "dictionary is larger than maximum allowed: %d\n",
+            kMaxDictionarySize);
+    exit(1);
+  }
+
+  std::vector<uint8_t> buffer;
+  buffer.resize(static_cast<size_t>(file_size));
+  size_t bytes_read = fread(buffer.data(), sizeof(uint8_t), buffer.size(), f);
+  if (bytes_read != buffer.size()) {
+    fprintf(stderr, "could not read dictionary\n");
+    exit(1);
+  }
+  fclose(f);
+  return buffer;
+}
+
 static const size_t kFileBufferSize = 65536;

-static void Decompresss(FILE* fin, FILE* fout) {
+static int Decompress(FILE* fin, FILE* fout, const char* dictionary_path) {
+  /* Dictionary should be kept during first rounds of decompression. */
+  std::vector<uint8_t> dictionary;
  BrotliState* s = BrotliCreateState(NULL, NULL, NULL);
  if (!s) {
    fprintf(stderr, "out of memory\n");
-    exit(1);
+    return 0;
+  }
+  if (dictionary_path != NULL) {
+    dictionary = ReadDictionary(dictionary_path);
+    BrotliSetCustomDictionary(dictionary.size(), dictionary.data(), s);
  }
  uint8_t* input = new uint8_t[kFileBufferSize];
  uint8_t* output = new uint8_t[kFileBufferSize];
@ -259,47 +304,109 @@ static void Decompresss(FILE* fin, FILE* fout) {
  BrotliDestroyState(s);
  if ((result == BROTLI_RESULT_NEEDS_MORE_OUTPUT) || ferror(fout)) {
    fprintf(stderr, "failed to write output\n");
-    exit(1);
+    return 0;
  } else if (result != BROTLI_RESULT_SUCCESS) { /* Error or needs more input. */
    fprintf(stderr, "corrupt input\n");
-    exit(1);
+    return 0;
  }
+  return 1;
+}
+
+static int Compress(int quality, int lgwin, FILE* fin, FILE* fout,
+    const char *dictionary_path) {
+  BrotliEncoderState* s = BrotliEncoderCreateInstance(0, 0, 0);
+  uint8_t* buffer = reinterpret_cast<uint8_t*>(malloc(kFileBufferSize << 1));
+  uint8_t* input = buffer;
+  uint8_t* output = buffer + kFileBufferSize;
+  size_t available_in = 0;
+  const uint8_t* next_in = NULL;
+  size_t available_out = kFileBufferSize;
+  uint8_t* next_out = output;
+  int is_eof = 0;
+  int is_ok = 1;
+
+  if (!s || !buffer) {
+    is_ok = 0;
+    goto finish;
+  }
+
+  BrotliEncoderSetParameter(s, BROTLI_PARAM_QUALITY, (uint32_t)quality);
+  BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, (uint32_t)lgwin);
+  if (dictionary_path != NULL) {
+    std::vector<uint8_t> dictionary = ReadDictionary(dictionary_path);
+    BrotliEncoderSetCustomDictionary(s, dictionary.size(),
+        reinterpret_cast<const uint8_t*>(dictionary.data()));
+  }
+
+  while (1) {
+    if (available_in == 0 && !is_eof) {
+      available_in = fread(input, 1, kFileBufferSize, fin);
+      next_in = input;
+      if (ferror(fin)) break;
+      is_eof = feof(fin);
+    }
+
+    if (!BrotliEncoderCompressStream(s,
+        is_eof ? BROTLI_OPERATION_FINISH : BROTLI_OPERATION_PROCESS,
+        &available_in, &next_in, &available_out, &next_out, NULL)) {
+      is_ok = 0;
+      break;
+    }
+
+    if (available_out != kFileBufferSize) {
+      size_t out_size = kFileBufferSize - available_out;
+      fwrite(output, 1, out_size, fout);
+      if (ferror(fout)) break;
+      available_out = kFileBufferSize;
+      next_out = output;
+    }
+
+    if (BrotliEncoderIsFinished(s)) break;
+  }
+
+finish:
+  free(buffer);
+  BrotliEncoderDestroyInstance(s);
+
+  if (!is_ok) {
+    /* Should detect OOM? */
+    fprintf(stderr, "failed to compress data\n");
+    return 0;
+  } else if (ferror(fout)) {
+    fprintf(stderr, "failed to write output\n");
+    return 0;
+  } else if (ferror(fin)) {
+    fprintf(stderr, "failed to read input\n");
+    return 0;
+  }
+  return 1;
 }

 int main(int argc, char** argv) {
  char *input_path = 0;
  char *output_path = 0;
+  char *dictionary_path = 0;
  int force = 0;
  int quality = 11;
  int decompress = 0;
  int repeat = 1;
  int verbose = 0;
  int lgwin = 0;
-  ParseArgv(argc, argv, &input_path, &output_path, &force,
+  ParseArgv(argc, argv, &input_path, &output_path, &dictionary_path, &force,
            &quality, &decompress, &repeat, &verbose, &lgwin);
  const clock_t clock_start = clock();
  for (int i = 0; i < repeat; ++i) {
    FILE* fin = OpenInputFile(input_path);
    FILE* fout = OpenOutputFile(output_path, force);
+    int is_ok = false;
    if (decompress) {
-      Decompresss(fin, fout);
+      is_ok = Decompress(fin, fout, dictionary_path);
    } else {
-      brotli::BrotliParams params;
-      params.lgwin = lgwin;
-      params.quality = quality;
-      try {
-        brotli::BrotliFileIn in(fin, 1 << 16);
-        brotli::BrotliFileOut out(fout);
-        if (!BrotliCompress(params, &in, &out)) {
-          fprintf(stderr, "compression failed\n");
-          unlink(output_path);
-          exit(1);
-        }
-      } catch (std::bad_alloc&) {
-        fprintf(stderr, "not enough memory\n");
-        unlink(output_path);
-        exit(1);
-      }
+      is_ok = Compress(quality, lgwin, fin, fout, dictionary_path);
+    }
+    if (!is_ok) {
+      unlink(output_path);
+      exit(1);
    }
    if (fclose(fin) != 0) {
      perror("fclose");