Add brotli compressor

This commit is for the encoder for brotli compression format. Brotli is a generic byte-level compression algorithm.
2024-11-21 19:20:09 +00:00 · 2013-10-23 13:06:13 +02:00 · 2013-10-23 13:06:13 +02:00 · c66e4e3e4f
commit c66e4e3e4f
parent 6455522172
24 changed files with 3904 additions and 0 deletions
--- a/enc/README
+++ b/enc/README
@ -0,0 +1,3 @@
+This directory holds the encoder for brotli compression format.
+
+Brotli is proposed to be used at the byte-compression level in WOFF 2.0 format.
--- a/enc/backward_references.cc
+++ b/enc/backward_references.cc
@ -0,0 +1,137 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Function to find backward reference copies.
+
+#include "./backward_references.h"
+
+#include <algorithm>
+#include <vector>
+
+#include "./command.h"
+#include "./hash.h"
+#include "./literal_cost.h"
+
+namespace brotli {
+
+void CreateBackwardReferences(const uint8_t* data,
+                              int length,
+                              std::vector<Command>* commands) {
+  HashLongestMatch<13,11> *hasher = new HashLongestMatch<13,11>;
+  float *literal_cost = new float[length];
+  EstimateBitCostsForLiterals(length, data, literal_cost);
+  hasher->SetLiteralCost(literal_cost);
+
+  // Length heuristic that seems to help probably by better selection
+  // of lazy matches of similar lengths.
+  int insert_length = 0;
+  size_t i = 0;
+
+  double average_cost = 0.0;
+  for (int i = 0; i < length; ++i) {
+    average_cost += literal_cost[i];
+  }
+  average_cost /= length;
+  hasher->set_average_cost(average_cost);
+
+  while (i + 2 < length) {
+    size_t best_len = 0;
+    size_t best_dist = 0;
+    double best_score = 0;
+    const size_t max_distance = std::min(i, 1UL << 24);
+    hasher->set_insert_length(insert_length);
+    bool match_found = hasher->FindLongestMatch(
+        data, i, length - i, max_distance,
+        &best_len, &best_dist, &best_score);
+    if (match_found) {
+      // Found a match. Let's look for something even better ahead.
+      int delayed_backward_references_in_row = 0;
+      while (i + 4 < length &&
+             delayed_backward_references_in_row < 4) {
+        size_t best_len_2 = 0;
+        size_t best_dist_2 = 0;
+        double best_score_2 = 0;
+        hasher->Store(data + i, i);
+        match_found = hasher->FindLongestMatch(
+            data, i + 1, length - i - 1, max_distance,
+            &best_len_2, &best_dist_2, &best_score_2);
+        double cost_diff_lazy = 0;
+        if (best_len >= 4) {
+          cost_diff_lazy += hasher->literal_cost(i + 4) - average_cost;
+        }
+        {
+          const int tail_length = best_len_2 - best_len + 1;
+          for (int k = 0; k < tail_length; ++k) {
+            cost_diff_lazy -= hasher->literal_cost(i + best_len + k) -
+                average_cost;
+          }
+        }
+        // If we are not inserting any symbols, inserting one is more
+        // expensive than if we were inserting symbols anyways.
+        if (insert_length < 1) {
+          cost_diff_lazy += 1.0;
+        }
+        // Add bias to slightly avoid lazy matching.
+        cost_diff_lazy += 2.0 + delayed_backward_references_in_row * 0.2;
+        cost_diff_lazy += 0.04 * hasher->literal_cost(i);
+
+        if (match_found && best_score_2 >= best_score + cost_diff_lazy) {
+          // Ok, let's just write one byte for now and start a match from the
+          // next byte.
+          ++insert_length;
+          ++delayed_backward_references_in_row;
+          best_len = best_len_2;
+          best_dist = best_dist_2;
+          best_score = best_score_2;
+          i++;
+        } else {
+          break;
+        }
+      }
+      Command cmd;
+      cmd.insert_length_ = insert_length;
+      cmd.copy_length_ = best_len;
+      cmd.copy_distance_ = best_dist;
+      commands->push_back(cmd);
+      hasher->set_last_distance(best_dist);
+
+      insert_length = 0;
+      ++i;
+      for (int j = 1; j < best_len; ++j) {
+        if (i + 2 < length) {
+          hasher->Store(data + i, i);
+        }
+        ++i;
+      }
+    } else {
+      ++insert_length;
+      hasher->Store(data + i, i);
+      ++i;
+    }
+  }
+  insert_length += (length - i);
+
+  if (insert_length > 0) {
+    Command cmd;
+    cmd.insert_length_ = insert_length;
+    cmd.copy_length_ = 0;
+    cmd.copy_distance_ = 0;
+    commands->push_back(cmd);
+  }
+
+  delete[] literal_cost;
+  delete hasher;
+}
+
+}  // namespace brotli
--- a/enc/backward_references.h
+++ b/enc/backward_references.h
@ -0,0 +1,33 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Function to find backward reference copies.
+
+#ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_
+#define BROTLI_ENC_BACKWARD_REFERENCES_H_
+
+#include <stdint.h>
+#include <vector>
+
+#include "./command.h"
+
+namespace brotli {
+
+void CreateBackwardReferences(const uint8_t* data,
+                              int length,
+                              std::vector<Command>* commands);
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_BACKWARD_REFERENCES_H_
--- a/enc/bit_cost.h
+++ b/enc/bit_cost.h
@ -0,0 +1,150 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Functions to estimate the bit cost of Huffman trees.
+
+#ifndef BROTLI_ENC_BIT_COST_H_
+#define BROTLI_ENC_BIT_COST_H_
+
+#include <stdint.h>
+
+#include "./entropy_encode.h"
+#include "./fast_log.h"
+
+namespace brotli {
+
+static const int kHuffmanExtraBits[kCodeLengthCodes] = {
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 7,
+};
+
+static inline int HuffmanTreeBitCost(const int* counts, const uint8_t* depth) {
+  int nbits = 0;
+  for (int i = 0; i < kCodeLengthCodes; ++i) {
+    nbits += counts[i] * (depth[i] + kHuffmanExtraBits[i]);
+  }
+  return nbits;
+}
+
+static inline int HuffmanTreeBitCost(
+    const Histogram<kCodeLengthCodes>& histogram,
+    const EntropyCode<kCodeLengthCodes>& entropy) {
+  return HuffmanTreeBitCost(&histogram.data_[0], &entropy.depth_[0]);
+}
+
+static inline int HuffmanBitCost(const uint8_t* depth, int length) {
+  int max_depth = 1;
+  int histogram[kCodeLengthCodes] = { 0 };
+  int tail_start = 0;
+  // compute histogram of compacted huffman tree
+  for (int i = 0; i < length;) {
+    const int value = depth[i];
+    if (value > max_depth) {
+      max_depth = value;
+    }
+    int reps = 1;
+    for (int k = i + 1; k < length && depth[k] == value; ++k) {
+      ++reps;
+    }
+    i += reps;
+    if (value == 0) {
+      while (reps > 10) {
+        ++histogram[18];
+        reps -= 138;
+      }
+      if (reps > 2) {
+        ++histogram[17];
+      } else if (reps > 0) {
+        histogram[0] += reps;
+      }
+    } else {
+      tail_start = i;
+      ++histogram[value];
+      --reps;
+      while (reps > 2) {
+        ++histogram[16];
+        reps -= 6;
+      }
+      if (reps > 0) {
+        histogram[value] += reps;
+      }
+    }
+  }
+
+  // create huffman tree of huffman tree
+  uint8_t cost[kCodeLengthCodes] = { 0 };
+  CreateHuffmanTree(histogram, kCodeLengthCodes, 7, cost);
+  // account for rle extra bits
+  cost[16] += 2;
+  cost[17] += 3;
+  cost[18] += 7;
+
+  int tree_size = 0;
+  int bits = 6 + 3 * max_depth;  // huffman tree of huffman tree cost
+  for (int i = 0; i < kCodeLengthCodes; ++i) {
+    bits += histogram[i] * cost[i];  // huffman tree bit cost
+    tree_size += histogram[i];
+  }
+  // bit cost adjustment for long trailing zero sequence
+  int tail_size = length - tail_start;
+  int tail_bits = 0;
+  while (tail_size >= 1) {
+    if (tail_size < 3) {
+      tail_bits += tail_size * cost[0];
+      tree_size -= tail_size;
+      break;
+    } else if (tail_size < 11) {
+      tail_bits += cost[17];
+      --tree_size;
+      break;
+    } else {
+      tail_bits += cost[18];
+      tail_size -= 138;
+      --tree_size;
+    }
+  }
+  if (tail_bits > 12) {
+    bits += ((Log2Ceiling(tree_size - 1) + 1) & ~1) + 3 - tail_bits;
+  }
+  return bits;
+}
+
+template<int kSize>
+double PopulationCost(const Histogram<kSize>& histogram) {
+  if (histogram.total_count_ == 0) {
+    return 4;
+  }
+  int symbols[2] = { 0 };
+  int count = 0;
+  for (int i = 0; i < kSize && count < 3; ++i) {
+    if (histogram.data_[i] > 0) {
+      if (count < 2) symbols[count] = i;
+      ++count;
+    }
+  }
+  if (count <= 2 && symbols[0] < 256 && symbols[1] < 256) {
+    return ((symbols[0] <= 1 ? 4 : 11) +
+            (count == 2 ? 8 + histogram.total_count_ : 0));
+  }
+  uint8_t depth[kSize] = { 0 };
+  CreateHuffmanTree(&histogram.data_[0], kSize, 15, depth);
+  int bits = HuffmanBitCost(depth, kSize);
+  for (int i = 0; i < kSize; ++i) {
+    bits += histogram.data_[i] * depth[i];
+  }
+  return bits;
+}
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_BIT_COST_H_
--- a/enc/block_splitter.cc
+++ b/enc/block_splitter.cc
@ -0,0 +1,411 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Block split point selection utilities.
+
+#include "./block_splitter.h"
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <map>
+
+#include "./cluster.h"
+#include "./command.h"
+#include "./fast_log.h"
+#include "./histogram.h"
+
+namespace brotli {
+
+static const int kMaxLiteralHistograms = 48;
+static const int kMaxCommandHistograms = 50;
+static const double kLiteralBlockSwitchCost = 26;
+static const double kCommandBlockSwitchCost = 13.5;
+static const double kDistanceBlockSwitchCost = 14.6;
+static const int kLiteralStrideLength = 70;
+static const int kCommandStrideLength = 40;
+static const int kSymbolsPerLiteralHistogram = 550;
+static const int kSymbolsPerCommandHistogram = 530;
+static const int kSymbolsPerDistanceHistogram = 550;
+static const int kMinLengthForBlockSplitting = 128;
+static const int kIterMulForRefining = 2;
+static const int kMinItersForRefining = 100;
+
+void CopyLiteralsToByteArray(const std::vector<Command>& cmds,
+                             const uint8_t* data,
+                             std::vector<uint8_t>* literals) {
+  // Count how many we have.
+  size_t total_length = 0;
+  for (int i = 0; i < cmds.size(); ++i) {
+    total_length += cmds[i].insert_length_;
+  }
+  if (total_length == 0) {
+    return;
+  }
+
+  // Allocate.
+  literals->resize(total_length);
+
+  // Loop again, and copy this time.
+  size_t pos = 0;
+  size_t from_pos = 0;
+  for (int i = 0; i < cmds.size() && pos < total_length; ++i) {
+    memcpy(&(*literals)[pos], data + from_pos, cmds[i].insert_length_);
+    pos += cmds[i].insert_length_;
+    from_pos += cmds[i].insert_length_ + cmds[i].copy_length_;
+  }
+}
+
+void CopyCommandsToByteArray(const std::vector<Command>& cmds,
+                             std::vector<uint16_t>* insert_and_copy_codes,
+                             std::vector<uint8_t>* distance_prefixes) {
+  for (int i = 0; i < cmds.size(); ++i) {
+    const Command& cmd = cmds[i];
+    insert_and_copy_codes->push_back(cmd.command_prefix_);
+    if (cmd.copy_length_ > 0 && cmd.distance_prefix_ != 0xffff) {
+      distance_prefixes->push_back(cmd.distance_prefix_);
+    }
+  }
+}
+
+template<int kSize>
+double HistogramAddEval(const Histogram<kSize>& a,
+                        const Histogram<kSize>& b) {
+  int total = a.total_count_ + b.total_count_;
+  double retval = total * FastLog2(total);
+  for (int i = 0; i < kSize; ++i) {
+    int count = a.data_[i] + b.data_[i];
+    retval -= count * FastLog2(count);
+  }
+  return retval;
+}
+
+template<typename HistogramType, typename DataType>
+void InitialEntropyCodes(const DataType* data, size_t length,
+                         int literals_per_histogram,
+                         int max_histograms,
+                         size_t stride,
+                         std::vector<HistogramType>* vec) {
+  int total_histograms = length / literals_per_histogram + 1;
+  if (total_histograms > max_histograms) {
+    total_histograms = max_histograms;
+  }
+  unsigned int seed = 7;
+  int block_length = length / total_histograms;
+  for (int i = 0; i < total_histograms; ++i) {
+    int pos = length * i / total_histograms;
+    if (i != 0) {
+      pos += rand_r(&seed) % block_length;
+    }
+    if (pos + stride >= length) {
+      pos = length - stride - 1;
+    }
+    HistogramType histo;
+    histo.Add(data + pos, stride);
+    vec->push_back(histo);
+  }
+}
+
+template<typename HistogramType>
+int FindClosest(const HistogramType& sample,
+                const std::vector<HistogramType>& vec) {
+  double best_distance = 1e99;
+  int best_ix = 0;
+  for (int i = 0; i < vec.size(); ++i) {
+    double distance = HistogramAddEval(sample, vec[i]);
+    if (distance < best_distance) {
+      best_ix = i;
+      best_distance = distance;
+    }
+  }
+  return best_ix;
+}
+
+template<typename HistogramType, typename DataType>
+void RandomSample(unsigned int* seed,
+                  const DataType* data,
+                  size_t length,
+                  size_t stride,
+                  HistogramType* sample) {
+  size_t pos = rand_r(seed) % (length - stride);
+  sample->Add(data + pos, stride);
+}
+
+template<typename HistogramType, typename DataType>
+void RefineEntropyCodes(const DataType* data, size_t length,
+                        size_t stride,
+                        std::vector<HistogramType>* vec) {
+  const int iters =
+      kIterMulForRefining * length / stride + kMinItersForRefining;
+  unsigned int seed = 7;
+  for (int iter = 0; iter < iters; ++iter) {
+    HistogramType sample;
+    RandomSample(&seed, data, length, stride, &sample);
+    int ix = FindClosest(sample, *vec);
+    (*vec)[ix].AddHistogram(sample);
+  }
+}
+
+inline static float BitCost(int total, int count) {
+  return count == 0 ? FastLog2(total) + 2 : FastLog2(total) - FastLog2(count);
+}
+
+template<typename DataType, int kSize>
+void FindBlocks(const DataType* data, const size_t length,
+                const double block_switch_bitcost,
+                const std::vector<Histogram<kSize> > &vec,
+                uint8_t *block_id) {
+  if (vec.size() <= 1) {
+    for (int i = 0; i < length; ++i) {
+      block_id[i] = 0;
+    }
+    return;
+  }
+  int vecsize = vec.size();
+  double* insert_cost = new double[kSize * vecsize];
+  memset(insert_cost, 0, sizeof(insert_cost[0]) * kSize * vecsize);
+  for (int i = 0; i < kSize; ++i) {
+    for (int j = 0; j < vecsize; ++j) {
+      insert_cost[i * vecsize + j] =
+          BitCost(vec[j].total_count_, vec[j].data_[i]);
+    }
+  }
+  double *cost = new double[vecsize];
+  memset(cost, 0, sizeof(cost[0]) * vecsize);
+  bool* switch_signal = new bool[length * vecsize];
+  memset(switch_signal, 0, sizeof(switch_signal[0]) * length * vecsize);
+  // After each iteration of this loop, cost[k] will contain the difference
+  // between the minimum cost of arriving at the current byte position using
+  // entropy code k, and the minimum cost of arriving at the current byte
+  // position. This difference is capped at the block switch cost, and if it
+  // reaches block switch cost, it means that when we trace back from the last
+  // position, we need to switch here.
+  for (size_t byte_ix = 0; byte_ix < length; ++byte_ix) {
+    int ix = byte_ix * vecsize;
+    int insert_cost_ix = data[byte_ix] * vecsize;
+    double min_cost = 1e99;
+    for (int k = 0; k < vecsize; ++k) {
+      // We are coding the symbol in data[byte_ix] with entropy code k.
+      cost[k] += insert_cost[insert_cost_ix + k];
+      if (cost[k] < min_cost) {
+        min_cost = cost[k];
+        block_id[byte_ix] = k;
+      }
+    }
+    double block_switch_cost = block_switch_bitcost;
+    // More blocks for the beginning.
+    if (byte_ix < 2000) {
+      block_switch_cost *= 0.77 + 0.07 * byte_ix / 2000;
+    }
+    for (int k = 0; k < vecsize; ++k) {
+      cost[k] -= min_cost;
+      if (cost[k] >= block_switch_cost) {
+        cost[k] = block_switch_cost;
+        switch_signal[ix + k] = true;
+      }
+    }
+  }
+  // Now trace back from the last position and switch at the marked places.
+  int byte_ix = length - 1;
+  int ix = byte_ix * vecsize;
+  int cur_id = block_id[byte_ix];
+  while (byte_ix > 0) {
+    --byte_ix;
+    ix -= vecsize;
+    if (switch_signal[ix + cur_id]) {
+      cur_id = block_id[byte_ix];
+    }
+    block_id[byte_ix] = cur_id;
+  }
+  delete[] insert_cost;
+  delete[] cost;
+  delete[] switch_signal;
+}
+
+int RemapBlockIds(uint8_t* block_ids, const size_t length) {
+  std::map<uint8_t, uint8_t> new_id;
+  int next_id = 0;
+  for (int i = 0; i < length; ++i) {
+    if (new_id.find(block_ids[i]) == new_id.end()) {
+      new_id[block_ids[i]] = next_id;
+      ++next_id;
+    }
+  }
+  for (int i = 0; i < length; ++i) {
+    block_ids[i] = new_id[block_ids[i]];
+  }
+  return next_id;
+}
+
+template<typename HistogramType, typename DataType>
+void BuildBlockHistograms(const DataType* data, const size_t length,
+                          uint8_t* block_ids,
+                          std::vector<HistogramType>* histograms) {
+  int num_types = RemapBlockIds(block_ids, length);
+  histograms->clear();
+  histograms->resize(num_types);
+  for (int i = 0; i < length; ++i) {
+    (*histograms)[block_ids[i]].Add(data[i]);
+  }
+}
+
+template<typename HistogramType, typename DataType>
+void ClusterBlocks(const DataType* data, const size_t length,
+                   uint8_t* block_ids) {
+  std::vector<HistogramType> histograms;
+  std::vector<int> block_index(length);
+  int cur_idx = 0;
+  HistogramType cur_histogram;
+  for (int i = 0; i < length; ++i) {
+    bool block_boundary = (i + 1 == length || block_ids[i] != block_ids[i + 1]);
+    block_index[i] = cur_idx;
+    cur_histogram.Add(data[i]);
+    if (block_boundary) {
+      histograms.push_back(cur_histogram);
+      cur_histogram.Clear();
+      ++cur_idx;
+    }
+  }
+  std::vector<HistogramType> clustered_histograms;
+  std::vector<int> histogram_symbols;
+  // Block ids need to fit in one byte and there are two ids reserved for
+  // indicating 'same as last' and 'last plus one'.
+  static const int kMaxNumberOfBlockTypes = 254;
+  ClusterHistograms(histograms, 1, histograms.size(),
+                    kMaxNumberOfBlockTypes,
+                    &clustered_histograms,
+                    &histogram_symbols);
+  for (int i = 0; i < length; ++i) {
+    block_ids[i] = histogram_symbols[block_index[i]];
+  }
+}
+
+void BuildBlockSplit(const std::vector<uint8_t>& block_ids, BlockSplit* split) {
+  int cur_id = block_ids[0];
+  int cur_length = 1;
+  split->num_types_ = -1;
+  for (int i = 1; i < block_ids.size(); ++i) {
+    if (block_ids[i] != cur_id) {
+      split->types_.push_back(cur_id);
+      split->lengths_.push_back(cur_length);
+      split->num_types_ = std::max(split->num_types_, cur_id);
+      cur_id = block_ids[i];
+      cur_length = 0;
+    }
+    ++cur_length;
+  }
+  split->types_.push_back(cur_id);
+  split->lengths_.push_back(cur_length);
+  split->num_types_ = std::max(split->num_types_, cur_id);
+  ++split->num_types_;
+}
+
+template<typename HistogramType, typename DataType>
+void SplitByteVector(const std::vector<DataType>& data,
+                     const int literals_per_histogram,
+                     const int max_histograms,
+                     const int sampling_stride_length,
+                     const double block_switch_cost,
+                     BlockSplit* split) {
+  if (data.empty()) {
+    split->num_types_ = 0;
+    return;
+  } else if (data.size() < kMinLengthForBlockSplitting) {
+    split->num_types_ = 1;
+    split->types_.push_back(0);
+    split->lengths_.push_back(data.size());
+    return;
+  }
+  std::vector<HistogramType> histograms;
+  // Find good entropy codes.
+  InitialEntropyCodes(data.data(), data.size(),
+                      literals_per_histogram,
+                      max_histograms,
+                      sampling_stride_length,
+                      &histograms);
+  RefineEntropyCodes(data.data(), data.size(),
+                     sampling_stride_length,
+                     &histograms);
+  // Find a good path through literals with the good entropy codes.
+  std::vector<uint8_t> block_ids(data.size());
+  for (int i = 0; i < 10; ++i) {
+    FindBlocks(data.data(), data.size(),
+               block_switch_cost,
+               histograms,
+               &block_ids[0]);
+    BuildBlockHistograms(data.data(), data.size(), &block_ids[0], &histograms);
+  }
+  ClusterBlocks<HistogramType>(data.data(), data.size(), &block_ids[0]);
+  BuildBlockSplit(block_ids, split);
+}
+
+void SplitBlock(const std::vector<Command>& cmds,
+                const uint8_t* data,
+                BlockSplit* literal_split,
+                BlockSplit* insert_and_copy_split,
+                BlockSplit* dist_split) {
+  // Create a continuous array of literals.
+  std::vector<uint8_t> literals;
+  CopyLiteralsToByteArray(cmds, data, &literals);
+
+  // Compute prefix codes for commands.
+  std::vector<uint16_t> insert_and_copy_codes;
+  std::vector<uint8_t> distance_prefixes;
+  CopyCommandsToByteArray(cmds,
+                          &insert_and_copy_codes,
+                          &distance_prefixes);
+
+  SplitByteVector<HistogramLiteral>(
+      literals,
+      kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
+      kLiteralStrideLength, kLiteralBlockSwitchCost,
+      literal_split);
+  SplitByteVector<HistogramCommand>(
+      insert_and_copy_codes,
+      kSymbolsPerCommandHistogram, kMaxCommandHistograms,
+      kCommandStrideLength, kCommandBlockSwitchCost,
+      insert_and_copy_split);
+  SplitByteVector<HistogramDistance>(
+      distance_prefixes,
+      kSymbolsPerDistanceHistogram, kMaxCommandHistograms,
+      kCommandStrideLength, kDistanceBlockSwitchCost,
+      dist_split);
+}
+
+void SplitBlockByTotalLength(const std::vector<Command>& all_commands,
+                             int input_size,
+                             int target_length,
+                             std::vector<std::vector<Command> >* blocks) {
+  int num_blocks = input_size / target_length + 1;
+  int length_limit = input_size / num_blocks + 1;
+  int total_length = 0;
+  std::vector<Command> cur_block;
+  for (int i = 0; i < all_commands.size(); ++i) {
+    const Command& cmd = all_commands[i];
+    int cmd_length = cmd.insert_length_ + cmd.copy_length_;
+    if (total_length > length_limit) {
+      blocks->push_back(cur_block);
+      cur_block.clear();
+      total_length = 0;
+    }
+    cur_block.push_back(cmd);
+    total_length += cmd_length;
+  }
+  blocks->push_back(cur_block);
+}
+
+}  // namespace brotli
--- a/enc/block_splitter.h
+++ b/enc/block_splitter.h
@ -0,0 +1,77 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Block split point selection utilities.
+
+#ifndef BROTLI_ENC_BLOCK_SPLITTER_H_
+#define BROTLI_ENC_BLOCK_SPLITTER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <vector>
+#include <utility>
+
+#include "./command.h"
+
+namespace brotli {
+
+struct BlockSplit {
+  int num_types_;
+  std::vector<uint8_t> types_;
+  std::vector<uint8_t> type_codes_;
+  std::vector<int> lengths_;
+};
+
+struct BlockSplitIterator {
+  explicit BlockSplitIterator(const BlockSplit& split)
+      : split_(split), idx_(0), type_(0), length_(0) {
+    if (!split.lengths_.empty()) {
+      length_ = split.lengths_[0];
+    }
+  }
+
+  void Next() {
+    if (length_ == 0) {
+      ++idx_;
+      type_ = split_.types_[idx_];
+      length_ = split_.lengths_[idx_];
+    }
+    --length_;
+  }
+
+  const BlockSplit& split_;
+  int idx_;
+  int type_;
+  int length_;
+};
+
+void CopyLiteralsToByteArray(const std::vector<Command>& cmds,
+                             const uint8_t* data,
+                             std::vector<uint8_t>* literals);
+
+void SplitBlock(const std::vector<Command>& cmds,
+                const uint8_t* data,
+                BlockSplit* literal_split,
+                BlockSplit* insert_and_copy_split,
+                BlockSplit* dist_split);
+
+void SplitBlockByTotalLength(const std::vector<Command>& all_commands,
+                             int input_size,
+                             int target_length,
+                             std::vector<std::vector<Command> >* blocks);
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_BLOCK_SPLITTER_H_
--- a/enc/cluster.h
+++ b/enc/cluster.h
@ -0,0 +1,288 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Functions for clustering similar histograms together.
+
+#ifndef BROTLI_ENC_CLUSTER_H_
+#define BROTLI_ENC_CLUSTER_H_
+
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <complex>
+#include <map>
+#include <set>
+#include <utility>
+#include <vector>
+
+#include "./bit_cost.h"
+#include "./entropy_encode.h"
+#include "./fast_log.h"
+#include "./histogram.h"
+
+namespace brotli {
+
+struct HistogramPair {
+  int idx1;
+  int idx2;
+  bool valid;
+  double cost_combo;
+  double cost_diff;
+};
+
+struct HistogramPairComparator {
+  bool operator()(const HistogramPair& p1, const HistogramPair& p2) {
+    if (p1.cost_diff != p2.cost_diff) {
+      return p1.cost_diff > p2.cost_diff;
+    }
+    return abs(p1.idx1 - p1.idx2) > abs(p2.idx1 - p2.idx2);
+  }
+};
+
+// Returns entropy reduction of the context map when we combine two clusters.
+inline double ClusterCostDiff(int size_a, int size_b) {
+  int size_c = size_a + size_b;
+  return size_a * FastLog2(size_a) + size_b * FastLog2(size_b) -
+      size_c * FastLog2(size_c);
+}
+
+// Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
+// it is below a threshold, stores the pair (idx1, idx2) in the *pairs heap.
+template<int kSize>
+void CompareAndPushToHeap(const Histogram<kSize>* out,
+                          const int* cluster_size,
+                          int idx1, int idx2,
+                          std::vector<HistogramPair>* pairs) {
+  if (idx1 == idx2) {
+    return;
+  }
+  if (idx2 < idx1) {
+    int t = idx2;
+    idx2 = idx1;
+    idx1 = t;
+  }
+  bool store_pair = false;
+  HistogramPair p;
+  p.idx1 = idx1;
+  p.idx2 = idx2;
+  p.valid = true;
+  p.cost_diff = 0.5 * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]);
+  p.cost_diff -= out[idx1].bit_cost_;
+  p.cost_diff -= out[idx2].bit_cost_;
+
+  if (out[idx1].total_count_ == 0) {
+    p.cost_combo = out[idx2].bit_cost_;
+    store_pair = true;
+  } else if (out[idx2].total_count_ == 0) {
+    p.cost_combo = out[idx1].bit_cost_;
+    store_pair = true;
+  } else {
+    double threshold = pairs->empty() ? 1e99 :
+        std::max(0.0, (*pairs)[0].cost_diff);
+    Histogram<kSize> combo = out[idx1];
+    combo.AddHistogram(out[idx2]);
+    double cost_combo = PopulationCost(combo);
+    if (cost_combo < threshold - p.cost_diff) {
+      p.cost_combo = cost_combo;
+      store_pair = true;
+    }
+  }
+  if (store_pair) {
+    p.cost_diff += p.cost_combo;
+    pairs->push_back(p);
+    push_heap(pairs->begin(), pairs->end(), HistogramPairComparator());
+  }
+}
+
+template<int kSize>
+void HistogramCombine(Histogram<kSize>* out,
+                      int* cluster_size,
+                      int* symbols,
+                      int symbols_size,
+                      int max_clusters) {
+  double cost_diff_threshold = 0.0;
+  int min_cluster_size = 1;
+  std::set<int> all_symbols;
+  std::vector<int> clusters;
+  for (int i = 0; i < symbols_size; ++i) {
+    if (all_symbols.find(symbols[i]) == all_symbols.end()) {
+      all_symbols.insert(symbols[i]);
+      clusters.push_back(symbols[i]);
+    }
+  }
+
+  // We maintain a heap of histogram pairs, ordered by the bit cost reduction.
+  std::vector<HistogramPair> pairs;
+  for (int idx1 = 0; idx1 < clusters.size(); ++idx1) {
+    for (int idx2 = idx1 + 1; idx2 < clusters.size(); ++idx2) {
+      CompareAndPushToHeap(out, cluster_size, clusters[idx1], clusters[idx2],
+                           &pairs);
+    }
+  }
+
+  while (clusters.size() > min_cluster_size) {
+    if (pairs[0].cost_diff >= cost_diff_threshold) {
+      cost_diff_threshold = 1e99;
+      min_cluster_size = max_clusters;
+      continue;
+    }
+    // Take the best pair from the top of heap.
+    int best_idx1 = pairs[0].idx1;
+    int best_idx2 = pairs[0].idx2;
+    out[best_idx1].AddHistogram(out[best_idx2]);
+    out[best_idx1].bit_cost_ = pairs[0].cost_combo;
+    cluster_size[best_idx1] += cluster_size[best_idx2];
+    for (int i = 0; i < symbols_size; ++i) {
+      if (symbols[i] == best_idx2) {
+        symbols[i] = best_idx1;
+      }
+    }
+    for (int i = 0; i + 1 < clusters.size(); ++i) {
+      if (clusters[i] >= best_idx2) {
+        clusters[i] = clusters[i + 1];
+      }
+    }
+    clusters.pop_back();
+    // Invalidate pairs intersecting the just combined best pair.
+    for (int i = 0; i < pairs.size(); ++i) {
+      HistogramPair& p = pairs[i];
+      if (p.idx1 == best_idx1 || p.idx2 == best_idx1 ||
+          p.idx1 == best_idx2 || p.idx2 == best_idx2) {
+        p.valid = false;
+      }
+    }
+    // Pop invalid pairs from the top of the heap.
+    while (!pairs.empty() && !pairs[0].valid) {
+      pop_heap(pairs.begin(), pairs.end(), HistogramPairComparator());
+      pairs.pop_back();
+    }
+    // Push new pairs formed with the combined histogram to the heap.
+    for (int i = 0; i < clusters.size(); ++i) {
+      CompareAndPushToHeap(out, cluster_size, best_idx1, clusters[i], &pairs);
+    }
+  }
+}
+
+// -----------------------------------------------------------------------------
+// Histogram refinement
+
+// What is the bit cost of moving histogram from cur_symbol to candidate.
+template<int kSize>
+double HistogramBitCostDistance(const Histogram<kSize>& histogram,
+                                const Histogram<kSize>& candidate) {
+  if (histogram.total_count_ == 0) {
+    return 0.0;
+  }
+  Histogram<kSize> tmp = histogram;
+  tmp.AddHistogram(candidate);
+  return PopulationCost(tmp) - candidate.bit_cost_;
+}
+
+// Find the best 'out' histogram for each of the 'in' histograms.
+// Note: we assume that out[]->bit_cost_ is already up-to-date.
+template<int kSize>
+void HistogramRemap(const Histogram<kSize>* in, int in_size,
+                    Histogram<kSize>* out, int* symbols) {
+  std::set<int> all_symbols;
+  for (int i = 0; i < in_size; ++i) {
+    all_symbols.insert(symbols[i]);
+  }
+  for (int i = 0; i < in_size; ++i) {
+    int best_out = i == 0 ? symbols[0] : symbols[i - 1];
+    double best_bits = HistogramBitCostDistance(in[i], out[best_out]);
+    for (std::set<int>::const_iterator k = all_symbols.begin();
+         k != all_symbols.end(); ++k) {
+      const double cur_bits = HistogramBitCostDistance(in[i], out[*k]);
+      if (cur_bits < best_bits) {
+        best_bits = cur_bits;
+        best_out = *k;
+      }
+    }
+    symbols[i] = best_out;
+  }
+
+  // Recompute each out based on raw and symbols.
+  for (std::set<int>::const_iterator k = all_symbols.begin();
+       k != all_symbols.end(); ++k) {
+    out[*k].Clear();
+  }
+  for (int i = 0; i < in_size; ++i) {
+    out[symbols[i]].AddHistogram(in[i]);
+  }
+}
+
+// Reorder histograms in *out so that the new symbols in *symbols come in
+// increasing order.
+template<int kSize>
+void HistogramReindex(std::vector<Histogram<kSize> >* out,
+                      std::vector<int>* symbols) {
+  std::vector<Histogram<kSize> > tmp(*out);
+  std::map<int, int> new_index;
+  int next_index = 0;
+  for (int i = 0; i < symbols->size(); ++i) {
+    if (new_index.find((*symbols)[i]) == new_index.end()) {
+      new_index[(*symbols)[i]] = next_index;
+      (*out)[next_index] = tmp[(*symbols)[i]];
+      ++next_index;
+    }
+  }
+  out->resize(next_index);
+  for (int i = 0; i < symbols->size(); ++i) {
+    (*symbols)[i] = new_index[(*symbols)[i]];
+  }
+}
+
+// Clusters similar histograms in 'in' together, the selected histograms are
+// placed in 'out', and for each index in 'in', *histogram_symbols will
+// indicate which of the 'out' histograms is the best approximation.
+template<int kSize>
+void ClusterHistograms(const std::vector<Histogram<kSize> >& in,
+                       int num_contexts, int num_blocks,
+                       int max_histograms,
+                       std::vector<Histogram<kSize> >* out,
+                       std::vector<int>* histogram_symbols) {
+  const int in_size = num_contexts * num_blocks;
+  std::vector<int> cluster_size(in_size, 1);
+  out->resize(in_size);
+  histogram_symbols->resize(in_size);
+  for (int i = 0; i < in_size; ++i) {
+    (*out)[i] = in[i];
+    (*out)[i].bit_cost_ = PopulationCost(in[i]);
+    (*histogram_symbols)[i] = i;
+  }
+
+  // Collapse similar histograms within a block type.
+  if (num_contexts > 1) {
+    for (int i = 0; i < num_blocks; ++i) {
+      HistogramCombine(&(*out)[0], &cluster_size[0],
+                       &(*histogram_symbols)[i * num_contexts], num_contexts,
+                       max_histograms);
+    }
+  }
+
+  // Collapse similar histograms.
+  HistogramCombine(&(*out)[0], &cluster_size[0],
+                   &(*histogram_symbols)[0], in_size,
+                   max_histograms);
+
+  // Find the optimal map from original histograms to the final ones.
+  HistogramRemap(&in[0], in_size, &(*out)[0], &(*histogram_symbols)[0]);
+
+  // Convert the context map to a canonical form.
+  HistogramReindex(out, histogram_symbols);
+}
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_CLUSTER_H_
--- a/enc/command.h
+++ b/enc/command.h
@ -0,0 +1,45 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This class models a sequence of literals and a backward reference copy.
+
+#ifndef BROTLI_ENC_COMMAND_H_
+#define BROTLI_ENC_COMMAND_H_
+
+#include <stdint.h>
+
+namespace brotli {
+
+// Command holds a sequence of literals and a backward reference copy.
+class Command {
+ public:
+  Command() : insert_length_(0), copy_length_(0),
+              copy_distance_(0), distance_code_(0),
+              distance_prefix_(0), command_prefix_(0),
+              distance_extra_bits_(0), distance_extra_bits_value_(0) {}
+
+  uint32_t insert_length_;
+  uint32_t copy_length_;
+  uint32_t copy_distance_;
+  // Values <= 16 are short codes, values > 16 are distances shifted by 16.
+  uint32_t distance_code_;
+  uint16_t distance_prefix_;
+  uint16_t command_prefix_;
+  int distance_extra_bits_;
+  uint32_t distance_extra_bits_value_;
+};
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_COMMAND_H_
--- a/enc/context.h
+++ b/enc/context.h
@ -0,0 +1,130 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Functions to map previous bytes into a context id.
+
+#ifndef BROTLI_ENC_CONTEXT_H_
+#define BROTLI_ENC_CONTEXT_H_
+
+#include <stdint.h>
+
+namespace brotli {
+
+static const int kSigned2BitContextLookup[] = {
+  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
+};
+
+static const int kSigned3BitContextLookup[] = {
+  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
+};
+
+static const int kSigned4BitContextLookup[] = {
+   0,  1,  2,  2,  3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,  4,
+   5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+   6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
+   6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
+   7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
+   7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
+   7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
+   7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
+   8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+   8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+   8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+   8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 13, 13, 14, 15,
+};
+
+enum ContextType {
+  CONTEXT_NONE        = 0,
+  CONTEXT_FULL        = 1,
+  CONTEXT_MSB7        = 2,
+  CONTEXT_MSB6        = 3,
+  CONTEXT_MSB5        = 4,
+  CONTEXT_MSB4        = 5,
+  CONTEXT_MSB3        = 6,
+  CONTEXT_MSB2        = 7,
+  CONTEXT_MSB1        = 8,
+  CONTEXT_IS_ZERO     = 9,
+  CONTEXT_SIGNED_2BIT = 10,
+  CONTEXT_SIGNED_3BIT = 11,
+  CONTEXT_SIGNED_4BIT = 12,
+  CONTEXT_SIGNED_MIXED_3BYTE = 13,
+};
+
+static const int kContextSize[] = {
+  1, 256, 128, 64, 32, 16, 8, 4, 2, 2, 4, 8, 16, 64,
+};
+
+static inline int NumContexts(int mode) {
+  return kContextSize[mode];
+}
+
+static inline uint8_t Context(uint8_t prev_byte, uint8_t prev_byte2,
+                              uint8_t prev_byte3, int mode) {
+  switch (mode) {
+    case CONTEXT_NONE:
+      return 0;
+    case CONTEXT_IS_ZERO:
+      return prev_byte == 0 ? 0 : 1;
+    case CONTEXT_SIGNED_2BIT:
+      return kSigned2BitContextLookup[prev_byte];
+    case CONTEXT_SIGNED_3BIT:
+      return kSigned3BitContextLookup[prev_byte];
+    case CONTEXT_SIGNED_4BIT:
+      return kSigned4BitContextLookup[prev_byte];
+    case CONTEXT_SIGNED_MIXED_3BYTE:
+      return ((kSigned3BitContextLookup[prev_byte] << 3) +
+              (kSigned2BitContextLookup[prev_byte2] << 1) +
+              (prev_byte3 == 0 ? 0 : 1));
+    default:
+      return prev_byte >> (mode - 1);
+  }
+}
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_CONTEXT_H_
--- a/enc/encode.cc
+++ b/enc/encode.cc
@ -0,0 +1,778 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Implementation of Brotli compressor.
+
+#include "./encode.h"
+
+#include <algorithm>
+#include <limits>
+
+#include "./backward_references.h"
+#include "./bit_cost.h"
+#include "./block_splitter.h"
+#include "./cluster.h"
+#include "./context.h"
+#include "./entropy_encode.h"
+#include "./fast_log.h"
+#include "./histogram.h"
+#include "./prefix.h"
+#include "./write_bits.h"
+
+namespace brotli {
+
+template<int kSize>
+double Entropy(const std::vector<Histogram<kSize> >& histograms) {
+  double retval = 0;
+  for (int i = 0; i < histograms.size(); ++i) {
+    retval += histograms[i].EntropyBitCost();
+  }
+  return retval;
+}
+
+void EncodeSize(size_t len, int* storage_ix, uint8_t* storage) {
+  std::vector<uint8_t> len_bytes;
+  while (len > 0) {
+    len_bytes.push_back(len & 0xff);
+    len >>= 8;
+  };
+  WriteBits(3, len_bytes.size(), storage_ix, storage);
+  for (int i = 0; i < len_bytes.size(); ++i) {
+    WriteBits(8, len_bytes[i], storage_ix, storage);
+  }
+}
+
+void EncodeMetaBlockLength(int input_size_bits,
+                           size_t meta_block_size,
+                           bool is_last_meta_block,
+                           int* storage_ix, uint8_t* storage) {
+  WriteBits(1, is_last_meta_block, storage_ix, storage);
+  if (is_last_meta_block) return;
+  while (input_size_bits > 0) {
+    WriteBits(8, meta_block_size & 0xff, storage_ix, storage);
+    meta_block_size >>= 8;
+    input_size_bits -= 8;
+  }
+  if (input_size_bits > 0) {
+    WriteBits(input_size_bits, meta_block_size, storage_ix, storage);
+  }
+}
+
+template<int kSize>
+void EntropyEncode(int val, const EntropyCode<kSize>& code,
+                   int* storage_ix, uint8_t* storage) {
+  if (code.count_ <= 1) {
+    return;
+  };
+  WriteBits(code.depth_[val], code.bits_[val], storage_ix, storage);
+}
+
+void StoreHuffmanTreeOfHuffmanTreeToBitMask(
+    const uint8_t* code_length_bitdepth,
+    int* storage_ix, uint8_t* storage) {
+  static const uint8_t kStorageOrder[kCodeLengthCodes] = {
+    17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+  };
+  // Throw away trailing zeros:
+  int codes_to_store = kCodeLengthCodes;
+  for (; codes_to_store > 4; --codes_to_store) {
+    if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
+      break;
+    }
+  }
+  WriteBits(4, codes_to_store - 4, storage_ix, storage);
+  for (int i = 0; i < codes_to_store; ++i) {
+    WriteBits(3, code_length_bitdepth[kStorageOrder[i]], storage_ix, storage);
+  }
+}
+
+void StoreHuffmanTreeToBitMask(
+    const uint8_t* huffman_tree,
+    const uint8_t* huffman_tree_extra_bits,
+    const int huffman_tree_size,
+    const EntropyCode<kCodeLengthCodes>& entropy,
+    int* storage_ix, uint8_t* storage) {
+  for (int i = 0; i < huffman_tree_size; ++i) {
+    const int ix = huffman_tree[i];
+    const int extra_bits = huffman_tree_extra_bits[i];
+    EntropyEncode(ix, entropy, storage_ix, storage);
+    switch (ix) {
+      case 16:
+        WriteBits(2, extra_bits, storage_ix, storage);
+        break;
+      case 17:
+        WriteBits(3, extra_bits, storage_ix, storage);
+        break;
+      case 18:
+        WriteBits(7, extra_bits, storage_ix, storage);
+        break;
+    }
+  }
+}
+
+template<int kSize>
+void StoreHuffmanCode(const EntropyCode<kSize>& code, int alphabet_size,
+                      int* storage_ix, uint8_t* storage) {
+  const int kMaxBits = 8;
+  const int kMaxSymbol = 1 << kMaxBits;
+
+  if (code.count_ == 0) {   // emit minimal tree for empty cases
+    // bits: small tree marker: 1, count-1: 0, large 8-bit code: 0, code: 0
+    WriteBits(4, 0x01, storage_ix, storage);
+    return;
+  }
+  if (code.count_ <= 2 &&
+      code.symbols_[0] < kMaxSymbol &&
+      code.symbols_[1] < kMaxSymbol) {
+    // Small tree marker to encode 1 or 2 symbols.
+    WriteBits(1, 1, storage_ix, storage);
+    WriteBits(1, code.count_ - 1, storage_ix, storage);
+    if (code.symbols_[0] <= 1) {
+      // Code bit for small (1 bit) symbol value.
+      WriteBits(1, 0, storage_ix, storage);
+      WriteBits(1, code.symbols_[0], storage_ix, storage);
+    } else {
+      WriteBits(1, 1, storage_ix, storage);
+      WriteBits(8, code.symbols_[0], storage_ix, storage);
+    }
+    if (code.count_ == 2) {
+      WriteBits(8, code.symbols_[1], storage_ix, storage);
+    }
+    return;
+  }
+  WriteBits(1, 0, storage_ix, storage);
+
+  uint8_t huffman_tree[kSize];
+  uint8_t huffman_tree_extra_bits[kSize];
+  int huffman_tree_size = 0;
+  WriteHuffmanTree(&code.depth_[0],
+                   alphabet_size,
+                   &huffman_tree[0],
+                   &huffman_tree_extra_bits[0],
+                   &huffman_tree_size);
+  Histogram<kCodeLengthCodes> huffman_tree_histogram;
+  memset(huffman_tree_histogram.data_, 0, sizeof(huffman_tree_histogram.data_));
+  for (int i = 0; i < huffman_tree_size; ++i) {
+    huffman_tree_histogram.Add(huffman_tree[i]);
+  }
+  EntropyCode<kCodeLengthCodes> huffman_tree_entropy;
+  BuildEntropyCode(huffman_tree_histogram, 7, kCodeLengthCodes,
+                   &huffman_tree_entropy);
+  Histogram<kCodeLengthCodes> trimmed_histogram = huffman_tree_histogram;
+  uint8_t* last_code = &huffman_tree[huffman_tree_size - 1];
+  while (*last_code == 0 || *last_code >= 17) {
+    trimmed_histogram.Remove(*last_code--);
+  }
+  int trimmed_size = trimmed_histogram.total_count_;
+  bool write_length = false;
+  if (trimmed_size > 1 && trimmed_size < huffman_tree_size) {
+    EntropyCode<kCodeLengthCodes> trimmed_entropy;
+    BuildEntropyCode(trimmed_histogram, 7, kCodeLengthCodes, &trimmed_entropy);
+    int huffman_bit_cost = HuffmanTreeBitCost(huffman_tree_histogram,
+                                              huffman_tree_entropy);
+    int trimmed_bit_cost = HuffmanTreeBitCost(trimmed_histogram,
+                                              trimmed_entropy);;
+    const int nbits = Log2Ceiling(trimmed_size - 1);
+    const int nbitpairs = (nbits == 0) ? 1 : (nbits + 1) / 2;
+    if (trimmed_bit_cost + 3 + 2 * nbitpairs < huffman_bit_cost) {
+      write_length = true;
+      huffman_tree_size = trimmed_size;
+      huffman_tree_entropy = trimmed_entropy;
+    }
+  }
+
+  StoreHuffmanTreeOfHuffmanTreeToBitMask(
+      &huffman_tree_entropy.depth_[0], storage_ix, storage);
+  WriteBits(1, write_length, storage_ix, storage);
+  if (write_length) {
+    const int nbits = Log2Ceiling(huffman_tree_size - 1);
+    const int nbitpairs = (nbits == 0) ? 1 : (nbits + 1) / 2;
+    WriteBits(3, nbitpairs - 1, storage_ix, storage);
+    WriteBits(nbitpairs * 2, huffman_tree_size - 2, storage_ix, storage);
+  }
+  StoreHuffmanTreeToBitMask(&huffman_tree[0], &huffman_tree_extra_bits[0],
+                            huffman_tree_size, huffman_tree_entropy,
+                            storage_ix, storage);
+}
+
+template<int kSize>
+void StoreHuffmanCodes(const std::vector<EntropyCode<kSize> >& codes,
+                       int alphabet_size,
+                       int* storage_ix, uint8_t* storage) {
+  for (int i = 0; i < codes.size(); ++i) {
+    StoreHuffmanCode(codes[i], alphabet_size, storage_ix, storage);
+  }
+}
+
+void EncodeCommand(const Command& cmd,
+                   const EntropyCodeCommand& entropy,
+                   int* storage_ix, uint8_t* storage) {
+  int code = cmd.command_prefix_;
+  EntropyEncode(code, entropy, storage_ix, storage);
+  if (code >= 128) {
+    code -= 128;
+  }
+  int insert_extra_bits = InsertLengthExtraBits(code);
+  uint64_t insert_extra_bits_val =
+      cmd.insert_length_ - InsertLengthOffset(code);
+  int copy_extra_bits = CopyLengthExtraBits(code);
+  uint64_t copy_extra_bits_val = cmd.copy_length_ - CopyLengthOffset(code);
+  if (insert_extra_bits > 0) {
+    WriteBits(insert_extra_bits, insert_extra_bits_val, storage_ix, storage);
+  }
+  if (copy_extra_bits > 0) {
+    WriteBits(copy_extra_bits, copy_extra_bits_val, storage_ix, storage);
+  }
+}
+
+void EncodeCopyDistance(const Command& cmd, const EntropyCodeDistance& entropy,
+                        int* storage_ix, uint8_t* storage) {
+  int code = cmd.distance_prefix_;
+  int extra_bits = cmd.distance_extra_bits_;
+  uint64_t extra_bits_val = cmd.distance_extra_bits_value_;
+  EntropyEncode(code, entropy, storage_ix, storage);
+  if (extra_bits > 0) {
+    WriteBits(extra_bits, extra_bits_val, storage_ix, storage);
+  }
+}
+
+
+void ComputeDistanceShortCodes(std::vector<Command>* cmds) {
+  static const int kIndexOffset[16] = {
+    3, 2, 1, 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2
+  };
+  static const int kValueOffset[16] = {
+    0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
+  };
+  int dist_ringbuffer[4] = { 4, 11, 15, 16 };
+  int ringbuffer_idx = 0;
+  for (int i = 0; i < cmds->size(); ++i) {
+    int cur_dist = (*cmds)[i].copy_distance_;
+    if (cur_dist == 0) break;
+    int dist_code = cur_dist + 16;
+    for (int k = 0; k < 16; ++k) {
+      // Only accept more popular choices.
+      if (cur_dist < 11 && ((k >= 2 && k < 4) || k >= 6)) {
+        // Typically unpopular ranges, don't replace a short distance
+        // with them.
+        continue;
+      }
+      int comp = (dist_ringbuffer[(ringbuffer_idx + kIndexOffset[k]) & 3] +
+                  kValueOffset[k]);
+      if (cur_dist == comp) {
+        dist_code = k + 1;
+        break;
+      }
+    }
+    if (dist_code > 1) {
+      dist_ringbuffer[ringbuffer_idx & 3] = cur_dist;
+      ++ringbuffer_idx;
+    }
+    (*cmds)[i].distance_code_ = dist_code;
+  }
+}
+
+void ComputeCommandPrefixes(std::vector<Command>* cmds,
+                            int num_direct_distance_codes,
+                            int distance_postfix_bits) {
+  for (int i = 0; i < cmds->size(); ++i) {
+    Command* cmd = &(*cmds)[i];
+    cmd->command_prefix_ = CommandPrefix(cmd->insert_length_,
+                                         cmd->copy_length_);
+    if (cmd->copy_length_ > 0) {
+      PrefixEncodeCopyDistance(cmd->distance_code_,
+                               num_direct_distance_codes,
+                               distance_postfix_bits,
+                               &cmd->distance_prefix_,
+                               &cmd->distance_extra_bits_,
+                               &cmd->distance_extra_bits_value_);
+    }
+    if (cmd->command_prefix_ < 128 && cmd->distance_prefix_ == 0) {
+      cmd->distance_prefix_ = 0xffff;
+    } else {
+      cmd->command_prefix_ += 128;
+    }
+  }
+}
+
+int IndexOf(const std::vector<int>& v, int value) {
+  for (int i = 0; i < v.size(); ++i) {
+    if (v[i] == value) return i;
+  }
+  return -1;
+}
+
+void MoveToFront(std::vector<int>* v, int index) {
+  int value = (*v)[index];
+  for (int i = index; i > 0; --i) {
+    (*v)[i] = (*v)[i - 1];
+  }
+  (*v)[0] = value;
+}
+
+std::vector<int> MoveToFrontTransform(const std::vector<int>& v) {
+  if (v.empty()) return v;
+  std::vector<int> mtf(*max_element(v.begin(), v.end()) + 1);
+  for (int i = 0; i < mtf.size(); ++i) mtf[i] = i;
+  std::vector<int> result(v.size());
+  for (int i = 0; i < v.size(); ++i) {
+    int index = IndexOf(mtf, v[i]);
+    result[i] = index;
+    MoveToFront(&mtf, index);
+  }
+  return result;
+}
+
+// Finds runs of zeros in v_in and replaces them with a prefix code of the run
+// length plus extra bits in *v_out and *extra_bits. Non-zero values in v_in are
+// shifted by *max_length_prefix. Will not create prefix codes bigger than the
+// initial value of *max_run_length_prefix. The prefix code of run length L is
+// simply Log2Floor(L) and the number of extra bits is the same as the prefix
+// code.
+void RunLengthCodeZeros(const std::vector<int>& v_in,
+                        int* max_run_length_prefix,
+                        std::vector<int>* v_out,
+                        std::vector<int>* extra_bits) {
+  int max_reps = 0;
+  for (int i = 0; i < v_in.size();) {
+    for (; i < v_in.size() && v_in[i] != 0; ++i) ;
+    int reps = 0;
+    for (; i < v_in.size() && v_in[i] == 0; ++i) {
+      ++reps;
+    }
+    max_reps = std::max(reps, max_reps);
+  }
+  int max_prefix = max_reps > 0 ? Log2Floor(max_reps) : 0;
+  *max_run_length_prefix = std::min(max_prefix, *max_run_length_prefix);
+  for (int i = 0; i < v_in.size();) {
+    if (v_in[i] != 0) {
+      v_out->push_back(v_in[i] + *max_run_length_prefix);
+      extra_bits->push_back(0);
+      ++i;
+    } else {
+      int reps = 1;
+      for (uint32_t k = i + 1; k < v_in.size() && v_in[k] == 0; ++k) {
+        ++reps;
+      }
+      i += reps;
+      while (reps) {
+        if (reps < (2 << *max_run_length_prefix)) {
+          int run_length_prefix = Log2Floor(reps);
+          v_out->push_back(run_length_prefix);
+          extra_bits->push_back(reps - (1 << run_length_prefix));
+          break;
+        } else {
+          v_out->push_back(*max_run_length_prefix);
+          extra_bits->push_back((1 << *max_run_length_prefix) - 1);
+          reps -= (2 << *max_run_length_prefix) - 1;
+        }
+      }
+    }
+  }
+}
+
+// Returns a maximum zero-run-length-prefix value such that run-length coding
+// zeros in v with this maximum prefix value and then encoding the resulting
+// histogram and entropy-coding v produces the least amount of bits.
+int BestMaxZeroRunLengthPrefix(const std::vector<int>& v) {
+  int min_cost = std::numeric_limits<int>::max();
+  int best_max_prefix = 0;
+  for (int max_prefix = 0; max_prefix <= 16; ++max_prefix) {
+    std::vector<int> rle_symbols;
+    std::vector<int> extra_bits;
+    int max_run_length_prefix = max_prefix;
+    RunLengthCodeZeros(v, &max_run_length_prefix, &rle_symbols, &extra_bits);
+    if (max_run_length_prefix < max_prefix) break;
+    HistogramLiteral histogram;
+    for (int i = 0; i < rle_symbols.size(); ++i) {
+      histogram.Add(rle_symbols[i]);
+    }
+    int bit_cost = PopulationCost(histogram);
+    if (max_prefix > 0) {
+      bit_cost += 4;
+    }
+    for (int i = 1; i <= max_prefix; ++i) {
+      bit_cost += histogram.data_[i] * i;  // extra bits
+    }
+    if (bit_cost < min_cost) {
+      min_cost = bit_cost;
+      best_max_prefix = max_prefix;
+    }
+  }
+  return best_max_prefix;
+}
+
+void EncodeContextMap(const std::vector<int>& context_map,
+                      int context_mode,
+                      int context_mode_bits,
+                      int num_clusters,
+                      int* storage_ix, uint8_t* storage) {
+  if (context_mode == 0) {
+    WriteBits(1, 0, storage_ix, storage);  // no context
+    return;
+  }
+
+  WriteBits(1, 1, storage_ix, storage);  // have context
+  if (context_mode_bits > 0) {
+    WriteBits(context_mode_bits, context_mode - 1, storage_ix, storage);
+  }
+  WriteBits(8, num_clusters - 1, storage_ix, storage);
+
+  if (num_clusters == 1 || num_clusters == context_map.size()) {
+    return;
+  }
+
+  std::vector<int> transformed_symbols = MoveToFrontTransform(context_map);
+  std::vector<int> rle_symbols;
+  std::vector<int> extra_bits;
+  int max_run_length_prefix = BestMaxZeroRunLengthPrefix(transformed_symbols);
+  RunLengthCodeZeros(transformed_symbols, &max_run_length_prefix,
+                     &rle_symbols, &extra_bits);
+  HistogramLiteral symbol_histogram;
+  for (int i = 0; i < rle_symbols.size(); ++i) {
+    symbol_histogram.Add(rle_symbols[i]);
+  }
+  EntropyCodeLiteral symbol_code;
+  BuildEntropyCode(symbol_histogram, 15, num_clusters + max_run_length_prefix,
+                   &symbol_code);
+  bool use_rle = max_run_length_prefix > 0;
+  WriteBits(1, use_rle, storage_ix, storage);
+  if (use_rle) {
+    WriteBits(4, max_run_length_prefix - 1, storage_ix, storage);
+  }
+  StoreHuffmanCode(symbol_code, num_clusters + max_run_length_prefix,
+                   storage_ix, storage);
+  for (int i = 0; i < rle_symbols.size(); ++i) {
+    EntropyEncode(rle_symbols[i], symbol_code, storage_ix, storage);
+    if (rle_symbols[i] > 0 && rle_symbols[i] <= max_run_length_prefix) {
+      WriteBits(rle_symbols[i], extra_bits[i], storage_ix, storage);
+    }
+  }
+  WriteBits(1, 1, storage_ix, storage);  // use move-to-front
+}
+
+template<int kSize>
+void BuildEntropyCodes(const std::vector<Histogram<kSize> >& histograms,
+                       int alphabet_size,
+                       std::vector<EntropyCode<kSize> >* entropy_codes) {
+  entropy_codes->resize(histograms.size());
+  for (int i = 0; i < histograms.size(); ++i) {
+    BuildEntropyCode(histograms[i], 15, alphabet_size, &(*entropy_codes)[i]);
+  }
+}
+
+struct BlockSplitCode {
+  EntropyCodeLiteral block_type_code;
+  EntropyCodeBlockLength block_len_code;
+};
+
+void EncodeBlockLength(const EntropyCodeBlockLength& entropy,
+                       int length,
+                       int* storage_ix, uint8_t* storage) {
+  int len_code = BlockLengthPrefix(length);
+  int extra_bits = BlockLengthExtraBits(len_code);
+  int extra_bits_value = length - BlockLengthOffset(len_code);
+  EntropyEncode(len_code, entropy, storage_ix, storage);
+
+  if (extra_bits > 0) {
+    WriteBits(extra_bits, extra_bits_value, storage_ix, storage);
+  }
+}
+
+void ComputeBlockTypeShortCodes(BlockSplit* split) {
+  if (split->num_types_ <= 1) {
+    split->num_types_ = 1;
+    return;
+  }
+  int ringbuffer[2] = { 0, 1 };
+  size_t index = 0;
+  for (int i = 0; i < split->types_.size(); ++i) {
+    int type = split->types_[i];
+    int type_code;
+    if (type == ringbuffer[index & 1]) {
+      type_code = 0;
+    } else if (type == ringbuffer[(index - 1) & 1] + 1) {
+      type_code = 1;
+    } else {
+      type_code = type + 2;
+    }
+    ringbuffer[index & 1] = type;
+    ++index;
+    split->type_codes_.push_back(type_code);
+  }
+}
+
+void BuildAndEncodeBlockSplitCode(const BlockSplit& split,
+                                  BlockSplitCode* code,
+                                  int* storage_ix, uint8_t* storage) {
+  if (split.num_types_ <= 1) {
+    WriteBits(1, 0, storage_ix, storage);
+    return;
+  }
+  WriteBits(1, 1, storage_ix, storage);
+  HistogramLiteral type_histo;
+  for (int i = 0; i < split.type_codes_.size(); ++i) {
+    type_histo.Add(split.type_codes_[i]);
+  }
+  BuildEntropyCode(type_histo, 15, split.num_types_ + 2,
+                   &code->block_type_code);
+  HistogramBlockLength length_histo;
+  for (int i = 0; i < split.lengths_.size(); ++i) {
+    length_histo.Add(BlockLengthPrefix(split.lengths_[i]));
+  }
+  BuildEntropyCode(length_histo, 15, kNumBlockLenPrefixes,
+                   &code->block_len_code);
+  WriteBits(8, split.num_types_ - 1, storage_ix, storage);
+  StoreHuffmanCode(code->block_type_code, split.num_types_ + 2,
+                   storage_ix, storage);
+  StoreHuffmanCode(code->block_len_code, kNumBlockLenPrefixes,
+                   storage_ix, storage);
+  EncodeBlockLength(code->block_len_code, split.lengths_[0],
+                    storage_ix, storage);
+}
+
+void MoveAndEncode(const BlockSplitCode& code,
+                   BlockSplitIterator* it,
+                   int* storage_ix, uint8_t* storage) {
+  if (it->length_ == 0) {
+    ++it->idx_;
+    it->type_ = it->split_.types_[it->idx_];
+    it->length_ = it->split_.lengths_[it->idx_];
+    uint8_t type_code = it->split_.type_codes_[it->idx_];
+    EntropyEncode(type_code, code.block_type_code, storage_ix, storage);
+    EncodeBlockLength(code.block_len_code, it->length_, storage_ix, storage);
+  }
+  --it->length_;
+}
+
+struct EncodingParams {
+  int num_direct_distance_codes;
+  int distance_postfix_bits;
+  int literal_context_mode;
+  int distance_context_mode;
+};
+
+struct MetaBlock {
+  std::vector<Command> cmds;
+  EncodingParams params;
+  BlockSplit literal_split;
+  BlockSplit command_split;
+  BlockSplit distance_split;
+  std::vector<int> literal_context_map;
+  std::vector<int> distance_context_map;
+  std::vector<HistogramLiteral> literal_histograms;
+  std::vector<HistogramCommand> command_histograms;
+  std::vector<HistogramDistance> distance_histograms;
+};
+
+void BuildMetaBlock(const EncodingParams& params,
+                    const std::vector<Command>& cmds,
+                    const uint8_t* input_buffer,
+                    size_t pos,
+                    MetaBlock* mb) {
+  mb->cmds = cmds;
+  mb->params = params;
+  ComputeCommandPrefixes(&mb->cmds,
+                         mb->params.num_direct_distance_codes,
+                         mb->params.distance_postfix_bits);
+  SplitBlock(mb->cmds,
+             input_buffer + pos,
+             &mb->literal_split,
+             &mb->command_split,
+             &mb->distance_split);
+  ComputeBlockTypeShortCodes(&mb->literal_split);
+  ComputeBlockTypeShortCodes(&mb->command_split);
+  ComputeBlockTypeShortCodes(&mb->distance_split);
+
+  int num_literal_contexts_per_block_type =
+      NumContexts(mb->params.literal_context_mode);
+  int num_literal_contexts =
+      mb->literal_split.num_types_ *
+      num_literal_contexts_per_block_type;
+  int num_distance_contexts_per_block_type =
+      (mb->params.distance_context_mode > 0 ? 4 : 1);
+  int num_distance_contexts =
+      mb->distance_split.num_types_ *
+      num_distance_contexts_per_block_type;
+  std::vector<HistogramLiteral> literal_histograms(num_literal_contexts);
+  mb->command_histograms.resize(mb->command_split.num_types_);
+  std::vector<HistogramDistance> distance_histograms(num_distance_contexts);
+  BuildHistograms(mb->cmds,
+                  mb->literal_split,
+                  mb->command_split,
+                  mb->distance_split,
+                  input_buffer,
+                  pos,
+                  mb->params.literal_context_mode,
+                  mb->params.distance_context_mode,
+                  &literal_histograms,
+                  &mb->command_histograms,
+                  &distance_histograms);
+
+  // Histogram ids need to fit in one byte and there are 16 ids reserved for
+  // run length codes, which leaves a maximum number of 240 histograms.
+  static const int kMaxNumberOfHistograms = 240;
+
+  mb->literal_histograms = literal_histograms;
+  if (mb->params.literal_context_mode > 0) {
+    ClusterHistograms(literal_histograms,
+                      num_literal_contexts_per_block_type,
+                      mb->literal_split.num_types_,
+                      kMaxNumberOfHistograms,
+                      &mb->literal_histograms,
+                      &mb->literal_context_map);
+  }
+
+  mb->distance_histograms = distance_histograms;
+  if (mb->params.distance_context_mode > 0) {
+    ClusterHistograms(distance_histograms,
+                      num_distance_contexts_per_block_type,
+                      mb->distance_split.num_types_,
+                      kMaxNumberOfHistograms,
+                      &mb->distance_histograms,
+                      &mb->distance_context_map);
+  }
+}
+
+size_t MetaBlockLength(const std::vector<Command>& cmds) {
+  size_t length = 0;
+  for (int i = 0; i < cmds.size(); ++i) {
+    const Command& cmd = cmds[i];
+    length += cmd.insert_length_ + cmd.copy_length_;
+  }
+  return length;
+}
+
+void StoreMetaBlock(const MetaBlock& mb,
+                    const uint8_t* input_buffer,
+                    int input_size_bits,
+                    bool is_last,
+                    size_t* pos,
+                    int* storage_ix, uint8_t* storage) {
+  size_t length = MetaBlockLength(mb.cmds);
+  const size_t end_pos = *pos + length;
+  EncodeMetaBlockLength(input_size_bits, length - 1, is_last,
+                        storage_ix, storage);
+  BlockSplitCode literal_split_code;
+  BlockSplitCode command_split_code;
+  BlockSplitCode distance_split_code;
+  BuildAndEncodeBlockSplitCode(mb.literal_split, &literal_split_code,
+                               storage_ix, storage);
+  BuildAndEncodeBlockSplitCode(mb.command_split, &command_split_code,
+                               storage_ix, storage);
+  BuildAndEncodeBlockSplitCode(mb.distance_split, &distance_split_code,
+                               storage_ix, storage);
+  WriteBits(2, mb.params.distance_postfix_bits, storage_ix, storage);
+  WriteBits(4,
+            mb.params.num_direct_distance_codes >>
+            mb.params.distance_postfix_bits, storage_ix, storage);
+  int num_distance_codes =
+      kNumDistanceShortCodes + mb.params.num_direct_distance_codes +
+      (48 << mb.params.distance_postfix_bits);
+  EncodeContextMap(mb.literal_context_map, mb.params.literal_context_mode, 4,
+                   mb.literal_histograms.size(), storage_ix, storage);
+  EncodeContextMap(mb.distance_context_map, mb.params.distance_context_mode, 0,
+                   mb.distance_histograms.size(), storage_ix, storage);
+  std::vector<EntropyCodeLiteral> literal_codes;
+  std::vector<EntropyCodeCommand> command_codes;
+  std::vector<EntropyCodeDistance> distance_codes;
+  BuildEntropyCodes(mb.literal_histograms, 256, &literal_codes);
+  BuildEntropyCodes(mb.command_histograms, kNumCommandPrefixes,
+                    &command_codes);
+  BuildEntropyCodes(mb.distance_histograms, num_distance_codes,
+                    &distance_codes);
+  StoreHuffmanCodes(literal_codes, 256, storage_ix, storage);
+  StoreHuffmanCodes(command_codes, kNumCommandPrefixes, storage_ix, storage);
+  StoreHuffmanCodes(distance_codes, num_distance_codes, storage_ix, storage);
+  BlockSplitIterator literal_it(mb.literal_split);
+  BlockSplitIterator command_it(mb.command_split);
+  BlockSplitIterator distance_it(mb.distance_split);
+  for (int i = 0; i < mb.cmds.size(); ++i) {
+    const Command& cmd = mb.cmds[i];
+    MoveAndEncode(command_split_code, &command_it, storage_ix, storage);
+    EncodeCommand(cmd, command_codes[command_it.type_], storage_ix, storage);
+    for (int j = 0; j < cmd.insert_length_; ++j) {
+      MoveAndEncode(literal_split_code, &literal_it, storage_ix, storage);
+      int histogram_idx = literal_it.type_;
+      if (mb.params.literal_context_mode > 0) {
+        uint8_t prev_byte = *pos > 0 ? input_buffer[*pos - 1] : 0;
+        uint8_t prev_byte2 = *pos > 1 ? input_buffer[*pos - 2] : 0;
+        uint8_t prev_byte3 = *pos > 2 ? input_buffer[*pos - 3] : 0;
+        int context = (literal_it.type_ *
+                       NumContexts(mb.params.literal_context_mode) +
+                       Context(prev_byte, prev_byte2, prev_byte3,
+                               mb.params.literal_context_mode));
+        histogram_idx = mb.literal_context_map[context];
+      }
+      EntropyEncode(input_buffer[(*pos)++],
+                    literal_codes[histogram_idx], storage_ix, storage);
+    }
+    if (*pos < end_pos && cmd.distance_prefix_ != 0xffff) {
+      MoveAndEncode(distance_split_code, &distance_it, storage_ix, storage);
+      int histogram_index = distance_it.type_;
+      if (mb.params.distance_context_mode > 0) {
+        int context = distance_it.type_ << 2;
+        context += (cmd.copy_length_ > 4) ? 3 : cmd.copy_length_ - 2;
+        histogram_index = mb.distance_context_map[context];
+      }
+      EncodeCopyDistance(cmd, distance_codes[histogram_index],
+                         storage_ix, storage);
+    }
+    *pos += cmd.copy_length_;
+  }
+}
+
+int BrotliCompressBuffer(size_t input_size,
+                         const uint8_t* input_buffer,
+                         size_t* encoded_size,
+                         uint8_t* encoded_buffer) {
+  int storage_ix = 0;
+  uint8_t* storage = encoded_buffer;
+  WriteBitsPrepareStorage(storage_ix, storage);
+  EncodeSize(input_size, &storage_ix, storage);
+
+  if (input_size == 0) {
+    *encoded_size = (storage_ix + 7) >> 3;
+    return 1;
+  }
+  int input_size_bits = Log2Ceiling(input_size);
+
+  std::vector<Command> all_commands;
+  CreateBackwardReferences(input_buffer, input_size, &all_commands);
+  ComputeDistanceShortCodes(&all_commands);
+
+  std::vector<std::vector<Command> > meta_block_commands;
+  SplitBlockByTotalLength(all_commands, input_size, 2 << 20,
+                          &meta_block_commands);
+
+  size_t pos = 0;
+  for (int block_idx = 0; block_idx < meta_block_commands.size(); ++block_idx) {
+    const std::vector<Command>& commands = meta_block_commands[block_idx];
+    bool is_last_meta_block = (block_idx + 1 == meta_block_commands.size());
+    EncodingParams params;
+    params.num_direct_distance_codes = 12;
+    params.distance_postfix_bits = 1;
+    params.literal_context_mode = CONTEXT_SIGNED_MIXED_3BYTE;
+    params.distance_context_mode = 1;
+    MetaBlock mb;
+    BuildMetaBlock(params, commands, input_buffer, pos, &mb);
+    StoreMetaBlock(mb, input_buffer, input_size_bits, is_last_meta_block,
+                   &pos, &storage_ix, storage);
+  }
+
+  *encoded_size = (storage_ix + 7) >> 3;
+  return 1;
+}
+
+}  // namespace brotli
--- a/enc/encode.h
+++ b/enc/encode.h
@ -0,0 +1,37 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// API for Brotli compression
+
+#ifndef BROTLI_ENC_ENCODE_H_
+#define BROTLI_ENC_ENCODE_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string>
+
+namespace brotli {
+
+// Compresses the data in input_buffer into encoded_buffer, and sets
+// *encoded_size to the compressed length.
+// Returns 0 if there was an error and 1 otherwise.
+int BrotliCompressBuffer(size_t input_size,
+                         const uint8_t* input_buffer,
+                         size_t* encoded_size,
+                         uint8_t* encoded_buffer);
+
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_ENCODE_H_
--- a/enc/entropy_encode.cc
+++ b/enc/entropy_encode.cc
@ -0,0 +1,397 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Entropy encoding (Huffman) utilities.
+
+#include "./entropy_encode.h"
+
+#include <stdint.h>
+#include <algorithm>
+#include <limits>
+#include <vector>
+
+#include "./histogram.h"
+
+namespace brotli {
+
+namespace {
+
+struct HuffmanTree {
+  HuffmanTree();
+  HuffmanTree(int count, int16_t left, int16_t right)
+      : total_count_(count),
+        index_left_(left),
+        index_right_or_value_(right) {
+  }
+  int total_count_;
+  int16_t index_left_;
+  int16_t index_right_or_value_;
+};
+
+HuffmanTree::HuffmanTree() {}
+
+// Sort the root nodes, least popular first.
+bool SortHuffmanTree(const HuffmanTree &v0, const HuffmanTree &v1) {
+  return v0.total_count_ < v1.total_count_;
+}
+
+void SetDepth(const HuffmanTree &p,
+              HuffmanTree *pool,
+              uint8_t *depth,
+              int level) {
+  if (p.index_left_ >= 0) {
+    ++level;
+    SetDepth(pool[p.index_left_], pool, depth, level);
+    SetDepth(pool[p.index_right_or_value_], pool, depth, level);
+  } else {
+    depth[p.index_right_or_value_] = level;
+  }
+}
+
+}  // namespace
+
+// This function will create a Huffman tree.
+//
+// The catch here is that the tree cannot be arbitrarily deep.
+// Brotli specifies a maximum depth of 15 bits for "code trees"
+// and 7 bits for "code length code trees."
+//
+// count_limit is the value that is to be faked as the minimum value
+// and this minimum value is raised until the tree matches the
+// maximum length requirement.
+//
+// This algorithm is not of excellent performance for very long data blocks,
+// especially when population counts are longer than 2**tree_limit, but
+// we are not planning to use this with extremely long blocks.
+//
+// See http://en.wikipedia.org/wiki/Huffman_coding
+void CreateHuffmanTree(const int *data,
+                       const int length,
+                       const int tree_limit,
+                       uint8_t *depth) {
+  // For block sizes below 64 kB, we never need to do a second iteration
+  // of this loop. Probably all of our block sizes will be smaller than
+  // that, so this loop is mostly of academic interest. If we actually
+  // would need this, we would be better off with the Katajainen algorithm.
+  for (int count_limit = 1; ; count_limit *= 2) {
+    std::vector<HuffmanTree> tree;
+    tree.reserve(2 * length + 1);
+
+    for (int i = 0; i < length; ++i) {
+      if (data[i]) {
+        const int count = std::max(data[i], count_limit);
+        tree.push_back(HuffmanTree(count, -1, i));
+      }
+    }
+
+    const int n = tree.size();
+    if (n == 1) {
+      depth[tree[0].index_right_or_value_] = 1;      // Only one element.
+      break;
+    }
+
+    std::sort(tree.begin(), tree.end(), SortHuffmanTree);
+
+    // The nodes are:
+    // [0, n): the sorted leaf nodes that we start with.
+    // [n]: we add a sentinel here.
+    // [n + 1, 2n): new parent nodes are added here, starting from
+    //              (n+1). These are naturally in ascending order.
+    // [2n]: we add a sentinel at the end as well.
+    // There will be (2n+1) elements at the end.
+    const HuffmanTree sentinel(std::numeric_limits<int>::max(), -1, -1);
+    tree.push_back(sentinel);
+    tree.push_back(sentinel);
+
+    int i = 0;      // Points to the next leaf node.
+    int j = n + 1;  // Points to the next non-leaf node.
+    for (int k = n - 1; k > 0; --k) {
+      int left, right;
+      if (tree[i].total_count_ <= tree[j].total_count_) {
+        left = i;
+        ++i;
+      } else {
+        left = j;
+        ++j;
+      }
+      if (tree[i].total_count_ <= tree[j].total_count_) {
+        right = i;
+        ++i;
+      } else {
+        right = j;
+        ++j;
+      }
+
+      // The sentinel node becomes the parent node.
+      int j_end = tree.size() - 1;
+      tree[j_end].total_count_ =
+          tree[left].total_count_ + tree[right].total_count_;
+      tree[j_end].index_left_ = left;
+      tree[j_end].index_right_or_value_ = right;
+
+      // Add back the last sentinel node.
+      tree.push_back(sentinel);
+    }
+    SetDepth(tree[2 * n - 1], &tree[0], depth, 0);
+
+    // We need to pack the Huffman tree in tree_limit bits.
+    // If this was not successful, add fake entities to the lowest values
+    // and retry.
+    if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) {
+      break;
+    }
+  }
+}
+
+void WriteHuffmanTreeRepetitions(
+    const int previous_value,
+    const int value,
+    int repetitions,
+    uint8_t* tree,
+    uint8_t* extra_bits,
+    int* tree_size) {
+  if (previous_value != value) {
+    tree[*tree_size] = value;
+    extra_bits[*tree_size] = 0;
+    ++(*tree_size);
+    --repetitions;
+  }
+  while (repetitions >= 1) {
+    if (repetitions < 3) {
+      for (int i = 0; i < repetitions; ++i) {
+        tree[*tree_size] = value;
+        extra_bits[*tree_size] = 0;
+        ++(*tree_size);
+      }
+      return;
+    } else if (repetitions < 7) {
+      // 3 to 6 left.
+      tree[*tree_size] = 16;
+      extra_bits[*tree_size] = repetitions - 3;
+      ++(*tree_size);
+      return;
+    } else {
+      tree[*tree_size] = 16;
+      extra_bits[*tree_size] = 3;
+      ++(*tree_size);
+      repetitions -= 6;
+    }
+  }
+}
+
+void WriteHuffmanTreeRepetitionsZeros(
+    int repetitions,
+    uint8_t* tree,
+    uint8_t* extra_bits,
+    int* tree_size) {
+  while (repetitions >= 1) {
+    if (repetitions < 3) {
+      for (int i = 0; i < repetitions; ++i) {
+        tree[*tree_size] = 0;
+        extra_bits[*tree_size] = 0;
+        ++(*tree_size);
+      }
+      return;
+    } else if (repetitions < 11) {
+      tree[*tree_size] = 17;
+      extra_bits[*tree_size] = repetitions - 3;
+      ++(*tree_size);
+      return;
+    } else if (repetitions < 139) {
+      tree[*tree_size] = 18;
+      extra_bits[*tree_size] = repetitions - 11;
+      ++(*tree_size);
+      return;
+    } else {
+      tree[*tree_size] = 18;
+      extra_bits[*tree_size] = 0x7f;  // 138 repeated 0s
+      ++(*tree_size);
+      repetitions -= 138;
+    }
+  }
+}
+
+
+// Heuristics for selecting the stride ranges to collapse.
+int ValuesShouldBeCollapsedToStrideAverage(int a, int b) {
+  return abs(a - b) < 4;
+}
+
+int OptimizeHuffmanCountsForRle(int length, int* counts) {
+  int stride;
+  int limit;
+  int sum;
+  uint8_t* good_for_rle;
+  // Let's make the Huffman code more compatible with rle encoding.
+  int i;
+  for (; length >= 0; --length) {
+    if (length == 0) {
+      return 1;  // All zeros.
+    }
+    if (counts[length - 1] != 0) {
+      // Now counts[0..length - 1] does not have trailing zeros.
+      break;
+    }
+  }
+  // 2) Let's mark all population counts that already can be encoded
+  // with an rle code.
+  good_for_rle = (uint8_t*)calloc(length, 1);
+  if (good_for_rle == NULL) {
+    return 0;
+  }
+  {
+    // Let's not spoil any of the existing good rle codes.
+    // Mark any seq of 0's that is longer as 5 as a good_for_rle.
+    // Mark any seq of non-0's that is longer as 7 as a good_for_rle.
+    int symbol = counts[0];
+    int stride = 0;
+    for (i = 0; i < length + 1; ++i) {
+      if (i == length || counts[i] != symbol) {
+        if ((symbol == 0 && stride >= 5) ||
+            (symbol != 0 && stride >= 7)) {
+          int k;
+          for (k = 0; k < stride; ++k) {
+            good_for_rle[i - k - 1] = 1;
+          }
+        }
+        stride = 1;
+        if (i != length) {
+          symbol = counts[i];
+        }
+      } else {
+        ++stride;
+      }
+    }
+  }
+  // 3) Let's replace those population counts that lead to more rle codes.
+  stride = 0;
+  limit = counts[0];
+  sum = 0;
+  for (i = 0; i < length + 1; ++i) {
+    if (i == length || good_for_rle[i] ||
+        (i != 0 && good_for_rle[i - 1]) ||
+        !ValuesShouldBeCollapsedToStrideAverage(counts[i], limit)) {
+      if (stride >= 4 || (stride >= 3 && sum == 0)) {
+        int k;
+        // The stride must end, collapse what we have, if we have enough (4).
+        int count = (sum + stride / 2) / stride;
+        if (count < 1) {
+          count = 1;
+        }
+        if (sum == 0) {
+          // Don't make an all zeros stride to be upgraded to ones.
+          count = 0;
+        }
+        for (k = 0; k < stride; ++k) {
+          // We don't want to change value at counts[i],
+          // that is already belonging to the next stride. Thus - 1.
+          counts[i - k - 1] = count;
+        }
+      }
+      stride = 0;
+      sum = 0;
+      if (i < length - 3) {
+        // All interesting strides have a count of at least 4,
+        // at least when non-zeros.
+        limit = (counts[i] + counts[i + 1] +
+                 counts[i + 2] + counts[i + 3] + 2) / 4;
+      } else if (i < length) {
+        limit = counts[i];
+      } else {
+        limit = 0;
+      }
+    }
+    ++stride;
+    if (i != length) {
+      sum += counts[i];
+      if (stride >= 4) {
+        limit = (sum + stride / 2) / stride;
+      }
+    }
+  }
+  free(good_for_rle);
+  return 1;
+}
+
+
+void WriteHuffmanTree(const uint8_t* depth, const int length,
+                      uint8_t* tree,
+                      uint8_t* extra_bits_data,
+                      int* huffman_tree_size) {
+  int previous_value = 0;
+  for (uint32_t i = 0; i < length;) {
+    const int value = depth[i];
+    int reps = 1;
+    for (uint32_t k = i + 1; k < length && depth[k] == value; ++k) {
+      ++reps;
+    }
+    if (value == 0) {
+      WriteHuffmanTreeRepetitionsZeros(reps, tree, extra_bits_data,
+                                       huffman_tree_size);
+    } else {
+      WriteHuffmanTreeRepetitions(previous_value, value, reps, tree,
+                                  extra_bits_data, huffman_tree_size);
+      previous_value = value;
+    }
+    i += reps;
+  }
+}
+
+namespace {
+
+uint16_t ReverseBits(int num_bits, uint16_t bits) {
+  static const size_t kLut[16] = {  // Pre-reversed 4-bit values.
+    0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
+    0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf
+  };
+  size_t retval = kLut[bits & 0xf];
+  for (int i = 4; i < num_bits; i += 4) {
+    retval <<= 4;
+    bits >>= 4;
+    retval |= kLut[bits & 0xf];
+  }
+  retval >>= (-num_bits & 0x3);
+  return retval;
+}
+
+}  // namespace
+
+void ConvertBitDepthsToSymbols(const uint8_t *depth, int len, uint16_t *bits) {
+  // In Brotli, all bit depths are [1..15]
+  // 0 bit depth means that the symbol does not exist.
+  const int kMaxBits = 16;  // 0..15 are values for bits
+  uint16_t bl_count[kMaxBits] = { 0 };
+  {
+    for (int i = 0; i < len; ++i) {
+      ++bl_count[depth[i]];
+    }
+    bl_count[0] = 0;
+  }
+  uint16_t next_code[kMaxBits];
+  next_code[0] = 0;
+  {
+    int code = 0;
+    for (int bits = 1; bits < kMaxBits; ++bits) {
+      code = (code + bl_count[bits - 1]) << 1;
+      next_code[bits] = code;
+    }
+  }
+  for (int i = 0; i < len; ++i) {
+    if (depth[i]) {
+      bits[i] = ReverseBits(depth[i], next_code[depth[i]]++);
+    }
+  }
+}
+
+}  // namespace brotli
--- a/enc/entropy_encode.h
+++ b/enc/entropy_encode.h
@ -0,0 +1,112 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Entropy encoding (Huffman) utilities.
+
+#ifndef BROTLI_ENC_ENTROPY_ENCODE_H_
+#define BROTLI_ENC_ENTROPY_ENCODE_H_
+
+#include <stdint.h>
+#include <string.h>
+#include "./histogram.h"
+#include "./prefix.h"
+
+namespace brotli {
+
+// This function will create a Huffman tree.
+//
+// The (data,length) contains the population counts.
+// The tree_limit is the maximum bit depth of the Huffman codes.
+//
+// The depth contains the tree, i.e., how many bits are used for
+// the symbol.
+//
+// See http://en.wikipedia.org/wiki/Huffman_coding
+void CreateHuffmanTree(const int *data,
+                       const int length,
+                       const int tree_limit,
+                       uint8_t *depth);
+
+// Change the population counts in a way that the consequent
+// Hufmann tree compression, especially its rle-part will be more
+// likely to compress this data more efficiently.
+//
+// length contains the size of the histogram.
+// counts contains the population counts.
+int OptimizeHuffmanCountsForRle(int length, int* counts);
+
+
+// Write a huffman tree from bit depths into the bitstream representation
+// of a Huffman tree. The generated Huffman tree is to be compressed once
+// more using a Huffman tree
+void WriteHuffmanTree(const uint8_t* depth, const int length,
+                      uint8_t* tree,
+                      uint8_t* extra_bits_data,
+                      int* huffman_tree_size);
+
+// Get the actual bit values for a tree of bit depths.
+void ConvertBitDepthsToSymbols(const uint8_t *depth, int len, uint16_t *bits);
+
+template<int kSize>
+struct EntropyCode {
+  // How many bits for symbol.
+  uint8_t depth_[kSize];
+  // Actual bits used to represent the symbol.
+  uint16_t bits_[kSize];
+  // How many non-zero depth.
+  int count_;
+  // First two symbols with non-zero depth.
+  int symbols_[2];
+};
+
+template<int kSize>
+void BuildEntropyCode(const Histogram<kSize>& histogram,
+                      const int tree_limit,
+                      const int alphabet_size,
+                      EntropyCode<kSize>* code) {
+  memset(code->depth_, 0, sizeof(code->depth_));
+  memset(code->bits_, 0, sizeof(code->bits_));
+  memset(code->symbols_, 0, sizeof(code->symbols_));
+  code->count_ = 0;
+  if (histogram.total_count_ == 0) return;
+  for (int i = 0; i < kSize; ++i) {
+    if (histogram.data_[i] > 0) {
+      if (code->count_ < 2) code->symbols_[code->count_] = i;
+      ++code->count_;
+    }
+  }
+  if (code->count_ >= 64) {
+    int counts[kSize];
+    memcpy(counts, &histogram.data_[0], sizeof(counts[0]) * kSize);
+    OptimizeHuffmanCountsForRle(alphabet_size, counts);
+    CreateHuffmanTree(counts, alphabet_size, tree_limit, &code->depth_[0]);
+  } else {
+    CreateHuffmanTree(&histogram.data_[0], alphabet_size, tree_limit,
+                      &code->depth_[0]);
+  }
+  ConvertBitDepthsToSymbols(&code->depth_[0], alphabet_size, &code->bits_[0]);
+}
+
+static const int kCodeLengthCodes = 19;
+
+// Literal entropy code.
+typedef EntropyCode<256> EntropyCodeLiteral;
+// Prefix entropy codes.
+typedef EntropyCode<kNumCommandPrefixes> EntropyCodeCommand;
+typedef EntropyCode<kNumDistancePrefixes> EntropyCodeDistance;
+typedef EntropyCode<kNumBlockLenPrefixes> EntropyCodeBlockLength;
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_ENTROPY_ENCODE_H_
--- a/enc/fast_log.h
+++ b/enc/fast_log.h
@ -0,0 +1,161 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Utilities for fast computation of logarithms.
+
+#ifndef BROTLI_ENC_FAST_LOG_H_
+#define BROTLI_ENC_FAST_LOG_H_
+
+#include <math.h>
+#include <stdint.h>
+
+namespace brotli {
+
+// Return floor(log2(n)) for positive integer n.  Returns -1 iff n == 0.
+inline int Log2Floor(uint32_t n) {
+#if defined(__clang__) ||                       \
+  (defined(__GNUC__) &&                                         \
+   ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4))
+  return n == 0 ? -1 : 31 ^ __builtin_clz(n);
+#else
+  if (n == 0)
+    return -1;
+  int log = 0;
+  uint32_t value = n;
+  for (int i = 4; i >= 0; --i) {
+    int shift = (1 << i);
+    uint32_t x = value >> shift;
+    if (x != 0) {
+      value = x;
+      log += shift;
+    }
+  }
+  assert(value == 1);
+  return log;
+#endif
+}
+
+// Return ceiling(log2(n)) for positive integer n.  Returns -1 iff n == 0.
+inline int Log2Ceiling(uint32_t n) {
+  int floor = Log2Floor(n);
+  if (n == (n &~ (n - 1)))              // zero or a power of two
+    return floor;
+  else
+    return floor + 1;
+}
+
+// A lookup table for small values of log2(int) to be used in entropy
+// computation.
+//
+// ", ".join(["%.16ff" % x for x in [0.0]+[log2(x) for x in range(1, 256)]])
+static const float kLog2Table[] = {
+  0.0000000000000000f, 0.0000000000000000f, 1.0000000000000000f,
+  1.5849625007211563f, 2.0000000000000000f, 2.3219280948873622f,
+  2.5849625007211561f, 2.8073549220576042f, 3.0000000000000000f,
+  3.1699250014423126f, 3.3219280948873626f, 3.4594316186372978f,
+  3.5849625007211565f, 3.7004397181410922f, 3.8073549220576037f,
+  3.9068905956085187f, 4.0000000000000000f, 4.0874628412503400f,
+  4.1699250014423122f, 4.2479275134435852f, 4.3219280948873626f,
+  4.3923174227787607f, 4.4594316186372973f, 4.5235619560570131f,
+  4.5849625007211570f, 4.6438561897747244f, 4.7004397181410926f,
+  4.7548875021634691f, 4.8073549220576037f, 4.8579809951275728f,
+  4.9068905956085187f, 4.9541963103868758f, 5.0000000000000000f,
+  5.0443941193584534f, 5.0874628412503400f, 5.1292830169449664f,
+  5.1699250014423122f, 5.2094533656289501f, 5.2479275134435852f,
+  5.2854022188622487f, 5.3219280948873626f, 5.3575520046180838f,
+  5.3923174227787607f, 5.4262647547020979f, 5.4594316186372973f,
+  5.4918530963296748f, 5.5235619560570131f, 5.5545888516776376f,
+  5.5849625007211570f, 5.6147098441152083f, 5.6438561897747244f,
+  5.6724253419714961f, 5.7004397181410926f, 5.7279204545631996f,
+  5.7548875021634691f, 5.7813597135246599f, 5.8073549220576046f,
+  5.8328900141647422f, 5.8579809951275719f, 5.8826430493618416f,
+  5.9068905956085187f, 5.9307373375628867f, 5.9541963103868758f,
+  5.9772799234999168f, 6.0000000000000000f, 6.0223678130284544f,
+  6.0443941193584534f, 6.0660891904577721f, 6.0874628412503400f,
+  6.1085244567781700f, 6.1292830169449672f, 6.1497471195046822f,
+  6.1699250014423122f, 6.1898245588800176f, 6.2094533656289510f,
+  6.2288186904958804f, 6.2479275134435861f, 6.2667865406949019f,
+  6.2854022188622487f, 6.3037807481771031f, 6.3219280948873617f,
+  6.3398500028846252f, 6.3575520046180847f, 6.3750394313469254f,
+  6.3923174227787598f, 6.4093909361377026f, 6.4262647547020979f,
+  6.4429434958487288f, 6.4594316186372982f, 6.4757334309663976f,
+  6.4918530963296748f, 6.5077946401986964f, 6.5235619560570131f,
+  6.5391588111080319f, 6.5545888516776376f, 6.5698556083309478f,
+  6.5849625007211561f, 6.5999128421871278f, 6.6147098441152092f,
+  6.6293566200796095f, 6.6438561897747253f, 6.6582114827517955f,
+  6.6724253419714952f, 6.6865005271832185f, 6.7004397181410917f,
+  6.7142455176661224f, 6.7279204545631988f, 6.7414669864011465f,
+  6.7548875021634691f, 6.7681843247769260f, 6.7813597135246599f,
+  6.7944158663501062f, 6.8073549220576037f, 6.8201789624151887f,
+  6.8328900141647422f, 6.8454900509443757f, 6.8579809951275719f,
+  6.8703647195834048f, 6.8826430493618416f, 6.8948177633079437f,
+  6.9068905956085187f, 6.9188632372745955f, 6.9307373375628867f,
+  6.9425145053392399f, 6.9541963103868758f, 6.9657842846620879f,
+  6.9772799234999168f, 6.9886846867721664f, 7.0000000000000000f,
+  7.0112272554232540f, 7.0223678130284544f, 7.0334230015374501f,
+  7.0443941193584534f, 7.0552824355011898f, 7.0660891904577721f,
+  7.0768155970508317f, 7.0874628412503400f, 7.0980320829605272f,
+  7.1085244567781700f, 7.1189410727235076f, 7.1292830169449664f,
+  7.1395513523987937f, 7.1497471195046822f, 7.1598713367783891f,
+  7.1699250014423130f, 7.1799090900149345f, 7.1898245588800176f,
+  7.1996723448363644f, 7.2094533656289492f, 7.2191685204621621f,
+  7.2288186904958804f, 7.2384047393250794f, 7.2479275134435861f,
+  7.2573878426926521f, 7.2667865406949019f, 7.2761244052742384f,
+  7.2854022188622487f, 7.2946207488916270f, 7.3037807481771031f,
+  7.3128829552843557f, 7.3219280948873617f, 7.3309168781146177f,
+  7.3398500028846243f, 7.3487281542310781f, 7.3575520046180847f,
+  7.3663222142458151f, 7.3750394313469254f, 7.3837042924740528f,
+  7.3923174227787607f, 7.4008794362821844f, 7.4093909361377026f,
+  7.4178525148858991f, 7.4262647547020979f, 7.4346282276367255f,
+  7.4429434958487288f, 7.4512111118323299f, 7.4594316186372973f,
+  7.4676055500829976f, 7.4757334309663976f, 7.4838157772642564f,
+  7.4918530963296748f, 7.4998458870832057f, 7.5077946401986964f,
+  7.5156998382840436f, 7.5235619560570131f, 7.5313814605163119f,
+  7.5391588111080319f, 7.5468944598876373f, 7.5545888516776376f,
+  7.5622424242210728f, 7.5698556083309478f, 7.5774288280357487f,
+  7.5849625007211561f, 7.5924570372680806f, 7.5999128421871278f,
+  7.6073303137496113f, 7.6147098441152075f, 7.6220518194563764f,
+  7.6293566200796095f, 7.6366246205436488f, 7.6438561897747244f,
+  7.6510516911789290f, 7.6582114827517955f, 7.6653359171851765f,
+  7.6724253419714952f, 7.6794800995054464f, 7.6865005271832185f,
+  7.6934869574993252f, 7.7004397181410926f, 7.7073591320808825f,
+  7.7142455176661224f, 7.7210991887071856f, 7.7279204545631996f,
+  7.7347096202258392f, 7.7414669864011465f, 7.7481928495894596f,
+  7.7548875021634691f, 7.7615512324444795f, 7.7681843247769260f,
+  7.7747870596011737f, 7.7813597135246608f, 7.7879025593914317f,
+  7.7944158663501062f, 7.8008998999203047f, 7.8073549220576037f,
+  7.8137811912170374f, 7.8201789624151887f, 7.8265484872909159f,
+  7.8328900141647422f, 7.8392037880969445f, 7.8454900509443757f,
+  7.8517490414160571f, 7.8579809951275719f, 7.8641861446542798f,
+  7.8703647195834048f, 7.8765169465650002f, 7.8826430493618425f,
+  7.8887432488982601f, 7.8948177633079446f, 7.9008668079807496f,
+  7.9068905956085187f, 7.9128893362299619f, 7.9188632372745955f,
+  7.9248125036057813f, 7.9307373375628867f, 7.9366379390025719f,
+  7.9425145053392399f, 7.9483672315846778f, 7.9541963103868758f,
+  7.9600019320680806f, 7.9657842846620870f, 7.9715435539507720f,
+  7.9772799234999168f, 7.9829935746943104f, 7.9886846867721664f,
+  7.9943534368588578f
+};
+
+// Faster logarithm for small integers, with the property of log2(0) == 0.
+static inline double FastLog2(int v) {
+  if (v < (int)(sizeof(kLog2Table) / sizeof(kLog2Table[0]))) {
+    return kLog2Table[v];
+  }
+  return log2(v);
+}
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_FAST_LOG_H_
--- a/enc/find_match_length.h
+++ b/enc/find_match_length.h
@ -0,0 +1,85 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Function to find maximal matching prefixes of strings.
+
+#ifndef BROTLI_ENC_FIND_MATCH_LENGTH_H_
+#define BROTLI_ENC_FIND_MATCH_LENGTH_H_
+
+#include <stdint.h>
+
+#include "./port.h"
+
+namespace brotli {
+
+// Separate implementation for x86_64, for speed.
+#if defined(__GNUC__) && defined(ARCH_K8)
+
+static inline int FindMatchLengthWithLimit(const uint8_t* s1,
+                                           const uint8_t* s2,
+                                           size_t limit) {
+  int matched = 0;
+  size_t limit2 = (limit >> 3) + 1;  // + 1 is for pre-decrement in while
+  while (PREDICT_TRUE(--limit2)) {
+    if (PREDICT_FALSE(BROTLI_UNALIGNED_LOAD64(s2) ==
+                      BROTLI_UNALIGNED_LOAD64(s1 + matched))) {
+      s2 += 8;
+      matched += 8;
+    } else {
+      uint64_t x =
+          BROTLI_UNALIGNED_LOAD64(s2) ^ BROTLI_UNALIGNED_LOAD64(s1 + matched);
+      int matching_bits =  __builtin_ctzll(x);
+      matched += matching_bits >> 3;
+      return matched;
+    }
+  }
+  limit = (limit & 7) + 1;  // + 1 is for pre-decrement in while
+  while (--limit) {
+    if (PREDICT_TRUE(s1[matched] == *s2)) {
+      ++s2;
+      ++matched;
+    } else {
+      return matched;
+    }
+  }
+  return matched;
+}
+#else
+static inline int FindMatchLengthWithLimit(const uint8_t* s1,
+                                           const uint8_t* s2,
+                                           size_t limit) {
+  int matched = 0;
+  const uint8_t* s2_limit = s2 + limit;
+  const uint8_t* s2_ptr = s2;
+  // Find out how long the match is. We loop over the data 32 bits at a
+  // time until we find a 32-bit block that doesn't match; then we find
+  // the first non-matching bit and use that to calculate the total
+  // length of the match.
+  while (s2_ptr <= s2_limit - 4 &&
+         BROTLI_UNALIGNED_LOAD32(s2_ptr) ==
+         BROTLI_UNALIGNED_LOAD32(s1 + matched)) {
+    s2_ptr += 4;
+    matched += 4;
+  }
+  while ((s2_ptr < s2_limit) && (s1[matched] == *s2_ptr)) {
+    ++s2_ptr;
+    ++matched;
+  }
+  return matched;
+}
+#endif
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_FIND_MATCH_LENGTH_H_
--- a/enc/hash.h
+++ b/enc/hash.h
@ -0,0 +1,354 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// A (forgetful) hash table to the data seen by the compressor, to
+// help create backward references to previous data.
+
+#ifndef BROTLI_ENC_HASH_H_
+#define BROTLI_ENC_HASH_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/types.h>
+#include <algorithm>
+
+#include "./fast_log.h"
+#include "./find_match_length.h"
+#include "./port.h"
+
+namespace brotli {
+
+// kHashMul32 multiplier has these properties:
+// * The multiplier must be odd. Otherwise we may lose the highest bit.
+// * No long streaks of 1s or 0s.
+// * There is no effort to ensure that it is a prime, the oddity is enough
+//   for this use.
+// * The number has been tuned heuristically against compression benchmarks.
+static const uint32_t kHashMul32 = 0x1e35a7bd;
+
+inline uint32_t Hash3Bytes(const uint8_t *data, const int bits) {
+  uint32_t h = (BROTLI_UNALIGNED_LOAD32(data) & 0xffffff) * kHashMul32;
+  // The higher bits contain more mixture from the multiplication,
+  // so we take our results from there.
+  return h >> (32 - bits);
+}
+
+// Usually, we always choose the longest backward reference. This function
+// allows for the exception of that rule.
+//
+// If we choose a backward reference that is further away, it will
+// usually be coded with more bits. We approximate this by assuming
+// log2(distance). If the distance can be expressed in terms of the
+// last four distances, we use some heuristic constants to estimate
+// the bits cost. For the first up to four literals we use the bit
+// cost of the literals from the literal cost model, after that we
+// use the average bit cost of the cost model.
+//
+// This function is used to sometimes discard a longer backward reference
+// when it is not much longer and the bit cost for encoding it is more
+// than the saved literals.
+inline double BackwardReferenceScore(double average_cost,
+                                     double start_cost4,
+                                     double start_cost3,
+                                     double start_cost2,
+                                     int copy_length,
+                                     int backward_reference_offset,
+                                     int last_distance1,
+                                     int last_distance2,
+                                     int last_distance3,
+                                     int last_distance4) {
+  double retval = 0;
+  switch (copy_length) {
+    case 2: retval = start_cost2; break;
+    case 3: retval = start_cost3; break;
+    default: retval = start_cost4 + (copy_length - 4) * average_cost; break;
+  }
+  int diff_last1 = abs(backward_reference_offset - last_distance1);
+  int diff_last2 = abs(backward_reference_offset - last_distance2);
+  if (diff_last1 == 0) {
+    retval += 0.6;
+  } else if (diff_last1 < 4) {
+    retval -= 0.9 + 0.03 * diff_last1;
+  } else if (diff_last2 < 4) {
+    retval -= 0.95 + 0.1 * diff_last2;
+  } else if (backward_reference_offset == last_distance3) {
+    retval -= 1.17;
+  } else if (backward_reference_offset == last_distance4) {
+    retval -= 1.27;
+  } else {
+    retval -= 1.20 * Log2Floor(backward_reference_offset);
+  }
+  return retval;
+}
+
+// A (forgetful) hash table to the data seen by the compressor, to
+// help create backward references to previous data.
+//
+// This is a hash map of fixed size (kBucketSize) to a ring buffer of
+// fixed size (kBlockSize). The ring buffer contains the last kBlockSize
+// index positions of the given hash key in the compressed data.
+template <int kBucketBits, int kBlockBits>
+class HashLongestMatch {
+ public:
+  HashLongestMatch()
+      : literal_cost_(NULL),
+        last_distance1_(4),
+        last_distance2_(11),
+        last_distance3_(15),
+        last_distance4_(16),
+        insert_length_(0),
+        average_cost_(5.4) {
+    Reset();
+  }
+  void Reset() {
+    std::fill(&num_[0], &num_[sizeof(num_) / sizeof(num_[0])], 0);
+  }
+  void SetLiteralCost(float *cost) {
+    literal_cost_ = cost;
+  }
+  double literal_cost(int i) const { return literal_cost_[i]; }
+
+  // Look at 3 bytes at data.
+  // Compute a hash from these, and store the value of ix at that position.
+  inline void Store(const uint8_t *data, const int ix) {
+    const uint32_t key = Hash3Bytes(data, kBucketBits);
+    const int minor_ix = num_[key] & kBlockMask;
+    buckets_[key][minor_ix] = ix;
+    ++num_[key];
+  }
+
+  // Store hashes for a range of data.
+  void StoreHashes(const uint8_t *data, size_t len, int startix, int mask) {
+    for (int p = 0; p < len; ++p) {
+      Store(&data[p & mask], startix + p);
+    }
+  }
+
+  // Find a longest backward match of &data[cur_ix] up to the length of
+  // max_length.
+  //
+  // Does not look for matches longer than max_length.
+  // Does not look for matches further away than max_backward.
+  // Writes the best found match length into best_len_out.
+  // Writes the index (&data[index]) offset from the start of the best match
+  // into best_distance_out.
+  // Write the score of the best match into best_score_out.
+  bool FindLongestMatch(const uint8_t * __restrict data,
+                        const uint32_t cur_ix,
+                        uint32_t max_length,
+                        const uint32_t max_backward,
+                        size_t * __restrict best_len_out,
+                        size_t * __restrict best_distance_out,
+                        double * __restrict best_score_out) {
+    const double start_cost4 = literal_cost_ == NULL ? 20 :
+        literal_cost_[cur_ix] +
+        literal_cost_[cur_ix + 1] +
+        literal_cost_[cur_ix + 2] +
+        literal_cost_[cur_ix + 3];
+
+    const double start_cost3 = literal_cost_ == NULL ? 15 :
+        literal_cost_[cur_ix] +
+        literal_cost_[cur_ix + 1] +
+        literal_cost_[cur_ix + 2] + 0.3;
+    double start_cost2 = literal_cost_ == NULL ? 10 :
+        literal_cost_[cur_ix] +
+        literal_cost_[cur_ix + 1] + 1.2;
+    bool match_found = false;
+    // Don't accept a short copy from far away.
+    double best_score = 8.25;
+    if (insert_length_ < 4) {
+      double cost_diff[4] = { 0.20, 0.09, 0.05, 0.03 };
+      best_score += cost_diff[insert_length_];
+    }
+    size_t best_len = *best_len_out;
+    *best_len_out = 0;
+    size_t best_ix = 1;
+    // Try last distance first.
+    for (int i = 0; i < 16; ++i) {
+      int prev_ix = cur_ix;
+      switch(i) {
+        case 0: prev_ix -= last_distance1_; break;
+        case 1: prev_ix -= last_distance2_; break;
+        case 2: prev_ix -= last_distance3_; break;
+        case 3: prev_ix -= last_distance4_; break;
+
+        case 4: prev_ix -= last_distance1_ - 1; break;
+        case 5: prev_ix -= last_distance1_ + 1; break;
+        case 6: prev_ix -= last_distance1_ - 2; break;
+        case 7: prev_ix -= last_distance1_ + 2; break;
+        case 8: prev_ix -= last_distance1_ - 3; break;
+        case 9: prev_ix -= last_distance1_ + 3; break;
+
+        case 10: prev_ix -= last_distance2_ - 1; break;
+        case 11: prev_ix -= last_distance2_ + 1; break;
+        case 12: prev_ix -= last_distance2_ - 2; break;
+        case 13: prev_ix -= last_distance2_ + 2; break;
+        case 14: prev_ix -= last_distance2_ - 3; break;
+        case 15: prev_ix -= last_distance2_ + 3; break;
+      }
+      if (prev_ix >= cur_ix) {
+        continue;
+      }
+      const size_t backward = cur_ix - prev_ix;
+      if (PREDICT_FALSE(backward > max_backward)) {
+        continue;
+      }
+      if (data[cur_ix + best_len] != data[prev_ix + best_len]) {
+        continue;
+      }
+      const size_t len =
+          FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix], max_length);
+      if (len >= 3 || (len == 2 && i < 2)) {
+        // Comparing for >= 2 does not change the semantics, but just saves for
+        // a few unnecessary binary logarithms in backward reference score,
+        // since we are not interested in such short matches.
+        const double score = BackwardReferenceScore(average_cost_,
+                                                    start_cost4,
+                                                    start_cost3,
+                                                    start_cost2,
+                                                    len, backward,
+                                                    last_distance1_,
+                                                    last_distance2_,
+                                                    last_distance3_,
+                                                    last_distance4_);
+        if (best_score < score) {
+          best_score = score;
+          best_len = len;
+          best_ix = backward;
+          *best_len_out = best_len;
+          *best_distance_out = best_ix;
+          *best_score_out = best_score;
+          match_found = true;
+        }
+      }
+    }
+    const uint32_t key = Hash3Bytes(&data[cur_ix], kBucketBits);
+    const uint32_t * __restrict const bucket = &buckets_[key][0];
+    const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
+    int stop = int(cur_ix) - 64;
+    if (stop < 0) { stop = 0; }
+
+    start_cost2 -= 1.0;
+    for (int i = cur_ix - 1; i > stop; --i) {
+      size_t prev_ix = i;
+      const size_t backward = cur_ix - prev_ix;
+      if (PREDICT_FALSE(backward > max_backward)) {
+        break;
+      }
+      if (data[cur_ix] != data[prev_ix] ||
+          data[cur_ix + 1] != data[prev_ix + 1]) {
+        continue;
+      }
+      int len = 2;
+      const double score = start_cost2 - 1.70 * Log2Floor(backward);
+
+      if (best_score < score) {
+        best_score = score;
+        best_len = len;
+        best_ix = backward;
+        *best_len_out = best_len;
+        *best_distance_out = best_ix;
+        match_found = true;
+      }
+    }
+    for (int i = num_[key] - 1; i >= down; --i) {
+      size_t prev_ix = bucket[i & kBlockMask];
+      const size_t backward = cur_ix - prev_ix;
+      if (PREDICT_FALSE(backward > max_backward)) {
+        break;
+      }
+      if (data[cur_ix + best_len] != data[prev_ix + best_len]) {
+        continue;
+      }
+      const size_t len =
+          FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix], max_length);
+      if (len >= 3) {
+        // Comparing for >= 3 does not change the semantics, but just saves for
+        // a few unnecessary binary logarithms in backward reference score,
+        // since we are not interested in such short matches.
+        const double score = BackwardReferenceScore(average_cost_,
+                                                    start_cost4,
+                                                    start_cost3,
+                                                    start_cost2,
+                                                    len, backward,
+                                                    last_distance1_,
+                                                    last_distance2_,
+                                                    last_distance3_,
+                                                    last_distance4_);
+        if (best_score < score) {
+          best_score = score;
+          best_len = len;
+          best_ix = backward;
+          *best_len_out = best_len;
+          *best_distance_out = best_ix;
+          *best_score_out = best_score;
+          match_found = true;
+        }
+      }
+    }
+    return match_found;
+  }
+
+  void set_last_distance(int v) {
+    if (last_distance1_ != v) {
+      last_distance4_ = last_distance3_;
+      last_distance3_ = last_distance2_;
+      last_distance2_ = last_distance1_;
+      last_distance1_ = v;
+    }
+  }
+
+  int last_distance() const { return last_distance1_; }
+
+  void set_insert_length(int v) { insert_length_ = v; }
+
+  void set_average_cost(double v) { average_cost_ = v; }
+
+ private:
+  // Number of hash buckets.
+  static const uint32_t kBucketSize = 1 << kBucketBits;
+
+  // Only kBlockSize newest backward references are kept,
+  // and the older are forgotten.
+  static const uint32_t kBlockSize = 1 << kBlockBits;
+
+  // Mask for accessing entries in a block (in a ringbuffer manner).
+  static const uint32_t kBlockMask = (1 << kBlockBits) - 1;
+
+  // Number of entries in a particular bucket.
+  uint16_t num_[kBucketSize];
+
+  // Buckets containing kBlockSize of backward references.
+  uint32_t buckets_[kBucketSize][kBlockSize];
+
+  // Model of how much the ith literal costs to encode using
+  // the entropy model.
+  float *literal_cost_;
+
+  int last_distance1_;
+  int last_distance2_;
+  int last_distance3_;
+  int last_distance4_;
+
+  // Cost adjustment for how many literals we are planning to insert
+  // anyway.
+  int insert_length_;
+
+  double average_cost_;
+};
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_HASH_H_
--- a/enc/histogram.cc
+++ b/enc/histogram.cc
@ -0,0 +1,72 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Build per-context histograms of literals, commands and distance codes.
+
+#include "./histogram.h"
+
+#include <stdint.h>
+#include <cmath>
+
+#include "./block_splitter.h"
+#include "./command.h"
+#include "./context.h"
+#include "./prefix.h"
+
+namespace brotli {
+
+void BuildHistograms(
+    const std::vector<Command>& cmds,
+    const BlockSplit& literal_split,
+    const BlockSplit& insert_and_copy_split,
+    const BlockSplit& dist_split,
+    const uint8_t* input_buffer,
+    size_t pos,
+    int context_mode,
+    int distance_context_mode,
+    std::vector<HistogramLiteral>* literal_histograms,
+    std::vector<HistogramCommand>* insert_and_copy_histograms,
+    std::vector<HistogramDistance>* copy_dist_histograms) {
+  BlockSplitIterator literal_it(literal_split);
+  BlockSplitIterator insert_and_copy_it(insert_and_copy_split);
+  BlockSplitIterator dist_it(dist_split);
+  for (int i = 0; i < cmds.size(); ++i) {
+    const Command &cmd = cmds[i];
+    insert_and_copy_it.Next();
+    (*insert_and_copy_histograms)[insert_and_copy_it.type_].Add(
+        cmd.command_prefix_);
+    for (int j = 0; j < cmd.insert_length_; ++j) {
+      literal_it.Next();
+      uint8_t prev_byte = pos > 0 ? input_buffer[pos - 1] : 0;
+      uint8_t prev_byte2 = pos > 1 ? input_buffer[pos - 2] : 0;
+      uint8_t prev_byte3 = pos > 2 ? input_buffer[pos - 3] : 0;
+      int context = (literal_it.type_ * NumContexts(context_mode) +
+                     Context(prev_byte, prev_byte2, prev_byte3, context_mode));
+      (*literal_histograms)[context].Add(input_buffer[pos]);
+      ++pos;
+    }
+    pos += cmd.copy_length_;
+    if (cmd.copy_length_ > 0 && cmd.distance_prefix_ != 0xffff) {
+      dist_it.Next();
+      int context = dist_it.type_;
+      if (distance_context_mode > 0) {
+        context <<= 2;
+        context += (cmd.copy_length_ > 4) ? 3 : cmd.copy_length_ - 2;
+      }
+      (*copy_dist_histograms)[context].Add(cmd.distance_prefix_);
+    }
+  }
+}
+
+}  // namespace brotli
--- a/enc/histogram.h
+++ b/enc/histogram.h
@ -0,0 +1,97 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Models the histograms of literals, commands and distance codes.
+
+#ifndef BROTLI_ENC_HISTOGRAM_H_
+#define BROTLI_ENC_HISTOGRAM_H_
+
+#include <stdint.h>
+#include <string.h>
+#include <vector>
+#include <utility>
+#include "./command.h"
+#include "./fast_log.h"
+#include "./prefix.h"
+
+namespace brotli {
+
+class BlockSplit;
+
+// A simple container for histograms of data in blocks.
+template<int kDataSize>
+struct Histogram {
+  Histogram() {
+    Clear();
+  }
+  void Clear() {
+    memset(data_, 0, sizeof(data_));
+    total_count_ = 0;
+  }
+  void Add(int val) {
+    ++data_[val];
+    ++total_count_;
+  }
+  void Remove(int val) {
+    --data_[val];
+    --total_count_;
+  }
+  template<typename DataType>
+  void Add(const DataType *p, size_t n) {
+    total_count_ += n;
+    n += 1;
+    while(--n) ++data_[*p++];
+  }
+  void AddHistogram(const Histogram& v) {
+    total_count_ += v.total_count_;
+    for (int i = 0; i < kDataSize; ++i) {
+      data_[i] += v.data_[i];
+    }
+  }
+  double EntropyBitCost() const {
+    double retval = total_count_ * FastLog2(total_count_);
+    for (int i = 0; i < kDataSize; ++i) {
+      retval -= data_[i] * FastLog2(data_[i]);
+    }
+    return retval;
+  }
+
+  int data_[kDataSize];
+  int total_count_;
+  double bit_cost_;
+};
+
+// Literal histogram.
+typedef Histogram<256> HistogramLiteral;
+// Prefix histograms.
+typedef Histogram<kNumCommandPrefixes> HistogramCommand;
+typedef Histogram<kNumDistancePrefixes> HistogramDistance;
+typedef Histogram<kNumBlockLenPrefixes> HistogramBlockLength;
+
+void BuildHistograms(
+    const std::vector<Command>& cmds,
+    const BlockSplit& literal_split,
+    const BlockSplit& insert_and_copy_split,
+    const BlockSplit& dist_split,
+    const uint8_t* input_buffer,
+    size_t pos,
+    int context_mode,
+    int distance_context_mode,
+    std::vector<HistogramLiteral>* literal_histograms,
+    std::vector<HistogramCommand>* insert_and_copy_histograms,
+    std::vector<HistogramDistance>* copy_dist_histograms);
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_HISTOGRAM_H_
--- a/enc/literal_cost.cc
+++ b/enc/literal_cost.cc
@ -0,0 +1,60 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Literal cost model to allow backward reference replacement to be efficient.
+
+#include "./literal_cost.h"
+
+#include <math.h>
+#include <stdint.h>
+#include <algorithm>
+
+namespace brotli {
+
+void EstimateBitCostsForLiterals(size_t len, const uint8_t *data, float *cost) {
+  int histogram[256] = { 0 };
+  int window_half = 2000;
+  int in_window = std::min(static_cast<size_t>(window_half), len);
+
+  // Bootstrap histogram.
+  for (int i = 0; i < in_window; ++i) {
+    ++histogram[data[i]];
+  }
+
+  // Compute bit costs with sliding window.
+  for (int i = 0; i < len; ++i) {
+    if (i - window_half >= 0) {
+      // Remove a byte in the past.
+      --histogram[data[i - window_half]];
+      --in_window;
+    }
+    if (i + window_half < len) {
+      // Add a byte in the future.
+      ++histogram[data[i + window_half]];
+      ++in_window;
+    }
+    int histo = histogram[data[i]];
+    if (histo == 0) {
+      histo = 1;
+    }
+    cost[i] = log2(static_cast<double>(in_window) / histo);
+    cost[i] += 0.03;
+    if (cost[i] < 1.0) {
+      cost[i] *= 0.5;
+      cost[i] += 0.5;
+    }
+  }
+}
+
+}  // namespace brotli
--- a/enc/literal_cost.h
+++ b/enc/literal_cost.h
@ -0,0 +1,31 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Literal cost model to allow backward reference replacement to be efficient.
+
+#ifndef BROTLI_ENC_LITERAL_COST_H_
+#define BROTLI_ENC_LITERAL_COST_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+namespace brotli {
+
+// Input: length of data, and the bytes.
+// Output: estimate of how many bits the literal will take entropy coded.
+void EstimateBitCostsForLiterals(size_t len, const uint8_t *data, float *cost);
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_LITERAL_COST_H_
--- a/enc/port.h
+++ b/enc/port.h
@ -0,0 +1,138 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Macros for endianness, branch prediction and unaligned loads and stores.
+
+#ifndef BROTLI_ENC_PORT_H_
+#define BROTLI_ENC_PORT_H_
+
+#if defined OS_LINUX || defined OS_CYGWIN
+#include <endian.h>
+#elif defined OS_FREEBSD
+#include <machine/endian.h>
+#elif defined OS_MACOSX
+#include <machine/endian.h>
+/* Let's try and follow the Linux convention */
+#define __BYTE_ORDER  BYTE_ORDER
+#define __LITTLE_ENDIAN LITTLE_ENDIAN
+#define __BIG_ENDIAN BIG_ENDIAN
+#endif
+
+// define the macros IS_LITTLE_ENDIAN or IS_BIG_ENDIAN
+// using the above endian definitions from endian.h if
+// endian.h was included
+#ifdef __BYTE_ORDER
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define IS_LITTLE_ENDIAN
+#endif
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define IS_BIG_ENDIAN
+#endif
+
+#else
+
+#if defined(__LITTLE_ENDIAN__)
+#define IS_LITTLE_ENDIAN
+#elif defined(__BIG_ENDIAN__)
+#define IS_BIG_ENDIAN
+#endif
+#endif  // __BYTE_ORDER
+
+#if defined(COMPILER_GCC3)
+#define PREDICT_FALSE(x) (__builtin_expect(x, 0))
+#define PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
+#else
+#define PREDICT_FALSE(x) x
+#define PREDICT_TRUE(x) x
+#endif
+
+// Portable handling of unaligned loads, stores, and copies.
+// On some platforms, like ARM, the copy functions can be more efficient
+// then a load and a store.
+
+#if defined(ARCH_PIII) || defined(ARCH_ATHLON) || \
+  defined(ARCH_K8) || defined(_ARCH_PPC)
+
+// x86 and x86-64 can perform unaligned loads/stores directly;
+// modern PowerPC hardware can also do unaligned integer loads and stores;
+// but note: the FPU still sends unaligned loads and stores to a trap handler!
+
+#define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
+#define BROTLI_UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64_t *>(_p))
+
+#define BROTLI_UNALIGNED_STORE32(_p, _val) \
+  (*reinterpret_cast<uint32_t *>(_p) = (_val))
+#define BROTLI_UNALIGNED_STORE64(_p, _val) \
+  (*reinterpret_cast<uint64_t *>(_p) = (_val))
+
+#elif defined(__arm__) && \
+  !defined(__ARM_ARCH_5__) && \
+  !defined(__ARM_ARCH_5T__) && \
+  !defined(__ARM_ARCH_5TE__) && \
+  !defined(__ARM_ARCH_5TEJ__) && \
+  !defined(__ARM_ARCH_6__) && \
+  !defined(__ARM_ARCH_6J__) && \
+  !defined(__ARM_ARCH_6K__) && \
+  !defined(__ARM_ARCH_6Z__) && \
+  !defined(__ARM_ARCH_6ZK__) && \
+  !defined(__ARM_ARCH_6T2__)
+
+// ARMv7 and newer support native unaligned accesses, but only of 16-bit
+// and 32-bit values (not 64-bit); older versions either raise a fatal signal,
+// do an unaligned read and rotate the words around a bit, or do the reads very
+// slowly (trip through kernel mode).
+
+#define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
+#define BROTLI_UNALIGNED_STORE32(_p, _val) \
+  (*reinterpret_cast<uint32_t *>(_p) = (_val))
+
+inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
+  uint64_t t;
+  memcpy(&t, p, sizeof t);
+  return t;
+}
+
+inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
+  memcpy(p, &v, sizeof v);
+}
+
+#else
+
+// These functions are provided for architectures that don't support
+// unaligned loads and stores.
+
+inline uint32_t BROTLI_UNALIGNED_LOAD32(const void *p) {
+  uint32_t t;
+  memcpy(&t, p, sizeof t);
+  return t;
+}
+
+inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
+  uint64_t t;
+  memcpy(&t, p, sizeof t);
+  return t;
+}
+
+inline void BROTLI_UNALIGNED_STORE32(void *p, uint32_t v) {
+  memcpy(p, &v, sizeof v);
+}
+
+inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
+  memcpy(p, &v, sizeof v);
+}
+
+#endif
+
+#endif  // BROTLI_ENC_PORT_H_
--- a/enc/prefix.cc
+++ b/enc/prefix.cc
@ -0,0 +1,166 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Functions for encoding of integers into prefix codes the amount of extra
+// bits, and the actual values of the extra bits.
+
+#include "./prefix.h"
+
+#include "./fast_log.h"
+
+namespace brotli {
+
+// Represents the range of values belonging to a prefix code:
+// [offset, offset + 2^nbits)
+struct PrefixCodeRange {
+  int offset;
+  int nbits;
+};
+
+static const PrefixCodeRange kBlockLengthPrefixCode[kNumBlockLenPrefixes] = {
+  {   1,  2}, {    5,  2}, {  9,   2}, {  13,  2},
+  {  17,  3}, {   25,  3}, {  33,  3}, {  41,  3},
+  {  49,  4}, {   65,  4}, {  81,  4}, {  97,  4},
+  { 113,  5}, {  145,  5}, { 177,  5}, { 209,  5},
+  { 241,  6}, {  305,  6}, { 369,  7}, { 497,  8},
+  { 753,  9}, { 1265, 10}, {2289, 11}, {4337, 12},
+  {8433, 13}, {16625, 24}
+};
+
+static const PrefixCodeRange kInsertLengthPrefixCode[kNumInsertLenPrefixes] = {
+  {   0,  0}, {   1,  0}, {  2,   0}, {    3,  0},
+  {   4,  0}, {   5,  0}, {  6,   1}, {    8,  1},
+  {  10,  2}, {  14,  2}, { 18,   3}, {   26,  3},
+  {  34,  4}, {  50,  4}, { 66,   5}, {   98,  5},
+  { 130,  6}, { 194,  7}, { 322,  8}, {  578,  9},
+  {1090, 10}, {2114, 12}, {6210, 14}, {22594, 24},
+};
+
+static const PrefixCodeRange kCopyLengthPrefixCode[kNumCopyLenPrefixes] = {
+  {  2, 0}, {   3,  0}, {   4,  0}, {   5,  0},
+  {  6, 0}, {   7,  0}, {   8,  0}, {   9,  0},
+  { 10, 1}, {  12,  1}, {  14,  2}, {  18,  2},
+  { 22, 3}, {  30,  3}, {  38,  4}, {  54,  4},
+  { 70, 5}, { 102,  5}, { 134,  6}, { 198,  7},
+  {326, 8}, { 582,  9}, {1094, 10}, {2118, 24},
+};
+
+static const int kInsertAndCopyRangeLut[9] = {
+  0, 1, 4, 2, 3, 6, 5, 7, 8,
+};
+
+static const int kInsertRangeLut[9] = {
+  0, 0, 1, 1, 0, 2, 1, 2, 2,
+};
+
+static const int kCopyRangeLut[9] = {
+  0, 1, 0, 1, 2, 0, 2, 1, 2,
+};
+
+int InsertLengthPrefix(int length) {
+  for (int i = 0; i < kNumInsertLenPrefixes; ++i) {
+    const PrefixCodeRange& range = kInsertLengthPrefixCode[i];
+    if (length >= range.offset && length < range.offset + (1 << range.nbits)) {
+      return i;
+    }
+  }
+  return -1;
+}
+
+int CopyLengthPrefix(int length) {
+  for (int i = 0; i < kNumCopyLenPrefixes; ++i) {
+    const PrefixCodeRange& range = kCopyLengthPrefixCode[i];
+    if (length >= range.offset && length < range.offset + (1 << range.nbits)) {
+      return i;
+    }
+  }
+  return -1;
+}
+
+int CommandPrefix(int insert_length, int copy_length) {
+  if (copy_length == 0) {
+    copy_length = 3;
+  }
+  int insert_prefix = InsertLengthPrefix(insert_length);
+  int copy_prefix = CopyLengthPrefix(copy_length);
+  int range_idx = 3 * (insert_prefix >> 3) + (copy_prefix >> 3);
+  return ((kInsertAndCopyRangeLut[range_idx] << 6) +
+          ((insert_prefix & 7) << 3) + (copy_prefix & 7));
+}
+
+int InsertLengthExtraBits(int code) {
+  int insert_code = (kInsertRangeLut[code >> 6] << 3) + ((code >> 3) & 7);
+  return kInsertLengthPrefixCode[insert_code].nbits;
+}
+
+int InsertLengthOffset(int code) {
+  int insert_code = (kInsertRangeLut[code >> 6] << 3) + ((code >> 3) & 7);
+  return kInsertLengthPrefixCode[insert_code].offset;
+}
+
+int CopyLengthExtraBits(int code) {
+  int copy_code = (kCopyRangeLut[code >> 6] << 3) + (code & 7);
+  return kCopyLengthPrefixCode[copy_code].nbits;
+}
+
+int CopyLengthOffset(int code) {
+  int copy_code = (kCopyRangeLut[code >> 6] << 3) + (code & 7);
+  return kCopyLengthPrefixCode[copy_code].offset;
+}
+
+void PrefixEncodeCopyDistance(int distance_code,
+                              int num_direct_codes,
+                              int postfix_bits,
+                              uint16_t* code,
+                              int* nbits,
+                              uint32_t* extra_bits) {
+  distance_code -= 1;
+  if (distance_code < kNumDistanceShortCodes + num_direct_codes) {
+    *code = distance_code;
+    *nbits = 0;
+    *extra_bits = 0;
+    return;
+  }
+  distance_code -= kNumDistanceShortCodes + num_direct_codes;
+  distance_code += (1 << (postfix_bits + 2));
+  int bucket = Log2Floor(distance_code) - 1;
+  int postfix_mask = (1 << postfix_bits) - 1;
+  int postfix = distance_code & postfix_mask;
+  int prefix = (distance_code >> bucket) & 1;
+  int offset = (2 + prefix) << bucket;
+  *nbits = bucket - postfix_bits;
+  *code = kNumDistanceShortCodes + num_direct_codes +
+      ((2 * (*nbits - 1) + prefix) << postfix_bits) + postfix;
+  *extra_bits = (distance_code - offset) >> postfix_bits;
+}
+
+int BlockLengthPrefix(int length) {
+  for (int i = 0; i < kNumBlockLenPrefixes; ++i) {
+    const PrefixCodeRange& range = kBlockLengthPrefixCode[i];
+    if (length >= range.offset && length < range.offset + (1 << range.nbits)) {
+      return i;
+    }
+  }
+  return -1;
+}
+
+int BlockLengthExtraBits(int length_code) {
+  return kBlockLengthPrefixCode[length_code].nbits;
+}
+
+int BlockLengthOffset(int length_code) {
+  return kBlockLengthPrefixCode[length_code].offset;
+}
+
+}  // namespace brotli
--- a/enc/prefix.h
+++ b/enc/prefix.h
@ -0,0 +1,51 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Functions for encoding of integers into prefix codes the amount of extra
+// bits, and the actual values of the extra bits.
+
+#ifndef BROTLI_ENC_PREFIX_H_
+#define BROTLI_ENC_PREFIX_H_
+
+#include <stdint.h>
+
+namespace brotli {
+
+static const int kNumInsertLenPrefixes = 24;
+static const int kNumCopyLenPrefixes = 24;
+static const int kNumCommandPrefixes = 704;
+static const int kNumBlockLenPrefixes = 26;
+static const int kNumDistanceShortCodes = 16;
+static const int kNumDistancePrefixes = 520;
+
+int CommandPrefix(int insert_length, int copy_length);
+int InsertLengthExtraBits(int prefix);
+int InsertLengthOffset(int prefix);
+int CopyLengthExtraBits(int prefix);
+int CopyLengthOffset(int prefix);
+
+void PrefixEncodeCopyDistance(int distance_code,
+                              int num_direct_codes,
+                              int shift_bits,
+                              uint16_t* prefix,
+                              int* nbits,
+                              uint32_t* extra_bits);
+
+int BlockLengthPrefix(int length);
+int BlockLengthExtraBits(int prefix);
+int BlockLengthOffset(int prefix);
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_PREFIX_H_
--- a/enc/write_bits.h
+++ b/enc/write_bits.h
@ -0,0 +1,91 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Write bits into a byte array.
+
+#ifndef BROTLI_ENC_WRITE_BITS_H_
+#define BROTLI_ENC_WRITE_BITS_H_
+
+#include <assert.h>
+#include <endian.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include "./port.h"
+
+namespace brotli {
+
+//#define BIT_WRITER_DEBUG
+
+// This function writes bits into bytes in increasing addresses, and within
+// a byte least-significant-bit first.
+//
+// The function can write up to 56 bits in one go with WriteBits
+// Example: let's assume that 3 bits (Rs below) have been written already:
+//
+// BYTE-0     BYTE+1       BYTE+2
+//
+// 0000 0RRR    0000 0000    0000 0000
+//
+// Now, we could write 5 or less bits in MSB by just sifting by 3
+// and OR'ing to BYTE-0.
+//
+// For n bits, we take the last 5 bits, OR that with high bits in BYTE-0,
+// and locate the rest in BYTE+1, BYTE+2, etc.
+inline void WriteBits(int n_bits,
+                      uint64_t bits,
+                      int * __restrict pos,
+                      uint8_t * __restrict array) {
+#ifdef BIT_WRITER_DEBUG
+  printf("WriteBits  %2d  0x%016llx  %10d\n", n_bits, bits, *pos);
+#endif
+#ifdef IS_LITTLE_ENDIAN
+  // This branch of the code can write up to 56 bits at a time,
+  // 7 bits are lost by being perhaps already in *p and at least
+  // 1 bit is needed to initialize the bit-stream ahead (i.e. if 7
+  // bits are in *p and we write 57 bits, then the next write will
+  // access a byte that was never initialized).
+  uint8_t *p = &array[*pos >> 3];
+  uint64_t v = *p;
+  v |= bits << (*pos & 7);
+  BROTLI_UNALIGNED_STORE64(p, v);  // Set some bits.
+  *pos += n_bits;
+#else
+  // implicit & 0xff is assumed for uint8_t arithmetics
+  uint8_t *array_pos = &array[*pos >> 3];
+  const int bits_reserved_in_first_byte = (*pos & 7);
+  bits <<= bits_reserved_in_first_byte;
+  *array_pos++ |= bits;
+  for (int bits_left_to_write = n_bits - 8 + bits_reserved_in_first_byte;
+       bits_left_to_write >= 1;
+       bits_left_to_write -= 8) {
+    bits >>= 8;
+    *array_pos++ = bits;
+  }
+  *array_pos = 0;
+  *pos += n_bits;
+#endif
+}
+
+inline void WriteBitsPrepareStorage(int pos, uint8_t *array) {
+#ifdef BIT_WRITER_DEBUG
+  printf("WriteBitsPrepareStorage            %10d\n", pos);
+#endif
+  assert((pos & 7) == 0);
+  array[pos >> 3] = 0;
+}
+
+}  // namespace brotli
+
+#endif  // BROTLI_ENC_WRITE_BITS_H_