Add brotli compressor

This commit is for the encoder for brotli compression format.
Brotli is a generic byte-level compression algorithm.
This commit is contained in:
Zoltan Szabadka 2013-10-23 13:06:13 +02:00
parent 6455522172
commit c66e4e3e4f
24 changed files with 3904 additions and 0 deletions

3
enc/README Normal file
View File

@ -0,0 +1,3 @@
This directory holds the encoder for brotli compression format.
Brotli is proposed to be used at the byte-compression level in WOFF 2.0 format.

137
enc/backward_references.cc Normal file
View File

@ -0,0 +1,137 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Function to find backward reference copies.
#include "./backward_references.h"
#include <algorithm>
#include <vector>
#include "./command.h"
#include "./hash.h"
#include "./literal_cost.h"
namespace brotli {
void CreateBackwardReferences(const uint8_t* data,
int length,
std::vector<Command>* commands) {
HashLongestMatch<13,11> *hasher = new HashLongestMatch<13,11>;
float *literal_cost = new float[length];
EstimateBitCostsForLiterals(length, data, literal_cost);
hasher->SetLiteralCost(literal_cost);
// Length heuristic that seems to help probably by better selection
// of lazy matches of similar lengths.
int insert_length = 0;
size_t i = 0;
double average_cost = 0.0;
for (int i = 0; i < length; ++i) {
average_cost += literal_cost[i];
}
average_cost /= length;
hasher->set_average_cost(average_cost);
while (i + 2 < length) {
size_t best_len = 0;
size_t best_dist = 0;
double best_score = 0;
const size_t max_distance = std::min(i, 1UL << 24);
hasher->set_insert_length(insert_length);
bool match_found = hasher->FindLongestMatch(
data, i, length - i, max_distance,
&best_len, &best_dist, &best_score);
if (match_found) {
// Found a match. Let's look for something even better ahead.
int delayed_backward_references_in_row = 0;
while (i + 4 < length &&
delayed_backward_references_in_row < 4) {
size_t best_len_2 = 0;
size_t best_dist_2 = 0;
double best_score_2 = 0;
hasher->Store(data + i, i);
match_found = hasher->FindLongestMatch(
data, i + 1, length - i - 1, max_distance,
&best_len_2, &best_dist_2, &best_score_2);
double cost_diff_lazy = 0;
if (best_len >= 4) {
cost_diff_lazy += hasher->literal_cost(i + 4) - average_cost;
}
{
const int tail_length = best_len_2 - best_len + 1;
for (int k = 0; k < tail_length; ++k) {
cost_diff_lazy -= hasher->literal_cost(i + best_len + k) -
average_cost;
}
}
// If we are not inserting any symbols, inserting one is more
// expensive than if we were inserting symbols anyways.
if (insert_length < 1) {
cost_diff_lazy += 1.0;
}
// Add bias to slightly avoid lazy matching.
cost_diff_lazy += 2.0 + delayed_backward_references_in_row * 0.2;
cost_diff_lazy += 0.04 * hasher->literal_cost(i);
if (match_found && best_score_2 >= best_score + cost_diff_lazy) {
// Ok, let's just write one byte for now and start a match from the
// next byte.
++insert_length;
++delayed_backward_references_in_row;
best_len = best_len_2;
best_dist = best_dist_2;
best_score = best_score_2;
i++;
} else {
break;
}
}
Command cmd;
cmd.insert_length_ = insert_length;
cmd.copy_length_ = best_len;
cmd.copy_distance_ = best_dist;
commands->push_back(cmd);
hasher->set_last_distance(best_dist);
insert_length = 0;
++i;
for (int j = 1; j < best_len; ++j) {
if (i + 2 < length) {
hasher->Store(data + i, i);
}
++i;
}
} else {
++insert_length;
hasher->Store(data + i, i);
++i;
}
}
insert_length += (length - i);
if (insert_length > 0) {
Command cmd;
cmd.insert_length_ = insert_length;
cmd.copy_length_ = 0;
cmd.copy_distance_ = 0;
commands->push_back(cmd);
}
delete[] literal_cost;
delete hasher;
}
} // namespace brotli

33
enc/backward_references.h Normal file
View File

@ -0,0 +1,33 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Function to find backward reference copies.
#ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_
#define BROTLI_ENC_BACKWARD_REFERENCES_H_
#include <stdint.h>
#include <vector>
#include "./command.h"
namespace brotli {
void CreateBackwardReferences(const uint8_t* data,
int length,
std::vector<Command>* commands);
} // namespace brotli
#endif // BROTLI_ENC_BACKWARD_REFERENCES_H_

150
enc/bit_cost.h Normal file
View File

@ -0,0 +1,150 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Functions to estimate the bit cost of Huffman trees.
#ifndef BROTLI_ENC_BIT_COST_H_
#define BROTLI_ENC_BIT_COST_H_
#include <stdint.h>
#include "./entropy_encode.h"
#include "./fast_log.h"
namespace brotli {
static const int kHuffmanExtraBits[kCodeLengthCodes] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 7,
};
static inline int HuffmanTreeBitCost(const int* counts, const uint8_t* depth) {
int nbits = 0;
for (int i = 0; i < kCodeLengthCodes; ++i) {
nbits += counts[i] * (depth[i] + kHuffmanExtraBits[i]);
}
return nbits;
}
static inline int HuffmanTreeBitCost(
const Histogram<kCodeLengthCodes>& histogram,
const EntropyCode<kCodeLengthCodes>& entropy) {
return HuffmanTreeBitCost(&histogram.data_[0], &entropy.depth_[0]);
}
static inline int HuffmanBitCost(const uint8_t* depth, int length) {
int max_depth = 1;
int histogram[kCodeLengthCodes] = { 0 };
int tail_start = 0;
// compute histogram of compacted huffman tree
for (int i = 0; i < length;) {
const int value = depth[i];
if (value > max_depth) {
max_depth = value;
}
int reps = 1;
for (int k = i + 1; k < length && depth[k] == value; ++k) {
++reps;
}
i += reps;
if (value == 0) {
while (reps > 10) {
++histogram[18];
reps -= 138;
}
if (reps > 2) {
++histogram[17];
} else if (reps > 0) {
histogram[0] += reps;
}
} else {
tail_start = i;
++histogram[value];
--reps;
while (reps > 2) {
++histogram[16];
reps -= 6;
}
if (reps > 0) {
histogram[value] += reps;
}
}
}
// create huffman tree of huffman tree
uint8_t cost[kCodeLengthCodes] = { 0 };
CreateHuffmanTree(histogram, kCodeLengthCodes, 7, cost);
// account for rle extra bits
cost[16] += 2;
cost[17] += 3;
cost[18] += 7;
int tree_size = 0;
int bits = 6 + 3 * max_depth; // huffman tree of huffman tree cost
for (int i = 0; i < kCodeLengthCodes; ++i) {
bits += histogram[i] * cost[i]; // huffman tree bit cost
tree_size += histogram[i];
}
// bit cost adjustment for long trailing zero sequence
int tail_size = length - tail_start;
int tail_bits = 0;
while (tail_size >= 1) {
if (tail_size < 3) {
tail_bits += tail_size * cost[0];
tree_size -= tail_size;
break;
} else if (tail_size < 11) {
tail_bits += cost[17];
--tree_size;
break;
} else {
tail_bits += cost[18];
tail_size -= 138;
--tree_size;
}
}
if (tail_bits > 12) {
bits += ((Log2Ceiling(tree_size - 1) + 1) & ~1) + 3 - tail_bits;
}
return bits;
}
template<int kSize>
double PopulationCost(const Histogram<kSize>& histogram) {
if (histogram.total_count_ == 0) {
return 4;
}
int symbols[2] = { 0 };
int count = 0;
for (int i = 0; i < kSize && count < 3; ++i) {
if (histogram.data_[i] > 0) {
if (count < 2) symbols[count] = i;
++count;
}
}
if (count <= 2 && symbols[0] < 256 && symbols[1] < 256) {
return ((symbols[0] <= 1 ? 4 : 11) +
(count == 2 ? 8 + histogram.total_count_ : 0));
}
uint8_t depth[kSize] = { 0 };
CreateHuffmanTree(&histogram.data_[0], kSize, 15, depth);
int bits = HuffmanBitCost(depth, kSize);
for (int i = 0; i < kSize; ++i) {
bits += histogram.data_[i] * depth[i];
}
return bits;
}
} // namespace brotli
#endif // BROTLI_ENC_BIT_COST_H_

411
enc/block_splitter.cc Normal file
View File

@ -0,0 +1,411 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Block split point selection utilities.
#include "./block_splitter.h"
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <algorithm>
#include <map>
#include "./cluster.h"
#include "./command.h"
#include "./fast_log.h"
#include "./histogram.h"
namespace brotli {
static const int kMaxLiteralHistograms = 48;
static const int kMaxCommandHistograms = 50;
static const double kLiteralBlockSwitchCost = 26;
static const double kCommandBlockSwitchCost = 13.5;
static const double kDistanceBlockSwitchCost = 14.6;
static const int kLiteralStrideLength = 70;
static const int kCommandStrideLength = 40;
static const int kSymbolsPerLiteralHistogram = 550;
static const int kSymbolsPerCommandHistogram = 530;
static const int kSymbolsPerDistanceHistogram = 550;
static const int kMinLengthForBlockSplitting = 128;
static const int kIterMulForRefining = 2;
static const int kMinItersForRefining = 100;
void CopyLiteralsToByteArray(const std::vector<Command>& cmds,
const uint8_t* data,
std::vector<uint8_t>* literals) {
// Count how many we have.
size_t total_length = 0;
for (int i = 0; i < cmds.size(); ++i) {
total_length += cmds[i].insert_length_;
}
if (total_length == 0) {
return;
}
// Allocate.
literals->resize(total_length);
// Loop again, and copy this time.
size_t pos = 0;
size_t from_pos = 0;
for (int i = 0; i < cmds.size() && pos < total_length; ++i) {
memcpy(&(*literals)[pos], data + from_pos, cmds[i].insert_length_);
pos += cmds[i].insert_length_;
from_pos += cmds[i].insert_length_ + cmds[i].copy_length_;
}
}
void CopyCommandsToByteArray(const std::vector<Command>& cmds,
std::vector<uint16_t>* insert_and_copy_codes,
std::vector<uint8_t>* distance_prefixes) {
for (int i = 0; i < cmds.size(); ++i) {
const Command& cmd = cmds[i];
insert_and_copy_codes->push_back(cmd.command_prefix_);
if (cmd.copy_length_ > 0 && cmd.distance_prefix_ != 0xffff) {
distance_prefixes->push_back(cmd.distance_prefix_);
}
}
}
template<int kSize>
double HistogramAddEval(const Histogram<kSize>& a,
const Histogram<kSize>& b) {
int total = a.total_count_ + b.total_count_;
double retval = total * FastLog2(total);
for (int i = 0; i < kSize; ++i) {
int count = a.data_[i] + b.data_[i];
retval -= count * FastLog2(count);
}
return retval;
}
template<typename HistogramType, typename DataType>
void InitialEntropyCodes(const DataType* data, size_t length,
int literals_per_histogram,
int max_histograms,
size_t stride,
std::vector<HistogramType>* vec) {
int total_histograms = length / literals_per_histogram + 1;
if (total_histograms > max_histograms) {
total_histograms = max_histograms;
}
unsigned int seed = 7;
int block_length = length / total_histograms;
for (int i = 0; i < total_histograms; ++i) {
int pos = length * i / total_histograms;
if (i != 0) {
pos += rand_r(&seed) % block_length;
}
if (pos + stride >= length) {
pos = length - stride - 1;
}
HistogramType histo;
histo.Add(data + pos, stride);
vec->push_back(histo);
}
}
template<typename HistogramType>
int FindClosest(const HistogramType& sample,
const std::vector<HistogramType>& vec) {
double best_distance = 1e99;
int best_ix = 0;
for (int i = 0; i < vec.size(); ++i) {
double distance = HistogramAddEval(sample, vec[i]);
if (distance < best_distance) {
best_ix = i;
best_distance = distance;
}
}
return best_ix;
}
template<typename HistogramType, typename DataType>
void RandomSample(unsigned int* seed,
const DataType* data,
size_t length,
size_t stride,
HistogramType* sample) {
size_t pos = rand_r(seed) % (length - stride);
sample->Add(data + pos, stride);
}
template<typename HistogramType, typename DataType>
void RefineEntropyCodes(const DataType* data, size_t length,
size_t stride,
std::vector<HistogramType>* vec) {
const int iters =
kIterMulForRefining * length / stride + kMinItersForRefining;
unsigned int seed = 7;
for (int iter = 0; iter < iters; ++iter) {
HistogramType sample;
RandomSample(&seed, data, length, stride, &sample);
int ix = FindClosest(sample, *vec);
(*vec)[ix].AddHistogram(sample);
}
}
inline static float BitCost(int total, int count) {
return count == 0 ? FastLog2(total) + 2 : FastLog2(total) - FastLog2(count);
}
template<typename DataType, int kSize>
void FindBlocks(const DataType* data, const size_t length,
const double block_switch_bitcost,
const std::vector<Histogram<kSize> > &vec,
uint8_t *block_id) {
if (vec.size() <= 1) {
for (int i = 0; i < length; ++i) {
block_id[i] = 0;
}
return;
}
int vecsize = vec.size();
double* insert_cost = new double[kSize * vecsize];
memset(insert_cost, 0, sizeof(insert_cost[0]) * kSize * vecsize);
for (int i = 0; i < kSize; ++i) {
for (int j = 0; j < vecsize; ++j) {
insert_cost[i * vecsize + j] =
BitCost(vec[j].total_count_, vec[j].data_[i]);
}
}
double *cost = new double[vecsize];
memset(cost, 0, sizeof(cost[0]) * vecsize);
bool* switch_signal = new bool[length * vecsize];
memset(switch_signal, 0, sizeof(switch_signal[0]) * length * vecsize);
// After each iteration of this loop, cost[k] will contain the difference
// between the minimum cost of arriving at the current byte position using
// entropy code k, and the minimum cost of arriving at the current byte
// position. This difference is capped at the block switch cost, and if it
// reaches block switch cost, it means that when we trace back from the last
// position, we need to switch here.
for (size_t byte_ix = 0; byte_ix < length; ++byte_ix) {
int ix = byte_ix * vecsize;
int insert_cost_ix = data[byte_ix] * vecsize;
double min_cost = 1e99;
for (int k = 0; k < vecsize; ++k) {
// We are coding the symbol in data[byte_ix] with entropy code k.
cost[k] += insert_cost[insert_cost_ix + k];
if (cost[k] < min_cost) {
min_cost = cost[k];
block_id[byte_ix] = k;
}
}
double block_switch_cost = block_switch_bitcost;
// More blocks for the beginning.
if (byte_ix < 2000) {
block_switch_cost *= 0.77 + 0.07 * byte_ix / 2000;
}
for (int k = 0; k < vecsize; ++k) {
cost[k] -= min_cost;
if (cost[k] >= block_switch_cost) {
cost[k] = block_switch_cost;
switch_signal[ix + k] = true;
}
}
}
// Now trace back from the last position and switch at the marked places.
int byte_ix = length - 1;
int ix = byte_ix * vecsize;
int cur_id = block_id[byte_ix];
while (byte_ix > 0) {
--byte_ix;
ix -= vecsize;
if (switch_signal[ix + cur_id]) {
cur_id = block_id[byte_ix];
}
block_id[byte_ix] = cur_id;
}
delete[] insert_cost;
delete[] cost;
delete[] switch_signal;
}
int RemapBlockIds(uint8_t* block_ids, const size_t length) {
std::map<uint8_t, uint8_t> new_id;
int next_id = 0;
for (int i = 0; i < length; ++i) {
if (new_id.find(block_ids[i]) == new_id.end()) {
new_id[block_ids[i]] = next_id;
++next_id;
}
}
for (int i = 0; i < length; ++i) {
block_ids[i] = new_id[block_ids[i]];
}
return next_id;
}
template<typename HistogramType, typename DataType>
void BuildBlockHistograms(const DataType* data, const size_t length,
uint8_t* block_ids,
std::vector<HistogramType>* histograms) {
int num_types = RemapBlockIds(block_ids, length);
histograms->clear();
histograms->resize(num_types);
for (int i = 0; i < length; ++i) {
(*histograms)[block_ids[i]].Add(data[i]);
}
}
template<typename HistogramType, typename DataType>
void ClusterBlocks(const DataType* data, const size_t length,
uint8_t* block_ids) {
std::vector<HistogramType> histograms;
std::vector<int> block_index(length);
int cur_idx = 0;
HistogramType cur_histogram;
for (int i = 0; i < length; ++i) {
bool block_boundary = (i + 1 == length || block_ids[i] != block_ids[i + 1]);
block_index[i] = cur_idx;
cur_histogram.Add(data[i]);
if (block_boundary) {
histograms.push_back(cur_histogram);
cur_histogram.Clear();
++cur_idx;
}
}
std::vector<HistogramType> clustered_histograms;
std::vector<int> histogram_symbols;
// Block ids need to fit in one byte and there are two ids reserved for
// indicating 'same as last' and 'last plus one'.
static const int kMaxNumberOfBlockTypes = 254;
ClusterHistograms(histograms, 1, histograms.size(),
kMaxNumberOfBlockTypes,
&clustered_histograms,
&histogram_symbols);
for (int i = 0; i < length; ++i) {
block_ids[i] = histogram_symbols[block_index[i]];
}
}
void BuildBlockSplit(const std::vector<uint8_t>& block_ids, BlockSplit* split) {
int cur_id = block_ids[0];
int cur_length = 1;
split->num_types_ = -1;
for (int i = 1; i < block_ids.size(); ++i) {
if (block_ids[i] != cur_id) {
split->types_.push_back(cur_id);
split->lengths_.push_back(cur_length);
split->num_types_ = std::max(split->num_types_, cur_id);
cur_id = block_ids[i];
cur_length = 0;
}
++cur_length;
}
split->types_.push_back(cur_id);
split->lengths_.push_back(cur_length);
split->num_types_ = std::max(split->num_types_, cur_id);
++split->num_types_;
}
template<typename HistogramType, typename DataType>
void SplitByteVector(const std::vector<DataType>& data,
const int literals_per_histogram,
const int max_histograms,
const int sampling_stride_length,
const double block_switch_cost,
BlockSplit* split) {
if (data.empty()) {
split->num_types_ = 0;
return;
} else if (data.size() < kMinLengthForBlockSplitting) {
split->num_types_ = 1;
split->types_.push_back(0);
split->lengths_.push_back(data.size());
return;
}
std::vector<HistogramType> histograms;
// Find good entropy codes.
InitialEntropyCodes(data.data(), data.size(),
literals_per_histogram,
max_histograms,
sampling_stride_length,
&histograms);
RefineEntropyCodes(data.data(), data.size(),
sampling_stride_length,
&histograms);
// Find a good path through literals with the good entropy codes.
std::vector<uint8_t> block_ids(data.size());
for (int i = 0; i < 10; ++i) {
FindBlocks(data.data(), data.size(),
block_switch_cost,
histograms,
&block_ids[0]);
BuildBlockHistograms(data.data(), data.size(), &block_ids[0], &histograms);
}
ClusterBlocks<HistogramType>(data.data(), data.size(), &block_ids[0]);
BuildBlockSplit(block_ids, split);
}
void SplitBlock(const std::vector<Command>& cmds,
const uint8_t* data,
BlockSplit* literal_split,
BlockSplit* insert_and_copy_split,
BlockSplit* dist_split) {
// Create a continuous array of literals.
std::vector<uint8_t> literals;
CopyLiteralsToByteArray(cmds, data, &literals);
// Compute prefix codes for commands.
std::vector<uint16_t> insert_and_copy_codes;
std::vector<uint8_t> distance_prefixes;
CopyCommandsToByteArray(cmds,
&insert_and_copy_codes,
&distance_prefixes);
SplitByteVector<HistogramLiteral>(
literals,
kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
kLiteralStrideLength, kLiteralBlockSwitchCost,
literal_split);
SplitByteVector<HistogramCommand>(
insert_and_copy_codes,
kSymbolsPerCommandHistogram, kMaxCommandHistograms,
kCommandStrideLength, kCommandBlockSwitchCost,
insert_and_copy_split);
SplitByteVector<HistogramDistance>(
distance_prefixes,
kSymbolsPerDistanceHistogram, kMaxCommandHistograms,
kCommandStrideLength, kDistanceBlockSwitchCost,
dist_split);
}
void SplitBlockByTotalLength(const std::vector<Command>& all_commands,
int input_size,
int target_length,
std::vector<std::vector<Command> >* blocks) {
int num_blocks = input_size / target_length + 1;
int length_limit = input_size / num_blocks + 1;
int total_length = 0;
std::vector<Command> cur_block;
for (int i = 0; i < all_commands.size(); ++i) {
const Command& cmd = all_commands[i];
int cmd_length = cmd.insert_length_ + cmd.copy_length_;
if (total_length > length_limit) {
blocks->push_back(cur_block);
cur_block.clear();
total_length = 0;
}
cur_block.push_back(cmd);
total_length += cmd_length;
}
blocks->push_back(cur_block);
}
} // namespace brotli

77
enc/block_splitter.h Normal file
View File

@ -0,0 +1,77 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Block split point selection utilities.
#ifndef BROTLI_ENC_BLOCK_SPLITTER_H_
#define BROTLI_ENC_BLOCK_SPLITTER_H_
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include <vector>
#include <utility>
#include "./command.h"
namespace brotli {
struct BlockSplit {
int num_types_;
std::vector<uint8_t> types_;
std::vector<uint8_t> type_codes_;
std::vector<int> lengths_;
};
struct BlockSplitIterator {
explicit BlockSplitIterator(const BlockSplit& split)
: split_(split), idx_(0), type_(0), length_(0) {
if (!split.lengths_.empty()) {
length_ = split.lengths_[0];
}
}
void Next() {
if (length_ == 0) {
++idx_;
type_ = split_.types_[idx_];
length_ = split_.lengths_[idx_];
}
--length_;
}
const BlockSplit& split_;
int idx_;
int type_;
int length_;
};
void CopyLiteralsToByteArray(const std::vector<Command>& cmds,
const uint8_t* data,
std::vector<uint8_t>* literals);
void SplitBlock(const std::vector<Command>& cmds,
const uint8_t* data,
BlockSplit* literal_split,
BlockSplit* insert_and_copy_split,
BlockSplit* dist_split);
void SplitBlockByTotalLength(const std::vector<Command>& all_commands,
int input_size,
int target_length,
std::vector<std::vector<Command> >* blocks);
} // namespace brotli
#endif // BROTLI_ENC_BLOCK_SPLITTER_H_

288
enc/cluster.h Normal file
View File

@ -0,0 +1,288 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Functions for clustering similar histograms together.
#ifndef BROTLI_ENC_CLUSTER_H_
#define BROTLI_ENC_CLUSTER_H_
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <complex>
#include <map>
#include <set>
#include <utility>
#include <vector>
#include "./bit_cost.h"
#include "./entropy_encode.h"
#include "./fast_log.h"
#include "./histogram.h"
namespace brotli {
struct HistogramPair {
int idx1;
int idx2;
bool valid;
double cost_combo;
double cost_diff;
};
struct HistogramPairComparator {
bool operator()(const HistogramPair& p1, const HistogramPair& p2) {
if (p1.cost_diff != p2.cost_diff) {
return p1.cost_diff > p2.cost_diff;
}
return abs(p1.idx1 - p1.idx2) > abs(p2.idx1 - p2.idx2);
}
};
// Returns entropy reduction of the context map when we combine two clusters.
inline double ClusterCostDiff(int size_a, int size_b) {
int size_c = size_a + size_b;
return size_a * FastLog2(size_a) + size_b * FastLog2(size_b) -
size_c * FastLog2(size_c);
}
// Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
// it is below a threshold, stores the pair (idx1, idx2) in the *pairs heap.
template<int kSize>
void CompareAndPushToHeap(const Histogram<kSize>* out,
const int* cluster_size,
int idx1, int idx2,
std::vector<HistogramPair>* pairs) {
if (idx1 == idx2) {
return;
}
if (idx2 < idx1) {
int t = idx2;
idx2 = idx1;
idx1 = t;
}
bool store_pair = false;
HistogramPair p;
p.idx1 = idx1;
p.idx2 = idx2;
p.valid = true;
p.cost_diff = 0.5 * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]);
p.cost_diff -= out[idx1].bit_cost_;
p.cost_diff -= out[idx2].bit_cost_;
if (out[idx1].total_count_ == 0) {
p.cost_combo = out[idx2].bit_cost_;
store_pair = true;
} else if (out[idx2].total_count_ == 0) {
p.cost_combo = out[idx1].bit_cost_;
store_pair = true;
} else {
double threshold = pairs->empty() ? 1e99 :
std::max(0.0, (*pairs)[0].cost_diff);
Histogram<kSize> combo = out[idx1];
combo.AddHistogram(out[idx2]);
double cost_combo = PopulationCost(combo);
if (cost_combo < threshold - p.cost_diff) {
p.cost_combo = cost_combo;
store_pair = true;
}
}
if (store_pair) {
p.cost_diff += p.cost_combo;
pairs->push_back(p);
push_heap(pairs->begin(), pairs->end(), HistogramPairComparator());
}
}
template<int kSize>
void HistogramCombine(Histogram<kSize>* out,
int* cluster_size,
int* symbols,
int symbols_size,
int max_clusters) {
double cost_diff_threshold = 0.0;
int min_cluster_size = 1;
std::set<int> all_symbols;
std::vector<int> clusters;
for (int i = 0; i < symbols_size; ++i) {
if (all_symbols.find(symbols[i]) == all_symbols.end()) {
all_symbols.insert(symbols[i]);
clusters.push_back(symbols[i]);
}
}
// We maintain a heap of histogram pairs, ordered by the bit cost reduction.
std::vector<HistogramPair> pairs;
for (int idx1 = 0; idx1 < clusters.size(); ++idx1) {
for (int idx2 = idx1 + 1; idx2 < clusters.size(); ++idx2) {
CompareAndPushToHeap(out, cluster_size, clusters[idx1], clusters[idx2],
&pairs);
}
}
while (clusters.size() > min_cluster_size) {
if (pairs[0].cost_diff >= cost_diff_threshold) {
cost_diff_threshold = 1e99;
min_cluster_size = max_clusters;
continue;
}
// Take the best pair from the top of heap.
int best_idx1 = pairs[0].idx1;
int best_idx2 = pairs[0].idx2;
out[best_idx1].AddHistogram(out[best_idx2]);
out[best_idx1].bit_cost_ = pairs[0].cost_combo;
cluster_size[best_idx1] += cluster_size[best_idx2];
for (int i = 0; i < symbols_size; ++i) {
if (symbols[i] == best_idx2) {
symbols[i] = best_idx1;
}
}
for (int i = 0; i + 1 < clusters.size(); ++i) {
if (clusters[i] >= best_idx2) {
clusters[i] = clusters[i + 1];
}
}
clusters.pop_back();
// Invalidate pairs intersecting the just combined best pair.
for (int i = 0; i < pairs.size(); ++i) {
HistogramPair& p = pairs[i];
if (p.idx1 == best_idx1 || p.idx2 == best_idx1 ||
p.idx1 == best_idx2 || p.idx2 == best_idx2) {
p.valid = false;
}
}
// Pop invalid pairs from the top of the heap.
while (!pairs.empty() && !pairs[0].valid) {
pop_heap(pairs.begin(), pairs.end(), HistogramPairComparator());
pairs.pop_back();
}
// Push new pairs formed with the combined histogram to the heap.
for (int i = 0; i < clusters.size(); ++i) {
CompareAndPushToHeap(out, cluster_size, best_idx1, clusters[i], &pairs);
}
}
}
// -----------------------------------------------------------------------------
// Histogram refinement
// What is the bit cost of moving histogram from cur_symbol to candidate.
template<int kSize>
double HistogramBitCostDistance(const Histogram<kSize>& histogram,
const Histogram<kSize>& candidate) {
if (histogram.total_count_ == 0) {
return 0.0;
}
Histogram<kSize> tmp = histogram;
tmp.AddHistogram(candidate);
return PopulationCost(tmp) - candidate.bit_cost_;
}
// Find the best 'out' histogram for each of the 'in' histograms.
// Note: we assume that out[]->bit_cost_ is already up-to-date.
template<int kSize>
void HistogramRemap(const Histogram<kSize>* in, int in_size,
Histogram<kSize>* out, int* symbols) {
std::set<int> all_symbols;
for (int i = 0; i < in_size; ++i) {
all_symbols.insert(symbols[i]);
}
for (int i = 0; i < in_size; ++i) {
int best_out = i == 0 ? symbols[0] : symbols[i - 1];
double best_bits = HistogramBitCostDistance(in[i], out[best_out]);
for (std::set<int>::const_iterator k = all_symbols.begin();
k != all_symbols.end(); ++k) {
const double cur_bits = HistogramBitCostDistance(in[i], out[*k]);
if (cur_bits < best_bits) {
best_bits = cur_bits;
best_out = *k;
}
}
symbols[i] = best_out;
}
// Recompute each out based on raw and symbols.
for (std::set<int>::const_iterator k = all_symbols.begin();
k != all_symbols.end(); ++k) {
out[*k].Clear();
}
for (int i = 0; i < in_size; ++i) {
out[symbols[i]].AddHistogram(in[i]);
}
}
// Reorder histograms in *out so that the new symbols in *symbols come in
// increasing order.
template<int kSize>
void HistogramReindex(std::vector<Histogram<kSize> >* out,
std::vector<int>* symbols) {
std::vector<Histogram<kSize> > tmp(*out);
std::map<int, int> new_index;
int next_index = 0;
for (int i = 0; i < symbols->size(); ++i) {
if (new_index.find((*symbols)[i]) == new_index.end()) {
new_index[(*symbols)[i]] = next_index;
(*out)[next_index] = tmp[(*symbols)[i]];
++next_index;
}
}
out->resize(next_index);
for (int i = 0; i < symbols->size(); ++i) {
(*symbols)[i] = new_index[(*symbols)[i]];
}
}
// Clusters similar histograms in 'in' together, the selected histograms are
// placed in 'out', and for each index in 'in', *histogram_symbols will
// indicate which of the 'out' histograms is the best approximation.
template<int kSize>
void ClusterHistograms(const std::vector<Histogram<kSize> >& in,
int num_contexts, int num_blocks,
int max_histograms,
std::vector<Histogram<kSize> >* out,
std::vector<int>* histogram_symbols) {
const int in_size = num_contexts * num_blocks;
std::vector<int> cluster_size(in_size, 1);
out->resize(in_size);
histogram_symbols->resize(in_size);
for (int i = 0; i < in_size; ++i) {
(*out)[i] = in[i];
(*out)[i].bit_cost_ = PopulationCost(in[i]);
(*histogram_symbols)[i] = i;
}
// Collapse similar histograms within a block type.
if (num_contexts > 1) {
for (int i = 0; i < num_blocks; ++i) {
HistogramCombine(&(*out)[0], &cluster_size[0],
&(*histogram_symbols)[i * num_contexts], num_contexts,
max_histograms);
}
}
// Collapse similar histograms.
HistogramCombine(&(*out)[0], &cluster_size[0],
&(*histogram_symbols)[0], in_size,
max_histograms);
// Find the optimal map from original histograms to the final ones.
HistogramRemap(&in[0], in_size, &(*out)[0], &(*histogram_symbols)[0]);
// Convert the context map to a canonical form.
HistogramReindex(out, histogram_symbols);
}
} // namespace brotli
#endif // BROTLI_ENC_CLUSTER_H_

45
enc/command.h Normal file
View File

@ -0,0 +1,45 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This class models a sequence of literals and a backward reference copy.
#ifndef BROTLI_ENC_COMMAND_H_
#define BROTLI_ENC_COMMAND_H_
#include <stdint.h>
namespace brotli {
// Command holds a sequence of literals and a backward reference copy.
class Command {
public:
Command() : insert_length_(0), copy_length_(0),
copy_distance_(0), distance_code_(0),
distance_prefix_(0), command_prefix_(0),
distance_extra_bits_(0), distance_extra_bits_value_(0) {}
uint32_t insert_length_;
uint32_t copy_length_;
uint32_t copy_distance_;
// Values <= 16 are short codes, values > 16 are distances shifted by 16.
uint32_t distance_code_;
uint16_t distance_prefix_;
uint16_t command_prefix_;
int distance_extra_bits_;
uint32_t distance_extra_bits_value_;
};
} // namespace brotli
#endif // BROTLI_ENC_COMMAND_H_

130
enc/context.h Normal file
View File

@ -0,0 +1,130 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Functions to map previous bytes into a context id.
#ifndef BROTLI_ENC_CONTEXT_H_
#define BROTLI_ENC_CONTEXT_H_
#include <stdint.h>
namespace brotli {
static const int kSigned2BitContextLookup[] = {
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
};
static const int kSigned3BitContextLookup[] = {
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
};
static const int kSigned4BitContextLookup[] = {
0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 13, 13, 14, 15,
};
enum ContextType {
CONTEXT_NONE = 0,
CONTEXT_FULL = 1,
CONTEXT_MSB7 = 2,
CONTEXT_MSB6 = 3,
CONTEXT_MSB5 = 4,
CONTEXT_MSB4 = 5,
CONTEXT_MSB3 = 6,
CONTEXT_MSB2 = 7,
CONTEXT_MSB1 = 8,
CONTEXT_IS_ZERO = 9,
CONTEXT_SIGNED_2BIT = 10,
CONTEXT_SIGNED_3BIT = 11,
CONTEXT_SIGNED_4BIT = 12,
CONTEXT_SIGNED_MIXED_3BYTE = 13,
};
static const int kContextSize[] = {
1, 256, 128, 64, 32, 16, 8, 4, 2, 2, 4, 8, 16, 64,
};
static inline int NumContexts(int mode) {
return kContextSize[mode];
}
static inline uint8_t Context(uint8_t prev_byte, uint8_t prev_byte2,
uint8_t prev_byte3, int mode) {
switch (mode) {
case CONTEXT_NONE:
return 0;
case CONTEXT_IS_ZERO:
return prev_byte == 0 ? 0 : 1;
case CONTEXT_SIGNED_2BIT:
return kSigned2BitContextLookup[prev_byte];
case CONTEXT_SIGNED_3BIT:
return kSigned3BitContextLookup[prev_byte];
case CONTEXT_SIGNED_4BIT:
return kSigned4BitContextLookup[prev_byte];
case CONTEXT_SIGNED_MIXED_3BYTE:
return ((kSigned3BitContextLookup[prev_byte] << 3) +
(kSigned2BitContextLookup[prev_byte2] << 1) +
(prev_byte3 == 0 ? 0 : 1));
default:
return prev_byte >> (mode - 1);
}
}
} // namespace brotli
#endif // BROTLI_ENC_CONTEXT_H_

778
enc/encode.cc Normal file
View File

@ -0,0 +1,778 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Implementation of Brotli compressor.
#include "./encode.h"
#include <algorithm>
#include <limits>
#include "./backward_references.h"
#include "./bit_cost.h"
#include "./block_splitter.h"
#include "./cluster.h"
#include "./context.h"
#include "./entropy_encode.h"
#include "./fast_log.h"
#include "./histogram.h"
#include "./prefix.h"
#include "./write_bits.h"
namespace brotli {
template<int kSize>
double Entropy(const std::vector<Histogram<kSize> >& histograms) {
double retval = 0;
for (int i = 0; i < histograms.size(); ++i) {
retval += histograms[i].EntropyBitCost();
}
return retval;
}
void EncodeSize(size_t len, int* storage_ix, uint8_t* storage) {
std::vector<uint8_t> len_bytes;
while (len > 0) {
len_bytes.push_back(len & 0xff);
len >>= 8;
};
WriteBits(3, len_bytes.size(), storage_ix, storage);
for (int i = 0; i < len_bytes.size(); ++i) {
WriteBits(8, len_bytes[i], storage_ix, storage);
}
}
void EncodeMetaBlockLength(int input_size_bits,
size_t meta_block_size,
bool is_last_meta_block,
int* storage_ix, uint8_t* storage) {
WriteBits(1, is_last_meta_block, storage_ix, storage);
if (is_last_meta_block) return;
while (input_size_bits > 0) {
WriteBits(8, meta_block_size & 0xff, storage_ix, storage);
meta_block_size >>= 8;
input_size_bits -= 8;
}
if (input_size_bits > 0) {
WriteBits(input_size_bits, meta_block_size, storage_ix, storage);
}
}
template<int kSize>
void EntropyEncode(int val, const EntropyCode<kSize>& code,
int* storage_ix, uint8_t* storage) {
if (code.count_ <= 1) {
return;
};
WriteBits(code.depth_[val], code.bits_[val], storage_ix, storage);
}
void StoreHuffmanTreeOfHuffmanTreeToBitMask(
const uint8_t* code_length_bitdepth,
int* storage_ix, uint8_t* storage) {
static const uint8_t kStorageOrder[kCodeLengthCodes] = {
17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
};
// Throw away trailing zeros:
int codes_to_store = kCodeLengthCodes;
for (; codes_to_store > 4; --codes_to_store) {
if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
break;
}
}
WriteBits(4, codes_to_store - 4, storage_ix, storage);
for (int i = 0; i < codes_to_store; ++i) {
WriteBits(3, code_length_bitdepth[kStorageOrder[i]], storage_ix, storage);
}
}
void StoreHuffmanTreeToBitMask(
const uint8_t* huffman_tree,
const uint8_t* huffman_tree_extra_bits,
const int huffman_tree_size,
const EntropyCode<kCodeLengthCodes>& entropy,
int* storage_ix, uint8_t* storage) {
for (int i = 0; i < huffman_tree_size; ++i) {
const int ix = huffman_tree[i];
const int extra_bits = huffman_tree_extra_bits[i];
EntropyEncode(ix, entropy, storage_ix, storage);
switch (ix) {
case 16:
WriteBits(2, extra_bits, storage_ix, storage);
break;
case 17:
WriteBits(3, extra_bits, storage_ix, storage);
break;
case 18:
WriteBits(7, extra_bits, storage_ix, storage);
break;
}
}
}
template<int kSize>
void StoreHuffmanCode(const EntropyCode<kSize>& code, int alphabet_size,
int* storage_ix, uint8_t* storage) {
const int kMaxBits = 8;
const int kMaxSymbol = 1 << kMaxBits;
if (code.count_ == 0) { // emit minimal tree for empty cases
// bits: small tree marker: 1, count-1: 0, large 8-bit code: 0, code: 0
WriteBits(4, 0x01, storage_ix, storage);
return;
}
if (code.count_ <= 2 &&
code.symbols_[0] < kMaxSymbol &&
code.symbols_[1] < kMaxSymbol) {
// Small tree marker to encode 1 or 2 symbols.
WriteBits(1, 1, storage_ix, storage);
WriteBits(1, code.count_ - 1, storage_ix, storage);
if (code.symbols_[0] <= 1) {
// Code bit for small (1 bit) symbol value.
WriteBits(1, 0, storage_ix, storage);
WriteBits(1, code.symbols_[0], storage_ix, storage);
} else {
WriteBits(1, 1, storage_ix, storage);
WriteBits(8, code.symbols_[0], storage_ix, storage);
}
if (code.count_ == 2) {
WriteBits(8, code.symbols_[1], storage_ix, storage);
}
return;
}
WriteBits(1, 0, storage_ix, storage);
uint8_t huffman_tree[kSize];
uint8_t huffman_tree_extra_bits[kSize];
int huffman_tree_size = 0;
WriteHuffmanTree(&code.depth_[0],
alphabet_size,
&huffman_tree[0],
&huffman_tree_extra_bits[0],
&huffman_tree_size);
Histogram<kCodeLengthCodes> huffman_tree_histogram;
memset(huffman_tree_histogram.data_, 0, sizeof(huffman_tree_histogram.data_));
for (int i = 0; i < huffman_tree_size; ++i) {
huffman_tree_histogram.Add(huffman_tree[i]);
}
EntropyCode<kCodeLengthCodes> huffman_tree_entropy;
BuildEntropyCode(huffman_tree_histogram, 7, kCodeLengthCodes,
&huffman_tree_entropy);
Histogram<kCodeLengthCodes> trimmed_histogram = huffman_tree_histogram;
uint8_t* last_code = &huffman_tree[huffman_tree_size - 1];
while (*last_code == 0 || *last_code >= 17) {
trimmed_histogram.Remove(*last_code--);
}
int trimmed_size = trimmed_histogram.total_count_;
bool write_length = false;
if (trimmed_size > 1 && trimmed_size < huffman_tree_size) {
EntropyCode<kCodeLengthCodes> trimmed_entropy;
BuildEntropyCode(trimmed_histogram, 7, kCodeLengthCodes, &trimmed_entropy);
int huffman_bit_cost = HuffmanTreeBitCost(huffman_tree_histogram,
huffman_tree_entropy);
int trimmed_bit_cost = HuffmanTreeBitCost(trimmed_histogram,
trimmed_entropy);;
const int nbits = Log2Ceiling(trimmed_size - 1);
const int nbitpairs = (nbits == 0) ? 1 : (nbits + 1) / 2;
if (trimmed_bit_cost + 3 + 2 * nbitpairs < huffman_bit_cost) {
write_length = true;
huffman_tree_size = trimmed_size;
huffman_tree_entropy = trimmed_entropy;
}
}
StoreHuffmanTreeOfHuffmanTreeToBitMask(
&huffman_tree_entropy.depth_[0], storage_ix, storage);
WriteBits(1, write_length, storage_ix, storage);
if (write_length) {
const int nbits = Log2Ceiling(huffman_tree_size - 1);
const int nbitpairs = (nbits == 0) ? 1 : (nbits + 1) / 2;
WriteBits(3, nbitpairs - 1, storage_ix, storage);
WriteBits(nbitpairs * 2, huffman_tree_size - 2, storage_ix, storage);
}
StoreHuffmanTreeToBitMask(&huffman_tree[0], &huffman_tree_extra_bits[0],
huffman_tree_size, huffman_tree_entropy,
storage_ix, storage);
}
template<int kSize>
void StoreHuffmanCodes(const std::vector<EntropyCode<kSize> >& codes,
int alphabet_size,
int* storage_ix, uint8_t* storage) {
for (int i = 0; i < codes.size(); ++i) {
StoreHuffmanCode(codes[i], alphabet_size, storage_ix, storage);
}
}
void EncodeCommand(const Command& cmd,
const EntropyCodeCommand& entropy,
int* storage_ix, uint8_t* storage) {
int code = cmd.command_prefix_;
EntropyEncode(code, entropy, storage_ix, storage);
if (code >= 128) {
code -= 128;
}
int insert_extra_bits = InsertLengthExtraBits(code);
uint64_t insert_extra_bits_val =
cmd.insert_length_ - InsertLengthOffset(code);
int copy_extra_bits = CopyLengthExtraBits(code);
uint64_t copy_extra_bits_val = cmd.copy_length_ - CopyLengthOffset(code);
if (insert_extra_bits > 0) {
WriteBits(insert_extra_bits, insert_extra_bits_val, storage_ix, storage);
}
if (copy_extra_bits > 0) {
WriteBits(copy_extra_bits, copy_extra_bits_val, storage_ix, storage);
}
}
void EncodeCopyDistance(const Command& cmd, const EntropyCodeDistance& entropy,
int* storage_ix, uint8_t* storage) {
int code = cmd.distance_prefix_;
int extra_bits = cmd.distance_extra_bits_;
uint64_t extra_bits_val = cmd.distance_extra_bits_value_;
EntropyEncode(code, entropy, storage_ix, storage);
if (extra_bits > 0) {
WriteBits(extra_bits, extra_bits_val, storage_ix, storage);
}
}
void ComputeDistanceShortCodes(std::vector<Command>* cmds) {
static const int kIndexOffset[16] = {
3, 2, 1, 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2
};
static const int kValueOffset[16] = {
0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
};
int dist_ringbuffer[4] = { 4, 11, 15, 16 };
int ringbuffer_idx = 0;
for (int i = 0; i < cmds->size(); ++i) {
int cur_dist = (*cmds)[i].copy_distance_;
if (cur_dist == 0) break;
int dist_code = cur_dist + 16;
for (int k = 0; k < 16; ++k) {
// Only accept more popular choices.
if (cur_dist < 11 && ((k >= 2 && k < 4) || k >= 6)) {
// Typically unpopular ranges, don't replace a short distance
// with them.
continue;
}
int comp = (dist_ringbuffer[(ringbuffer_idx + kIndexOffset[k]) & 3] +
kValueOffset[k]);
if (cur_dist == comp) {
dist_code = k + 1;
break;
}
}
if (dist_code > 1) {
dist_ringbuffer[ringbuffer_idx & 3] = cur_dist;
++ringbuffer_idx;
}
(*cmds)[i].distance_code_ = dist_code;
}
}
void ComputeCommandPrefixes(std::vector<Command>* cmds,
int num_direct_distance_codes,
int distance_postfix_bits) {
for (int i = 0; i < cmds->size(); ++i) {
Command* cmd = &(*cmds)[i];
cmd->command_prefix_ = CommandPrefix(cmd->insert_length_,
cmd->copy_length_);
if (cmd->copy_length_ > 0) {
PrefixEncodeCopyDistance(cmd->distance_code_,
num_direct_distance_codes,
distance_postfix_bits,
&cmd->distance_prefix_,
&cmd->distance_extra_bits_,
&cmd->distance_extra_bits_value_);
}
if (cmd->command_prefix_ < 128 && cmd->distance_prefix_ == 0) {
cmd->distance_prefix_ = 0xffff;
} else {
cmd->command_prefix_ += 128;
}
}
}
int IndexOf(const std::vector<int>& v, int value) {
for (int i = 0; i < v.size(); ++i) {
if (v[i] == value) return i;
}
return -1;
}
void MoveToFront(std::vector<int>* v, int index) {
int value = (*v)[index];
for (int i = index; i > 0; --i) {
(*v)[i] = (*v)[i - 1];
}
(*v)[0] = value;
}
std::vector<int> MoveToFrontTransform(const std::vector<int>& v) {
if (v.empty()) return v;
std::vector<int> mtf(*max_element(v.begin(), v.end()) + 1);
for (int i = 0; i < mtf.size(); ++i) mtf[i] = i;
std::vector<int> result(v.size());
for (int i = 0; i < v.size(); ++i) {
int index = IndexOf(mtf, v[i]);
result[i] = index;
MoveToFront(&mtf, index);
}
return result;
}
// Finds runs of zeros in v_in and replaces them with a prefix code of the run
// length plus extra bits in *v_out and *extra_bits. Non-zero values in v_in are
// shifted by *max_length_prefix. Will not create prefix codes bigger than the
// initial value of *max_run_length_prefix. The prefix code of run length L is
// simply Log2Floor(L) and the number of extra bits is the same as the prefix
// code.
void RunLengthCodeZeros(const std::vector<int>& v_in,
int* max_run_length_prefix,
std::vector<int>* v_out,
std::vector<int>* extra_bits) {
int max_reps = 0;
for (int i = 0; i < v_in.size();) {
for (; i < v_in.size() && v_in[i] != 0; ++i) ;
int reps = 0;
for (; i < v_in.size() && v_in[i] == 0; ++i) {
++reps;
}
max_reps = std::max(reps, max_reps);
}
int max_prefix = max_reps > 0 ? Log2Floor(max_reps) : 0;
*max_run_length_prefix = std::min(max_prefix, *max_run_length_prefix);
for (int i = 0; i < v_in.size();) {
if (v_in[i] != 0) {
v_out->push_back(v_in[i] + *max_run_length_prefix);
extra_bits->push_back(0);
++i;
} else {
int reps = 1;
for (uint32_t k = i + 1; k < v_in.size() && v_in[k] == 0; ++k) {
++reps;
}
i += reps;
while (reps) {
if (reps < (2 << *max_run_length_prefix)) {
int run_length_prefix = Log2Floor(reps);
v_out->push_back(run_length_prefix);
extra_bits->push_back(reps - (1 << run_length_prefix));
break;
} else {
v_out->push_back(*max_run_length_prefix);
extra_bits->push_back((1 << *max_run_length_prefix) - 1);
reps -= (2 << *max_run_length_prefix) - 1;
}
}
}
}
}
// Returns a maximum zero-run-length-prefix value such that run-length coding
// zeros in v with this maximum prefix value and then encoding the resulting
// histogram and entropy-coding v produces the least amount of bits.
int BestMaxZeroRunLengthPrefix(const std::vector<int>& v) {
int min_cost = std::numeric_limits<int>::max();
int best_max_prefix = 0;
for (int max_prefix = 0; max_prefix <= 16; ++max_prefix) {
std::vector<int> rle_symbols;
std::vector<int> extra_bits;
int max_run_length_prefix = max_prefix;
RunLengthCodeZeros(v, &max_run_length_prefix, &rle_symbols, &extra_bits);
if (max_run_length_prefix < max_prefix) break;
HistogramLiteral histogram;
for (int i = 0; i < rle_symbols.size(); ++i) {
histogram.Add(rle_symbols[i]);
}
int bit_cost = PopulationCost(histogram);
if (max_prefix > 0) {
bit_cost += 4;
}
for (int i = 1; i <= max_prefix; ++i) {
bit_cost += histogram.data_[i] * i; // extra bits
}
if (bit_cost < min_cost) {
min_cost = bit_cost;
best_max_prefix = max_prefix;
}
}
return best_max_prefix;
}
void EncodeContextMap(const std::vector<int>& context_map,
int context_mode,
int context_mode_bits,
int num_clusters,
int* storage_ix, uint8_t* storage) {
if (context_mode == 0) {
WriteBits(1, 0, storage_ix, storage); // no context
return;
}
WriteBits(1, 1, storage_ix, storage); // have context
if (context_mode_bits > 0) {
WriteBits(context_mode_bits, context_mode - 1, storage_ix, storage);
}
WriteBits(8, num_clusters - 1, storage_ix, storage);
if (num_clusters == 1 || num_clusters == context_map.size()) {
return;
}
std::vector<int> transformed_symbols = MoveToFrontTransform(context_map);
std::vector<int> rle_symbols;
std::vector<int> extra_bits;
int max_run_length_prefix = BestMaxZeroRunLengthPrefix(transformed_symbols);
RunLengthCodeZeros(transformed_symbols, &max_run_length_prefix,
&rle_symbols, &extra_bits);
HistogramLiteral symbol_histogram;
for (int i = 0; i < rle_symbols.size(); ++i) {
symbol_histogram.Add(rle_symbols[i]);
}
EntropyCodeLiteral symbol_code;
BuildEntropyCode(symbol_histogram, 15, num_clusters + max_run_length_prefix,
&symbol_code);
bool use_rle = max_run_length_prefix > 0;
WriteBits(1, use_rle, storage_ix, storage);
if (use_rle) {
WriteBits(4, max_run_length_prefix - 1, storage_ix, storage);
}
StoreHuffmanCode(symbol_code, num_clusters + max_run_length_prefix,
storage_ix, storage);
for (int i = 0; i < rle_symbols.size(); ++i) {
EntropyEncode(rle_symbols[i], symbol_code, storage_ix, storage);
if (rle_symbols[i] > 0 && rle_symbols[i] <= max_run_length_prefix) {
WriteBits(rle_symbols[i], extra_bits[i], storage_ix, storage);
}
}
WriteBits(1, 1, storage_ix, storage); // use move-to-front
}
template<int kSize>
void BuildEntropyCodes(const std::vector<Histogram<kSize> >& histograms,
int alphabet_size,
std::vector<EntropyCode<kSize> >* entropy_codes) {
entropy_codes->resize(histograms.size());
for (int i = 0; i < histograms.size(); ++i) {
BuildEntropyCode(histograms[i], 15, alphabet_size, &(*entropy_codes)[i]);
}
}
struct BlockSplitCode {
EntropyCodeLiteral block_type_code;
EntropyCodeBlockLength block_len_code;
};
void EncodeBlockLength(const EntropyCodeBlockLength& entropy,
int length,
int* storage_ix, uint8_t* storage) {
int len_code = BlockLengthPrefix(length);
int extra_bits = BlockLengthExtraBits(len_code);
int extra_bits_value = length - BlockLengthOffset(len_code);
EntropyEncode(len_code, entropy, storage_ix, storage);
if (extra_bits > 0) {
WriteBits(extra_bits, extra_bits_value, storage_ix, storage);
}
}
void ComputeBlockTypeShortCodes(BlockSplit* split) {
if (split->num_types_ <= 1) {
split->num_types_ = 1;
return;
}
int ringbuffer[2] = { 0, 1 };
size_t index = 0;
for (int i = 0; i < split->types_.size(); ++i) {
int type = split->types_[i];
int type_code;
if (type == ringbuffer[index & 1]) {
type_code = 0;
} else if (type == ringbuffer[(index - 1) & 1] + 1) {
type_code = 1;
} else {
type_code = type + 2;
}
ringbuffer[index & 1] = type;
++index;
split->type_codes_.push_back(type_code);
}
}
void BuildAndEncodeBlockSplitCode(const BlockSplit& split,
BlockSplitCode* code,
int* storage_ix, uint8_t* storage) {
if (split.num_types_ <= 1) {
WriteBits(1, 0, storage_ix, storage);
return;
}
WriteBits(1, 1, storage_ix, storage);
HistogramLiteral type_histo;
for (int i = 0; i < split.type_codes_.size(); ++i) {
type_histo.Add(split.type_codes_[i]);
}
BuildEntropyCode(type_histo, 15, split.num_types_ + 2,
&code->block_type_code);
HistogramBlockLength length_histo;
for (int i = 0; i < split.lengths_.size(); ++i) {
length_histo.Add(BlockLengthPrefix(split.lengths_[i]));
}
BuildEntropyCode(length_histo, 15, kNumBlockLenPrefixes,
&code->block_len_code);
WriteBits(8, split.num_types_ - 1, storage_ix, storage);
StoreHuffmanCode(code->block_type_code, split.num_types_ + 2,
storage_ix, storage);
StoreHuffmanCode(code->block_len_code, kNumBlockLenPrefixes,
storage_ix, storage);
EncodeBlockLength(code->block_len_code, split.lengths_[0],
storage_ix, storage);
}
void MoveAndEncode(const BlockSplitCode& code,
BlockSplitIterator* it,
int* storage_ix, uint8_t* storage) {
if (it->length_ == 0) {
++it->idx_;
it->type_ = it->split_.types_[it->idx_];
it->length_ = it->split_.lengths_[it->idx_];
uint8_t type_code = it->split_.type_codes_[it->idx_];
EntropyEncode(type_code, code.block_type_code, storage_ix, storage);
EncodeBlockLength(code.block_len_code, it->length_, storage_ix, storage);
}
--it->length_;
}
struct EncodingParams {
int num_direct_distance_codes;
int distance_postfix_bits;
int literal_context_mode;
int distance_context_mode;
};
struct MetaBlock {
std::vector<Command> cmds;
EncodingParams params;
BlockSplit literal_split;
BlockSplit command_split;
BlockSplit distance_split;
std::vector<int> literal_context_map;
std::vector<int> distance_context_map;
std::vector<HistogramLiteral> literal_histograms;
std::vector<HistogramCommand> command_histograms;
std::vector<HistogramDistance> distance_histograms;
};
void BuildMetaBlock(const EncodingParams& params,
const std::vector<Command>& cmds,
const uint8_t* input_buffer,
size_t pos,
MetaBlock* mb) {
mb->cmds = cmds;
mb->params = params;
ComputeCommandPrefixes(&mb->cmds,
mb->params.num_direct_distance_codes,
mb->params.distance_postfix_bits);
SplitBlock(mb->cmds,
input_buffer + pos,
&mb->literal_split,
&mb->command_split,
&mb->distance_split);
ComputeBlockTypeShortCodes(&mb->literal_split);
ComputeBlockTypeShortCodes(&mb->command_split);
ComputeBlockTypeShortCodes(&mb->distance_split);
int num_literal_contexts_per_block_type =
NumContexts(mb->params.literal_context_mode);
int num_literal_contexts =
mb->literal_split.num_types_ *
num_literal_contexts_per_block_type;
int num_distance_contexts_per_block_type =
(mb->params.distance_context_mode > 0 ? 4 : 1);
int num_distance_contexts =
mb->distance_split.num_types_ *
num_distance_contexts_per_block_type;
std::vector<HistogramLiteral> literal_histograms(num_literal_contexts);
mb->command_histograms.resize(mb->command_split.num_types_);
std::vector<HistogramDistance> distance_histograms(num_distance_contexts);
BuildHistograms(mb->cmds,
mb->literal_split,
mb->command_split,
mb->distance_split,
input_buffer,
pos,
mb->params.literal_context_mode,
mb->params.distance_context_mode,
&literal_histograms,
&mb->command_histograms,
&distance_histograms);
// Histogram ids need to fit in one byte and there are 16 ids reserved for
// run length codes, which leaves a maximum number of 240 histograms.
static const int kMaxNumberOfHistograms = 240;
mb->literal_histograms = literal_histograms;
if (mb->params.literal_context_mode > 0) {
ClusterHistograms(literal_histograms,
num_literal_contexts_per_block_type,
mb->literal_split.num_types_,
kMaxNumberOfHistograms,
&mb->literal_histograms,
&mb->literal_context_map);
}
mb->distance_histograms = distance_histograms;
if (mb->params.distance_context_mode > 0) {
ClusterHistograms(distance_histograms,
num_distance_contexts_per_block_type,
mb->distance_split.num_types_,
kMaxNumberOfHistograms,
&mb->distance_histograms,
&mb->distance_context_map);
}
}
size_t MetaBlockLength(const std::vector<Command>& cmds) {
size_t length = 0;
for (int i = 0; i < cmds.size(); ++i) {
const Command& cmd = cmds[i];
length += cmd.insert_length_ + cmd.copy_length_;
}
return length;
}
void StoreMetaBlock(const MetaBlock& mb,
const uint8_t* input_buffer,
int input_size_bits,
bool is_last,
size_t* pos,
int* storage_ix, uint8_t* storage) {
size_t length = MetaBlockLength(mb.cmds);
const size_t end_pos = *pos + length;
EncodeMetaBlockLength(input_size_bits, length - 1, is_last,
storage_ix, storage);
BlockSplitCode literal_split_code;
BlockSplitCode command_split_code;
BlockSplitCode distance_split_code;
BuildAndEncodeBlockSplitCode(mb.literal_split, &literal_split_code,
storage_ix, storage);
BuildAndEncodeBlockSplitCode(mb.command_split, &command_split_code,
storage_ix, storage);
BuildAndEncodeBlockSplitCode(mb.distance_split, &distance_split_code,
storage_ix, storage);
WriteBits(2, mb.params.distance_postfix_bits, storage_ix, storage);
WriteBits(4,
mb.params.num_direct_distance_codes >>
mb.params.distance_postfix_bits, storage_ix, storage);
int num_distance_codes =
kNumDistanceShortCodes + mb.params.num_direct_distance_codes +
(48 << mb.params.distance_postfix_bits);
EncodeContextMap(mb.literal_context_map, mb.params.literal_context_mode, 4,
mb.literal_histograms.size(), storage_ix, storage);
EncodeContextMap(mb.distance_context_map, mb.params.distance_context_mode, 0,
mb.distance_histograms.size(), storage_ix, storage);
std::vector<EntropyCodeLiteral> literal_codes;
std::vector<EntropyCodeCommand> command_codes;
std::vector<EntropyCodeDistance> distance_codes;
BuildEntropyCodes(mb.literal_histograms, 256, &literal_codes);
BuildEntropyCodes(mb.command_histograms, kNumCommandPrefixes,
&command_codes);
BuildEntropyCodes(mb.distance_histograms, num_distance_codes,
&distance_codes);
StoreHuffmanCodes(literal_codes, 256, storage_ix, storage);
StoreHuffmanCodes(command_codes, kNumCommandPrefixes, storage_ix, storage);
StoreHuffmanCodes(distance_codes, num_distance_codes, storage_ix, storage);
BlockSplitIterator literal_it(mb.literal_split);
BlockSplitIterator command_it(mb.command_split);
BlockSplitIterator distance_it(mb.distance_split);
for (int i = 0; i < mb.cmds.size(); ++i) {
const Command& cmd = mb.cmds[i];
MoveAndEncode(command_split_code, &command_it, storage_ix, storage);
EncodeCommand(cmd, command_codes[command_it.type_], storage_ix, storage);
for (int j = 0; j < cmd.insert_length_; ++j) {
MoveAndEncode(literal_split_code, &literal_it, storage_ix, storage);
int histogram_idx = literal_it.type_;
if (mb.params.literal_context_mode > 0) {
uint8_t prev_byte = *pos > 0 ? input_buffer[*pos - 1] : 0;
uint8_t prev_byte2 = *pos > 1 ? input_buffer[*pos - 2] : 0;
uint8_t prev_byte3 = *pos > 2 ? input_buffer[*pos - 3] : 0;
int context = (literal_it.type_ *
NumContexts(mb.params.literal_context_mode) +
Context(prev_byte, prev_byte2, prev_byte3,
mb.params.literal_context_mode));
histogram_idx = mb.literal_context_map[context];
}
EntropyEncode(input_buffer[(*pos)++],
literal_codes[histogram_idx], storage_ix, storage);
}
if (*pos < end_pos && cmd.distance_prefix_ != 0xffff) {
MoveAndEncode(distance_split_code, &distance_it, storage_ix, storage);
int histogram_index = distance_it.type_;
if (mb.params.distance_context_mode > 0) {
int context = distance_it.type_ << 2;
context += (cmd.copy_length_ > 4) ? 3 : cmd.copy_length_ - 2;
histogram_index = mb.distance_context_map[context];
}
EncodeCopyDistance(cmd, distance_codes[histogram_index],
storage_ix, storage);
}
*pos += cmd.copy_length_;
}
}
int BrotliCompressBuffer(size_t input_size,
const uint8_t* input_buffer,
size_t* encoded_size,
uint8_t* encoded_buffer) {
int storage_ix = 0;
uint8_t* storage = encoded_buffer;
WriteBitsPrepareStorage(storage_ix, storage);
EncodeSize(input_size, &storage_ix, storage);
if (input_size == 0) {
*encoded_size = (storage_ix + 7) >> 3;
return 1;
}
int input_size_bits = Log2Ceiling(input_size);
std::vector<Command> all_commands;
CreateBackwardReferences(input_buffer, input_size, &all_commands);
ComputeDistanceShortCodes(&all_commands);
std::vector<std::vector<Command> > meta_block_commands;
SplitBlockByTotalLength(all_commands, input_size, 2 << 20,
&meta_block_commands);
size_t pos = 0;
for (int block_idx = 0; block_idx < meta_block_commands.size(); ++block_idx) {
const std::vector<Command>& commands = meta_block_commands[block_idx];
bool is_last_meta_block = (block_idx + 1 == meta_block_commands.size());
EncodingParams params;
params.num_direct_distance_codes = 12;
params.distance_postfix_bits = 1;
params.literal_context_mode = CONTEXT_SIGNED_MIXED_3BYTE;
params.distance_context_mode = 1;
MetaBlock mb;
BuildMetaBlock(params, commands, input_buffer, pos, &mb);
StoreMetaBlock(mb, input_buffer, input_size_bits, is_last_meta_block,
&pos, &storage_ix, storage);
}
*encoded_size = (storage_ix + 7) >> 3;
return 1;
}
} // namespace brotli

37
enc/encode.h Normal file
View File

@ -0,0 +1,37 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// API for Brotli compression
#ifndef BROTLI_ENC_ENCODE_H_
#define BROTLI_ENC_ENCODE_H_
#include <stddef.h>
#include <stdint.h>
#include <string>
namespace brotli {
// Compresses the data in input_buffer into encoded_buffer, and sets
// *encoded_size to the compressed length.
// Returns 0 if there was an error and 1 otherwise.
int BrotliCompressBuffer(size_t input_size,
const uint8_t* input_buffer,
size_t* encoded_size,
uint8_t* encoded_buffer);
} // namespace brotli
#endif // BROTLI_ENC_ENCODE_H_

397
enc/entropy_encode.cc Normal file
View File

@ -0,0 +1,397 @@
// Copyright 2010 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Entropy encoding (Huffman) utilities.
#include "./entropy_encode.h"
#include <stdint.h>
#include <algorithm>
#include <limits>
#include <vector>
#include "./histogram.h"
namespace brotli {
namespace {
struct HuffmanTree {
HuffmanTree();
HuffmanTree(int count, int16_t left, int16_t right)
: total_count_(count),
index_left_(left),
index_right_or_value_(right) {
}
int total_count_;
int16_t index_left_;
int16_t index_right_or_value_;
};
HuffmanTree::HuffmanTree() {}
// Sort the root nodes, least popular first.
bool SortHuffmanTree(const HuffmanTree &v0, const HuffmanTree &v1) {
return v0.total_count_ < v1.total_count_;
}
void SetDepth(const HuffmanTree &p,
HuffmanTree *pool,
uint8_t *depth,
int level) {
if (p.index_left_ >= 0) {
++level;
SetDepth(pool[p.index_left_], pool, depth, level);
SetDepth(pool[p.index_right_or_value_], pool, depth, level);
} else {
depth[p.index_right_or_value_] = level;
}
}
} // namespace
// This function will create a Huffman tree.
//
// The catch here is that the tree cannot be arbitrarily deep.
// Brotli specifies a maximum depth of 15 bits for "code trees"
// and 7 bits for "code length code trees."
//
// count_limit is the value that is to be faked as the minimum value
// and this minimum value is raised until the tree matches the
// maximum length requirement.
//
// This algorithm is not of excellent performance for very long data blocks,
// especially when population counts are longer than 2**tree_limit, but
// we are not planning to use this with extremely long blocks.
//
// See http://en.wikipedia.org/wiki/Huffman_coding
void CreateHuffmanTree(const int *data,
const int length,
const int tree_limit,
uint8_t *depth) {
// For block sizes below 64 kB, we never need to do a second iteration
// of this loop. Probably all of our block sizes will be smaller than
// that, so this loop is mostly of academic interest. If we actually
// would need this, we would be better off with the Katajainen algorithm.
for (int count_limit = 1; ; count_limit *= 2) {
std::vector<HuffmanTree> tree;
tree.reserve(2 * length + 1);
for (int i = 0; i < length; ++i) {
if (data[i]) {
const int count = std::max(data[i], count_limit);
tree.push_back(HuffmanTree(count, -1, i));
}
}
const int n = tree.size();
if (n == 1) {
depth[tree[0].index_right_or_value_] = 1; // Only one element.
break;
}
std::sort(tree.begin(), tree.end(), SortHuffmanTree);
// The nodes are:
// [0, n): the sorted leaf nodes that we start with.
// [n]: we add a sentinel here.
// [n + 1, 2n): new parent nodes are added here, starting from
// (n+1). These are naturally in ascending order.
// [2n]: we add a sentinel at the end as well.
// There will be (2n+1) elements at the end.
const HuffmanTree sentinel(std::numeric_limits<int>::max(), -1, -1);
tree.push_back(sentinel);
tree.push_back(sentinel);
int i = 0; // Points to the next leaf node.
int j = n + 1; // Points to the next non-leaf node.
for (int k = n - 1; k > 0; --k) {
int left, right;
if (tree[i].total_count_ <= tree[j].total_count_) {
left = i;
++i;
} else {
left = j;
++j;
}
if (tree[i].total_count_ <= tree[j].total_count_) {
right = i;
++i;
} else {
right = j;
++j;
}
// The sentinel node becomes the parent node.
int j_end = tree.size() - 1;
tree[j_end].total_count_ =
tree[left].total_count_ + tree[right].total_count_;
tree[j_end].index_left_ = left;
tree[j_end].index_right_or_value_ = right;
// Add back the last sentinel node.
tree.push_back(sentinel);
}
SetDepth(tree[2 * n - 1], &tree[0], depth, 0);
// We need to pack the Huffman tree in tree_limit bits.
// If this was not successful, add fake entities to the lowest values
// and retry.
if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) {
break;
}
}
}
void WriteHuffmanTreeRepetitions(
const int previous_value,
const int value,
int repetitions,
uint8_t* tree,
uint8_t* extra_bits,
int* tree_size) {
if (previous_value != value) {
tree[*tree_size] = value;
extra_bits[*tree_size] = 0;
++(*tree_size);
--repetitions;
}
while (repetitions >= 1) {
if (repetitions < 3) {
for (int i = 0; i < repetitions; ++i) {
tree[*tree_size] = value;
extra_bits[*tree_size] = 0;
++(*tree_size);
}
return;
} else if (repetitions < 7) {
// 3 to 6 left.
tree[*tree_size] = 16;
extra_bits[*tree_size] = repetitions - 3;
++(*tree_size);
return;
} else {
tree[*tree_size] = 16;
extra_bits[*tree_size] = 3;
++(*tree_size);
repetitions -= 6;
}
}
}
void WriteHuffmanTreeRepetitionsZeros(
int repetitions,
uint8_t* tree,
uint8_t* extra_bits,
int* tree_size) {
while (repetitions >= 1) {
if (repetitions < 3) {
for (int i = 0; i < repetitions; ++i) {
tree[*tree_size] = 0;
extra_bits[*tree_size] = 0;
++(*tree_size);
}
return;
} else if (repetitions < 11) {
tree[*tree_size] = 17;
extra_bits[*tree_size] = repetitions - 3;
++(*tree_size);
return;
} else if (repetitions < 139) {
tree[*tree_size] = 18;
extra_bits[*tree_size] = repetitions - 11;
++(*tree_size);
return;
} else {
tree[*tree_size] = 18;
extra_bits[*tree_size] = 0x7f; // 138 repeated 0s
++(*tree_size);
repetitions -= 138;
}
}
}
// Heuristics for selecting the stride ranges to collapse.
int ValuesShouldBeCollapsedToStrideAverage(int a, int b) {
return abs(a - b) < 4;
}
int OptimizeHuffmanCountsForRle(int length, int* counts) {
int stride;
int limit;
int sum;
uint8_t* good_for_rle;
// Let's make the Huffman code more compatible with rle encoding.
int i;
for (; length >= 0; --length) {
if (length == 0) {
return 1; // All zeros.
}
if (counts[length - 1] != 0) {
// Now counts[0..length - 1] does not have trailing zeros.
break;
}
}
// 2) Let's mark all population counts that already can be encoded
// with an rle code.
good_for_rle = (uint8_t*)calloc(length, 1);
if (good_for_rle == NULL) {
return 0;
}
{
// Let's not spoil any of the existing good rle codes.
// Mark any seq of 0's that is longer as 5 as a good_for_rle.
// Mark any seq of non-0's that is longer as 7 as a good_for_rle.
int symbol = counts[0];
int stride = 0;
for (i = 0; i < length + 1; ++i) {
if (i == length || counts[i] != symbol) {
if ((symbol == 0 && stride >= 5) ||
(symbol != 0 && stride >= 7)) {
int k;
for (k = 0; k < stride; ++k) {
good_for_rle[i - k - 1] = 1;
}
}
stride = 1;
if (i != length) {
symbol = counts[i];
}
} else {
++stride;
}
}
}
// 3) Let's replace those population counts that lead to more rle codes.
stride = 0;
limit = counts[0];
sum = 0;
for (i = 0; i < length + 1; ++i) {
if (i == length || good_for_rle[i] ||
(i != 0 && good_for_rle[i - 1]) ||
!ValuesShouldBeCollapsedToStrideAverage(counts[i], limit)) {
if (stride >= 4 || (stride >= 3 && sum == 0)) {
int k;
// The stride must end, collapse what we have, if we have enough (4).
int count = (sum + stride / 2) / stride;
if (count < 1) {
count = 1;
}
if (sum == 0) {
// Don't make an all zeros stride to be upgraded to ones.
count = 0;
}
for (k = 0; k < stride; ++k) {
// We don't want to change value at counts[i],
// that is already belonging to the next stride. Thus - 1.
counts[i - k - 1] = count;
}
}
stride = 0;
sum = 0;
if (i < length - 3) {
// All interesting strides have a count of at least 4,
// at least when non-zeros.
limit = (counts[i] + counts[i + 1] +
counts[i + 2] + counts[i + 3] + 2) / 4;
} else if (i < length) {
limit = counts[i];
} else {
limit = 0;
}
}
++stride;
if (i != length) {
sum += counts[i];
if (stride >= 4) {
limit = (sum + stride / 2) / stride;
}
}
}
free(good_for_rle);
return 1;
}
void WriteHuffmanTree(const uint8_t* depth, const int length,
uint8_t* tree,
uint8_t* extra_bits_data,
int* huffman_tree_size) {
int previous_value = 0;
for (uint32_t i = 0; i < length;) {
const int value = depth[i];
int reps = 1;
for (uint32_t k = i + 1; k < length && depth[k] == value; ++k) {
++reps;
}
if (value == 0) {
WriteHuffmanTreeRepetitionsZeros(reps, tree, extra_bits_data,
huffman_tree_size);
} else {
WriteHuffmanTreeRepetitions(previous_value, value, reps, tree,
extra_bits_data, huffman_tree_size);
previous_value = value;
}
i += reps;
}
}
namespace {
uint16_t ReverseBits(int num_bits, uint16_t bits) {
static const size_t kLut[16] = { // Pre-reversed 4-bit values.
0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf
};
size_t retval = kLut[bits & 0xf];
for (int i = 4; i < num_bits; i += 4) {
retval <<= 4;
bits >>= 4;
retval |= kLut[bits & 0xf];
}
retval >>= (-num_bits & 0x3);
return retval;
}
} // namespace
void ConvertBitDepthsToSymbols(const uint8_t *depth, int len, uint16_t *bits) {
// In Brotli, all bit depths are [1..15]
// 0 bit depth means that the symbol does not exist.
const int kMaxBits = 16; // 0..15 are values for bits
uint16_t bl_count[kMaxBits] = { 0 };
{
for (int i = 0; i < len; ++i) {
++bl_count[depth[i]];
}
bl_count[0] = 0;
}
uint16_t next_code[kMaxBits];
next_code[0] = 0;
{
int code = 0;
for (int bits = 1; bits < kMaxBits; ++bits) {
code = (code + bl_count[bits - 1]) << 1;
next_code[bits] = code;
}
}
for (int i = 0; i < len; ++i) {
if (depth[i]) {
bits[i] = ReverseBits(depth[i], next_code[depth[i]]++);
}
}
}
} // namespace brotli

112
enc/entropy_encode.h Normal file
View File

@ -0,0 +1,112 @@
// Copyright 2010 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Entropy encoding (Huffman) utilities.
#ifndef BROTLI_ENC_ENTROPY_ENCODE_H_
#define BROTLI_ENC_ENTROPY_ENCODE_H_
#include <stdint.h>
#include <string.h>
#include "./histogram.h"
#include "./prefix.h"
namespace brotli {
// This function will create a Huffman tree.
//
// The (data,length) contains the population counts.
// The tree_limit is the maximum bit depth of the Huffman codes.
//
// The depth contains the tree, i.e., how many bits are used for
// the symbol.
//
// See http://en.wikipedia.org/wiki/Huffman_coding
void CreateHuffmanTree(const int *data,
const int length,
const int tree_limit,
uint8_t *depth);
// Change the population counts in a way that the consequent
// Hufmann tree compression, especially its rle-part will be more
// likely to compress this data more efficiently.
//
// length contains the size of the histogram.
// counts contains the population counts.
int OptimizeHuffmanCountsForRle(int length, int* counts);
// Write a huffman tree from bit depths into the bitstream representation
// of a Huffman tree. The generated Huffman tree is to be compressed once
// more using a Huffman tree
void WriteHuffmanTree(const uint8_t* depth, const int length,
uint8_t* tree,
uint8_t* extra_bits_data,
int* huffman_tree_size);
// Get the actual bit values for a tree of bit depths.
void ConvertBitDepthsToSymbols(const uint8_t *depth, int len, uint16_t *bits);
template<int kSize>
struct EntropyCode {
// How many bits for symbol.
uint8_t depth_[kSize];
// Actual bits used to represent the symbol.
uint16_t bits_[kSize];
// How many non-zero depth.
int count_;
// First two symbols with non-zero depth.
int symbols_[2];
};
template<int kSize>
void BuildEntropyCode(const Histogram<kSize>& histogram,
const int tree_limit,
const int alphabet_size,
EntropyCode<kSize>* code) {
memset(code->depth_, 0, sizeof(code->depth_));
memset(code->bits_, 0, sizeof(code->bits_));
memset(code->symbols_, 0, sizeof(code->symbols_));
code->count_ = 0;
if (histogram.total_count_ == 0) return;
for (int i = 0; i < kSize; ++i) {
if (histogram.data_[i] > 0) {
if (code->count_ < 2) code->symbols_[code->count_] = i;
++code->count_;
}
}
if (code->count_ >= 64) {
int counts[kSize];
memcpy(counts, &histogram.data_[0], sizeof(counts[0]) * kSize);
OptimizeHuffmanCountsForRle(alphabet_size, counts);
CreateHuffmanTree(counts, alphabet_size, tree_limit, &code->depth_[0]);
} else {
CreateHuffmanTree(&histogram.data_[0], alphabet_size, tree_limit,
&code->depth_[0]);
}
ConvertBitDepthsToSymbols(&code->depth_[0], alphabet_size, &code->bits_[0]);
}
static const int kCodeLengthCodes = 19;
// Literal entropy code.
typedef EntropyCode<256> EntropyCodeLiteral;
// Prefix entropy codes.
typedef EntropyCode<kNumCommandPrefixes> EntropyCodeCommand;
typedef EntropyCode<kNumDistancePrefixes> EntropyCodeDistance;
typedef EntropyCode<kNumBlockLenPrefixes> EntropyCodeBlockLength;
} // namespace brotli
#endif // BROTLI_ENC_ENTROPY_ENCODE_H_

161
enc/fast_log.h Normal file
View File

@ -0,0 +1,161 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Utilities for fast computation of logarithms.
#ifndef BROTLI_ENC_FAST_LOG_H_
#define BROTLI_ENC_FAST_LOG_H_
#include <math.h>
#include <stdint.h>
namespace brotli {
// Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0.
inline int Log2Floor(uint32_t n) {
#if defined(__clang__) || \
(defined(__GNUC__) && \
((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4))
return n == 0 ? -1 : 31 ^ __builtin_clz(n);
#else
if (n == 0)
return -1;
int log = 0;
uint32_t value = n;
for (int i = 4; i >= 0; --i) {
int shift = (1 << i);
uint32_t x = value >> shift;
if (x != 0) {
value = x;
log += shift;
}
}
assert(value == 1);
return log;
#endif
}
// Return ceiling(log2(n)) for positive integer n. Returns -1 iff n == 0.
inline int Log2Ceiling(uint32_t n) {
int floor = Log2Floor(n);
if (n == (n &~ (n - 1))) // zero or a power of two
return floor;
else
return floor + 1;
}
// A lookup table for small values of log2(int) to be used in entropy
// computation.
//
// ", ".join(["%.16ff" % x for x in [0.0]+[log2(x) for x in range(1, 256)]])
static const float kLog2Table[] = {
0.0000000000000000f, 0.0000000000000000f, 1.0000000000000000f,
1.5849625007211563f, 2.0000000000000000f, 2.3219280948873622f,
2.5849625007211561f, 2.8073549220576042f, 3.0000000000000000f,
3.1699250014423126f, 3.3219280948873626f, 3.4594316186372978f,
3.5849625007211565f, 3.7004397181410922f, 3.8073549220576037f,
3.9068905956085187f, 4.0000000000000000f, 4.0874628412503400f,
4.1699250014423122f, 4.2479275134435852f, 4.3219280948873626f,
4.3923174227787607f, 4.4594316186372973f, 4.5235619560570131f,
4.5849625007211570f, 4.6438561897747244f, 4.7004397181410926f,
4.7548875021634691f, 4.8073549220576037f, 4.8579809951275728f,
4.9068905956085187f, 4.9541963103868758f, 5.0000000000000000f,
5.0443941193584534f, 5.0874628412503400f, 5.1292830169449664f,
5.1699250014423122f, 5.2094533656289501f, 5.2479275134435852f,
5.2854022188622487f, 5.3219280948873626f, 5.3575520046180838f,
5.3923174227787607f, 5.4262647547020979f, 5.4594316186372973f,
5.4918530963296748f, 5.5235619560570131f, 5.5545888516776376f,
5.5849625007211570f, 5.6147098441152083f, 5.6438561897747244f,
5.6724253419714961f, 5.7004397181410926f, 5.7279204545631996f,
5.7548875021634691f, 5.7813597135246599f, 5.8073549220576046f,
5.8328900141647422f, 5.8579809951275719f, 5.8826430493618416f,
5.9068905956085187f, 5.9307373375628867f, 5.9541963103868758f,
5.9772799234999168f, 6.0000000000000000f, 6.0223678130284544f,
6.0443941193584534f, 6.0660891904577721f, 6.0874628412503400f,
6.1085244567781700f, 6.1292830169449672f, 6.1497471195046822f,
6.1699250014423122f, 6.1898245588800176f, 6.2094533656289510f,
6.2288186904958804f, 6.2479275134435861f, 6.2667865406949019f,
6.2854022188622487f, 6.3037807481771031f, 6.3219280948873617f,
6.3398500028846252f, 6.3575520046180847f, 6.3750394313469254f,
6.3923174227787598f, 6.4093909361377026f, 6.4262647547020979f,
6.4429434958487288f, 6.4594316186372982f, 6.4757334309663976f,
6.4918530963296748f, 6.5077946401986964f, 6.5235619560570131f,
6.5391588111080319f, 6.5545888516776376f, 6.5698556083309478f,
6.5849625007211561f, 6.5999128421871278f, 6.6147098441152092f,
6.6293566200796095f, 6.6438561897747253f, 6.6582114827517955f,
6.6724253419714952f, 6.6865005271832185f, 6.7004397181410917f,
6.7142455176661224f, 6.7279204545631988f, 6.7414669864011465f,
6.7548875021634691f, 6.7681843247769260f, 6.7813597135246599f,
6.7944158663501062f, 6.8073549220576037f, 6.8201789624151887f,
6.8328900141647422f, 6.8454900509443757f, 6.8579809951275719f,
6.8703647195834048f, 6.8826430493618416f, 6.8948177633079437f,
6.9068905956085187f, 6.9188632372745955f, 6.9307373375628867f,
6.9425145053392399f, 6.9541963103868758f, 6.9657842846620879f,
6.9772799234999168f, 6.9886846867721664f, 7.0000000000000000f,
7.0112272554232540f, 7.0223678130284544f, 7.0334230015374501f,
7.0443941193584534f, 7.0552824355011898f, 7.0660891904577721f,
7.0768155970508317f, 7.0874628412503400f, 7.0980320829605272f,
7.1085244567781700f, 7.1189410727235076f, 7.1292830169449664f,
7.1395513523987937f, 7.1497471195046822f, 7.1598713367783891f,
7.1699250014423130f, 7.1799090900149345f, 7.1898245588800176f,
7.1996723448363644f, 7.2094533656289492f, 7.2191685204621621f,
7.2288186904958804f, 7.2384047393250794f, 7.2479275134435861f,
7.2573878426926521f, 7.2667865406949019f, 7.2761244052742384f,
7.2854022188622487f, 7.2946207488916270f, 7.3037807481771031f,
7.3128829552843557f, 7.3219280948873617f, 7.3309168781146177f,
7.3398500028846243f, 7.3487281542310781f, 7.3575520046180847f,
7.3663222142458151f, 7.3750394313469254f, 7.3837042924740528f,
7.3923174227787607f, 7.4008794362821844f, 7.4093909361377026f,
7.4178525148858991f, 7.4262647547020979f, 7.4346282276367255f,
7.4429434958487288f, 7.4512111118323299f, 7.4594316186372973f,
7.4676055500829976f, 7.4757334309663976f, 7.4838157772642564f,
7.4918530963296748f, 7.4998458870832057f, 7.5077946401986964f,
7.5156998382840436f, 7.5235619560570131f, 7.5313814605163119f,
7.5391588111080319f, 7.5468944598876373f, 7.5545888516776376f,
7.5622424242210728f, 7.5698556083309478f, 7.5774288280357487f,
7.5849625007211561f, 7.5924570372680806f, 7.5999128421871278f,
7.6073303137496113f, 7.6147098441152075f, 7.6220518194563764f,
7.6293566200796095f, 7.6366246205436488f, 7.6438561897747244f,
7.6510516911789290f, 7.6582114827517955f, 7.6653359171851765f,
7.6724253419714952f, 7.6794800995054464f, 7.6865005271832185f,
7.6934869574993252f, 7.7004397181410926f, 7.7073591320808825f,
7.7142455176661224f, 7.7210991887071856f, 7.7279204545631996f,
7.7347096202258392f, 7.7414669864011465f, 7.7481928495894596f,
7.7548875021634691f, 7.7615512324444795f, 7.7681843247769260f,
7.7747870596011737f, 7.7813597135246608f, 7.7879025593914317f,
7.7944158663501062f, 7.8008998999203047f, 7.8073549220576037f,
7.8137811912170374f, 7.8201789624151887f, 7.8265484872909159f,
7.8328900141647422f, 7.8392037880969445f, 7.8454900509443757f,
7.8517490414160571f, 7.8579809951275719f, 7.8641861446542798f,
7.8703647195834048f, 7.8765169465650002f, 7.8826430493618425f,
7.8887432488982601f, 7.8948177633079446f, 7.9008668079807496f,
7.9068905956085187f, 7.9128893362299619f, 7.9188632372745955f,
7.9248125036057813f, 7.9307373375628867f, 7.9366379390025719f,
7.9425145053392399f, 7.9483672315846778f, 7.9541963103868758f,
7.9600019320680806f, 7.9657842846620870f, 7.9715435539507720f,
7.9772799234999168f, 7.9829935746943104f, 7.9886846867721664f,
7.9943534368588578f
};
// Faster logarithm for small integers, with the property of log2(0) == 0.
static inline double FastLog2(int v) {
if (v < (int)(sizeof(kLog2Table) / sizeof(kLog2Table[0]))) {
return kLog2Table[v];
}
return log2(v);
}
} // namespace brotli
#endif // BROTLI_ENC_FAST_LOG_H_

85
enc/find_match_length.h Normal file
View File

@ -0,0 +1,85 @@
// Copyright 2010 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Function to find maximal matching prefixes of strings.
#ifndef BROTLI_ENC_FIND_MATCH_LENGTH_H_
#define BROTLI_ENC_FIND_MATCH_LENGTH_H_
#include <stdint.h>
#include "./port.h"
namespace brotli {
// Separate implementation for x86_64, for speed.
#if defined(__GNUC__) && defined(ARCH_K8)
static inline int FindMatchLengthWithLimit(const uint8_t* s1,
const uint8_t* s2,
size_t limit) {
int matched = 0;
size_t limit2 = (limit >> 3) + 1; // + 1 is for pre-decrement in while
while (PREDICT_TRUE(--limit2)) {
if (PREDICT_FALSE(BROTLI_UNALIGNED_LOAD64(s2) ==
BROTLI_UNALIGNED_LOAD64(s1 + matched))) {
s2 += 8;
matched += 8;
} else {
uint64_t x =
BROTLI_UNALIGNED_LOAD64(s2) ^ BROTLI_UNALIGNED_LOAD64(s1 + matched);
int matching_bits = __builtin_ctzll(x);
matched += matching_bits >> 3;
return matched;
}
}
limit = (limit & 7) + 1; // + 1 is for pre-decrement in while
while (--limit) {
if (PREDICT_TRUE(s1[matched] == *s2)) {
++s2;
++matched;
} else {
return matched;
}
}
return matched;
}
#else
static inline int FindMatchLengthWithLimit(const uint8_t* s1,
const uint8_t* s2,
size_t limit) {
int matched = 0;
const uint8_t* s2_limit = s2 + limit;
const uint8_t* s2_ptr = s2;
// Find out how long the match is. We loop over the data 32 bits at a
// time until we find a 32-bit block that doesn't match; then we find
// the first non-matching bit and use that to calculate the total
// length of the match.
while (s2_ptr <= s2_limit - 4 &&
BROTLI_UNALIGNED_LOAD32(s2_ptr) ==
BROTLI_UNALIGNED_LOAD32(s1 + matched)) {
s2_ptr += 4;
matched += 4;
}
while ((s2_ptr < s2_limit) && (s1[matched] == *s2_ptr)) {
++s2_ptr;
++matched;
}
return matched;
}
#endif
} // namespace brotli
#endif // BROTLI_ENC_FIND_MATCH_LENGTH_H_

354
enc/hash.h Normal file
View File

@ -0,0 +1,354 @@
// Copyright 2010 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// A (forgetful) hash table to the data seen by the compressor, to
// help create backward references to previous data.
#ifndef BROTLI_ENC_HASH_H_
#define BROTLI_ENC_HASH_H_
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include <sys/types.h>
#include <algorithm>
#include "./fast_log.h"
#include "./find_match_length.h"
#include "./port.h"
namespace brotli {
// kHashMul32 multiplier has these properties:
// * The multiplier must be odd. Otherwise we may lose the highest bit.
// * No long streaks of 1s or 0s.
// * There is no effort to ensure that it is a prime, the oddity is enough
// for this use.
// * The number has been tuned heuristically against compression benchmarks.
static const uint32_t kHashMul32 = 0x1e35a7bd;
inline uint32_t Hash3Bytes(const uint8_t *data, const int bits) {
uint32_t h = (BROTLI_UNALIGNED_LOAD32(data) & 0xffffff) * kHashMul32;
// The higher bits contain more mixture from the multiplication,
// so we take our results from there.
return h >> (32 - bits);
}
// Usually, we always choose the longest backward reference. This function
// allows for the exception of that rule.
//
// If we choose a backward reference that is further away, it will
// usually be coded with more bits. We approximate this by assuming
// log2(distance). If the distance can be expressed in terms of the
// last four distances, we use some heuristic constants to estimate
// the bits cost. For the first up to four literals we use the bit
// cost of the literals from the literal cost model, after that we
// use the average bit cost of the cost model.
//
// This function is used to sometimes discard a longer backward reference
// when it is not much longer and the bit cost for encoding it is more
// than the saved literals.
inline double BackwardReferenceScore(double average_cost,
double start_cost4,
double start_cost3,
double start_cost2,
int copy_length,
int backward_reference_offset,
int last_distance1,
int last_distance2,
int last_distance3,
int last_distance4) {
double retval = 0;
switch (copy_length) {
case 2: retval = start_cost2; break;
case 3: retval = start_cost3; break;
default: retval = start_cost4 + (copy_length - 4) * average_cost; break;
}
int diff_last1 = abs(backward_reference_offset - last_distance1);
int diff_last2 = abs(backward_reference_offset - last_distance2);
if (diff_last1 == 0) {
retval += 0.6;
} else if (diff_last1 < 4) {
retval -= 0.9 + 0.03 * diff_last1;
} else if (diff_last2 < 4) {
retval -= 0.95 + 0.1 * diff_last2;
} else if (backward_reference_offset == last_distance3) {
retval -= 1.17;
} else if (backward_reference_offset == last_distance4) {
retval -= 1.27;
} else {
retval -= 1.20 * Log2Floor(backward_reference_offset);
}
return retval;
}
// A (forgetful) hash table to the data seen by the compressor, to
// help create backward references to previous data.
//
// This is a hash map of fixed size (kBucketSize) to a ring buffer of
// fixed size (kBlockSize). The ring buffer contains the last kBlockSize
// index positions of the given hash key in the compressed data.
template <int kBucketBits, int kBlockBits>
class HashLongestMatch {
public:
HashLongestMatch()
: literal_cost_(NULL),
last_distance1_(4),
last_distance2_(11),
last_distance3_(15),
last_distance4_(16),
insert_length_(0),
average_cost_(5.4) {
Reset();
}
void Reset() {
std::fill(&num_[0], &num_[sizeof(num_) / sizeof(num_[0])], 0);
}
void SetLiteralCost(float *cost) {
literal_cost_ = cost;
}
double literal_cost(int i) const { return literal_cost_[i]; }
// Look at 3 bytes at data.
// Compute a hash from these, and store the value of ix at that position.
inline void Store(const uint8_t *data, const int ix) {
const uint32_t key = Hash3Bytes(data, kBucketBits);
const int minor_ix = num_[key] & kBlockMask;
buckets_[key][minor_ix] = ix;
++num_[key];
}
// Store hashes for a range of data.
void StoreHashes(const uint8_t *data, size_t len, int startix, int mask) {
for (int p = 0; p < len; ++p) {
Store(&data[p & mask], startix + p);
}
}
// Find a longest backward match of &data[cur_ix] up to the length of
// max_length.
//
// Does not look for matches longer than max_length.
// Does not look for matches further away than max_backward.
// Writes the best found match length into best_len_out.
// Writes the index (&data[index]) offset from the start of the best match
// into best_distance_out.
// Write the score of the best match into best_score_out.
bool FindLongestMatch(const uint8_t * __restrict data,
const uint32_t cur_ix,
uint32_t max_length,
const uint32_t max_backward,
size_t * __restrict best_len_out,
size_t * __restrict best_distance_out,
double * __restrict best_score_out) {
const double start_cost4 = literal_cost_ == NULL ? 20 :
literal_cost_[cur_ix] +
literal_cost_[cur_ix + 1] +
literal_cost_[cur_ix + 2] +
literal_cost_[cur_ix + 3];
const double start_cost3 = literal_cost_ == NULL ? 15 :
literal_cost_[cur_ix] +
literal_cost_[cur_ix + 1] +
literal_cost_[cur_ix + 2] + 0.3;
double start_cost2 = literal_cost_ == NULL ? 10 :
literal_cost_[cur_ix] +
literal_cost_[cur_ix + 1] + 1.2;
bool match_found = false;
// Don't accept a short copy from far away.
double best_score = 8.25;
if (insert_length_ < 4) {
double cost_diff[4] = { 0.20, 0.09, 0.05, 0.03 };
best_score += cost_diff[insert_length_];
}
size_t best_len = *best_len_out;
*best_len_out = 0;
size_t best_ix = 1;
// Try last distance first.
for (int i = 0; i < 16; ++i) {
int prev_ix = cur_ix;
switch(i) {
case 0: prev_ix -= last_distance1_; break;
case 1: prev_ix -= last_distance2_; break;
case 2: prev_ix -= last_distance3_; break;
case 3: prev_ix -= last_distance4_; break;
case 4: prev_ix -= last_distance1_ - 1; break;
case 5: prev_ix -= last_distance1_ + 1; break;
case 6: prev_ix -= last_distance1_ - 2; break;
case 7: prev_ix -= last_distance1_ + 2; break;
case 8: prev_ix -= last_distance1_ - 3; break;
case 9: prev_ix -= last_distance1_ + 3; break;
case 10: prev_ix -= last_distance2_ - 1; break;
case 11: prev_ix -= last_distance2_ + 1; break;
case 12: prev_ix -= last_distance2_ - 2; break;
case 13: prev_ix -= last_distance2_ + 2; break;
case 14: prev_ix -= last_distance2_ - 3; break;
case 15: prev_ix -= last_distance2_ + 3; break;
}
if (prev_ix >= cur_ix) {
continue;
}
const size_t backward = cur_ix - prev_ix;
if (PREDICT_FALSE(backward > max_backward)) {
continue;
}
if (data[cur_ix + best_len] != data[prev_ix + best_len]) {
continue;
}
const size_t len =
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix], max_length);
if (len >= 3 || (len == 2 && i < 2)) {
// Comparing for >= 2 does not change the semantics, but just saves for
// a few unnecessary binary logarithms in backward reference score,
// since we are not interested in such short matches.
const double score = BackwardReferenceScore(average_cost_,
start_cost4,
start_cost3,
start_cost2,
len, backward,
last_distance1_,
last_distance2_,
last_distance3_,
last_distance4_);
if (best_score < score) {
best_score = score;
best_len = len;
best_ix = backward;
*best_len_out = best_len;
*best_distance_out = best_ix;
*best_score_out = best_score;
match_found = true;
}
}
}
const uint32_t key = Hash3Bytes(&data[cur_ix], kBucketBits);
const uint32_t * __restrict const bucket = &buckets_[key][0];
const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
int stop = int(cur_ix) - 64;
if (stop < 0) { stop = 0; }
start_cost2 -= 1.0;
for (int i = cur_ix - 1; i > stop; --i) {
size_t prev_ix = i;
const size_t backward = cur_ix - prev_ix;
if (PREDICT_FALSE(backward > max_backward)) {
break;
}
if (data[cur_ix] != data[prev_ix] ||
data[cur_ix + 1] != data[prev_ix + 1]) {
continue;
}
int len = 2;
const double score = start_cost2 - 1.70 * Log2Floor(backward);
if (best_score < score) {
best_score = score;
best_len = len;
best_ix = backward;
*best_len_out = best_len;
*best_distance_out = best_ix;
match_found = true;
}
}
for (int i = num_[key] - 1; i >= down; --i) {
size_t prev_ix = bucket[i & kBlockMask];
const size_t backward = cur_ix - prev_ix;
if (PREDICT_FALSE(backward > max_backward)) {
break;
}
if (data[cur_ix + best_len] != data[prev_ix + best_len]) {
continue;
}
const size_t len =
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix], max_length);
if (len >= 3) {
// Comparing for >= 3 does not change the semantics, but just saves for
// a few unnecessary binary logarithms in backward reference score,
// since we are not interested in such short matches.
const double score = BackwardReferenceScore(average_cost_,
start_cost4,
start_cost3,
start_cost2,
len, backward,
last_distance1_,
last_distance2_,
last_distance3_,
last_distance4_);
if (best_score < score) {
best_score = score;
best_len = len;
best_ix = backward;
*best_len_out = best_len;
*best_distance_out = best_ix;
*best_score_out = best_score;
match_found = true;
}
}
}
return match_found;
}
void set_last_distance(int v) {
if (last_distance1_ != v) {
last_distance4_ = last_distance3_;
last_distance3_ = last_distance2_;
last_distance2_ = last_distance1_;
last_distance1_ = v;
}
}
int last_distance() const { return last_distance1_; }
void set_insert_length(int v) { insert_length_ = v; }
void set_average_cost(double v) { average_cost_ = v; }
private:
// Number of hash buckets.
static const uint32_t kBucketSize = 1 << kBucketBits;
// Only kBlockSize newest backward references are kept,
// and the older are forgotten.
static const uint32_t kBlockSize = 1 << kBlockBits;
// Mask for accessing entries in a block (in a ringbuffer manner).
static const uint32_t kBlockMask = (1 << kBlockBits) - 1;
// Number of entries in a particular bucket.
uint16_t num_[kBucketSize];
// Buckets containing kBlockSize of backward references.
uint32_t buckets_[kBucketSize][kBlockSize];
// Model of how much the ith literal costs to encode using
// the entropy model.
float *literal_cost_;
int last_distance1_;
int last_distance2_;
int last_distance3_;
int last_distance4_;
// Cost adjustment for how many literals we are planning to insert
// anyway.
int insert_length_;
double average_cost_;
};
} // namespace brotli
#endif // BROTLI_ENC_HASH_H_

72
enc/histogram.cc Normal file
View File

@ -0,0 +1,72 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Build per-context histograms of literals, commands and distance codes.
#include "./histogram.h"
#include <stdint.h>
#include <cmath>
#include "./block_splitter.h"
#include "./command.h"
#include "./context.h"
#include "./prefix.h"
namespace brotli {
void BuildHistograms(
const std::vector<Command>& cmds,
const BlockSplit& literal_split,
const BlockSplit& insert_and_copy_split,
const BlockSplit& dist_split,
const uint8_t* input_buffer,
size_t pos,
int context_mode,
int distance_context_mode,
std::vector<HistogramLiteral>* literal_histograms,
std::vector<HistogramCommand>* insert_and_copy_histograms,
std::vector<HistogramDistance>* copy_dist_histograms) {
BlockSplitIterator literal_it(literal_split);
BlockSplitIterator insert_and_copy_it(insert_and_copy_split);
BlockSplitIterator dist_it(dist_split);
for (int i = 0; i < cmds.size(); ++i) {
const Command &cmd = cmds[i];
insert_and_copy_it.Next();
(*insert_and_copy_histograms)[insert_and_copy_it.type_].Add(
cmd.command_prefix_);
for (int j = 0; j < cmd.insert_length_; ++j) {
literal_it.Next();
uint8_t prev_byte = pos > 0 ? input_buffer[pos - 1] : 0;
uint8_t prev_byte2 = pos > 1 ? input_buffer[pos - 2] : 0;
uint8_t prev_byte3 = pos > 2 ? input_buffer[pos - 3] : 0;
int context = (literal_it.type_ * NumContexts(context_mode) +
Context(prev_byte, prev_byte2, prev_byte3, context_mode));
(*literal_histograms)[context].Add(input_buffer[pos]);
++pos;
}
pos += cmd.copy_length_;
if (cmd.copy_length_ > 0 && cmd.distance_prefix_ != 0xffff) {
dist_it.Next();
int context = dist_it.type_;
if (distance_context_mode > 0) {
context <<= 2;
context += (cmd.copy_length_ > 4) ? 3 : cmd.copy_length_ - 2;
}
(*copy_dist_histograms)[context].Add(cmd.distance_prefix_);
}
}
}
} // namespace brotli

97
enc/histogram.h Normal file
View File

@ -0,0 +1,97 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Models the histograms of literals, commands and distance codes.
#ifndef BROTLI_ENC_HISTOGRAM_H_
#define BROTLI_ENC_HISTOGRAM_H_
#include <stdint.h>
#include <string.h>
#include <vector>
#include <utility>
#include "./command.h"
#include "./fast_log.h"
#include "./prefix.h"
namespace brotli {
class BlockSplit;
// A simple container for histograms of data in blocks.
template<int kDataSize>
struct Histogram {
Histogram() {
Clear();
}
void Clear() {
memset(data_, 0, sizeof(data_));
total_count_ = 0;
}
void Add(int val) {
++data_[val];
++total_count_;
}
void Remove(int val) {
--data_[val];
--total_count_;
}
template<typename DataType>
void Add(const DataType *p, size_t n) {
total_count_ += n;
n += 1;
while(--n) ++data_[*p++];
}
void AddHistogram(const Histogram& v) {
total_count_ += v.total_count_;
for (int i = 0; i < kDataSize; ++i) {
data_[i] += v.data_[i];
}
}
double EntropyBitCost() const {
double retval = total_count_ * FastLog2(total_count_);
for (int i = 0; i < kDataSize; ++i) {
retval -= data_[i] * FastLog2(data_[i]);
}
return retval;
}
int data_[kDataSize];
int total_count_;
double bit_cost_;
};
// Literal histogram.
typedef Histogram<256> HistogramLiteral;
// Prefix histograms.
typedef Histogram<kNumCommandPrefixes> HistogramCommand;
typedef Histogram<kNumDistancePrefixes> HistogramDistance;
typedef Histogram<kNumBlockLenPrefixes> HistogramBlockLength;
void BuildHistograms(
const std::vector<Command>& cmds,
const BlockSplit& literal_split,
const BlockSplit& insert_and_copy_split,
const BlockSplit& dist_split,
const uint8_t* input_buffer,
size_t pos,
int context_mode,
int distance_context_mode,
std::vector<HistogramLiteral>* literal_histograms,
std::vector<HistogramCommand>* insert_and_copy_histograms,
std::vector<HistogramDistance>* copy_dist_histograms);
} // namespace brotli
#endif // BROTLI_ENC_HISTOGRAM_H_

60
enc/literal_cost.cc Normal file
View File

@ -0,0 +1,60 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Literal cost model to allow backward reference replacement to be efficient.
#include "./literal_cost.h"
#include <math.h>
#include <stdint.h>
#include <algorithm>
namespace brotli {
void EstimateBitCostsForLiterals(size_t len, const uint8_t *data, float *cost) {
int histogram[256] = { 0 };
int window_half = 2000;
int in_window = std::min(static_cast<size_t>(window_half), len);
// Bootstrap histogram.
for (int i = 0; i < in_window; ++i) {
++histogram[data[i]];
}
// Compute bit costs with sliding window.
for (int i = 0; i < len; ++i) {
if (i - window_half >= 0) {
// Remove a byte in the past.
--histogram[data[i - window_half]];
--in_window;
}
if (i + window_half < len) {
// Add a byte in the future.
++histogram[data[i + window_half]];
++in_window;
}
int histo = histogram[data[i]];
if (histo == 0) {
histo = 1;
}
cost[i] = log2(static_cast<double>(in_window) / histo);
cost[i] += 0.03;
if (cost[i] < 1.0) {
cost[i] *= 0.5;
cost[i] += 0.5;
}
}
}
} // namespace brotli

31
enc/literal_cost.h Normal file
View File

@ -0,0 +1,31 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Literal cost model to allow backward reference replacement to be efficient.
#ifndef BROTLI_ENC_LITERAL_COST_H_
#define BROTLI_ENC_LITERAL_COST_H_
#include <stddef.h>
#include <stdint.h>
namespace brotli {
// Input: length of data, and the bytes.
// Output: estimate of how many bits the literal will take entropy coded.
void EstimateBitCostsForLiterals(size_t len, const uint8_t *data, float *cost);
} // namespace brotli
#endif // BROTLI_ENC_LITERAL_COST_H_

138
enc/port.h Normal file
View File

@ -0,0 +1,138 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Macros for endianness, branch prediction and unaligned loads and stores.
#ifndef BROTLI_ENC_PORT_H_
#define BROTLI_ENC_PORT_H_
#if defined OS_LINUX || defined OS_CYGWIN
#include <endian.h>
#elif defined OS_FREEBSD
#include <machine/endian.h>
#elif defined OS_MACOSX
#include <machine/endian.h>
/* Let's try and follow the Linux convention */
#define __BYTE_ORDER BYTE_ORDER
#define __LITTLE_ENDIAN LITTLE_ENDIAN
#define __BIG_ENDIAN BIG_ENDIAN
#endif
// define the macros IS_LITTLE_ENDIAN or IS_BIG_ENDIAN
// using the above endian definitions from endian.h if
// endian.h was included
#ifdef __BYTE_ORDER
#if __BYTE_ORDER == __LITTLE_ENDIAN
#define IS_LITTLE_ENDIAN
#endif
#if __BYTE_ORDER == __BIG_ENDIAN
#define IS_BIG_ENDIAN
#endif
#else
#if defined(__LITTLE_ENDIAN__)
#define IS_LITTLE_ENDIAN
#elif defined(__BIG_ENDIAN__)
#define IS_BIG_ENDIAN
#endif
#endif // __BYTE_ORDER
#if defined(COMPILER_GCC3)
#define PREDICT_FALSE(x) (__builtin_expect(x, 0))
#define PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
#else
#define PREDICT_FALSE(x) x
#define PREDICT_TRUE(x) x
#endif
// Portable handling of unaligned loads, stores, and copies.
// On some platforms, like ARM, the copy functions can be more efficient
// then a load and a store.
#if defined(ARCH_PIII) || defined(ARCH_ATHLON) || \
defined(ARCH_K8) || defined(_ARCH_PPC)
// x86 and x86-64 can perform unaligned loads/stores directly;
// modern PowerPC hardware can also do unaligned integer loads and stores;
// but note: the FPU still sends unaligned loads and stores to a trap handler!
#define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
#define BROTLI_UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64_t *>(_p))
#define BROTLI_UNALIGNED_STORE32(_p, _val) \
(*reinterpret_cast<uint32_t *>(_p) = (_val))
#define BROTLI_UNALIGNED_STORE64(_p, _val) \
(*reinterpret_cast<uint64_t *>(_p) = (_val))
#elif defined(__arm__) && \
!defined(__ARM_ARCH_5__) && \
!defined(__ARM_ARCH_5T__) && \
!defined(__ARM_ARCH_5TE__) && \
!defined(__ARM_ARCH_5TEJ__) && \
!defined(__ARM_ARCH_6__) && \
!defined(__ARM_ARCH_6J__) && \
!defined(__ARM_ARCH_6K__) && \
!defined(__ARM_ARCH_6Z__) && \
!defined(__ARM_ARCH_6ZK__) && \
!defined(__ARM_ARCH_6T2__)
// ARMv7 and newer support native unaligned accesses, but only of 16-bit
// and 32-bit values (not 64-bit); older versions either raise a fatal signal,
// do an unaligned read and rotate the words around a bit, or do the reads very
// slowly (trip through kernel mode).
#define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
#define BROTLI_UNALIGNED_STORE32(_p, _val) \
(*reinterpret_cast<uint32_t *>(_p) = (_val))
inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
uint64_t t;
memcpy(&t, p, sizeof t);
return t;
}
inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
memcpy(p, &v, sizeof v);
}
#else
// These functions are provided for architectures that don't support
// unaligned loads and stores.
inline uint32_t BROTLI_UNALIGNED_LOAD32(const void *p) {
uint32_t t;
memcpy(&t, p, sizeof t);
return t;
}
inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
uint64_t t;
memcpy(&t, p, sizeof t);
return t;
}
inline void BROTLI_UNALIGNED_STORE32(void *p, uint32_t v) {
memcpy(p, &v, sizeof v);
}
inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
memcpy(p, &v, sizeof v);
}
#endif
#endif // BROTLI_ENC_PORT_H_

166
enc/prefix.cc Normal file
View File

@ -0,0 +1,166 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Functions for encoding of integers into prefix codes the amount of extra
// bits, and the actual values of the extra bits.
#include "./prefix.h"
#include "./fast_log.h"
namespace brotli {
// Represents the range of values belonging to a prefix code:
// [offset, offset + 2^nbits)
struct PrefixCodeRange {
int offset;
int nbits;
};
static const PrefixCodeRange kBlockLengthPrefixCode[kNumBlockLenPrefixes] = {
{ 1, 2}, { 5, 2}, { 9, 2}, { 13, 2},
{ 17, 3}, { 25, 3}, { 33, 3}, { 41, 3},
{ 49, 4}, { 65, 4}, { 81, 4}, { 97, 4},
{ 113, 5}, { 145, 5}, { 177, 5}, { 209, 5},
{ 241, 6}, { 305, 6}, { 369, 7}, { 497, 8},
{ 753, 9}, { 1265, 10}, {2289, 11}, {4337, 12},
{8433, 13}, {16625, 24}
};
static const PrefixCodeRange kInsertLengthPrefixCode[kNumInsertLenPrefixes] = {
{ 0, 0}, { 1, 0}, { 2, 0}, { 3, 0},
{ 4, 0}, { 5, 0}, { 6, 1}, { 8, 1},
{ 10, 2}, { 14, 2}, { 18, 3}, { 26, 3},
{ 34, 4}, { 50, 4}, { 66, 5}, { 98, 5},
{ 130, 6}, { 194, 7}, { 322, 8}, { 578, 9},
{1090, 10}, {2114, 12}, {6210, 14}, {22594, 24},
};
static const PrefixCodeRange kCopyLengthPrefixCode[kNumCopyLenPrefixes] = {
{ 2, 0}, { 3, 0}, { 4, 0}, { 5, 0},
{ 6, 0}, { 7, 0}, { 8, 0}, { 9, 0},
{ 10, 1}, { 12, 1}, { 14, 2}, { 18, 2},
{ 22, 3}, { 30, 3}, { 38, 4}, { 54, 4},
{ 70, 5}, { 102, 5}, { 134, 6}, { 198, 7},
{326, 8}, { 582, 9}, {1094, 10}, {2118, 24},
};
static const int kInsertAndCopyRangeLut[9] = {
0, 1, 4, 2, 3, 6, 5, 7, 8,
};
static const int kInsertRangeLut[9] = {
0, 0, 1, 1, 0, 2, 1, 2, 2,
};
static const int kCopyRangeLut[9] = {
0, 1, 0, 1, 2, 0, 2, 1, 2,
};
int InsertLengthPrefix(int length) {
for (int i = 0; i < kNumInsertLenPrefixes; ++i) {
const PrefixCodeRange& range = kInsertLengthPrefixCode[i];
if (length >= range.offset && length < range.offset + (1 << range.nbits)) {
return i;
}
}
return -1;
}
int CopyLengthPrefix(int length) {
for (int i = 0; i < kNumCopyLenPrefixes; ++i) {
const PrefixCodeRange& range = kCopyLengthPrefixCode[i];
if (length >= range.offset && length < range.offset + (1 << range.nbits)) {
return i;
}
}
return -1;
}
int CommandPrefix(int insert_length, int copy_length) {
if (copy_length == 0) {
copy_length = 3;
}
int insert_prefix = InsertLengthPrefix(insert_length);
int copy_prefix = CopyLengthPrefix(copy_length);
int range_idx = 3 * (insert_prefix >> 3) + (copy_prefix >> 3);
return ((kInsertAndCopyRangeLut[range_idx] << 6) +
((insert_prefix & 7) << 3) + (copy_prefix & 7));
}
int InsertLengthExtraBits(int code) {
int insert_code = (kInsertRangeLut[code >> 6] << 3) + ((code >> 3) & 7);
return kInsertLengthPrefixCode[insert_code].nbits;
}
int InsertLengthOffset(int code) {
int insert_code = (kInsertRangeLut[code >> 6] << 3) + ((code >> 3) & 7);
return kInsertLengthPrefixCode[insert_code].offset;
}
int CopyLengthExtraBits(int code) {
int copy_code = (kCopyRangeLut[code >> 6] << 3) + (code & 7);
return kCopyLengthPrefixCode[copy_code].nbits;
}
int CopyLengthOffset(int code) {
int copy_code = (kCopyRangeLut[code >> 6] << 3) + (code & 7);
return kCopyLengthPrefixCode[copy_code].offset;
}
void PrefixEncodeCopyDistance(int distance_code,
int num_direct_codes,
int postfix_bits,
uint16_t* code,
int* nbits,
uint32_t* extra_bits) {
distance_code -= 1;
if (distance_code < kNumDistanceShortCodes + num_direct_codes) {
*code = distance_code;
*nbits = 0;
*extra_bits = 0;
return;
}
distance_code -= kNumDistanceShortCodes + num_direct_codes;
distance_code += (1 << (postfix_bits + 2));
int bucket = Log2Floor(distance_code) - 1;
int postfix_mask = (1 << postfix_bits) - 1;
int postfix = distance_code & postfix_mask;
int prefix = (distance_code >> bucket) & 1;
int offset = (2 + prefix) << bucket;
*nbits = bucket - postfix_bits;
*code = kNumDistanceShortCodes + num_direct_codes +
((2 * (*nbits - 1) + prefix) << postfix_bits) + postfix;
*extra_bits = (distance_code - offset) >> postfix_bits;
}
int BlockLengthPrefix(int length) {
for (int i = 0; i < kNumBlockLenPrefixes; ++i) {
const PrefixCodeRange& range = kBlockLengthPrefixCode[i];
if (length >= range.offset && length < range.offset + (1 << range.nbits)) {
return i;
}
}
return -1;
}
int BlockLengthExtraBits(int length_code) {
return kBlockLengthPrefixCode[length_code].nbits;
}
int BlockLengthOffset(int length_code) {
return kBlockLengthPrefixCode[length_code].offset;
}
} // namespace brotli

51
enc/prefix.h Normal file
View File

@ -0,0 +1,51 @@
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Functions for encoding of integers into prefix codes the amount of extra
// bits, and the actual values of the extra bits.
#ifndef BROTLI_ENC_PREFIX_H_
#define BROTLI_ENC_PREFIX_H_
#include <stdint.h>
namespace brotli {
static const int kNumInsertLenPrefixes = 24;
static const int kNumCopyLenPrefixes = 24;
static const int kNumCommandPrefixes = 704;
static const int kNumBlockLenPrefixes = 26;
static const int kNumDistanceShortCodes = 16;
static const int kNumDistancePrefixes = 520;
int CommandPrefix(int insert_length, int copy_length);
int InsertLengthExtraBits(int prefix);
int InsertLengthOffset(int prefix);
int CopyLengthExtraBits(int prefix);
int CopyLengthOffset(int prefix);
void PrefixEncodeCopyDistance(int distance_code,
int num_direct_codes,
int shift_bits,
uint16_t* prefix,
int* nbits,
uint32_t* extra_bits);
int BlockLengthPrefix(int length);
int BlockLengthExtraBits(int prefix);
int BlockLengthOffset(int prefix);
} // namespace brotli
#endif // BROTLI_ENC_PREFIX_H_

91
enc/write_bits.h Normal file
View File

@ -0,0 +1,91 @@
// Copyright 2010 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Write bits into a byte array.
#ifndef BROTLI_ENC_WRITE_BITS_H_
#define BROTLI_ENC_WRITE_BITS_H_
#include <assert.h>
#include <endian.h>
#include <stdint.h>
#include <stdio.h>
#include "./port.h"
namespace brotli {
//#define BIT_WRITER_DEBUG
// This function writes bits into bytes in increasing addresses, and within
// a byte least-significant-bit first.
//
// The function can write up to 56 bits in one go with WriteBits
// Example: let's assume that 3 bits (Rs below) have been written already:
//
// BYTE-0 BYTE+1 BYTE+2
//
// 0000 0RRR 0000 0000 0000 0000
//
// Now, we could write 5 or less bits in MSB by just sifting by 3
// and OR'ing to BYTE-0.
//
// For n bits, we take the last 5 bits, OR that with high bits in BYTE-0,
// and locate the rest in BYTE+1, BYTE+2, etc.
inline void WriteBits(int n_bits,
uint64_t bits,
int * __restrict pos,
uint8_t * __restrict array) {
#ifdef BIT_WRITER_DEBUG
printf("WriteBits %2d 0x%016llx %10d\n", n_bits, bits, *pos);
#endif
#ifdef IS_LITTLE_ENDIAN
// This branch of the code can write up to 56 bits at a time,
// 7 bits are lost by being perhaps already in *p and at least
// 1 bit is needed to initialize the bit-stream ahead (i.e. if 7
// bits are in *p and we write 57 bits, then the next write will
// access a byte that was never initialized).
uint8_t *p = &array[*pos >> 3];
uint64_t v = *p;
v |= bits << (*pos & 7);
BROTLI_UNALIGNED_STORE64(p, v); // Set some bits.
*pos += n_bits;
#else
// implicit & 0xff is assumed for uint8_t arithmetics
uint8_t *array_pos = &array[*pos >> 3];
const int bits_reserved_in_first_byte = (*pos & 7);
bits <<= bits_reserved_in_first_byte;
*array_pos++ |= bits;
for (int bits_left_to_write = n_bits - 8 + bits_reserved_in_first_byte;
bits_left_to_write >= 1;
bits_left_to_write -= 8) {
bits >>= 8;
*array_pos++ = bits;
}
*array_pos = 0;
*pos += n_bits;
#endif
}
inline void WriteBitsPrepareStorage(int pos, uint8_t *array) {
#ifdef BIT_WRITER_DEBUG
printf("WriteBitsPrepareStorage %10d\n", pos);
#endif
assert((pos & 7) == 0);
array[pos >> 3] = 0;
}
} // namespace brotli
#endif // BROTLI_ENC_WRITE_BITS_H_