mirror of
https://github.com/google/brotli.git
synced 2024-11-21 19:20:09 +00:00
Add brotli compressor
This commit is for the encoder for brotli compression format. Brotli is a generic byte-level compression algorithm.
This commit is contained in:
parent
6455522172
commit
c66e4e3e4f
3
enc/README
Normal file
3
enc/README
Normal file
@ -0,0 +1,3 @@
|
||||
This directory holds the encoder for brotli compression format.
|
||||
|
||||
Brotli is proposed to be used at the byte-compression level in WOFF 2.0 format.
|
137
enc/backward_references.cc
Normal file
137
enc/backward_references.cc
Normal file
@ -0,0 +1,137 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Function to find backward reference copies.
|
||||
|
||||
#include "./backward_references.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
#include "./command.h"
|
||||
#include "./hash.h"
|
||||
#include "./literal_cost.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
void CreateBackwardReferences(const uint8_t* data,
|
||||
int length,
|
||||
std::vector<Command>* commands) {
|
||||
HashLongestMatch<13,11> *hasher = new HashLongestMatch<13,11>;
|
||||
float *literal_cost = new float[length];
|
||||
EstimateBitCostsForLiterals(length, data, literal_cost);
|
||||
hasher->SetLiteralCost(literal_cost);
|
||||
|
||||
// Length heuristic that seems to help probably by better selection
|
||||
// of lazy matches of similar lengths.
|
||||
int insert_length = 0;
|
||||
size_t i = 0;
|
||||
|
||||
double average_cost = 0.0;
|
||||
for (int i = 0; i < length; ++i) {
|
||||
average_cost += literal_cost[i];
|
||||
}
|
||||
average_cost /= length;
|
||||
hasher->set_average_cost(average_cost);
|
||||
|
||||
while (i + 2 < length) {
|
||||
size_t best_len = 0;
|
||||
size_t best_dist = 0;
|
||||
double best_score = 0;
|
||||
const size_t max_distance = std::min(i, 1UL << 24);
|
||||
hasher->set_insert_length(insert_length);
|
||||
bool match_found = hasher->FindLongestMatch(
|
||||
data, i, length - i, max_distance,
|
||||
&best_len, &best_dist, &best_score);
|
||||
if (match_found) {
|
||||
// Found a match. Let's look for something even better ahead.
|
||||
int delayed_backward_references_in_row = 0;
|
||||
while (i + 4 < length &&
|
||||
delayed_backward_references_in_row < 4) {
|
||||
size_t best_len_2 = 0;
|
||||
size_t best_dist_2 = 0;
|
||||
double best_score_2 = 0;
|
||||
hasher->Store(data + i, i);
|
||||
match_found = hasher->FindLongestMatch(
|
||||
data, i + 1, length - i - 1, max_distance,
|
||||
&best_len_2, &best_dist_2, &best_score_2);
|
||||
double cost_diff_lazy = 0;
|
||||
if (best_len >= 4) {
|
||||
cost_diff_lazy += hasher->literal_cost(i + 4) - average_cost;
|
||||
}
|
||||
{
|
||||
const int tail_length = best_len_2 - best_len + 1;
|
||||
for (int k = 0; k < tail_length; ++k) {
|
||||
cost_diff_lazy -= hasher->literal_cost(i + best_len + k) -
|
||||
average_cost;
|
||||
}
|
||||
}
|
||||
// If we are not inserting any symbols, inserting one is more
|
||||
// expensive than if we were inserting symbols anyways.
|
||||
if (insert_length < 1) {
|
||||
cost_diff_lazy += 1.0;
|
||||
}
|
||||
// Add bias to slightly avoid lazy matching.
|
||||
cost_diff_lazy += 2.0 + delayed_backward_references_in_row * 0.2;
|
||||
cost_diff_lazy += 0.04 * hasher->literal_cost(i);
|
||||
|
||||
if (match_found && best_score_2 >= best_score + cost_diff_lazy) {
|
||||
// Ok, let's just write one byte for now and start a match from the
|
||||
// next byte.
|
||||
++insert_length;
|
||||
++delayed_backward_references_in_row;
|
||||
best_len = best_len_2;
|
||||
best_dist = best_dist_2;
|
||||
best_score = best_score_2;
|
||||
i++;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Command cmd;
|
||||
cmd.insert_length_ = insert_length;
|
||||
cmd.copy_length_ = best_len;
|
||||
cmd.copy_distance_ = best_dist;
|
||||
commands->push_back(cmd);
|
||||
hasher->set_last_distance(best_dist);
|
||||
|
||||
insert_length = 0;
|
||||
++i;
|
||||
for (int j = 1; j < best_len; ++j) {
|
||||
if (i + 2 < length) {
|
||||
hasher->Store(data + i, i);
|
||||
}
|
||||
++i;
|
||||
}
|
||||
} else {
|
||||
++insert_length;
|
||||
hasher->Store(data + i, i);
|
||||
++i;
|
||||
}
|
||||
}
|
||||
insert_length += (length - i);
|
||||
|
||||
if (insert_length > 0) {
|
||||
Command cmd;
|
||||
cmd.insert_length_ = insert_length;
|
||||
cmd.copy_length_ = 0;
|
||||
cmd.copy_distance_ = 0;
|
||||
commands->push_back(cmd);
|
||||
}
|
||||
|
||||
delete[] literal_cost;
|
||||
delete hasher;
|
||||
}
|
||||
|
||||
} // namespace brotli
|
33
enc/backward_references.h
Normal file
33
enc/backward_references.h
Normal file
@ -0,0 +1,33 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Function to find backward reference copies.
|
||||
|
||||
#ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_
|
||||
#define BROTLI_ENC_BACKWARD_REFERENCES_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include <vector>
|
||||
|
||||
#include "./command.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
void CreateBackwardReferences(const uint8_t* data,
|
||||
int length,
|
||||
std::vector<Command>* commands);
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_BACKWARD_REFERENCES_H_
|
150
enc/bit_cost.h
Normal file
150
enc/bit_cost.h
Normal file
@ -0,0 +1,150 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Functions to estimate the bit cost of Huffman trees.
|
||||
|
||||
#ifndef BROTLI_ENC_BIT_COST_H_
|
||||
#define BROTLI_ENC_BIT_COST_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "./entropy_encode.h"
|
||||
#include "./fast_log.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
static const int kHuffmanExtraBits[kCodeLengthCodes] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 7,
|
||||
};
|
||||
|
||||
static inline int HuffmanTreeBitCost(const int* counts, const uint8_t* depth) {
|
||||
int nbits = 0;
|
||||
for (int i = 0; i < kCodeLengthCodes; ++i) {
|
||||
nbits += counts[i] * (depth[i] + kHuffmanExtraBits[i]);
|
||||
}
|
||||
return nbits;
|
||||
}
|
||||
|
||||
static inline int HuffmanTreeBitCost(
|
||||
const Histogram<kCodeLengthCodes>& histogram,
|
||||
const EntropyCode<kCodeLengthCodes>& entropy) {
|
||||
return HuffmanTreeBitCost(&histogram.data_[0], &entropy.depth_[0]);
|
||||
}
|
||||
|
||||
static inline int HuffmanBitCost(const uint8_t* depth, int length) {
|
||||
int max_depth = 1;
|
||||
int histogram[kCodeLengthCodes] = { 0 };
|
||||
int tail_start = 0;
|
||||
// compute histogram of compacted huffman tree
|
||||
for (int i = 0; i < length;) {
|
||||
const int value = depth[i];
|
||||
if (value > max_depth) {
|
||||
max_depth = value;
|
||||
}
|
||||
int reps = 1;
|
||||
for (int k = i + 1; k < length && depth[k] == value; ++k) {
|
||||
++reps;
|
||||
}
|
||||
i += reps;
|
||||
if (value == 0) {
|
||||
while (reps > 10) {
|
||||
++histogram[18];
|
||||
reps -= 138;
|
||||
}
|
||||
if (reps > 2) {
|
||||
++histogram[17];
|
||||
} else if (reps > 0) {
|
||||
histogram[0] += reps;
|
||||
}
|
||||
} else {
|
||||
tail_start = i;
|
||||
++histogram[value];
|
||||
--reps;
|
||||
while (reps > 2) {
|
||||
++histogram[16];
|
||||
reps -= 6;
|
||||
}
|
||||
if (reps > 0) {
|
||||
histogram[value] += reps;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// create huffman tree of huffman tree
|
||||
uint8_t cost[kCodeLengthCodes] = { 0 };
|
||||
CreateHuffmanTree(histogram, kCodeLengthCodes, 7, cost);
|
||||
// account for rle extra bits
|
||||
cost[16] += 2;
|
||||
cost[17] += 3;
|
||||
cost[18] += 7;
|
||||
|
||||
int tree_size = 0;
|
||||
int bits = 6 + 3 * max_depth; // huffman tree of huffman tree cost
|
||||
for (int i = 0; i < kCodeLengthCodes; ++i) {
|
||||
bits += histogram[i] * cost[i]; // huffman tree bit cost
|
||||
tree_size += histogram[i];
|
||||
}
|
||||
// bit cost adjustment for long trailing zero sequence
|
||||
int tail_size = length - tail_start;
|
||||
int tail_bits = 0;
|
||||
while (tail_size >= 1) {
|
||||
if (tail_size < 3) {
|
||||
tail_bits += tail_size * cost[0];
|
||||
tree_size -= tail_size;
|
||||
break;
|
||||
} else if (tail_size < 11) {
|
||||
tail_bits += cost[17];
|
||||
--tree_size;
|
||||
break;
|
||||
} else {
|
||||
tail_bits += cost[18];
|
||||
tail_size -= 138;
|
||||
--tree_size;
|
||||
}
|
||||
}
|
||||
if (tail_bits > 12) {
|
||||
bits += ((Log2Ceiling(tree_size - 1) + 1) & ~1) + 3 - tail_bits;
|
||||
}
|
||||
return bits;
|
||||
}
|
||||
|
||||
template<int kSize>
|
||||
double PopulationCost(const Histogram<kSize>& histogram) {
|
||||
if (histogram.total_count_ == 0) {
|
||||
return 4;
|
||||
}
|
||||
int symbols[2] = { 0 };
|
||||
int count = 0;
|
||||
for (int i = 0; i < kSize && count < 3; ++i) {
|
||||
if (histogram.data_[i] > 0) {
|
||||
if (count < 2) symbols[count] = i;
|
||||
++count;
|
||||
}
|
||||
}
|
||||
if (count <= 2 && symbols[0] < 256 && symbols[1] < 256) {
|
||||
return ((symbols[0] <= 1 ? 4 : 11) +
|
||||
(count == 2 ? 8 + histogram.total_count_ : 0));
|
||||
}
|
||||
uint8_t depth[kSize] = { 0 };
|
||||
CreateHuffmanTree(&histogram.data_[0], kSize, 15, depth);
|
||||
int bits = HuffmanBitCost(depth, kSize);
|
||||
for (int i = 0; i < kSize; ++i) {
|
||||
bits += histogram.data_[i] * depth[i];
|
||||
}
|
||||
return bits;
|
||||
}
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_BIT_COST_H_
|
411
enc/block_splitter.cc
Normal file
411
enc/block_splitter.cc
Normal file
@ -0,0 +1,411 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Block split point selection utilities.
|
||||
|
||||
#include "./block_splitter.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
|
||||
#include "./cluster.h"
|
||||
#include "./command.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./histogram.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
static const int kMaxLiteralHistograms = 48;
|
||||
static const int kMaxCommandHistograms = 50;
|
||||
static const double kLiteralBlockSwitchCost = 26;
|
||||
static const double kCommandBlockSwitchCost = 13.5;
|
||||
static const double kDistanceBlockSwitchCost = 14.6;
|
||||
static const int kLiteralStrideLength = 70;
|
||||
static const int kCommandStrideLength = 40;
|
||||
static const int kSymbolsPerLiteralHistogram = 550;
|
||||
static const int kSymbolsPerCommandHistogram = 530;
|
||||
static const int kSymbolsPerDistanceHistogram = 550;
|
||||
static const int kMinLengthForBlockSplitting = 128;
|
||||
static const int kIterMulForRefining = 2;
|
||||
static const int kMinItersForRefining = 100;
|
||||
|
||||
void CopyLiteralsToByteArray(const std::vector<Command>& cmds,
|
||||
const uint8_t* data,
|
||||
std::vector<uint8_t>* literals) {
|
||||
// Count how many we have.
|
||||
size_t total_length = 0;
|
||||
for (int i = 0; i < cmds.size(); ++i) {
|
||||
total_length += cmds[i].insert_length_;
|
||||
}
|
||||
if (total_length == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Allocate.
|
||||
literals->resize(total_length);
|
||||
|
||||
// Loop again, and copy this time.
|
||||
size_t pos = 0;
|
||||
size_t from_pos = 0;
|
||||
for (int i = 0; i < cmds.size() && pos < total_length; ++i) {
|
||||
memcpy(&(*literals)[pos], data + from_pos, cmds[i].insert_length_);
|
||||
pos += cmds[i].insert_length_;
|
||||
from_pos += cmds[i].insert_length_ + cmds[i].copy_length_;
|
||||
}
|
||||
}
|
||||
|
||||
void CopyCommandsToByteArray(const std::vector<Command>& cmds,
|
||||
std::vector<uint16_t>* insert_and_copy_codes,
|
||||
std::vector<uint8_t>* distance_prefixes) {
|
||||
for (int i = 0; i < cmds.size(); ++i) {
|
||||
const Command& cmd = cmds[i];
|
||||
insert_and_copy_codes->push_back(cmd.command_prefix_);
|
||||
if (cmd.copy_length_ > 0 && cmd.distance_prefix_ != 0xffff) {
|
||||
distance_prefixes->push_back(cmd.distance_prefix_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<int kSize>
|
||||
double HistogramAddEval(const Histogram<kSize>& a,
|
||||
const Histogram<kSize>& b) {
|
||||
int total = a.total_count_ + b.total_count_;
|
||||
double retval = total * FastLog2(total);
|
||||
for (int i = 0; i < kSize; ++i) {
|
||||
int count = a.data_[i] + b.data_[i];
|
||||
retval -= count * FastLog2(count);
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
template<typename HistogramType, typename DataType>
|
||||
void InitialEntropyCodes(const DataType* data, size_t length,
|
||||
int literals_per_histogram,
|
||||
int max_histograms,
|
||||
size_t stride,
|
||||
std::vector<HistogramType>* vec) {
|
||||
int total_histograms = length / literals_per_histogram + 1;
|
||||
if (total_histograms > max_histograms) {
|
||||
total_histograms = max_histograms;
|
||||
}
|
||||
unsigned int seed = 7;
|
||||
int block_length = length / total_histograms;
|
||||
for (int i = 0; i < total_histograms; ++i) {
|
||||
int pos = length * i / total_histograms;
|
||||
if (i != 0) {
|
||||
pos += rand_r(&seed) % block_length;
|
||||
}
|
||||
if (pos + stride >= length) {
|
||||
pos = length - stride - 1;
|
||||
}
|
||||
HistogramType histo;
|
||||
histo.Add(data + pos, stride);
|
||||
vec->push_back(histo);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename HistogramType>
|
||||
int FindClosest(const HistogramType& sample,
|
||||
const std::vector<HistogramType>& vec) {
|
||||
double best_distance = 1e99;
|
||||
int best_ix = 0;
|
||||
for (int i = 0; i < vec.size(); ++i) {
|
||||
double distance = HistogramAddEval(sample, vec[i]);
|
||||
if (distance < best_distance) {
|
||||
best_ix = i;
|
||||
best_distance = distance;
|
||||
}
|
||||
}
|
||||
return best_ix;
|
||||
}
|
||||
|
||||
template<typename HistogramType, typename DataType>
|
||||
void RandomSample(unsigned int* seed,
|
||||
const DataType* data,
|
||||
size_t length,
|
||||
size_t stride,
|
||||
HistogramType* sample) {
|
||||
size_t pos = rand_r(seed) % (length - stride);
|
||||
sample->Add(data + pos, stride);
|
||||
}
|
||||
|
||||
template<typename HistogramType, typename DataType>
|
||||
void RefineEntropyCodes(const DataType* data, size_t length,
|
||||
size_t stride,
|
||||
std::vector<HistogramType>* vec) {
|
||||
const int iters =
|
||||
kIterMulForRefining * length / stride + kMinItersForRefining;
|
||||
unsigned int seed = 7;
|
||||
for (int iter = 0; iter < iters; ++iter) {
|
||||
HistogramType sample;
|
||||
RandomSample(&seed, data, length, stride, &sample);
|
||||
int ix = FindClosest(sample, *vec);
|
||||
(*vec)[ix].AddHistogram(sample);
|
||||
}
|
||||
}
|
||||
|
||||
inline static float BitCost(int total, int count) {
|
||||
return count == 0 ? FastLog2(total) + 2 : FastLog2(total) - FastLog2(count);
|
||||
}
|
||||
|
||||
template<typename DataType, int kSize>
|
||||
void FindBlocks(const DataType* data, const size_t length,
|
||||
const double block_switch_bitcost,
|
||||
const std::vector<Histogram<kSize> > &vec,
|
||||
uint8_t *block_id) {
|
||||
if (vec.size() <= 1) {
|
||||
for (int i = 0; i < length; ++i) {
|
||||
block_id[i] = 0;
|
||||
}
|
||||
return;
|
||||
}
|
||||
int vecsize = vec.size();
|
||||
double* insert_cost = new double[kSize * vecsize];
|
||||
memset(insert_cost, 0, sizeof(insert_cost[0]) * kSize * vecsize);
|
||||
for (int i = 0; i < kSize; ++i) {
|
||||
for (int j = 0; j < vecsize; ++j) {
|
||||
insert_cost[i * vecsize + j] =
|
||||
BitCost(vec[j].total_count_, vec[j].data_[i]);
|
||||
}
|
||||
}
|
||||
double *cost = new double[vecsize];
|
||||
memset(cost, 0, sizeof(cost[0]) * vecsize);
|
||||
bool* switch_signal = new bool[length * vecsize];
|
||||
memset(switch_signal, 0, sizeof(switch_signal[0]) * length * vecsize);
|
||||
// After each iteration of this loop, cost[k] will contain the difference
|
||||
// between the minimum cost of arriving at the current byte position using
|
||||
// entropy code k, and the minimum cost of arriving at the current byte
|
||||
// position. This difference is capped at the block switch cost, and if it
|
||||
// reaches block switch cost, it means that when we trace back from the last
|
||||
// position, we need to switch here.
|
||||
for (size_t byte_ix = 0; byte_ix < length; ++byte_ix) {
|
||||
int ix = byte_ix * vecsize;
|
||||
int insert_cost_ix = data[byte_ix] * vecsize;
|
||||
double min_cost = 1e99;
|
||||
for (int k = 0; k < vecsize; ++k) {
|
||||
// We are coding the symbol in data[byte_ix] with entropy code k.
|
||||
cost[k] += insert_cost[insert_cost_ix + k];
|
||||
if (cost[k] < min_cost) {
|
||||
min_cost = cost[k];
|
||||
block_id[byte_ix] = k;
|
||||
}
|
||||
}
|
||||
double block_switch_cost = block_switch_bitcost;
|
||||
// More blocks for the beginning.
|
||||
if (byte_ix < 2000) {
|
||||
block_switch_cost *= 0.77 + 0.07 * byte_ix / 2000;
|
||||
}
|
||||
for (int k = 0; k < vecsize; ++k) {
|
||||
cost[k] -= min_cost;
|
||||
if (cost[k] >= block_switch_cost) {
|
||||
cost[k] = block_switch_cost;
|
||||
switch_signal[ix + k] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Now trace back from the last position and switch at the marked places.
|
||||
int byte_ix = length - 1;
|
||||
int ix = byte_ix * vecsize;
|
||||
int cur_id = block_id[byte_ix];
|
||||
while (byte_ix > 0) {
|
||||
--byte_ix;
|
||||
ix -= vecsize;
|
||||
if (switch_signal[ix + cur_id]) {
|
||||
cur_id = block_id[byte_ix];
|
||||
}
|
||||
block_id[byte_ix] = cur_id;
|
||||
}
|
||||
delete[] insert_cost;
|
||||
delete[] cost;
|
||||
delete[] switch_signal;
|
||||
}
|
||||
|
||||
int RemapBlockIds(uint8_t* block_ids, const size_t length) {
|
||||
std::map<uint8_t, uint8_t> new_id;
|
||||
int next_id = 0;
|
||||
for (int i = 0; i < length; ++i) {
|
||||
if (new_id.find(block_ids[i]) == new_id.end()) {
|
||||
new_id[block_ids[i]] = next_id;
|
||||
++next_id;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < length; ++i) {
|
||||
block_ids[i] = new_id[block_ids[i]];
|
||||
}
|
||||
return next_id;
|
||||
}
|
||||
|
||||
template<typename HistogramType, typename DataType>
|
||||
void BuildBlockHistograms(const DataType* data, const size_t length,
|
||||
uint8_t* block_ids,
|
||||
std::vector<HistogramType>* histograms) {
|
||||
int num_types = RemapBlockIds(block_ids, length);
|
||||
histograms->clear();
|
||||
histograms->resize(num_types);
|
||||
for (int i = 0; i < length; ++i) {
|
||||
(*histograms)[block_ids[i]].Add(data[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename HistogramType, typename DataType>
|
||||
void ClusterBlocks(const DataType* data, const size_t length,
|
||||
uint8_t* block_ids) {
|
||||
std::vector<HistogramType> histograms;
|
||||
std::vector<int> block_index(length);
|
||||
int cur_idx = 0;
|
||||
HistogramType cur_histogram;
|
||||
for (int i = 0; i < length; ++i) {
|
||||
bool block_boundary = (i + 1 == length || block_ids[i] != block_ids[i + 1]);
|
||||
block_index[i] = cur_idx;
|
||||
cur_histogram.Add(data[i]);
|
||||
if (block_boundary) {
|
||||
histograms.push_back(cur_histogram);
|
||||
cur_histogram.Clear();
|
||||
++cur_idx;
|
||||
}
|
||||
}
|
||||
std::vector<HistogramType> clustered_histograms;
|
||||
std::vector<int> histogram_symbols;
|
||||
// Block ids need to fit in one byte and there are two ids reserved for
|
||||
// indicating 'same as last' and 'last plus one'.
|
||||
static const int kMaxNumberOfBlockTypes = 254;
|
||||
ClusterHistograms(histograms, 1, histograms.size(),
|
||||
kMaxNumberOfBlockTypes,
|
||||
&clustered_histograms,
|
||||
&histogram_symbols);
|
||||
for (int i = 0; i < length; ++i) {
|
||||
block_ids[i] = histogram_symbols[block_index[i]];
|
||||
}
|
||||
}
|
||||
|
||||
void BuildBlockSplit(const std::vector<uint8_t>& block_ids, BlockSplit* split) {
|
||||
int cur_id = block_ids[0];
|
||||
int cur_length = 1;
|
||||
split->num_types_ = -1;
|
||||
for (int i = 1; i < block_ids.size(); ++i) {
|
||||
if (block_ids[i] != cur_id) {
|
||||
split->types_.push_back(cur_id);
|
||||
split->lengths_.push_back(cur_length);
|
||||
split->num_types_ = std::max(split->num_types_, cur_id);
|
||||
cur_id = block_ids[i];
|
||||
cur_length = 0;
|
||||
}
|
||||
++cur_length;
|
||||
}
|
||||
split->types_.push_back(cur_id);
|
||||
split->lengths_.push_back(cur_length);
|
||||
split->num_types_ = std::max(split->num_types_, cur_id);
|
||||
++split->num_types_;
|
||||
}
|
||||
|
||||
template<typename HistogramType, typename DataType>
|
||||
void SplitByteVector(const std::vector<DataType>& data,
|
||||
const int literals_per_histogram,
|
||||
const int max_histograms,
|
||||
const int sampling_stride_length,
|
||||
const double block_switch_cost,
|
||||
BlockSplit* split) {
|
||||
if (data.empty()) {
|
||||
split->num_types_ = 0;
|
||||
return;
|
||||
} else if (data.size() < kMinLengthForBlockSplitting) {
|
||||
split->num_types_ = 1;
|
||||
split->types_.push_back(0);
|
||||
split->lengths_.push_back(data.size());
|
||||
return;
|
||||
}
|
||||
std::vector<HistogramType> histograms;
|
||||
// Find good entropy codes.
|
||||
InitialEntropyCodes(data.data(), data.size(),
|
||||
literals_per_histogram,
|
||||
max_histograms,
|
||||
sampling_stride_length,
|
||||
&histograms);
|
||||
RefineEntropyCodes(data.data(), data.size(),
|
||||
sampling_stride_length,
|
||||
&histograms);
|
||||
// Find a good path through literals with the good entropy codes.
|
||||
std::vector<uint8_t> block_ids(data.size());
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
FindBlocks(data.data(), data.size(),
|
||||
block_switch_cost,
|
||||
histograms,
|
||||
&block_ids[0]);
|
||||
BuildBlockHistograms(data.data(), data.size(), &block_ids[0], &histograms);
|
||||
}
|
||||
ClusterBlocks<HistogramType>(data.data(), data.size(), &block_ids[0]);
|
||||
BuildBlockSplit(block_ids, split);
|
||||
}
|
||||
|
||||
void SplitBlock(const std::vector<Command>& cmds,
|
||||
const uint8_t* data,
|
||||
BlockSplit* literal_split,
|
||||
BlockSplit* insert_and_copy_split,
|
||||
BlockSplit* dist_split) {
|
||||
// Create a continuous array of literals.
|
||||
std::vector<uint8_t> literals;
|
||||
CopyLiteralsToByteArray(cmds, data, &literals);
|
||||
|
||||
// Compute prefix codes for commands.
|
||||
std::vector<uint16_t> insert_and_copy_codes;
|
||||
std::vector<uint8_t> distance_prefixes;
|
||||
CopyCommandsToByteArray(cmds,
|
||||
&insert_and_copy_codes,
|
||||
&distance_prefixes);
|
||||
|
||||
SplitByteVector<HistogramLiteral>(
|
||||
literals,
|
||||
kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
|
||||
kLiteralStrideLength, kLiteralBlockSwitchCost,
|
||||
literal_split);
|
||||
SplitByteVector<HistogramCommand>(
|
||||
insert_and_copy_codes,
|
||||
kSymbolsPerCommandHistogram, kMaxCommandHistograms,
|
||||
kCommandStrideLength, kCommandBlockSwitchCost,
|
||||
insert_and_copy_split);
|
||||
SplitByteVector<HistogramDistance>(
|
||||
distance_prefixes,
|
||||
kSymbolsPerDistanceHistogram, kMaxCommandHistograms,
|
||||
kCommandStrideLength, kDistanceBlockSwitchCost,
|
||||
dist_split);
|
||||
}
|
||||
|
||||
void SplitBlockByTotalLength(const std::vector<Command>& all_commands,
|
||||
int input_size,
|
||||
int target_length,
|
||||
std::vector<std::vector<Command> >* blocks) {
|
||||
int num_blocks = input_size / target_length + 1;
|
||||
int length_limit = input_size / num_blocks + 1;
|
||||
int total_length = 0;
|
||||
std::vector<Command> cur_block;
|
||||
for (int i = 0; i < all_commands.size(); ++i) {
|
||||
const Command& cmd = all_commands[i];
|
||||
int cmd_length = cmd.insert_length_ + cmd.copy_length_;
|
||||
if (total_length > length_limit) {
|
||||
blocks->push_back(cur_block);
|
||||
cur_block.clear();
|
||||
total_length = 0;
|
||||
}
|
||||
cur_block.push_back(cmd);
|
||||
total_length += cmd_length;
|
||||
}
|
||||
blocks->push_back(cur_block);
|
||||
}
|
||||
|
||||
} // namespace brotli
|
77
enc/block_splitter.h
Normal file
77
enc/block_splitter.h
Normal file
@ -0,0 +1,77 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Block split point selection utilities.
|
||||
|
||||
#ifndef BROTLI_ENC_BLOCK_SPLITTER_H_
|
||||
#define BROTLI_ENC_BLOCK_SPLITTER_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
||||
#include "./command.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
struct BlockSplit {
|
||||
int num_types_;
|
||||
std::vector<uint8_t> types_;
|
||||
std::vector<uint8_t> type_codes_;
|
||||
std::vector<int> lengths_;
|
||||
};
|
||||
|
||||
struct BlockSplitIterator {
|
||||
explicit BlockSplitIterator(const BlockSplit& split)
|
||||
: split_(split), idx_(0), type_(0), length_(0) {
|
||||
if (!split.lengths_.empty()) {
|
||||
length_ = split.lengths_[0];
|
||||
}
|
||||
}
|
||||
|
||||
void Next() {
|
||||
if (length_ == 0) {
|
||||
++idx_;
|
||||
type_ = split_.types_[idx_];
|
||||
length_ = split_.lengths_[idx_];
|
||||
}
|
||||
--length_;
|
||||
}
|
||||
|
||||
const BlockSplit& split_;
|
||||
int idx_;
|
||||
int type_;
|
||||
int length_;
|
||||
};
|
||||
|
||||
void CopyLiteralsToByteArray(const std::vector<Command>& cmds,
|
||||
const uint8_t* data,
|
||||
std::vector<uint8_t>* literals);
|
||||
|
||||
void SplitBlock(const std::vector<Command>& cmds,
|
||||
const uint8_t* data,
|
||||
BlockSplit* literal_split,
|
||||
BlockSplit* insert_and_copy_split,
|
||||
BlockSplit* dist_split);
|
||||
|
||||
void SplitBlockByTotalLength(const std::vector<Command>& all_commands,
|
||||
int input_size,
|
||||
int target_length,
|
||||
std::vector<std::vector<Command> >* blocks);
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_BLOCK_SPLITTER_H_
|
288
enc/cluster.h
Normal file
288
enc/cluster.h
Normal file
@ -0,0 +1,288 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Functions for clustering similar histograms together.
|
||||
|
||||
#ifndef BROTLI_ENC_CLUSTER_H_
|
||||
#define BROTLI_ENC_CLUSTER_H_
|
||||
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <complex>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "./bit_cost.h"
|
||||
#include "./entropy_encode.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./histogram.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
struct HistogramPair {
|
||||
int idx1;
|
||||
int idx2;
|
||||
bool valid;
|
||||
double cost_combo;
|
||||
double cost_diff;
|
||||
};
|
||||
|
||||
struct HistogramPairComparator {
|
||||
bool operator()(const HistogramPair& p1, const HistogramPair& p2) {
|
||||
if (p1.cost_diff != p2.cost_diff) {
|
||||
return p1.cost_diff > p2.cost_diff;
|
||||
}
|
||||
return abs(p1.idx1 - p1.idx2) > abs(p2.idx1 - p2.idx2);
|
||||
}
|
||||
};
|
||||
|
||||
// Returns entropy reduction of the context map when we combine two clusters.
|
||||
inline double ClusterCostDiff(int size_a, int size_b) {
|
||||
int size_c = size_a + size_b;
|
||||
return size_a * FastLog2(size_a) + size_b * FastLog2(size_b) -
|
||||
size_c * FastLog2(size_c);
|
||||
}
|
||||
|
||||
// Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
|
||||
// it is below a threshold, stores the pair (idx1, idx2) in the *pairs heap.
|
||||
template<int kSize>
|
||||
void CompareAndPushToHeap(const Histogram<kSize>* out,
|
||||
const int* cluster_size,
|
||||
int idx1, int idx2,
|
||||
std::vector<HistogramPair>* pairs) {
|
||||
if (idx1 == idx2) {
|
||||
return;
|
||||
}
|
||||
if (idx2 < idx1) {
|
||||
int t = idx2;
|
||||
idx2 = idx1;
|
||||
idx1 = t;
|
||||
}
|
||||
bool store_pair = false;
|
||||
HistogramPair p;
|
||||
p.idx1 = idx1;
|
||||
p.idx2 = idx2;
|
||||
p.valid = true;
|
||||
p.cost_diff = 0.5 * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]);
|
||||
p.cost_diff -= out[idx1].bit_cost_;
|
||||
p.cost_diff -= out[idx2].bit_cost_;
|
||||
|
||||
if (out[idx1].total_count_ == 0) {
|
||||
p.cost_combo = out[idx2].bit_cost_;
|
||||
store_pair = true;
|
||||
} else if (out[idx2].total_count_ == 0) {
|
||||
p.cost_combo = out[idx1].bit_cost_;
|
||||
store_pair = true;
|
||||
} else {
|
||||
double threshold = pairs->empty() ? 1e99 :
|
||||
std::max(0.0, (*pairs)[0].cost_diff);
|
||||
Histogram<kSize> combo = out[idx1];
|
||||
combo.AddHistogram(out[idx2]);
|
||||
double cost_combo = PopulationCost(combo);
|
||||
if (cost_combo < threshold - p.cost_diff) {
|
||||
p.cost_combo = cost_combo;
|
||||
store_pair = true;
|
||||
}
|
||||
}
|
||||
if (store_pair) {
|
||||
p.cost_diff += p.cost_combo;
|
||||
pairs->push_back(p);
|
||||
push_heap(pairs->begin(), pairs->end(), HistogramPairComparator());
|
||||
}
|
||||
}
|
||||
|
||||
template<int kSize>
|
||||
void HistogramCombine(Histogram<kSize>* out,
|
||||
int* cluster_size,
|
||||
int* symbols,
|
||||
int symbols_size,
|
||||
int max_clusters) {
|
||||
double cost_diff_threshold = 0.0;
|
||||
int min_cluster_size = 1;
|
||||
std::set<int> all_symbols;
|
||||
std::vector<int> clusters;
|
||||
for (int i = 0; i < symbols_size; ++i) {
|
||||
if (all_symbols.find(symbols[i]) == all_symbols.end()) {
|
||||
all_symbols.insert(symbols[i]);
|
||||
clusters.push_back(symbols[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// We maintain a heap of histogram pairs, ordered by the bit cost reduction.
|
||||
std::vector<HistogramPair> pairs;
|
||||
for (int idx1 = 0; idx1 < clusters.size(); ++idx1) {
|
||||
for (int idx2 = idx1 + 1; idx2 < clusters.size(); ++idx2) {
|
||||
CompareAndPushToHeap(out, cluster_size, clusters[idx1], clusters[idx2],
|
||||
&pairs);
|
||||
}
|
||||
}
|
||||
|
||||
while (clusters.size() > min_cluster_size) {
|
||||
if (pairs[0].cost_diff >= cost_diff_threshold) {
|
||||
cost_diff_threshold = 1e99;
|
||||
min_cluster_size = max_clusters;
|
||||
continue;
|
||||
}
|
||||
// Take the best pair from the top of heap.
|
||||
int best_idx1 = pairs[0].idx1;
|
||||
int best_idx2 = pairs[0].idx2;
|
||||
out[best_idx1].AddHistogram(out[best_idx2]);
|
||||
out[best_idx1].bit_cost_ = pairs[0].cost_combo;
|
||||
cluster_size[best_idx1] += cluster_size[best_idx2];
|
||||
for (int i = 0; i < symbols_size; ++i) {
|
||||
if (symbols[i] == best_idx2) {
|
||||
symbols[i] = best_idx1;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i + 1 < clusters.size(); ++i) {
|
||||
if (clusters[i] >= best_idx2) {
|
||||
clusters[i] = clusters[i + 1];
|
||||
}
|
||||
}
|
||||
clusters.pop_back();
|
||||
// Invalidate pairs intersecting the just combined best pair.
|
||||
for (int i = 0; i < pairs.size(); ++i) {
|
||||
HistogramPair& p = pairs[i];
|
||||
if (p.idx1 == best_idx1 || p.idx2 == best_idx1 ||
|
||||
p.idx1 == best_idx2 || p.idx2 == best_idx2) {
|
||||
p.valid = false;
|
||||
}
|
||||
}
|
||||
// Pop invalid pairs from the top of the heap.
|
||||
while (!pairs.empty() && !pairs[0].valid) {
|
||||
pop_heap(pairs.begin(), pairs.end(), HistogramPairComparator());
|
||||
pairs.pop_back();
|
||||
}
|
||||
// Push new pairs formed with the combined histogram to the heap.
|
||||
for (int i = 0; i < clusters.size(); ++i) {
|
||||
CompareAndPushToHeap(out, cluster_size, best_idx1, clusters[i], &pairs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Histogram refinement
|
||||
|
||||
// What is the bit cost of moving histogram from cur_symbol to candidate.
|
||||
template<int kSize>
|
||||
double HistogramBitCostDistance(const Histogram<kSize>& histogram,
|
||||
const Histogram<kSize>& candidate) {
|
||||
if (histogram.total_count_ == 0) {
|
||||
return 0.0;
|
||||
}
|
||||
Histogram<kSize> tmp = histogram;
|
||||
tmp.AddHistogram(candidate);
|
||||
return PopulationCost(tmp) - candidate.bit_cost_;
|
||||
}
|
||||
|
||||
// Find the best 'out' histogram for each of the 'in' histograms.
|
||||
// Note: we assume that out[]->bit_cost_ is already up-to-date.
|
||||
template<int kSize>
|
||||
void HistogramRemap(const Histogram<kSize>* in, int in_size,
|
||||
Histogram<kSize>* out, int* symbols) {
|
||||
std::set<int> all_symbols;
|
||||
for (int i = 0; i < in_size; ++i) {
|
||||
all_symbols.insert(symbols[i]);
|
||||
}
|
||||
for (int i = 0; i < in_size; ++i) {
|
||||
int best_out = i == 0 ? symbols[0] : symbols[i - 1];
|
||||
double best_bits = HistogramBitCostDistance(in[i], out[best_out]);
|
||||
for (std::set<int>::const_iterator k = all_symbols.begin();
|
||||
k != all_symbols.end(); ++k) {
|
||||
const double cur_bits = HistogramBitCostDistance(in[i], out[*k]);
|
||||
if (cur_bits < best_bits) {
|
||||
best_bits = cur_bits;
|
||||
best_out = *k;
|
||||
}
|
||||
}
|
||||
symbols[i] = best_out;
|
||||
}
|
||||
|
||||
// Recompute each out based on raw and symbols.
|
||||
for (std::set<int>::const_iterator k = all_symbols.begin();
|
||||
k != all_symbols.end(); ++k) {
|
||||
out[*k].Clear();
|
||||
}
|
||||
for (int i = 0; i < in_size; ++i) {
|
||||
out[symbols[i]].AddHistogram(in[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Reorder histograms in *out so that the new symbols in *symbols come in
|
||||
// increasing order.
|
||||
template<int kSize>
|
||||
void HistogramReindex(std::vector<Histogram<kSize> >* out,
|
||||
std::vector<int>* symbols) {
|
||||
std::vector<Histogram<kSize> > tmp(*out);
|
||||
std::map<int, int> new_index;
|
||||
int next_index = 0;
|
||||
for (int i = 0; i < symbols->size(); ++i) {
|
||||
if (new_index.find((*symbols)[i]) == new_index.end()) {
|
||||
new_index[(*symbols)[i]] = next_index;
|
||||
(*out)[next_index] = tmp[(*symbols)[i]];
|
||||
++next_index;
|
||||
}
|
||||
}
|
||||
out->resize(next_index);
|
||||
for (int i = 0; i < symbols->size(); ++i) {
|
||||
(*symbols)[i] = new_index[(*symbols)[i]];
|
||||
}
|
||||
}
|
||||
|
||||
// Clusters similar histograms in 'in' together, the selected histograms are
|
||||
// placed in 'out', and for each index in 'in', *histogram_symbols will
|
||||
// indicate which of the 'out' histograms is the best approximation.
|
||||
template<int kSize>
|
||||
void ClusterHistograms(const std::vector<Histogram<kSize> >& in,
|
||||
int num_contexts, int num_blocks,
|
||||
int max_histograms,
|
||||
std::vector<Histogram<kSize> >* out,
|
||||
std::vector<int>* histogram_symbols) {
|
||||
const int in_size = num_contexts * num_blocks;
|
||||
std::vector<int> cluster_size(in_size, 1);
|
||||
out->resize(in_size);
|
||||
histogram_symbols->resize(in_size);
|
||||
for (int i = 0; i < in_size; ++i) {
|
||||
(*out)[i] = in[i];
|
||||
(*out)[i].bit_cost_ = PopulationCost(in[i]);
|
||||
(*histogram_symbols)[i] = i;
|
||||
}
|
||||
|
||||
// Collapse similar histograms within a block type.
|
||||
if (num_contexts > 1) {
|
||||
for (int i = 0; i < num_blocks; ++i) {
|
||||
HistogramCombine(&(*out)[0], &cluster_size[0],
|
||||
&(*histogram_symbols)[i * num_contexts], num_contexts,
|
||||
max_histograms);
|
||||
}
|
||||
}
|
||||
|
||||
// Collapse similar histograms.
|
||||
HistogramCombine(&(*out)[0], &cluster_size[0],
|
||||
&(*histogram_symbols)[0], in_size,
|
||||
max_histograms);
|
||||
|
||||
// Find the optimal map from original histograms to the final ones.
|
||||
HistogramRemap(&in[0], in_size, &(*out)[0], &(*histogram_symbols)[0]);
|
||||
|
||||
// Convert the context map to a canonical form.
|
||||
HistogramReindex(out, histogram_symbols);
|
||||
}
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_CLUSTER_H_
|
45
enc/command.h
Normal file
45
enc/command.h
Normal file
@ -0,0 +1,45 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// This class models a sequence of literals and a backward reference copy.
|
||||
|
||||
#ifndef BROTLI_ENC_COMMAND_H_
|
||||
#define BROTLI_ENC_COMMAND_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// Command holds a sequence of literals and a backward reference copy.
|
||||
class Command {
|
||||
public:
|
||||
Command() : insert_length_(0), copy_length_(0),
|
||||
copy_distance_(0), distance_code_(0),
|
||||
distance_prefix_(0), command_prefix_(0),
|
||||
distance_extra_bits_(0), distance_extra_bits_value_(0) {}
|
||||
|
||||
uint32_t insert_length_;
|
||||
uint32_t copy_length_;
|
||||
uint32_t copy_distance_;
|
||||
// Values <= 16 are short codes, values > 16 are distances shifted by 16.
|
||||
uint32_t distance_code_;
|
||||
uint16_t distance_prefix_;
|
||||
uint16_t command_prefix_;
|
||||
int distance_extra_bits_;
|
||||
uint32_t distance_extra_bits_value_;
|
||||
};
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_COMMAND_H_
|
130
enc/context.h
Normal file
130
enc/context.h
Normal file
@ -0,0 +1,130 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Functions to map previous bytes into a context id.
|
||||
|
||||
#ifndef BROTLI_ENC_CONTEXT_H_
|
||||
#define BROTLI_ENC_CONTEXT_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
namespace brotli {
|
||||
|
||||
static const int kSigned2BitContextLookup[] = {
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
|
||||
};
|
||||
|
||||
static const int kSigned3BitContextLookup[] = {
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
|
||||
};
|
||||
|
||||
static const int kSigned4BitContextLookup[] = {
|
||||
0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 13, 13, 14, 15,
|
||||
};
|
||||
|
||||
enum ContextType {
|
||||
CONTEXT_NONE = 0,
|
||||
CONTEXT_FULL = 1,
|
||||
CONTEXT_MSB7 = 2,
|
||||
CONTEXT_MSB6 = 3,
|
||||
CONTEXT_MSB5 = 4,
|
||||
CONTEXT_MSB4 = 5,
|
||||
CONTEXT_MSB3 = 6,
|
||||
CONTEXT_MSB2 = 7,
|
||||
CONTEXT_MSB1 = 8,
|
||||
CONTEXT_IS_ZERO = 9,
|
||||
CONTEXT_SIGNED_2BIT = 10,
|
||||
CONTEXT_SIGNED_3BIT = 11,
|
||||
CONTEXT_SIGNED_4BIT = 12,
|
||||
CONTEXT_SIGNED_MIXED_3BYTE = 13,
|
||||
};
|
||||
|
||||
static const int kContextSize[] = {
|
||||
1, 256, 128, 64, 32, 16, 8, 4, 2, 2, 4, 8, 16, 64,
|
||||
};
|
||||
|
||||
static inline int NumContexts(int mode) {
|
||||
return kContextSize[mode];
|
||||
}
|
||||
|
||||
static inline uint8_t Context(uint8_t prev_byte, uint8_t prev_byte2,
|
||||
uint8_t prev_byte3, int mode) {
|
||||
switch (mode) {
|
||||
case CONTEXT_NONE:
|
||||
return 0;
|
||||
case CONTEXT_IS_ZERO:
|
||||
return prev_byte == 0 ? 0 : 1;
|
||||
case CONTEXT_SIGNED_2BIT:
|
||||
return kSigned2BitContextLookup[prev_byte];
|
||||
case CONTEXT_SIGNED_3BIT:
|
||||
return kSigned3BitContextLookup[prev_byte];
|
||||
case CONTEXT_SIGNED_4BIT:
|
||||
return kSigned4BitContextLookup[prev_byte];
|
||||
case CONTEXT_SIGNED_MIXED_3BYTE:
|
||||
return ((kSigned3BitContextLookup[prev_byte] << 3) +
|
||||
(kSigned2BitContextLookup[prev_byte2] << 1) +
|
||||
(prev_byte3 == 0 ? 0 : 1));
|
||||
default:
|
||||
return prev_byte >> (mode - 1);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_CONTEXT_H_
|
778
enc/encode.cc
Normal file
778
enc/encode.cc
Normal file
@ -0,0 +1,778 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Implementation of Brotli compressor.
|
||||
|
||||
#include "./encode.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
|
||||
#include "./backward_references.h"
|
||||
#include "./bit_cost.h"
|
||||
#include "./block_splitter.h"
|
||||
#include "./cluster.h"
|
||||
#include "./context.h"
|
||||
#include "./entropy_encode.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./histogram.h"
|
||||
#include "./prefix.h"
|
||||
#include "./write_bits.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
template<int kSize>
|
||||
double Entropy(const std::vector<Histogram<kSize> >& histograms) {
|
||||
double retval = 0;
|
||||
for (int i = 0; i < histograms.size(); ++i) {
|
||||
retval += histograms[i].EntropyBitCost();
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
void EncodeSize(size_t len, int* storage_ix, uint8_t* storage) {
|
||||
std::vector<uint8_t> len_bytes;
|
||||
while (len > 0) {
|
||||
len_bytes.push_back(len & 0xff);
|
||||
len >>= 8;
|
||||
};
|
||||
WriteBits(3, len_bytes.size(), storage_ix, storage);
|
||||
for (int i = 0; i < len_bytes.size(); ++i) {
|
||||
WriteBits(8, len_bytes[i], storage_ix, storage);
|
||||
}
|
||||
}
|
||||
|
||||
void EncodeMetaBlockLength(int input_size_bits,
|
||||
size_t meta_block_size,
|
||||
bool is_last_meta_block,
|
||||
int* storage_ix, uint8_t* storage) {
|
||||
WriteBits(1, is_last_meta_block, storage_ix, storage);
|
||||
if (is_last_meta_block) return;
|
||||
while (input_size_bits > 0) {
|
||||
WriteBits(8, meta_block_size & 0xff, storage_ix, storage);
|
||||
meta_block_size >>= 8;
|
||||
input_size_bits -= 8;
|
||||
}
|
||||
if (input_size_bits > 0) {
|
||||
WriteBits(input_size_bits, meta_block_size, storage_ix, storage);
|
||||
}
|
||||
}
|
||||
|
||||
template<int kSize>
|
||||
void EntropyEncode(int val, const EntropyCode<kSize>& code,
|
||||
int* storage_ix, uint8_t* storage) {
|
||||
if (code.count_ <= 1) {
|
||||
return;
|
||||
};
|
||||
WriteBits(code.depth_[val], code.bits_[val], storage_ix, storage);
|
||||
}
|
||||
|
||||
void StoreHuffmanTreeOfHuffmanTreeToBitMask(
|
||||
const uint8_t* code_length_bitdepth,
|
||||
int* storage_ix, uint8_t* storage) {
|
||||
static const uint8_t kStorageOrder[kCodeLengthCodes] = {
|
||||
17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
};
|
||||
// Throw away trailing zeros:
|
||||
int codes_to_store = kCodeLengthCodes;
|
||||
for (; codes_to_store > 4; --codes_to_store) {
|
||||
if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
WriteBits(4, codes_to_store - 4, storage_ix, storage);
|
||||
for (int i = 0; i < codes_to_store; ++i) {
|
||||
WriteBits(3, code_length_bitdepth[kStorageOrder[i]], storage_ix, storage);
|
||||
}
|
||||
}
|
||||
|
||||
void StoreHuffmanTreeToBitMask(
|
||||
const uint8_t* huffman_tree,
|
||||
const uint8_t* huffman_tree_extra_bits,
|
||||
const int huffman_tree_size,
|
||||
const EntropyCode<kCodeLengthCodes>& entropy,
|
||||
int* storage_ix, uint8_t* storage) {
|
||||
for (int i = 0; i < huffman_tree_size; ++i) {
|
||||
const int ix = huffman_tree[i];
|
||||
const int extra_bits = huffman_tree_extra_bits[i];
|
||||
EntropyEncode(ix, entropy, storage_ix, storage);
|
||||
switch (ix) {
|
||||
case 16:
|
||||
WriteBits(2, extra_bits, storage_ix, storage);
|
||||
break;
|
||||
case 17:
|
||||
WriteBits(3, extra_bits, storage_ix, storage);
|
||||
break;
|
||||
case 18:
|
||||
WriteBits(7, extra_bits, storage_ix, storage);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<int kSize>
|
||||
void StoreHuffmanCode(const EntropyCode<kSize>& code, int alphabet_size,
|
||||
int* storage_ix, uint8_t* storage) {
|
||||
const int kMaxBits = 8;
|
||||
const int kMaxSymbol = 1 << kMaxBits;
|
||||
|
||||
if (code.count_ == 0) { // emit minimal tree for empty cases
|
||||
// bits: small tree marker: 1, count-1: 0, large 8-bit code: 0, code: 0
|
||||
WriteBits(4, 0x01, storage_ix, storage);
|
||||
return;
|
||||
}
|
||||
if (code.count_ <= 2 &&
|
||||
code.symbols_[0] < kMaxSymbol &&
|
||||
code.symbols_[1] < kMaxSymbol) {
|
||||
// Small tree marker to encode 1 or 2 symbols.
|
||||
WriteBits(1, 1, storage_ix, storage);
|
||||
WriteBits(1, code.count_ - 1, storage_ix, storage);
|
||||
if (code.symbols_[0] <= 1) {
|
||||
// Code bit for small (1 bit) symbol value.
|
||||
WriteBits(1, 0, storage_ix, storage);
|
||||
WriteBits(1, code.symbols_[0], storage_ix, storage);
|
||||
} else {
|
||||
WriteBits(1, 1, storage_ix, storage);
|
||||
WriteBits(8, code.symbols_[0], storage_ix, storage);
|
||||
}
|
||||
if (code.count_ == 2) {
|
||||
WriteBits(8, code.symbols_[1], storage_ix, storage);
|
||||
}
|
||||
return;
|
||||
}
|
||||
WriteBits(1, 0, storage_ix, storage);
|
||||
|
||||
uint8_t huffman_tree[kSize];
|
||||
uint8_t huffman_tree_extra_bits[kSize];
|
||||
int huffman_tree_size = 0;
|
||||
WriteHuffmanTree(&code.depth_[0],
|
||||
alphabet_size,
|
||||
&huffman_tree[0],
|
||||
&huffman_tree_extra_bits[0],
|
||||
&huffman_tree_size);
|
||||
Histogram<kCodeLengthCodes> huffman_tree_histogram;
|
||||
memset(huffman_tree_histogram.data_, 0, sizeof(huffman_tree_histogram.data_));
|
||||
for (int i = 0; i < huffman_tree_size; ++i) {
|
||||
huffman_tree_histogram.Add(huffman_tree[i]);
|
||||
}
|
||||
EntropyCode<kCodeLengthCodes> huffman_tree_entropy;
|
||||
BuildEntropyCode(huffman_tree_histogram, 7, kCodeLengthCodes,
|
||||
&huffman_tree_entropy);
|
||||
Histogram<kCodeLengthCodes> trimmed_histogram = huffman_tree_histogram;
|
||||
uint8_t* last_code = &huffman_tree[huffman_tree_size - 1];
|
||||
while (*last_code == 0 || *last_code >= 17) {
|
||||
trimmed_histogram.Remove(*last_code--);
|
||||
}
|
||||
int trimmed_size = trimmed_histogram.total_count_;
|
||||
bool write_length = false;
|
||||
if (trimmed_size > 1 && trimmed_size < huffman_tree_size) {
|
||||
EntropyCode<kCodeLengthCodes> trimmed_entropy;
|
||||
BuildEntropyCode(trimmed_histogram, 7, kCodeLengthCodes, &trimmed_entropy);
|
||||
int huffman_bit_cost = HuffmanTreeBitCost(huffman_tree_histogram,
|
||||
huffman_tree_entropy);
|
||||
int trimmed_bit_cost = HuffmanTreeBitCost(trimmed_histogram,
|
||||
trimmed_entropy);;
|
||||
const int nbits = Log2Ceiling(trimmed_size - 1);
|
||||
const int nbitpairs = (nbits == 0) ? 1 : (nbits + 1) / 2;
|
||||
if (trimmed_bit_cost + 3 + 2 * nbitpairs < huffman_bit_cost) {
|
||||
write_length = true;
|
||||
huffman_tree_size = trimmed_size;
|
||||
huffman_tree_entropy = trimmed_entropy;
|
||||
}
|
||||
}
|
||||
|
||||
StoreHuffmanTreeOfHuffmanTreeToBitMask(
|
||||
&huffman_tree_entropy.depth_[0], storage_ix, storage);
|
||||
WriteBits(1, write_length, storage_ix, storage);
|
||||
if (write_length) {
|
||||
const int nbits = Log2Ceiling(huffman_tree_size - 1);
|
||||
const int nbitpairs = (nbits == 0) ? 1 : (nbits + 1) / 2;
|
||||
WriteBits(3, nbitpairs - 1, storage_ix, storage);
|
||||
WriteBits(nbitpairs * 2, huffman_tree_size - 2, storage_ix, storage);
|
||||
}
|
||||
StoreHuffmanTreeToBitMask(&huffman_tree[0], &huffman_tree_extra_bits[0],
|
||||
huffman_tree_size, huffman_tree_entropy,
|
||||
storage_ix, storage);
|
||||
}
|
||||
|
||||
template<int kSize>
|
||||
void StoreHuffmanCodes(const std::vector<EntropyCode<kSize> >& codes,
|
||||
int alphabet_size,
|
||||
int* storage_ix, uint8_t* storage) {
|
||||
for (int i = 0; i < codes.size(); ++i) {
|
||||
StoreHuffmanCode(codes[i], alphabet_size, storage_ix, storage);
|
||||
}
|
||||
}
|
||||
|
||||
void EncodeCommand(const Command& cmd,
|
||||
const EntropyCodeCommand& entropy,
|
||||
int* storage_ix, uint8_t* storage) {
|
||||
int code = cmd.command_prefix_;
|
||||
EntropyEncode(code, entropy, storage_ix, storage);
|
||||
if (code >= 128) {
|
||||
code -= 128;
|
||||
}
|
||||
int insert_extra_bits = InsertLengthExtraBits(code);
|
||||
uint64_t insert_extra_bits_val =
|
||||
cmd.insert_length_ - InsertLengthOffset(code);
|
||||
int copy_extra_bits = CopyLengthExtraBits(code);
|
||||
uint64_t copy_extra_bits_val = cmd.copy_length_ - CopyLengthOffset(code);
|
||||
if (insert_extra_bits > 0) {
|
||||
WriteBits(insert_extra_bits, insert_extra_bits_val, storage_ix, storage);
|
||||
}
|
||||
if (copy_extra_bits > 0) {
|
||||
WriteBits(copy_extra_bits, copy_extra_bits_val, storage_ix, storage);
|
||||
}
|
||||
}
|
||||
|
||||
void EncodeCopyDistance(const Command& cmd, const EntropyCodeDistance& entropy,
|
||||
int* storage_ix, uint8_t* storage) {
|
||||
int code = cmd.distance_prefix_;
|
||||
int extra_bits = cmd.distance_extra_bits_;
|
||||
uint64_t extra_bits_val = cmd.distance_extra_bits_value_;
|
||||
EntropyEncode(code, entropy, storage_ix, storage);
|
||||
if (extra_bits > 0) {
|
||||
WriteBits(extra_bits, extra_bits_val, storage_ix, storage);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void ComputeDistanceShortCodes(std::vector<Command>* cmds) {
|
||||
static const int kIndexOffset[16] = {
|
||||
3, 2, 1, 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2
|
||||
};
|
||||
static const int kValueOffset[16] = {
|
||||
0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
|
||||
};
|
||||
int dist_ringbuffer[4] = { 4, 11, 15, 16 };
|
||||
int ringbuffer_idx = 0;
|
||||
for (int i = 0; i < cmds->size(); ++i) {
|
||||
int cur_dist = (*cmds)[i].copy_distance_;
|
||||
if (cur_dist == 0) break;
|
||||
int dist_code = cur_dist + 16;
|
||||
for (int k = 0; k < 16; ++k) {
|
||||
// Only accept more popular choices.
|
||||
if (cur_dist < 11 && ((k >= 2 && k < 4) || k >= 6)) {
|
||||
// Typically unpopular ranges, don't replace a short distance
|
||||
// with them.
|
||||
continue;
|
||||
}
|
||||
int comp = (dist_ringbuffer[(ringbuffer_idx + kIndexOffset[k]) & 3] +
|
||||
kValueOffset[k]);
|
||||
if (cur_dist == comp) {
|
||||
dist_code = k + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (dist_code > 1) {
|
||||
dist_ringbuffer[ringbuffer_idx & 3] = cur_dist;
|
||||
++ringbuffer_idx;
|
||||
}
|
||||
(*cmds)[i].distance_code_ = dist_code;
|
||||
}
|
||||
}
|
||||
|
||||
void ComputeCommandPrefixes(std::vector<Command>* cmds,
|
||||
int num_direct_distance_codes,
|
||||
int distance_postfix_bits) {
|
||||
for (int i = 0; i < cmds->size(); ++i) {
|
||||
Command* cmd = &(*cmds)[i];
|
||||
cmd->command_prefix_ = CommandPrefix(cmd->insert_length_,
|
||||
cmd->copy_length_);
|
||||
if (cmd->copy_length_ > 0) {
|
||||
PrefixEncodeCopyDistance(cmd->distance_code_,
|
||||
num_direct_distance_codes,
|
||||
distance_postfix_bits,
|
||||
&cmd->distance_prefix_,
|
||||
&cmd->distance_extra_bits_,
|
||||
&cmd->distance_extra_bits_value_);
|
||||
}
|
||||
if (cmd->command_prefix_ < 128 && cmd->distance_prefix_ == 0) {
|
||||
cmd->distance_prefix_ = 0xffff;
|
||||
} else {
|
||||
cmd->command_prefix_ += 128;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int IndexOf(const std::vector<int>& v, int value) {
|
||||
for (int i = 0; i < v.size(); ++i) {
|
||||
if (v[i] == value) return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
void MoveToFront(std::vector<int>* v, int index) {
|
||||
int value = (*v)[index];
|
||||
for (int i = index; i > 0; --i) {
|
||||
(*v)[i] = (*v)[i - 1];
|
||||
}
|
||||
(*v)[0] = value;
|
||||
}
|
||||
|
||||
std::vector<int> MoveToFrontTransform(const std::vector<int>& v) {
|
||||
if (v.empty()) return v;
|
||||
std::vector<int> mtf(*max_element(v.begin(), v.end()) + 1);
|
||||
for (int i = 0; i < mtf.size(); ++i) mtf[i] = i;
|
||||
std::vector<int> result(v.size());
|
||||
for (int i = 0; i < v.size(); ++i) {
|
||||
int index = IndexOf(mtf, v[i]);
|
||||
result[i] = index;
|
||||
MoveToFront(&mtf, index);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Finds runs of zeros in v_in and replaces them with a prefix code of the run
|
||||
// length plus extra bits in *v_out and *extra_bits. Non-zero values in v_in are
|
||||
// shifted by *max_length_prefix. Will not create prefix codes bigger than the
|
||||
// initial value of *max_run_length_prefix. The prefix code of run length L is
|
||||
// simply Log2Floor(L) and the number of extra bits is the same as the prefix
|
||||
// code.
|
||||
void RunLengthCodeZeros(const std::vector<int>& v_in,
|
||||
int* max_run_length_prefix,
|
||||
std::vector<int>* v_out,
|
||||
std::vector<int>* extra_bits) {
|
||||
int max_reps = 0;
|
||||
for (int i = 0; i < v_in.size();) {
|
||||
for (; i < v_in.size() && v_in[i] != 0; ++i) ;
|
||||
int reps = 0;
|
||||
for (; i < v_in.size() && v_in[i] == 0; ++i) {
|
||||
++reps;
|
||||
}
|
||||
max_reps = std::max(reps, max_reps);
|
||||
}
|
||||
int max_prefix = max_reps > 0 ? Log2Floor(max_reps) : 0;
|
||||
*max_run_length_prefix = std::min(max_prefix, *max_run_length_prefix);
|
||||
for (int i = 0; i < v_in.size();) {
|
||||
if (v_in[i] != 0) {
|
||||
v_out->push_back(v_in[i] + *max_run_length_prefix);
|
||||
extra_bits->push_back(0);
|
||||
++i;
|
||||
} else {
|
||||
int reps = 1;
|
||||
for (uint32_t k = i + 1; k < v_in.size() && v_in[k] == 0; ++k) {
|
||||
++reps;
|
||||
}
|
||||
i += reps;
|
||||
while (reps) {
|
||||
if (reps < (2 << *max_run_length_prefix)) {
|
||||
int run_length_prefix = Log2Floor(reps);
|
||||
v_out->push_back(run_length_prefix);
|
||||
extra_bits->push_back(reps - (1 << run_length_prefix));
|
||||
break;
|
||||
} else {
|
||||
v_out->push_back(*max_run_length_prefix);
|
||||
extra_bits->push_back((1 << *max_run_length_prefix) - 1);
|
||||
reps -= (2 << *max_run_length_prefix) - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Returns a maximum zero-run-length-prefix value such that run-length coding
|
||||
// zeros in v with this maximum prefix value and then encoding the resulting
|
||||
// histogram and entropy-coding v produces the least amount of bits.
|
||||
int BestMaxZeroRunLengthPrefix(const std::vector<int>& v) {
|
||||
int min_cost = std::numeric_limits<int>::max();
|
||||
int best_max_prefix = 0;
|
||||
for (int max_prefix = 0; max_prefix <= 16; ++max_prefix) {
|
||||
std::vector<int> rle_symbols;
|
||||
std::vector<int> extra_bits;
|
||||
int max_run_length_prefix = max_prefix;
|
||||
RunLengthCodeZeros(v, &max_run_length_prefix, &rle_symbols, &extra_bits);
|
||||
if (max_run_length_prefix < max_prefix) break;
|
||||
HistogramLiteral histogram;
|
||||
for (int i = 0; i < rle_symbols.size(); ++i) {
|
||||
histogram.Add(rle_symbols[i]);
|
||||
}
|
||||
int bit_cost = PopulationCost(histogram);
|
||||
if (max_prefix > 0) {
|
||||
bit_cost += 4;
|
||||
}
|
||||
for (int i = 1; i <= max_prefix; ++i) {
|
||||
bit_cost += histogram.data_[i] * i; // extra bits
|
||||
}
|
||||
if (bit_cost < min_cost) {
|
||||
min_cost = bit_cost;
|
||||
best_max_prefix = max_prefix;
|
||||
}
|
||||
}
|
||||
return best_max_prefix;
|
||||
}
|
||||
|
||||
void EncodeContextMap(const std::vector<int>& context_map,
|
||||
int context_mode,
|
||||
int context_mode_bits,
|
||||
int num_clusters,
|
||||
int* storage_ix, uint8_t* storage) {
|
||||
if (context_mode == 0) {
|
||||
WriteBits(1, 0, storage_ix, storage); // no context
|
||||
return;
|
||||
}
|
||||
|
||||
WriteBits(1, 1, storage_ix, storage); // have context
|
||||
if (context_mode_bits > 0) {
|
||||
WriteBits(context_mode_bits, context_mode - 1, storage_ix, storage);
|
||||
}
|
||||
WriteBits(8, num_clusters - 1, storage_ix, storage);
|
||||
|
||||
if (num_clusters == 1 || num_clusters == context_map.size()) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<int> transformed_symbols = MoveToFrontTransform(context_map);
|
||||
std::vector<int> rle_symbols;
|
||||
std::vector<int> extra_bits;
|
||||
int max_run_length_prefix = BestMaxZeroRunLengthPrefix(transformed_symbols);
|
||||
RunLengthCodeZeros(transformed_symbols, &max_run_length_prefix,
|
||||
&rle_symbols, &extra_bits);
|
||||
HistogramLiteral symbol_histogram;
|
||||
for (int i = 0; i < rle_symbols.size(); ++i) {
|
||||
symbol_histogram.Add(rle_symbols[i]);
|
||||
}
|
||||
EntropyCodeLiteral symbol_code;
|
||||
BuildEntropyCode(symbol_histogram, 15, num_clusters + max_run_length_prefix,
|
||||
&symbol_code);
|
||||
bool use_rle = max_run_length_prefix > 0;
|
||||
WriteBits(1, use_rle, storage_ix, storage);
|
||||
if (use_rle) {
|
||||
WriteBits(4, max_run_length_prefix - 1, storage_ix, storage);
|
||||
}
|
||||
StoreHuffmanCode(symbol_code, num_clusters + max_run_length_prefix,
|
||||
storage_ix, storage);
|
||||
for (int i = 0; i < rle_symbols.size(); ++i) {
|
||||
EntropyEncode(rle_symbols[i], symbol_code, storage_ix, storage);
|
||||
if (rle_symbols[i] > 0 && rle_symbols[i] <= max_run_length_prefix) {
|
||||
WriteBits(rle_symbols[i], extra_bits[i], storage_ix, storage);
|
||||
}
|
||||
}
|
||||
WriteBits(1, 1, storage_ix, storage); // use move-to-front
|
||||
}
|
||||
|
||||
template<int kSize>
|
||||
void BuildEntropyCodes(const std::vector<Histogram<kSize> >& histograms,
|
||||
int alphabet_size,
|
||||
std::vector<EntropyCode<kSize> >* entropy_codes) {
|
||||
entropy_codes->resize(histograms.size());
|
||||
for (int i = 0; i < histograms.size(); ++i) {
|
||||
BuildEntropyCode(histograms[i], 15, alphabet_size, &(*entropy_codes)[i]);
|
||||
}
|
||||
}
|
||||
|
||||
struct BlockSplitCode {
|
||||
EntropyCodeLiteral block_type_code;
|
||||
EntropyCodeBlockLength block_len_code;
|
||||
};
|
||||
|
||||
void EncodeBlockLength(const EntropyCodeBlockLength& entropy,
|
||||
int length,
|
||||
int* storage_ix, uint8_t* storage) {
|
||||
int len_code = BlockLengthPrefix(length);
|
||||
int extra_bits = BlockLengthExtraBits(len_code);
|
||||
int extra_bits_value = length - BlockLengthOffset(len_code);
|
||||
EntropyEncode(len_code, entropy, storage_ix, storage);
|
||||
|
||||
if (extra_bits > 0) {
|
||||
WriteBits(extra_bits, extra_bits_value, storage_ix, storage);
|
||||
}
|
||||
}
|
||||
|
||||
void ComputeBlockTypeShortCodes(BlockSplit* split) {
|
||||
if (split->num_types_ <= 1) {
|
||||
split->num_types_ = 1;
|
||||
return;
|
||||
}
|
||||
int ringbuffer[2] = { 0, 1 };
|
||||
size_t index = 0;
|
||||
for (int i = 0; i < split->types_.size(); ++i) {
|
||||
int type = split->types_[i];
|
||||
int type_code;
|
||||
if (type == ringbuffer[index & 1]) {
|
||||
type_code = 0;
|
||||
} else if (type == ringbuffer[(index - 1) & 1] + 1) {
|
||||
type_code = 1;
|
||||
} else {
|
||||
type_code = type + 2;
|
||||
}
|
||||
ringbuffer[index & 1] = type;
|
||||
++index;
|
||||
split->type_codes_.push_back(type_code);
|
||||
}
|
||||
}
|
||||
|
||||
void BuildAndEncodeBlockSplitCode(const BlockSplit& split,
|
||||
BlockSplitCode* code,
|
||||
int* storage_ix, uint8_t* storage) {
|
||||
if (split.num_types_ <= 1) {
|
||||
WriteBits(1, 0, storage_ix, storage);
|
||||
return;
|
||||
}
|
||||
WriteBits(1, 1, storage_ix, storage);
|
||||
HistogramLiteral type_histo;
|
||||
for (int i = 0; i < split.type_codes_.size(); ++i) {
|
||||
type_histo.Add(split.type_codes_[i]);
|
||||
}
|
||||
BuildEntropyCode(type_histo, 15, split.num_types_ + 2,
|
||||
&code->block_type_code);
|
||||
HistogramBlockLength length_histo;
|
||||
for (int i = 0; i < split.lengths_.size(); ++i) {
|
||||
length_histo.Add(BlockLengthPrefix(split.lengths_[i]));
|
||||
}
|
||||
BuildEntropyCode(length_histo, 15, kNumBlockLenPrefixes,
|
||||
&code->block_len_code);
|
||||
WriteBits(8, split.num_types_ - 1, storage_ix, storage);
|
||||
StoreHuffmanCode(code->block_type_code, split.num_types_ + 2,
|
||||
storage_ix, storage);
|
||||
StoreHuffmanCode(code->block_len_code, kNumBlockLenPrefixes,
|
||||
storage_ix, storage);
|
||||
EncodeBlockLength(code->block_len_code, split.lengths_[0],
|
||||
storage_ix, storage);
|
||||
}
|
||||
|
||||
void MoveAndEncode(const BlockSplitCode& code,
|
||||
BlockSplitIterator* it,
|
||||
int* storage_ix, uint8_t* storage) {
|
||||
if (it->length_ == 0) {
|
||||
++it->idx_;
|
||||
it->type_ = it->split_.types_[it->idx_];
|
||||
it->length_ = it->split_.lengths_[it->idx_];
|
||||
uint8_t type_code = it->split_.type_codes_[it->idx_];
|
||||
EntropyEncode(type_code, code.block_type_code, storage_ix, storage);
|
||||
EncodeBlockLength(code.block_len_code, it->length_, storage_ix, storage);
|
||||
}
|
||||
--it->length_;
|
||||
}
|
||||
|
||||
struct EncodingParams {
|
||||
int num_direct_distance_codes;
|
||||
int distance_postfix_bits;
|
||||
int literal_context_mode;
|
||||
int distance_context_mode;
|
||||
};
|
||||
|
||||
struct MetaBlock {
|
||||
std::vector<Command> cmds;
|
||||
EncodingParams params;
|
||||
BlockSplit literal_split;
|
||||
BlockSplit command_split;
|
||||
BlockSplit distance_split;
|
||||
std::vector<int> literal_context_map;
|
||||
std::vector<int> distance_context_map;
|
||||
std::vector<HistogramLiteral> literal_histograms;
|
||||
std::vector<HistogramCommand> command_histograms;
|
||||
std::vector<HistogramDistance> distance_histograms;
|
||||
};
|
||||
|
||||
void BuildMetaBlock(const EncodingParams& params,
|
||||
const std::vector<Command>& cmds,
|
||||
const uint8_t* input_buffer,
|
||||
size_t pos,
|
||||
MetaBlock* mb) {
|
||||
mb->cmds = cmds;
|
||||
mb->params = params;
|
||||
ComputeCommandPrefixes(&mb->cmds,
|
||||
mb->params.num_direct_distance_codes,
|
||||
mb->params.distance_postfix_bits);
|
||||
SplitBlock(mb->cmds,
|
||||
input_buffer + pos,
|
||||
&mb->literal_split,
|
||||
&mb->command_split,
|
||||
&mb->distance_split);
|
||||
ComputeBlockTypeShortCodes(&mb->literal_split);
|
||||
ComputeBlockTypeShortCodes(&mb->command_split);
|
||||
ComputeBlockTypeShortCodes(&mb->distance_split);
|
||||
|
||||
int num_literal_contexts_per_block_type =
|
||||
NumContexts(mb->params.literal_context_mode);
|
||||
int num_literal_contexts =
|
||||
mb->literal_split.num_types_ *
|
||||
num_literal_contexts_per_block_type;
|
||||
int num_distance_contexts_per_block_type =
|
||||
(mb->params.distance_context_mode > 0 ? 4 : 1);
|
||||
int num_distance_contexts =
|
||||
mb->distance_split.num_types_ *
|
||||
num_distance_contexts_per_block_type;
|
||||
std::vector<HistogramLiteral> literal_histograms(num_literal_contexts);
|
||||
mb->command_histograms.resize(mb->command_split.num_types_);
|
||||
std::vector<HistogramDistance> distance_histograms(num_distance_contexts);
|
||||
BuildHistograms(mb->cmds,
|
||||
mb->literal_split,
|
||||
mb->command_split,
|
||||
mb->distance_split,
|
||||
input_buffer,
|
||||
pos,
|
||||
mb->params.literal_context_mode,
|
||||
mb->params.distance_context_mode,
|
||||
&literal_histograms,
|
||||
&mb->command_histograms,
|
||||
&distance_histograms);
|
||||
|
||||
// Histogram ids need to fit in one byte and there are 16 ids reserved for
|
||||
// run length codes, which leaves a maximum number of 240 histograms.
|
||||
static const int kMaxNumberOfHistograms = 240;
|
||||
|
||||
mb->literal_histograms = literal_histograms;
|
||||
if (mb->params.literal_context_mode > 0) {
|
||||
ClusterHistograms(literal_histograms,
|
||||
num_literal_contexts_per_block_type,
|
||||
mb->literal_split.num_types_,
|
||||
kMaxNumberOfHistograms,
|
||||
&mb->literal_histograms,
|
||||
&mb->literal_context_map);
|
||||
}
|
||||
|
||||
mb->distance_histograms = distance_histograms;
|
||||
if (mb->params.distance_context_mode > 0) {
|
||||
ClusterHistograms(distance_histograms,
|
||||
num_distance_contexts_per_block_type,
|
||||
mb->distance_split.num_types_,
|
||||
kMaxNumberOfHistograms,
|
||||
&mb->distance_histograms,
|
||||
&mb->distance_context_map);
|
||||
}
|
||||
}
|
||||
|
||||
size_t MetaBlockLength(const std::vector<Command>& cmds) {
|
||||
size_t length = 0;
|
||||
for (int i = 0; i < cmds.size(); ++i) {
|
||||
const Command& cmd = cmds[i];
|
||||
length += cmd.insert_length_ + cmd.copy_length_;
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
||||
void StoreMetaBlock(const MetaBlock& mb,
|
||||
const uint8_t* input_buffer,
|
||||
int input_size_bits,
|
||||
bool is_last,
|
||||
size_t* pos,
|
||||
int* storage_ix, uint8_t* storage) {
|
||||
size_t length = MetaBlockLength(mb.cmds);
|
||||
const size_t end_pos = *pos + length;
|
||||
EncodeMetaBlockLength(input_size_bits, length - 1, is_last,
|
||||
storage_ix, storage);
|
||||
BlockSplitCode literal_split_code;
|
||||
BlockSplitCode command_split_code;
|
||||
BlockSplitCode distance_split_code;
|
||||
BuildAndEncodeBlockSplitCode(mb.literal_split, &literal_split_code,
|
||||
storage_ix, storage);
|
||||
BuildAndEncodeBlockSplitCode(mb.command_split, &command_split_code,
|
||||
storage_ix, storage);
|
||||
BuildAndEncodeBlockSplitCode(mb.distance_split, &distance_split_code,
|
||||
storage_ix, storage);
|
||||
WriteBits(2, mb.params.distance_postfix_bits, storage_ix, storage);
|
||||
WriteBits(4,
|
||||
mb.params.num_direct_distance_codes >>
|
||||
mb.params.distance_postfix_bits, storage_ix, storage);
|
||||
int num_distance_codes =
|
||||
kNumDistanceShortCodes + mb.params.num_direct_distance_codes +
|
||||
(48 << mb.params.distance_postfix_bits);
|
||||
EncodeContextMap(mb.literal_context_map, mb.params.literal_context_mode, 4,
|
||||
mb.literal_histograms.size(), storage_ix, storage);
|
||||
EncodeContextMap(mb.distance_context_map, mb.params.distance_context_mode, 0,
|
||||
mb.distance_histograms.size(), storage_ix, storage);
|
||||
std::vector<EntropyCodeLiteral> literal_codes;
|
||||
std::vector<EntropyCodeCommand> command_codes;
|
||||
std::vector<EntropyCodeDistance> distance_codes;
|
||||
BuildEntropyCodes(mb.literal_histograms, 256, &literal_codes);
|
||||
BuildEntropyCodes(mb.command_histograms, kNumCommandPrefixes,
|
||||
&command_codes);
|
||||
BuildEntropyCodes(mb.distance_histograms, num_distance_codes,
|
||||
&distance_codes);
|
||||
StoreHuffmanCodes(literal_codes, 256, storage_ix, storage);
|
||||
StoreHuffmanCodes(command_codes, kNumCommandPrefixes, storage_ix, storage);
|
||||
StoreHuffmanCodes(distance_codes, num_distance_codes, storage_ix, storage);
|
||||
BlockSplitIterator literal_it(mb.literal_split);
|
||||
BlockSplitIterator command_it(mb.command_split);
|
||||
BlockSplitIterator distance_it(mb.distance_split);
|
||||
for (int i = 0; i < mb.cmds.size(); ++i) {
|
||||
const Command& cmd = mb.cmds[i];
|
||||
MoveAndEncode(command_split_code, &command_it, storage_ix, storage);
|
||||
EncodeCommand(cmd, command_codes[command_it.type_], storage_ix, storage);
|
||||
for (int j = 0; j < cmd.insert_length_; ++j) {
|
||||
MoveAndEncode(literal_split_code, &literal_it, storage_ix, storage);
|
||||
int histogram_idx = literal_it.type_;
|
||||
if (mb.params.literal_context_mode > 0) {
|
||||
uint8_t prev_byte = *pos > 0 ? input_buffer[*pos - 1] : 0;
|
||||
uint8_t prev_byte2 = *pos > 1 ? input_buffer[*pos - 2] : 0;
|
||||
uint8_t prev_byte3 = *pos > 2 ? input_buffer[*pos - 3] : 0;
|
||||
int context = (literal_it.type_ *
|
||||
NumContexts(mb.params.literal_context_mode) +
|
||||
Context(prev_byte, prev_byte2, prev_byte3,
|
||||
mb.params.literal_context_mode));
|
||||
histogram_idx = mb.literal_context_map[context];
|
||||
}
|
||||
EntropyEncode(input_buffer[(*pos)++],
|
||||
literal_codes[histogram_idx], storage_ix, storage);
|
||||
}
|
||||
if (*pos < end_pos && cmd.distance_prefix_ != 0xffff) {
|
||||
MoveAndEncode(distance_split_code, &distance_it, storage_ix, storage);
|
||||
int histogram_index = distance_it.type_;
|
||||
if (mb.params.distance_context_mode > 0) {
|
||||
int context = distance_it.type_ << 2;
|
||||
context += (cmd.copy_length_ > 4) ? 3 : cmd.copy_length_ - 2;
|
||||
histogram_index = mb.distance_context_map[context];
|
||||
}
|
||||
EncodeCopyDistance(cmd, distance_codes[histogram_index],
|
||||
storage_ix, storage);
|
||||
}
|
||||
*pos += cmd.copy_length_;
|
||||
}
|
||||
}
|
||||
|
||||
int BrotliCompressBuffer(size_t input_size,
|
||||
const uint8_t* input_buffer,
|
||||
size_t* encoded_size,
|
||||
uint8_t* encoded_buffer) {
|
||||
int storage_ix = 0;
|
||||
uint8_t* storage = encoded_buffer;
|
||||
WriteBitsPrepareStorage(storage_ix, storage);
|
||||
EncodeSize(input_size, &storage_ix, storage);
|
||||
|
||||
if (input_size == 0) {
|
||||
*encoded_size = (storage_ix + 7) >> 3;
|
||||
return 1;
|
||||
}
|
||||
int input_size_bits = Log2Ceiling(input_size);
|
||||
|
||||
std::vector<Command> all_commands;
|
||||
CreateBackwardReferences(input_buffer, input_size, &all_commands);
|
||||
ComputeDistanceShortCodes(&all_commands);
|
||||
|
||||
std::vector<std::vector<Command> > meta_block_commands;
|
||||
SplitBlockByTotalLength(all_commands, input_size, 2 << 20,
|
||||
&meta_block_commands);
|
||||
|
||||
size_t pos = 0;
|
||||
for (int block_idx = 0; block_idx < meta_block_commands.size(); ++block_idx) {
|
||||
const std::vector<Command>& commands = meta_block_commands[block_idx];
|
||||
bool is_last_meta_block = (block_idx + 1 == meta_block_commands.size());
|
||||
EncodingParams params;
|
||||
params.num_direct_distance_codes = 12;
|
||||
params.distance_postfix_bits = 1;
|
||||
params.literal_context_mode = CONTEXT_SIGNED_MIXED_3BYTE;
|
||||
params.distance_context_mode = 1;
|
||||
MetaBlock mb;
|
||||
BuildMetaBlock(params, commands, input_buffer, pos, &mb);
|
||||
StoreMetaBlock(mb, input_buffer, input_size_bits, is_last_meta_block,
|
||||
&pos, &storage_ix, storage);
|
||||
}
|
||||
|
||||
*encoded_size = (storage_ix + 7) >> 3;
|
||||
return 1;
|
||||
}
|
||||
|
||||
} // namespace brotli
|
37
enc/encode.h
Normal file
37
enc/encode.h
Normal file
@ -0,0 +1,37 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// API for Brotli compression
|
||||
|
||||
#ifndef BROTLI_ENC_ENCODE_H_
|
||||
#define BROTLI_ENC_ENCODE_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <string>
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// Compresses the data in input_buffer into encoded_buffer, and sets
|
||||
// *encoded_size to the compressed length.
|
||||
// Returns 0 if there was an error and 1 otherwise.
|
||||
int BrotliCompressBuffer(size_t input_size,
|
||||
const uint8_t* input_buffer,
|
||||
size_t* encoded_size,
|
||||
uint8_t* encoded_buffer);
|
||||
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_ENCODE_H_
|
397
enc/entropy_encode.cc
Normal file
397
enc/entropy_encode.cc
Normal file
@ -0,0 +1,397 @@
|
||||
// Copyright 2010 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Entropy encoding (Huffman) utilities.
|
||||
|
||||
#include "./entropy_encode.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
#include "./histogram.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
namespace {
|
||||
|
||||
struct HuffmanTree {
|
||||
HuffmanTree();
|
||||
HuffmanTree(int count, int16_t left, int16_t right)
|
||||
: total_count_(count),
|
||||
index_left_(left),
|
||||
index_right_or_value_(right) {
|
||||
}
|
||||
int total_count_;
|
||||
int16_t index_left_;
|
||||
int16_t index_right_or_value_;
|
||||
};
|
||||
|
||||
HuffmanTree::HuffmanTree() {}
|
||||
|
||||
// Sort the root nodes, least popular first.
|
||||
bool SortHuffmanTree(const HuffmanTree &v0, const HuffmanTree &v1) {
|
||||
return v0.total_count_ < v1.total_count_;
|
||||
}
|
||||
|
||||
void SetDepth(const HuffmanTree &p,
|
||||
HuffmanTree *pool,
|
||||
uint8_t *depth,
|
||||
int level) {
|
||||
if (p.index_left_ >= 0) {
|
||||
++level;
|
||||
SetDepth(pool[p.index_left_], pool, depth, level);
|
||||
SetDepth(pool[p.index_right_or_value_], pool, depth, level);
|
||||
} else {
|
||||
depth[p.index_right_or_value_] = level;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// This function will create a Huffman tree.
|
||||
//
|
||||
// The catch here is that the tree cannot be arbitrarily deep.
|
||||
// Brotli specifies a maximum depth of 15 bits for "code trees"
|
||||
// and 7 bits for "code length code trees."
|
||||
//
|
||||
// count_limit is the value that is to be faked as the minimum value
|
||||
// and this minimum value is raised until the tree matches the
|
||||
// maximum length requirement.
|
||||
//
|
||||
// This algorithm is not of excellent performance for very long data blocks,
|
||||
// especially when population counts are longer than 2**tree_limit, but
|
||||
// we are not planning to use this with extremely long blocks.
|
||||
//
|
||||
// See http://en.wikipedia.org/wiki/Huffman_coding
|
||||
void CreateHuffmanTree(const int *data,
|
||||
const int length,
|
||||
const int tree_limit,
|
||||
uint8_t *depth) {
|
||||
// For block sizes below 64 kB, we never need to do a second iteration
|
||||
// of this loop. Probably all of our block sizes will be smaller than
|
||||
// that, so this loop is mostly of academic interest. If we actually
|
||||
// would need this, we would be better off with the Katajainen algorithm.
|
||||
for (int count_limit = 1; ; count_limit *= 2) {
|
||||
std::vector<HuffmanTree> tree;
|
||||
tree.reserve(2 * length + 1);
|
||||
|
||||
for (int i = 0; i < length; ++i) {
|
||||
if (data[i]) {
|
||||
const int count = std::max(data[i], count_limit);
|
||||
tree.push_back(HuffmanTree(count, -1, i));
|
||||
}
|
||||
}
|
||||
|
||||
const int n = tree.size();
|
||||
if (n == 1) {
|
||||
depth[tree[0].index_right_or_value_] = 1; // Only one element.
|
||||
break;
|
||||
}
|
||||
|
||||
std::sort(tree.begin(), tree.end(), SortHuffmanTree);
|
||||
|
||||
// The nodes are:
|
||||
// [0, n): the sorted leaf nodes that we start with.
|
||||
// [n]: we add a sentinel here.
|
||||
// [n + 1, 2n): new parent nodes are added here, starting from
|
||||
// (n+1). These are naturally in ascending order.
|
||||
// [2n]: we add a sentinel at the end as well.
|
||||
// There will be (2n+1) elements at the end.
|
||||
const HuffmanTree sentinel(std::numeric_limits<int>::max(), -1, -1);
|
||||
tree.push_back(sentinel);
|
||||
tree.push_back(sentinel);
|
||||
|
||||
int i = 0; // Points to the next leaf node.
|
||||
int j = n + 1; // Points to the next non-leaf node.
|
||||
for (int k = n - 1; k > 0; --k) {
|
||||
int left, right;
|
||||
if (tree[i].total_count_ <= tree[j].total_count_) {
|
||||
left = i;
|
||||
++i;
|
||||
} else {
|
||||
left = j;
|
||||
++j;
|
||||
}
|
||||
if (tree[i].total_count_ <= tree[j].total_count_) {
|
||||
right = i;
|
||||
++i;
|
||||
} else {
|
||||
right = j;
|
||||
++j;
|
||||
}
|
||||
|
||||
// The sentinel node becomes the parent node.
|
||||
int j_end = tree.size() - 1;
|
||||
tree[j_end].total_count_ =
|
||||
tree[left].total_count_ + tree[right].total_count_;
|
||||
tree[j_end].index_left_ = left;
|
||||
tree[j_end].index_right_or_value_ = right;
|
||||
|
||||
// Add back the last sentinel node.
|
||||
tree.push_back(sentinel);
|
||||
}
|
||||
SetDepth(tree[2 * n - 1], &tree[0], depth, 0);
|
||||
|
||||
// We need to pack the Huffman tree in tree_limit bits.
|
||||
// If this was not successful, add fake entities to the lowest values
|
||||
// and retry.
|
||||
if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void WriteHuffmanTreeRepetitions(
|
||||
const int previous_value,
|
||||
const int value,
|
||||
int repetitions,
|
||||
uint8_t* tree,
|
||||
uint8_t* extra_bits,
|
||||
int* tree_size) {
|
||||
if (previous_value != value) {
|
||||
tree[*tree_size] = value;
|
||||
extra_bits[*tree_size] = 0;
|
||||
++(*tree_size);
|
||||
--repetitions;
|
||||
}
|
||||
while (repetitions >= 1) {
|
||||
if (repetitions < 3) {
|
||||
for (int i = 0; i < repetitions; ++i) {
|
||||
tree[*tree_size] = value;
|
||||
extra_bits[*tree_size] = 0;
|
||||
++(*tree_size);
|
||||
}
|
||||
return;
|
||||
} else if (repetitions < 7) {
|
||||
// 3 to 6 left.
|
||||
tree[*tree_size] = 16;
|
||||
extra_bits[*tree_size] = repetitions - 3;
|
||||
++(*tree_size);
|
||||
return;
|
||||
} else {
|
||||
tree[*tree_size] = 16;
|
||||
extra_bits[*tree_size] = 3;
|
||||
++(*tree_size);
|
||||
repetitions -= 6;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void WriteHuffmanTreeRepetitionsZeros(
|
||||
int repetitions,
|
||||
uint8_t* tree,
|
||||
uint8_t* extra_bits,
|
||||
int* tree_size) {
|
||||
while (repetitions >= 1) {
|
||||
if (repetitions < 3) {
|
||||
for (int i = 0; i < repetitions; ++i) {
|
||||
tree[*tree_size] = 0;
|
||||
extra_bits[*tree_size] = 0;
|
||||
++(*tree_size);
|
||||
}
|
||||
return;
|
||||
} else if (repetitions < 11) {
|
||||
tree[*tree_size] = 17;
|
||||
extra_bits[*tree_size] = repetitions - 3;
|
||||
++(*tree_size);
|
||||
return;
|
||||
} else if (repetitions < 139) {
|
||||
tree[*tree_size] = 18;
|
||||
extra_bits[*tree_size] = repetitions - 11;
|
||||
++(*tree_size);
|
||||
return;
|
||||
} else {
|
||||
tree[*tree_size] = 18;
|
||||
extra_bits[*tree_size] = 0x7f; // 138 repeated 0s
|
||||
++(*tree_size);
|
||||
repetitions -= 138;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Heuristics for selecting the stride ranges to collapse.
|
||||
int ValuesShouldBeCollapsedToStrideAverage(int a, int b) {
|
||||
return abs(a - b) < 4;
|
||||
}
|
||||
|
||||
int OptimizeHuffmanCountsForRle(int length, int* counts) {
|
||||
int stride;
|
||||
int limit;
|
||||
int sum;
|
||||
uint8_t* good_for_rle;
|
||||
// Let's make the Huffman code more compatible with rle encoding.
|
||||
int i;
|
||||
for (; length >= 0; --length) {
|
||||
if (length == 0) {
|
||||
return 1; // All zeros.
|
||||
}
|
||||
if (counts[length - 1] != 0) {
|
||||
// Now counts[0..length - 1] does not have trailing zeros.
|
||||
break;
|
||||
}
|
||||
}
|
||||
// 2) Let's mark all population counts that already can be encoded
|
||||
// with an rle code.
|
||||
good_for_rle = (uint8_t*)calloc(length, 1);
|
||||
if (good_for_rle == NULL) {
|
||||
return 0;
|
||||
}
|
||||
{
|
||||
// Let's not spoil any of the existing good rle codes.
|
||||
// Mark any seq of 0's that is longer as 5 as a good_for_rle.
|
||||
// Mark any seq of non-0's that is longer as 7 as a good_for_rle.
|
||||
int symbol = counts[0];
|
||||
int stride = 0;
|
||||
for (i = 0; i < length + 1; ++i) {
|
||||
if (i == length || counts[i] != symbol) {
|
||||
if ((symbol == 0 && stride >= 5) ||
|
||||
(symbol != 0 && stride >= 7)) {
|
||||
int k;
|
||||
for (k = 0; k < stride; ++k) {
|
||||
good_for_rle[i - k - 1] = 1;
|
||||
}
|
||||
}
|
||||
stride = 1;
|
||||
if (i != length) {
|
||||
symbol = counts[i];
|
||||
}
|
||||
} else {
|
||||
++stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
// 3) Let's replace those population counts that lead to more rle codes.
|
||||
stride = 0;
|
||||
limit = counts[0];
|
||||
sum = 0;
|
||||
for (i = 0; i < length + 1; ++i) {
|
||||
if (i == length || good_for_rle[i] ||
|
||||
(i != 0 && good_for_rle[i - 1]) ||
|
||||
!ValuesShouldBeCollapsedToStrideAverage(counts[i], limit)) {
|
||||
if (stride >= 4 || (stride >= 3 && sum == 0)) {
|
||||
int k;
|
||||
// The stride must end, collapse what we have, if we have enough (4).
|
||||
int count = (sum + stride / 2) / stride;
|
||||
if (count < 1) {
|
||||
count = 1;
|
||||
}
|
||||
if (sum == 0) {
|
||||
// Don't make an all zeros stride to be upgraded to ones.
|
||||
count = 0;
|
||||
}
|
||||
for (k = 0; k < stride; ++k) {
|
||||
// We don't want to change value at counts[i],
|
||||
// that is already belonging to the next stride. Thus - 1.
|
||||
counts[i - k - 1] = count;
|
||||
}
|
||||
}
|
||||
stride = 0;
|
||||
sum = 0;
|
||||
if (i < length - 3) {
|
||||
// All interesting strides have a count of at least 4,
|
||||
// at least when non-zeros.
|
||||
limit = (counts[i] + counts[i + 1] +
|
||||
counts[i + 2] + counts[i + 3] + 2) / 4;
|
||||
} else if (i < length) {
|
||||
limit = counts[i];
|
||||
} else {
|
||||
limit = 0;
|
||||
}
|
||||
}
|
||||
++stride;
|
||||
if (i != length) {
|
||||
sum += counts[i];
|
||||
if (stride >= 4) {
|
||||
limit = (sum + stride / 2) / stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
free(good_for_rle);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
void WriteHuffmanTree(const uint8_t* depth, const int length,
|
||||
uint8_t* tree,
|
||||
uint8_t* extra_bits_data,
|
||||
int* huffman_tree_size) {
|
||||
int previous_value = 0;
|
||||
for (uint32_t i = 0; i < length;) {
|
||||
const int value = depth[i];
|
||||
int reps = 1;
|
||||
for (uint32_t k = i + 1; k < length && depth[k] == value; ++k) {
|
||||
++reps;
|
||||
}
|
||||
if (value == 0) {
|
||||
WriteHuffmanTreeRepetitionsZeros(reps, tree, extra_bits_data,
|
||||
huffman_tree_size);
|
||||
} else {
|
||||
WriteHuffmanTreeRepetitions(previous_value, value, reps, tree,
|
||||
extra_bits_data, huffman_tree_size);
|
||||
previous_value = value;
|
||||
}
|
||||
i += reps;
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
uint16_t ReverseBits(int num_bits, uint16_t bits) {
|
||||
static const size_t kLut[16] = { // Pre-reversed 4-bit values.
|
||||
0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
|
||||
0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf
|
||||
};
|
||||
size_t retval = kLut[bits & 0xf];
|
||||
for (int i = 4; i < num_bits; i += 4) {
|
||||
retval <<= 4;
|
||||
bits >>= 4;
|
||||
retval |= kLut[bits & 0xf];
|
||||
}
|
||||
retval >>= (-num_bits & 0x3);
|
||||
return retval;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void ConvertBitDepthsToSymbols(const uint8_t *depth, int len, uint16_t *bits) {
|
||||
// In Brotli, all bit depths are [1..15]
|
||||
// 0 bit depth means that the symbol does not exist.
|
||||
const int kMaxBits = 16; // 0..15 are values for bits
|
||||
uint16_t bl_count[kMaxBits] = { 0 };
|
||||
{
|
||||
for (int i = 0; i < len; ++i) {
|
||||
++bl_count[depth[i]];
|
||||
}
|
||||
bl_count[0] = 0;
|
||||
}
|
||||
uint16_t next_code[kMaxBits];
|
||||
next_code[0] = 0;
|
||||
{
|
||||
int code = 0;
|
||||
for (int bits = 1; bits < kMaxBits; ++bits) {
|
||||
code = (code + bl_count[bits - 1]) << 1;
|
||||
next_code[bits] = code;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < len; ++i) {
|
||||
if (depth[i]) {
|
||||
bits[i] = ReverseBits(depth[i], next_code[depth[i]]++);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace brotli
|
112
enc/entropy_encode.h
Normal file
112
enc/entropy_encode.h
Normal file
@ -0,0 +1,112 @@
|
||||
// Copyright 2010 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Entropy encoding (Huffman) utilities.
|
||||
|
||||
#ifndef BROTLI_ENC_ENTROPY_ENCODE_H_
|
||||
#define BROTLI_ENC_ENTROPY_ENCODE_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include "./histogram.h"
|
||||
#include "./prefix.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// This function will create a Huffman tree.
|
||||
//
|
||||
// The (data,length) contains the population counts.
|
||||
// The tree_limit is the maximum bit depth of the Huffman codes.
|
||||
//
|
||||
// The depth contains the tree, i.e., how many bits are used for
|
||||
// the symbol.
|
||||
//
|
||||
// See http://en.wikipedia.org/wiki/Huffman_coding
|
||||
void CreateHuffmanTree(const int *data,
|
||||
const int length,
|
||||
const int tree_limit,
|
||||
uint8_t *depth);
|
||||
|
||||
// Change the population counts in a way that the consequent
|
||||
// Hufmann tree compression, especially its rle-part will be more
|
||||
// likely to compress this data more efficiently.
|
||||
//
|
||||
// length contains the size of the histogram.
|
||||
// counts contains the population counts.
|
||||
int OptimizeHuffmanCountsForRle(int length, int* counts);
|
||||
|
||||
|
||||
// Write a huffman tree from bit depths into the bitstream representation
|
||||
// of a Huffman tree. The generated Huffman tree is to be compressed once
|
||||
// more using a Huffman tree
|
||||
void WriteHuffmanTree(const uint8_t* depth, const int length,
|
||||
uint8_t* tree,
|
||||
uint8_t* extra_bits_data,
|
||||
int* huffman_tree_size);
|
||||
|
||||
// Get the actual bit values for a tree of bit depths.
|
||||
void ConvertBitDepthsToSymbols(const uint8_t *depth, int len, uint16_t *bits);
|
||||
|
||||
template<int kSize>
|
||||
struct EntropyCode {
|
||||
// How many bits for symbol.
|
||||
uint8_t depth_[kSize];
|
||||
// Actual bits used to represent the symbol.
|
||||
uint16_t bits_[kSize];
|
||||
// How many non-zero depth.
|
||||
int count_;
|
||||
// First two symbols with non-zero depth.
|
||||
int symbols_[2];
|
||||
};
|
||||
|
||||
template<int kSize>
|
||||
void BuildEntropyCode(const Histogram<kSize>& histogram,
|
||||
const int tree_limit,
|
||||
const int alphabet_size,
|
||||
EntropyCode<kSize>* code) {
|
||||
memset(code->depth_, 0, sizeof(code->depth_));
|
||||
memset(code->bits_, 0, sizeof(code->bits_));
|
||||
memset(code->symbols_, 0, sizeof(code->symbols_));
|
||||
code->count_ = 0;
|
||||
if (histogram.total_count_ == 0) return;
|
||||
for (int i = 0; i < kSize; ++i) {
|
||||
if (histogram.data_[i] > 0) {
|
||||
if (code->count_ < 2) code->symbols_[code->count_] = i;
|
||||
++code->count_;
|
||||
}
|
||||
}
|
||||
if (code->count_ >= 64) {
|
||||
int counts[kSize];
|
||||
memcpy(counts, &histogram.data_[0], sizeof(counts[0]) * kSize);
|
||||
OptimizeHuffmanCountsForRle(alphabet_size, counts);
|
||||
CreateHuffmanTree(counts, alphabet_size, tree_limit, &code->depth_[0]);
|
||||
} else {
|
||||
CreateHuffmanTree(&histogram.data_[0], alphabet_size, tree_limit,
|
||||
&code->depth_[0]);
|
||||
}
|
||||
ConvertBitDepthsToSymbols(&code->depth_[0], alphabet_size, &code->bits_[0]);
|
||||
}
|
||||
|
||||
static const int kCodeLengthCodes = 19;
|
||||
|
||||
// Literal entropy code.
|
||||
typedef EntropyCode<256> EntropyCodeLiteral;
|
||||
// Prefix entropy codes.
|
||||
typedef EntropyCode<kNumCommandPrefixes> EntropyCodeCommand;
|
||||
typedef EntropyCode<kNumDistancePrefixes> EntropyCodeDistance;
|
||||
typedef EntropyCode<kNumBlockLenPrefixes> EntropyCodeBlockLength;
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_ENTROPY_ENCODE_H_
|
161
enc/fast_log.h
Normal file
161
enc/fast_log.h
Normal file
@ -0,0 +1,161 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Utilities for fast computation of logarithms.
|
||||
|
||||
#ifndef BROTLI_ENC_FAST_LOG_H_
|
||||
#define BROTLI_ENC_FAST_LOG_H_
|
||||
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0.
|
||||
inline int Log2Floor(uint32_t n) {
|
||||
#if defined(__clang__) || \
|
||||
(defined(__GNUC__) && \
|
||||
((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4))
|
||||
return n == 0 ? -1 : 31 ^ __builtin_clz(n);
|
||||
#else
|
||||
if (n == 0)
|
||||
return -1;
|
||||
int log = 0;
|
||||
uint32_t value = n;
|
||||
for (int i = 4; i >= 0; --i) {
|
||||
int shift = (1 << i);
|
||||
uint32_t x = value >> shift;
|
||||
if (x != 0) {
|
||||
value = x;
|
||||
log += shift;
|
||||
}
|
||||
}
|
||||
assert(value == 1);
|
||||
return log;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Return ceiling(log2(n)) for positive integer n. Returns -1 iff n == 0.
|
||||
inline int Log2Ceiling(uint32_t n) {
|
||||
int floor = Log2Floor(n);
|
||||
if (n == (n &~ (n - 1))) // zero or a power of two
|
||||
return floor;
|
||||
else
|
||||
return floor + 1;
|
||||
}
|
||||
|
||||
// A lookup table for small values of log2(int) to be used in entropy
|
||||
// computation.
|
||||
//
|
||||
// ", ".join(["%.16ff" % x for x in [0.0]+[log2(x) for x in range(1, 256)]])
|
||||
static const float kLog2Table[] = {
|
||||
0.0000000000000000f, 0.0000000000000000f, 1.0000000000000000f,
|
||||
1.5849625007211563f, 2.0000000000000000f, 2.3219280948873622f,
|
||||
2.5849625007211561f, 2.8073549220576042f, 3.0000000000000000f,
|
||||
3.1699250014423126f, 3.3219280948873626f, 3.4594316186372978f,
|
||||
3.5849625007211565f, 3.7004397181410922f, 3.8073549220576037f,
|
||||
3.9068905956085187f, 4.0000000000000000f, 4.0874628412503400f,
|
||||
4.1699250014423122f, 4.2479275134435852f, 4.3219280948873626f,
|
||||
4.3923174227787607f, 4.4594316186372973f, 4.5235619560570131f,
|
||||
4.5849625007211570f, 4.6438561897747244f, 4.7004397181410926f,
|
||||
4.7548875021634691f, 4.8073549220576037f, 4.8579809951275728f,
|
||||
4.9068905956085187f, 4.9541963103868758f, 5.0000000000000000f,
|
||||
5.0443941193584534f, 5.0874628412503400f, 5.1292830169449664f,
|
||||
5.1699250014423122f, 5.2094533656289501f, 5.2479275134435852f,
|
||||
5.2854022188622487f, 5.3219280948873626f, 5.3575520046180838f,
|
||||
5.3923174227787607f, 5.4262647547020979f, 5.4594316186372973f,
|
||||
5.4918530963296748f, 5.5235619560570131f, 5.5545888516776376f,
|
||||
5.5849625007211570f, 5.6147098441152083f, 5.6438561897747244f,
|
||||
5.6724253419714961f, 5.7004397181410926f, 5.7279204545631996f,
|
||||
5.7548875021634691f, 5.7813597135246599f, 5.8073549220576046f,
|
||||
5.8328900141647422f, 5.8579809951275719f, 5.8826430493618416f,
|
||||
5.9068905956085187f, 5.9307373375628867f, 5.9541963103868758f,
|
||||
5.9772799234999168f, 6.0000000000000000f, 6.0223678130284544f,
|
||||
6.0443941193584534f, 6.0660891904577721f, 6.0874628412503400f,
|
||||
6.1085244567781700f, 6.1292830169449672f, 6.1497471195046822f,
|
||||
6.1699250014423122f, 6.1898245588800176f, 6.2094533656289510f,
|
||||
6.2288186904958804f, 6.2479275134435861f, 6.2667865406949019f,
|
||||
6.2854022188622487f, 6.3037807481771031f, 6.3219280948873617f,
|
||||
6.3398500028846252f, 6.3575520046180847f, 6.3750394313469254f,
|
||||
6.3923174227787598f, 6.4093909361377026f, 6.4262647547020979f,
|
||||
6.4429434958487288f, 6.4594316186372982f, 6.4757334309663976f,
|
||||
6.4918530963296748f, 6.5077946401986964f, 6.5235619560570131f,
|
||||
6.5391588111080319f, 6.5545888516776376f, 6.5698556083309478f,
|
||||
6.5849625007211561f, 6.5999128421871278f, 6.6147098441152092f,
|
||||
6.6293566200796095f, 6.6438561897747253f, 6.6582114827517955f,
|
||||
6.6724253419714952f, 6.6865005271832185f, 6.7004397181410917f,
|
||||
6.7142455176661224f, 6.7279204545631988f, 6.7414669864011465f,
|
||||
6.7548875021634691f, 6.7681843247769260f, 6.7813597135246599f,
|
||||
6.7944158663501062f, 6.8073549220576037f, 6.8201789624151887f,
|
||||
6.8328900141647422f, 6.8454900509443757f, 6.8579809951275719f,
|
||||
6.8703647195834048f, 6.8826430493618416f, 6.8948177633079437f,
|
||||
6.9068905956085187f, 6.9188632372745955f, 6.9307373375628867f,
|
||||
6.9425145053392399f, 6.9541963103868758f, 6.9657842846620879f,
|
||||
6.9772799234999168f, 6.9886846867721664f, 7.0000000000000000f,
|
||||
7.0112272554232540f, 7.0223678130284544f, 7.0334230015374501f,
|
||||
7.0443941193584534f, 7.0552824355011898f, 7.0660891904577721f,
|
||||
7.0768155970508317f, 7.0874628412503400f, 7.0980320829605272f,
|
||||
7.1085244567781700f, 7.1189410727235076f, 7.1292830169449664f,
|
||||
7.1395513523987937f, 7.1497471195046822f, 7.1598713367783891f,
|
||||
7.1699250014423130f, 7.1799090900149345f, 7.1898245588800176f,
|
||||
7.1996723448363644f, 7.2094533656289492f, 7.2191685204621621f,
|
||||
7.2288186904958804f, 7.2384047393250794f, 7.2479275134435861f,
|
||||
7.2573878426926521f, 7.2667865406949019f, 7.2761244052742384f,
|
||||
7.2854022188622487f, 7.2946207488916270f, 7.3037807481771031f,
|
||||
7.3128829552843557f, 7.3219280948873617f, 7.3309168781146177f,
|
||||
7.3398500028846243f, 7.3487281542310781f, 7.3575520046180847f,
|
||||
7.3663222142458151f, 7.3750394313469254f, 7.3837042924740528f,
|
||||
7.3923174227787607f, 7.4008794362821844f, 7.4093909361377026f,
|
||||
7.4178525148858991f, 7.4262647547020979f, 7.4346282276367255f,
|
||||
7.4429434958487288f, 7.4512111118323299f, 7.4594316186372973f,
|
||||
7.4676055500829976f, 7.4757334309663976f, 7.4838157772642564f,
|
||||
7.4918530963296748f, 7.4998458870832057f, 7.5077946401986964f,
|
||||
7.5156998382840436f, 7.5235619560570131f, 7.5313814605163119f,
|
||||
7.5391588111080319f, 7.5468944598876373f, 7.5545888516776376f,
|
||||
7.5622424242210728f, 7.5698556083309478f, 7.5774288280357487f,
|
||||
7.5849625007211561f, 7.5924570372680806f, 7.5999128421871278f,
|
||||
7.6073303137496113f, 7.6147098441152075f, 7.6220518194563764f,
|
||||
7.6293566200796095f, 7.6366246205436488f, 7.6438561897747244f,
|
||||
7.6510516911789290f, 7.6582114827517955f, 7.6653359171851765f,
|
||||
7.6724253419714952f, 7.6794800995054464f, 7.6865005271832185f,
|
||||
7.6934869574993252f, 7.7004397181410926f, 7.7073591320808825f,
|
||||
7.7142455176661224f, 7.7210991887071856f, 7.7279204545631996f,
|
||||
7.7347096202258392f, 7.7414669864011465f, 7.7481928495894596f,
|
||||
7.7548875021634691f, 7.7615512324444795f, 7.7681843247769260f,
|
||||
7.7747870596011737f, 7.7813597135246608f, 7.7879025593914317f,
|
||||
7.7944158663501062f, 7.8008998999203047f, 7.8073549220576037f,
|
||||
7.8137811912170374f, 7.8201789624151887f, 7.8265484872909159f,
|
||||
7.8328900141647422f, 7.8392037880969445f, 7.8454900509443757f,
|
||||
7.8517490414160571f, 7.8579809951275719f, 7.8641861446542798f,
|
||||
7.8703647195834048f, 7.8765169465650002f, 7.8826430493618425f,
|
||||
7.8887432488982601f, 7.8948177633079446f, 7.9008668079807496f,
|
||||
7.9068905956085187f, 7.9128893362299619f, 7.9188632372745955f,
|
||||
7.9248125036057813f, 7.9307373375628867f, 7.9366379390025719f,
|
||||
7.9425145053392399f, 7.9483672315846778f, 7.9541963103868758f,
|
||||
7.9600019320680806f, 7.9657842846620870f, 7.9715435539507720f,
|
||||
7.9772799234999168f, 7.9829935746943104f, 7.9886846867721664f,
|
||||
7.9943534368588578f
|
||||
};
|
||||
|
||||
// Faster logarithm for small integers, with the property of log2(0) == 0.
|
||||
static inline double FastLog2(int v) {
|
||||
if (v < (int)(sizeof(kLog2Table) / sizeof(kLog2Table[0]))) {
|
||||
return kLog2Table[v];
|
||||
}
|
||||
return log2(v);
|
||||
}
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_FAST_LOG_H_
|
85
enc/find_match_length.h
Normal file
85
enc/find_match_length.h
Normal file
@ -0,0 +1,85 @@
|
||||
// Copyright 2010 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Function to find maximal matching prefixes of strings.
|
||||
|
||||
#ifndef BROTLI_ENC_FIND_MATCH_LENGTH_H_
|
||||
#define BROTLI_ENC_FIND_MATCH_LENGTH_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "./port.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// Separate implementation for x86_64, for speed.
|
||||
#if defined(__GNUC__) && defined(ARCH_K8)
|
||||
|
||||
static inline int FindMatchLengthWithLimit(const uint8_t* s1,
|
||||
const uint8_t* s2,
|
||||
size_t limit) {
|
||||
int matched = 0;
|
||||
size_t limit2 = (limit >> 3) + 1; // + 1 is for pre-decrement in while
|
||||
while (PREDICT_TRUE(--limit2)) {
|
||||
if (PREDICT_FALSE(BROTLI_UNALIGNED_LOAD64(s2) ==
|
||||
BROTLI_UNALIGNED_LOAD64(s1 + matched))) {
|
||||
s2 += 8;
|
||||
matched += 8;
|
||||
} else {
|
||||
uint64_t x =
|
||||
BROTLI_UNALIGNED_LOAD64(s2) ^ BROTLI_UNALIGNED_LOAD64(s1 + matched);
|
||||
int matching_bits = __builtin_ctzll(x);
|
||||
matched += matching_bits >> 3;
|
||||
return matched;
|
||||
}
|
||||
}
|
||||
limit = (limit & 7) + 1; // + 1 is for pre-decrement in while
|
||||
while (--limit) {
|
||||
if (PREDICT_TRUE(s1[matched] == *s2)) {
|
||||
++s2;
|
||||
++matched;
|
||||
} else {
|
||||
return matched;
|
||||
}
|
||||
}
|
||||
return matched;
|
||||
}
|
||||
#else
|
||||
static inline int FindMatchLengthWithLimit(const uint8_t* s1,
|
||||
const uint8_t* s2,
|
||||
size_t limit) {
|
||||
int matched = 0;
|
||||
const uint8_t* s2_limit = s2 + limit;
|
||||
const uint8_t* s2_ptr = s2;
|
||||
// Find out how long the match is. We loop over the data 32 bits at a
|
||||
// time until we find a 32-bit block that doesn't match; then we find
|
||||
// the first non-matching bit and use that to calculate the total
|
||||
// length of the match.
|
||||
while (s2_ptr <= s2_limit - 4 &&
|
||||
BROTLI_UNALIGNED_LOAD32(s2_ptr) ==
|
||||
BROTLI_UNALIGNED_LOAD32(s1 + matched)) {
|
||||
s2_ptr += 4;
|
||||
matched += 4;
|
||||
}
|
||||
while ((s2_ptr < s2_limit) && (s1[matched] == *s2_ptr)) {
|
||||
++s2_ptr;
|
||||
++matched;
|
||||
}
|
||||
return matched;
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_FIND_MATCH_LENGTH_H_
|
354
enc/hash.h
Normal file
354
enc/hash.h
Normal file
@ -0,0 +1,354 @@
|
||||
// Copyright 2010 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// A (forgetful) hash table to the data seen by the compressor, to
|
||||
// help create backward references to previous data.
|
||||
|
||||
#ifndef BROTLI_ENC_HASH_H_
|
||||
#define BROTLI_ENC_HASH_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <algorithm>
|
||||
|
||||
#include "./fast_log.h"
|
||||
#include "./find_match_length.h"
|
||||
#include "./port.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// kHashMul32 multiplier has these properties:
|
||||
// * The multiplier must be odd. Otherwise we may lose the highest bit.
|
||||
// * No long streaks of 1s or 0s.
|
||||
// * There is no effort to ensure that it is a prime, the oddity is enough
|
||||
// for this use.
|
||||
// * The number has been tuned heuristically against compression benchmarks.
|
||||
static const uint32_t kHashMul32 = 0x1e35a7bd;
|
||||
|
||||
inline uint32_t Hash3Bytes(const uint8_t *data, const int bits) {
|
||||
uint32_t h = (BROTLI_UNALIGNED_LOAD32(data) & 0xffffff) * kHashMul32;
|
||||
// The higher bits contain more mixture from the multiplication,
|
||||
// so we take our results from there.
|
||||
return h >> (32 - bits);
|
||||
}
|
||||
|
||||
// Usually, we always choose the longest backward reference. This function
|
||||
// allows for the exception of that rule.
|
||||
//
|
||||
// If we choose a backward reference that is further away, it will
|
||||
// usually be coded with more bits. We approximate this by assuming
|
||||
// log2(distance). If the distance can be expressed in terms of the
|
||||
// last four distances, we use some heuristic constants to estimate
|
||||
// the bits cost. For the first up to four literals we use the bit
|
||||
// cost of the literals from the literal cost model, after that we
|
||||
// use the average bit cost of the cost model.
|
||||
//
|
||||
// This function is used to sometimes discard a longer backward reference
|
||||
// when it is not much longer and the bit cost for encoding it is more
|
||||
// than the saved literals.
|
||||
inline double BackwardReferenceScore(double average_cost,
|
||||
double start_cost4,
|
||||
double start_cost3,
|
||||
double start_cost2,
|
||||
int copy_length,
|
||||
int backward_reference_offset,
|
||||
int last_distance1,
|
||||
int last_distance2,
|
||||
int last_distance3,
|
||||
int last_distance4) {
|
||||
double retval = 0;
|
||||
switch (copy_length) {
|
||||
case 2: retval = start_cost2; break;
|
||||
case 3: retval = start_cost3; break;
|
||||
default: retval = start_cost4 + (copy_length - 4) * average_cost; break;
|
||||
}
|
||||
int diff_last1 = abs(backward_reference_offset - last_distance1);
|
||||
int diff_last2 = abs(backward_reference_offset - last_distance2);
|
||||
if (diff_last1 == 0) {
|
||||
retval += 0.6;
|
||||
} else if (diff_last1 < 4) {
|
||||
retval -= 0.9 + 0.03 * diff_last1;
|
||||
} else if (diff_last2 < 4) {
|
||||
retval -= 0.95 + 0.1 * diff_last2;
|
||||
} else if (backward_reference_offset == last_distance3) {
|
||||
retval -= 1.17;
|
||||
} else if (backward_reference_offset == last_distance4) {
|
||||
retval -= 1.27;
|
||||
} else {
|
||||
retval -= 1.20 * Log2Floor(backward_reference_offset);
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
// A (forgetful) hash table to the data seen by the compressor, to
|
||||
// help create backward references to previous data.
|
||||
//
|
||||
// This is a hash map of fixed size (kBucketSize) to a ring buffer of
|
||||
// fixed size (kBlockSize). The ring buffer contains the last kBlockSize
|
||||
// index positions of the given hash key in the compressed data.
|
||||
template <int kBucketBits, int kBlockBits>
|
||||
class HashLongestMatch {
|
||||
public:
|
||||
HashLongestMatch()
|
||||
: literal_cost_(NULL),
|
||||
last_distance1_(4),
|
||||
last_distance2_(11),
|
||||
last_distance3_(15),
|
||||
last_distance4_(16),
|
||||
insert_length_(0),
|
||||
average_cost_(5.4) {
|
||||
Reset();
|
||||
}
|
||||
void Reset() {
|
||||
std::fill(&num_[0], &num_[sizeof(num_) / sizeof(num_[0])], 0);
|
||||
}
|
||||
void SetLiteralCost(float *cost) {
|
||||
literal_cost_ = cost;
|
||||
}
|
||||
double literal_cost(int i) const { return literal_cost_[i]; }
|
||||
|
||||
// Look at 3 bytes at data.
|
||||
// Compute a hash from these, and store the value of ix at that position.
|
||||
inline void Store(const uint8_t *data, const int ix) {
|
||||
const uint32_t key = Hash3Bytes(data, kBucketBits);
|
||||
const int minor_ix = num_[key] & kBlockMask;
|
||||
buckets_[key][minor_ix] = ix;
|
||||
++num_[key];
|
||||
}
|
||||
|
||||
// Store hashes for a range of data.
|
||||
void StoreHashes(const uint8_t *data, size_t len, int startix, int mask) {
|
||||
for (int p = 0; p < len; ++p) {
|
||||
Store(&data[p & mask], startix + p);
|
||||
}
|
||||
}
|
||||
|
||||
// Find a longest backward match of &data[cur_ix] up to the length of
|
||||
// max_length.
|
||||
//
|
||||
// Does not look for matches longer than max_length.
|
||||
// Does not look for matches further away than max_backward.
|
||||
// Writes the best found match length into best_len_out.
|
||||
// Writes the index (&data[index]) offset from the start of the best match
|
||||
// into best_distance_out.
|
||||
// Write the score of the best match into best_score_out.
|
||||
bool FindLongestMatch(const uint8_t * __restrict data,
|
||||
const uint32_t cur_ix,
|
||||
uint32_t max_length,
|
||||
const uint32_t max_backward,
|
||||
size_t * __restrict best_len_out,
|
||||
size_t * __restrict best_distance_out,
|
||||
double * __restrict best_score_out) {
|
||||
const double start_cost4 = literal_cost_ == NULL ? 20 :
|
||||
literal_cost_[cur_ix] +
|
||||
literal_cost_[cur_ix + 1] +
|
||||
literal_cost_[cur_ix + 2] +
|
||||
literal_cost_[cur_ix + 3];
|
||||
|
||||
const double start_cost3 = literal_cost_ == NULL ? 15 :
|
||||
literal_cost_[cur_ix] +
|
||||
literal_cost_[cur_ix + 1] +
|
||||
literal_cost_[cur_ix + 2] + 0.3;
|
||||
double start_cost2 = literal_cost_ == NULL ? 10 :
|
||||
literal_cost_[cur_ix] +
|
||||
literal_cost_[cur_ix + 1] + 1.2;
|
||||
bool match_found = false;
|
||||
// Don't accept a short copy from far away.
|
||||
double best_score = 8.25;
|
||||
if (insert_length_ < 4) {
|
||||
double cost_diff[4] = { 0.20, 0.09, 0.05, 0.03 };
|
||||
best_score += cost_diff[insert_length_];
|
||||
}
|
||||
size_t best_len = *best_len_out;
|
||||
*best_len_out = 0;
|
||||
size_t best_ix = 1;
|
||||
// Try last distance first.
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
int prev_ix = cur_ix;
|
||||
switch(i) {
|
||||
case 0: prev_ix -= last_distance1_; break;
|
||||
case 1: prev_ix -= last_distance2_; break;
|
||||
case 2: prev_ix -= last_distance3_; break;
|
||||
case 3: prev_ix -= last_distance4_; break;
|
||||
|
||||
case 4: prev_ix -= last_distance1_ - 1; break;
|
||||
case 5: prev_ix -= last_distance1_ + 1; break;
|
||||
case 6: prev_ix -= last_distance1_ - 2; break;
|
||||
case 7: prev_ix -= last_distance1_ + 2; break;
|
||||
case 8: prev_ix -= last_distance1_ - 3; break;
|
||||
case 9: prev_ix -= last_distance1_ + 3; break;
|
||||
|
||||
case 10: prev_ix -= last_distance2_ - 1; break;
|
||||
case 11: prev_ix -= last_distance2_ + 1; break;
|
||||
case 12: prev_ix -= last_distance2_ - 2; break;
|
||||
case 13: prev_ix -= last_distance2_ + 2; break;
|
||||
case 14: prev_ix -= last_distance2_ - 3; break;
|
||||
case 15: prev_ix -= last_distance2_ + 3; break;
|
||||
}
|
||||
if (prev_ix >= cur_ix) {
|
||||
continue;
|
||||
}
|
||||
const size_t backward = cur_ix - prev_ix;
|
||||
if (PREDICT_FALSE(backward > max_backward)) {
|
||||
continue;
|
||||
}
|
||||
if (data[cur_ix + best_len] != data[prev_ix + best_len]) {
|
||||
continue;
|
||||
}
|
||||
const size_t len =
|
||||
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix], max_length);
|
||||
if (len >= 3 || (len == 2 && i < 2)) {
|
||||
// Comparing for >= 2 does not change the semantics, but just saves for
|
||||
// a few unnecessary binary logarithms in backward reference score,
|
||||
// since we are not interested in such short matches.
|
||||
const double score = BackwardReferenceScore(average_cost_,
|
||||
start_cost4,
|
||||
start_cost3,
|
||||
start_cost2,
|
||||
len, backward,
|
||||
last_distance1_,
|
||||
last_distance2_,
|
||||
last_distance3_,
|
||||
last_distance4_);
|
||||
if (best_score < score) {
|
||||
best_score = score;
|
||||
best_len = len;
|
||||
best_ix = backward;
|
||||
*best_len_out = best_len;
|
||||
*best_distance_out = best_ix;
|
||||
*best_score_out = best_score;
|
||||
match_found = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
const uint32_t key = Hash3Bytes(&data[cur_ix], kBucketBits);
|
||||
const uint32_t * __restrict const bucket = &buckets_[key][0];
|
||||
const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
|
||||
int stop = int(cur_ix) - 64;
|
||||
if (stop < 0) { stop = 0; }
|
||||
|
||||
start_cost2 -= 1.0;
|
||||
for (int i = cur_ix - 1; i > stop; --i) {
|
||||
size_t prev_ix = i;
|
||||
const size_t backward = cur_ix - prev_ix;
|
||||
if (PREDICT_FALSE(backward > max_backward)) {
|
||||
break;
|
||||
}
|
||||
if (data[cur_ix] != data[prev_ix] ||
|
||||
data[cur_ix + 1] != data[prev_ix + 1]) {
|
||||
continue;
|
||||
}
|
||||
int len = 2;
|
||||
const double score = start_cost2 - 1.70 * Log2Floor(backward);
|
||||
|
||||
if (best_score < score) {
|
||||
best_score = score;
|
||||
best_len = len;
|
||||
best_ix = backward;
|
||||
*best_len_out = best_len;
|
||||
*best_distance_out = best_ix;
|
||||
match_found = true;
|
||||
}
|
||||
}
|
||||
for (int i = num_[key] - 1; i >= down; --i) {
|
||||
size_t prev_ix = bucket[i & kBlockMask];
|
||||
const size_t backward = cur_ix - prev_ix;
|
||||
if (PREDICT_FALSE(backward > max_backward)) {
|
||||
break;
|
||||
}
|
||||
if (data[cur_ix + best_len] != data[prev_ix + best_len]) {
|
||||
continue;
|
||||
}
|
||||
const size_t len =
|
||||
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix], max_length);
|
||||
if (len >= 3) {
|
||||
// Comparing for >= 3 does not change the semantics, but just saves for
|
||||
// a few unnecessary binary logarithms in backward reference score,
|
||||
// since we are not interested in such short matches.
|
||||
const double score = BackwardReferenceScore(average_cost_,
|
||||
start_cost4,
|
||||
start_cost3,
|
||||
start_cost2,
|
||||
len, backward,
|
||||
last_distance1_,
|
||||
last_distance2_,
|
||||
last_distance3_,
|
||||
last_distance4_);
|
||||
if (best_score < score) {
|
||||
best_score = score;
|
||||
best_len = len;
|
||||
best_ix = backward;
|
||||
*best_len_out = best_len;
|
||||
*best_distance_out = best_ix;
|
||||
*best_score_out = best_score;
|
||||
match_found = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return match_found;
|
||||
}
|
||||
|
||||
void set_last_distance(int v) {
|
||||
if (last_distance1_ != v) {
|
||||
last_distance4_ = last_distance3_;
|
||||
last_distance3_ = last_distance2_;
|
||||
last_distance2_ = last_distance1_;
|
||||
last_distance1_ = v;
|
||||
}
|
||||
}
|
||||
|
||||
int last_distance() const { return last_distance1_; }
|
||||
|
||||
void set_insert_length(int v) { insert_length_ = v; }
|
||||
|
||||
void set_average_cost(double v) { average_cost_ = v; }
|
||||
|
||||
private:
|
||||
// Number of hash buckets.
|
||||
static const uint32_t kBucketSize = 1 << kBucketBits;
|
||||
|
||||
// Only kBlockSize newest backward references are kept,
|
||||
// and the older are forgotten.
|
||||
static const uint32_t kBlockSize = 1 << kBlockBits;
|
||||
|
||||
// Mask for accessing entries in a block (in a ringbuffer manner).
|
||||
static const uint32_t kBlockMask = (1 << kBlockBits) - 1;
|
||||
|
||||
// Number of entries in a particular bucket.
|
||||
uint16_t num_[kBucketSize];
|
||||
|
||||
// Buckets containing kBlockSize of backward references.
|
||||
uint32_t buckets_[kBucketSize][kBlockSize];
|
||||
|
||||
// Model of how much the ith literal costs to encode using
|
||||
// the entropy model.
|
||||
float *literal_cost_;
|
||||
|
||||
int last_distance1_;
|
||||
int last_distance2_;
|
||||
int last_distance3_;
|
||||
int last_distance4_;
|
||||
|
||||
// Cost adjustment for how many literals we are planning to insert
|
||||
// anyway.
|
||||
int insert_length_;
|
||||
|
||||
double average_cost_;
|
||||
};
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_HASH_H_
|
72
enc/histogram.cc
Normal file
72
enc/histogram.cc
Normal file
@ -0,0 +1,72 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Build per-context histograms of literals, commands and distance codes.
|
||||
|
||||
#include "./histogram.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <cmath>
|
||||
|
||||
#include "./block_splitter.h"
|
||||
#include "./command.h"
|
||||
#include "./context.h"
|
||||
#include "./prefix.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
void BuildHistograms(
|
||||
const std::vector<Command>& cmds,
|
||||
const BlockSplit& literal_split,
|
||||
const BlockSplit& insert_and_copy_split,
|
||||
const BlockSplit& dist_split,
|
||||
const uint8_t* input_buffer,
|
||||
size_t pos,
|
||||
int context_mode,
|
||||
int distance_context_mode,
|
||||
std::vector<HistogramLiteral>* literal_histograms,
|
||||
std::vector<HistogramCommand>* insert_and_copy_histograms,
|
||||
std::vector<HistogramDistance>* copy_dist_histograms) {
|
||||
BlockSplitIterator literal_it(literal_split);
|
||||
BlockSplitIterator insert_and_copy_it(insert_and_copy_split);
|
||||
BlockSplitIterator dist_it(dist_split);
|
||||
for (int i = 0; i < cmds.size(); ++i) {
|
||||
const Command &cmd = cmds[i];
|
||||
insert_and_copy_it.Next();
|
||||
(*insert_and_copy_histograms)[insert_and_copy_it.type_].Add(
|
||||
cmd.command_prefix_);
|
||||
for (int j = 0; j < cmd.insert_length_; ++j) {
|
||||
literal_it.Next();
|
||||
uint8_t prev_byte = pos > 0 ? input_buffer[pos - 1] : 0;
|
||||
uint8_t prev_byte2 = pos > 1 ? input_buffer[pos - 2] : 0;
|
||||
uint8_t prev_byte3 = pos > 2 ? input_buffer[pos - 3] : 0;
|
||||
int context = (literal_it.type_ * NumContexts(context_mode) +
|
||||
Context(prev_byte, prev_byte2, prev_byte3, context_mode));
|
||||
(*literal_histograms)[context].Add(input_buffer[pos]);
|
||||
++pos;
|
||||
}
|
||||
pos += cmd.copy_length_;
|
||||
if (cmd.copy_length_ > 0 && cmd.distance_prefix_ != 0xffff) {
|
||||
dist_it.Next();
|
||||
int context = dist_it.type_;
|
||||
if (distance_context_mode > 0) {
|
||||
context <<= 2;
|
||||
context += (cmd.copy_length_ > 4) ? 3 : cmd.copy_length_ - 2;
|
||||
}
|
||||
(*copy_dist_histograms)[context].Add(cmd.distance_prefix_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace brotli
|
97
enc/histogram.h
Normal file
97
enc/histogram.h
Normal file
@ -0,0 +1,97 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Models the histograms of literals, commands and distance codes.
|
||||
|
||||
#ifndef BROTLI_ENC_HISTOGRAM_H_
|
||||
#define BROTLI_ENC_HISTOGRAM_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include "./command.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./prefix.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
class BlockSplit;
|
||||
|
||||
// A simple container for histograms of data in blocks.
|
||||
template<int kDataSize>
|
||||
struct Histogram {
|
||||
Histogram() {
|
||||
Clear();
|
||||
}
|
||||
void Clear() {
|
||||
memset(data_, 0, sizeof(data_));
|
||||
total_count_ = 0;
|
||||
}
|
||||
void Add(int val) {
|
||||
++data_[val];
|
||||
++total_count_;
|
||||
}
|
||||
void Remove(int val) {
|
||||
--data_[val];
|
||||
--total_count_;
|
||||
}
|
||||
template<typename DataType>
|
||||
void Add(const DataType *p, size_t n) {
|
||||
total_count_ += n;
|
||||
n += 1;
|
||||
while(--n) ++data_[*p++];
|
||||
}
|
||||
void AddHistogram(const Histogram& v) {
|
||||
total_count_ += v.total_count_;
|
||||
for (int i = 0; i < kDataSize; ++i) {
|
||||
data_[i] += v.data_[i];
|
||||
}
|
||||
}
|
||||
double EntropyBitCost() const {
|
||||
double retval = total_count_ * FastLog2(total_count_);
|
||||
for (int i = 0; i < kDataSize; ++i) {
|
||||
retval -= data_[i] * FastLog2(data_[i]);
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
int data_[kDataSize];
|
||||
int total_count_;
|
||||
double bit_cost_;
|
||||
};
|
||||
|
||||
// Literal histogram.
|
||||
typedef Histogram<256> HistogramLiteral;
|
||||
// Prefix histograms.
|
||||
typedef Histogram<kNumCommandPrefixes> HistogramCommand;
|
||||
typedef Histogram<kNumDistancePrefixes> HistogramDistance;
|
||||
typedef Histogram<kNumBlockLenPrefixes> HistogramBlockLength;
|
||||
|
||||
void BuildHistograms(
|
||||
const std::vector<Command>& cmds,
|
||||
const BlockSplit& literal_split,
|
||||
const BlockSplit& insert_and_copy_split,
|
||||
const BlockSplit& dist_split,
|
||||
const uint8_t* input_buffer,
|
||||
size_t pos,
|
||||
int context_mode,
|
||||
int distance_context_mode,
|
||||
std::vector<HistogramLiteral>* literal_histograms,
|
||||
std::vector<HistogramCommand>* insert_and_copy_histograms,
|
||||
std::vector<HistogramDistance>* copy_dist_histograms);
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_HISTOGRAM_H_
|
60
enc/literal_cost.cc
Normal file
60
enc/literal_cost.cc
Normal file
@ -0,0 +1,60 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Literal cost model to allow backward reference replacement to be efficient.
|
||||
|
||||
#include "./literal_cost.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <algorithm>
|
||||
|
||||
namespace brotli {
|
||||
|
||||
void EstimateBitCostsForLiterals(size_t len, const uint8_t *data, float *cost) {
|
||||
int histogram[256] = { 0 };
|
||||
int window_half = 2000;
|
||||
int in_window = std::min(static_cast<size_t>(window_half), len);
|
||||
|
||||
// Bootstrap histogram.
|
||||
for (int i = 0; i < in_window; ++i) {
|
||||
++histogram[data[i]];
|
||||
}
|
||||
|
||||
// Compute bit costs with sliding window.
|
||||
for (int i = 0; i < len; ++i) {
|
||||
if (i - window_half >= 0) {
|
||||
// Remove a byte in the past.
|
||||
--histogram[data[i - window_half]];
|
||||
--in_window;
|
||||
}
|
||||
if (i + window_half < len) {
|
||||
// Add a byte in the future.
|
||||
++histogram[data[i + window_half]];
|
||||
++in_window;
|
||||
}
|
||||
int histo = histogram[data[i]];
|
||||
if (histo == 0) {
|
||||
histo = 1;
|
||||
}
|
||||
cost[i] = log2(static_cast<double>(in_window) / histo);
|
||||
cost[i] += 0.03;
|
||||
if (cost[i] < 1.0) {
|
||||
cost[i] *= 0.5;
|
||||
cost[i] += 0.5;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace brotli
|
31
enc/literal_cost.h
Normal file
31
enc/literal_cost.h
Normal file
@ -0,0 +1,31 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Literal cost model to allow backward reference replacement to be efficient.
|
||||
|
||||
#ifndef BROTLI_ENC_LITERAL_COST_H_
|
||||
#define BROTLI_ENC_LITERAL_COST_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// Input: length of data, and the bytes.
|
||||
// Output: estimate of how many bits the literal will take entropy coded.
|
||||
void EstimateBitCostsForLiterals(size_t len, const uint8_t *data, float *cost);
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_LITERAL_COST_H_
|
138
enc/port.h
Normal file
138
enc/port.h
Normal file
@ -0,0 +1,138 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Macros for endianness, branch prediction and unaligned loads and stores.
|
||||
|
||||
#ifndef BROTLI_ENC_PORT_H_
|
||||
#define BROTLI_ENC_PORT_H_
|
||||
|
||||
#if defined OS_LINUX || defined OS_CYGWIN
|
||||
#include <endian.h>
|
||||
#elif defined OS_FREEBSD
|
||||
#include <machine/endian.h>
|
||||
#elif defined OS_MACOSX
|
||||
#include <machine/endian.h>
|
||||
/* Let's try and follow the Linux convention */
|
||||
#define __BYTE_ORDER BYTE_ORDER
|
||||
#define __LITTLE_ENDIAN LITTLE_ENDIAN
|
||||
#define __BIG_ENDIAN BIG_ENDIAN
|
||||
#endif
|
||||
|
||||
// define the macros IS_LITTLE_ENDIAN or IS_BIG_ENDIAN
|
||||
// using the above endian definitions from endian.h if
|
||||
// endian.h was included
|
||||
#ifdef __BYTE_ORDER
|
||||
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
||||
#define IS_LITTLE_ENDIAN
|
||||
#endif
|
||||
|
||||
#if __BYTE_ORDER == __BIG_ENDIAN
|
||||
#define IS_BIG_ENDIAN
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#if defined(__LITTLE_ENDIAN__)
|
||||
#define IS_LITTLE_ENDIAN
|
||||
#elif defined(__BIG_ENDIAN__)
|
||||
#define IS_BIG_ENDIAN
|
||||
#endif
|
||||
#endif // __BYTE_ORDER
|
||||
|
||||
#if defined(COMPILER_GCC3)
|
||||
#define PREDICT_FALSE(x) (__builtin_expect(x, 0))
|
||||
#define PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
|
||||
#else
|
||||
#define PREDICT_FALSE(x) x
|
||||
#define PREDICT_TRUE(x) x
|
||||
#endif
|
||||
|
||||
// Portable handling of unaligned loads, stores, and copies.
|
||||
// On some platforms, like ARM, the copy functions can be more efficient
|
||||
// then a load and a store.
|
||||
|
||||
#if defined(ARCH_PIII) || defined(ARCH_ATHLON) || \
|
||||
defined(ARCH_K8) || defined(_ARCH_PPC)
|
||||
|
||||
// x86 and x86-64 can perform unaligned loads/stores directly;
|
||||
// modern PowerPC hardware can also do unaligned integer loads and stores;
|
||||
// but note: the FPU still sends unaligned loads and stores to a trap handler!
|
||||
|
||||
#define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
|
||||
#define BROTLI_UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64_t *>(_p))
|
||||
|
||||
#define BROTLI_UNALIGNED_STORE32(_p, _val) \
|
||||
(*reinterpret_cast<uint32_t *>(_p) = (_val))
|
||||
#define BROTLI_UNALIGNED_STORE64(_p, _val) \
|
||||
(*reinterpret_cast<uint64_t *>(_p) = (_val))
|
||||
|
||||
#elif defined(__arm__) && \
|
||||
!defined(__ARM_ARCH_5__) && \
|
||||
!defined(__ARM_ARCH_5T__) && \
|
||||
!defined(__ARM_ARCH_5TE__) && \
|
||||
!defined(__ARM_ARCH_5TEJ__) && \
|
||||
!defined(__ARM_ARCH_6__) && \
|
||||
!defined(__ARM_ARCH_6J__) && \
|
||||
!defined(__ARM_ARCH_6K__) && \
|
||||
!defined(__ARM_ARCH_6Z__) && \
|
||||
!defined(__ARM_ARCH_6ZK__) && \
|
||||
!defined(__ARM_ARCH_6T2__)
|
||||
|
||||
// ARMv7 and newer support native unaligned accesses, but only of 16-bit
|
||||
// and 32-bit values (not 64-bit); older versions either raise a fatal signal,
|
||||
// do an unaligned read and rotate the words around a bit, or do the reads very
|
||||
// slowly (trip through kernel mode).
|
||||
|
||||
#define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
|
||||
#define BROTLI_UNALIGNED_STORE32(_p, _val) \
|
||||
(*reinterpret_cast<uint32_t *>(_p) = (_val))
|
||||
|
||||
inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
|
||||
uint64_t t;
|
||||
memcpy(&t, p, sizeof t);
|
||||
return t;
|
||||
}
|
||||
|
||||
inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
|
||||
memcpy(p, &v, sizeof v);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// These functions are provided for architectures that don't support
|
||||
// unaligned loads and stores.
|
||||
|
||||
inline uint32_t BROTLI_UNALIGNED_LOAD32(const void *p) {
|
||||
uint32_t t;
|
||||
memcpy(&t, p, sizeof t);
|
||||
return t;
|
||||
}
|
||||
|
||||
inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
|
||||
uint64_t t;
|
||||
memcpy(&t, p, sizeof t);
|
||||
return t;
|
||||
}
|
||||
|
||||
inline void BROTLI_UNALIGNED_STORE32(void *p, uint32_t v) {
|
||||
memcpy(p, &v, sizeof v);
|
||||
}
|
||||
|
||||
inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
|
||||
memcpy(p, &v, sizeof v);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif // BROTLI_ENC_PORT_H_
|
166
enc/prefix.cc
Normal file
166
enc/prefix.cc
Normal file
@ -0,0 +1,166 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Functions for encoding of integers into prefix codes the amount of extra
|
||||
// bits, and the actual values of the extra bits.
|
||||
|
||||
#include "./prefix.h"
|
||||
|
||||
#include "./fast_log.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// Represents the range of values belonging to a prefix code:
|
||||
// [offset, offset + 2^nbits)
|
||||
struct PrefixCodeRange {
|
||||
int offset;
|
||||
int nbits;
|
||||
};
|
||||
|
||||
static const PrefixCodeRange kBlockLengthPrefixCode[kNumBlockLenPrefixes] = {
|
||||
{ 1, 2}, { 5, 2}, { 9, 2}, { 13, 2},
|
||||
{ 17, 3}, { 25, 3}, { 33, 3}, { 41, 3},
|
||||
{ 49, 4}, { 65, 4}, { 81, 4}, { 97, 4},
|
||||
{ 113, 5}, { 145, 5}, { 177, 5}, { 209, 5},
|
||||
{ 241, 6}, { 305, 6}, { 369, 7}, { 497, 8},
|
||||
{ 753, 9}, { 1265, 10}, {2289, 11}, {4337, 12},
|
||||
{8433, 13}, {16625, 24}
|
||||
};
|
||||
|
||||
static const PrefixCodeRange kInsertLengthPrefixCode[kNumInsertLenPrefixes] = {
|
||||
{ 0, 0}, { 1, 0}, { 2, 0}, { 3, 0},
|
||||
{ 4, 0}, { 5, 0}, { 6, 1}, { 8, 1},
|
||||
{ 10, 2}, { 14, 2}, { 18, 3}, { 26, 3},
|
||||
{ 34, 4}, { 50, 4}, { 66, 5}, { 98, 5},
|
||||
{ 130, 6}, { 194, 7}, { 322, 8}, { 578, 9},
|
||||
{1090, 10}, {2114, 12}, {6210, 14}, {22594, 24},
|
||||
};
|
||||
|
||||
static const PrefixCodeRange kCopyLengthPrefixCode[kNumCopyLenPrefixes] = {
|
||||
{ 2, 0}, { 3, 0}, { 4, 0}, { 5, 0},
|
||||
{ 6, 0}, { 7, 0}, { 8, 0}, { 9, 0},
|
||||
{ 10, 1}, { 12, 1}, { 14, 2}, { 18, 2},
|
||||
{ 22, 3}, { 30, 3}, { 38, 4}, { 54, 4},
|
||||
{ 70, 5}, { 102, 5}, { 134, 6}, { 198, 7},
|
||||
{326, 8}, { 582, 9}, {1094, 10}, {2118, 24},
|
||||
};
|
||||
|
||||
static const int kInsertAndCopyRangeLut[9] = {
|
||||
0, 1, 4, 2, 3, 6, 5, 7, 8,
|
||||
};
|
||||
|
||||
static const int kInsertRangeLut[9] = {
|
||||
0, 0, 1, 1, 0, 2, 1, 2, 2,
|
||||
};
|
||||
|
||||
static const int kCopyRangeLut[9] = {
|
||||
0, 1, 0, 1, 2, 0, 2, 1, 2,
|
||||
};
|
||||
|
||||
int InsertLengthPrefix(int length) {
|
||||
for (int i = 0; i < kNumInsertLenPrefixes; ++i) {
|
||||
const PrefixCodeRange& range = kInsertLengthPrefixCode[i];
|
||||
if (length >= range.offset && length < range.offset + (1 << range.nbits)) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int CopyLengthPrefix(int length) {
|
||||
for (int i = 0; i < kNumCopyLenPrefixes; ++i) {
|
||||
const PrefixCodeRange& range = kCopyLengthPrefixCode[i];
|
||||
if (length >= range.offset && length < range.offset + (1 << range.nbits)) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int CommandPrefix(int insert_length, int copy_length) {
|
||||
if (copy_length == 0) {
|
||||
copy_length = 3;
|
||||
}
|
||||
int insert_prefix = InsertLengthPrefix(insert_length);
|
||||
int copy_prefix = CopyLengthPrefix(copy_length);
|
||||
int range_idx = 3 * (insert_prefix >> 3) + (copy_prefix >> 3);
|
||||
return ((kInsertAndCopyRangeLut[range_idx] << 6) +
|
||||
((insert_prefix & 7) << 3) + (copy_prefix & 7));
|
||||
}
|
||||
|
||||
int InsertLengthExtraBits(int code) {
|
||||
int insert_code = (kInsertRangeLut[code >> 6] << 3) + ((code >> 3) & 7);
|
||||
return kInsertLengthPrefixCode[insert_code].nbits;
|
||||
}
|
||||
|
||||
int InsertLengthOffset(int code) {
|
||||
int insert_code = (kInsertRangeLut[code >> 6] << 3) + ((code >> 3) & 7);
|
||||
return kInsertLengthPrefixCode[insert_code].offset;
|
||||
}
|
||||
|
||||
int CopyLengthExtraBits(int code) {
|
||||
int copy_code = (kCopyRangeLut[code >> 6] << 3) + (code & 7);
|
||||
return kCopyLengthPrefixCode[copy_code].nbits;
|
||||
}
|
||||
|
||||
int CopyLengthOffset(int code) {
|
||||
int copy_code = (kCopyRangeLut[code >> 6] << 3) + (code & 7);
|
||||
return kCopyLengthPrefixCode[copy_code].offset;
|
||||
}
|
||||
|
||||
void PrefixEncodeCopyDistance(int distance_code,
|
||||
int num_direct_codes,
|
||||
int postfix_bits,
|
||||
uint16_t* code,
|
||||
int* nbits,
|
||||
uint32_t* extra_bits) {
|
||||
distance_code -= 1;
|
||||
if (distance_code < kNumDistanceShortCodes + num_direct_codes) {
|
||||
*code = distance_code;
|
||||
*nbits = 0;
|
||||
*extra_bits = 0;
|
||||
return;
|
||||
}
|
||||
distance_code -= kNumDistanceShortCodes + num_direct_codes;
|
||||
distance_code += (1 << (postfix_bits + 2));
|
||||
int bucket = Log2Floor(distance_code) - 1;
|
||||
int postfix_mask = (1 << postfix_bits) - 1;
|
||||
int postfix = distance_code & postfix_mask;
|
||||
int prefix = (distance_code >> bucket) & 1;
|
||||
int offset = (2 + prefix) << bucket;
|
||||
*nbits = bucket - postfix_bits;
|
||||
*code = kNumDistanceShortCodes + num_direct_codes +
|
||||
((2 * (*nbits - 1) + prefix) << postfix_bits) + postfix;
|
||||
*extra_bits = (distance_code - offset) >> postfix_bits;
|
||||
}
|
||||
|
||||
int BlockLengthPrefix(int length) {
|
||||
for (int i = 0; i < kNumBlockLenPrefixes; ++i) {
|
||||
const PrefixCodeRange& range = kBlockLengthPrefixCode[i];
|
||||
if (length >= range.offset && length < range.offset + (1 << range.nbits)) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int BlockLengthExtraBits(int length_code) {
|
||||
return kBlockLengthPrefixCode[length_code].nbits;
|
||||
}
|
||||
|
||||
int BlockLengthOffset(int length_code) {
|
||||
return kBlockLengthPrefixCode[length_code].offset;
|
||||
}
|
||||
|
||||
} // namespace brotli
|
51
enc/prefix.h
Normal file
51
enc/prefix.h
Normal file
@ -0,0 +1,51 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Functions for encoding of integers into prefix codes the amount of extra
|
||||
// bits, and the actual values of the extra bits.
|
||||
|
||||
#ifndef BROTLI_ENC_PREFIX_H_
|
||||
#define BROTLI_ENC_PREFIX_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
namespace brotli {
|
||||
|
||||
static const int kNumInsertLenPrefixes = 24;
|
||||
static const int kNumCopyLenPrefixes = 24;
|
||||
static const int kNumCommandPrefixes = 704;
|
||||
static const int kNumBlockLenPrefixes = 26;
|
||||
static const int kNumDistanceShortCodes = 16;
|
||||
static const int kNumDistancePrefixes = 520;
|
||||
|
||||
int CommandPrefix(int insert_length, int copy_length);
|
||||
int InsertLengthExtraBits(int prefix);
|
||||
int InsertLengthOffset(int prefix);
|
||||
int CopyLengthExtraBits(int prefix);
|
||||
int CopyLengthOffset(int prefix);
|
||||
|
||||
void PrefixEncodeCopyDistance(int distance_code,
|
||||
int num_direct_codes,
|
||||
int shift_bits,
|
||||
uint16_t* prefix,
|
||||
int* nbits,
|
||||
uint32_t* extra_bits);
|
||||
|
||||
int BlockLengthPrefix(int length);
|
||||
int BlockLengthExtraBits(int prefix);
|
||||
int BlockLengthOffset(int prefix);
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_PREFIX_H_
|
91
enc/write_bits.h
Normal file
91
enc/write_bits.h
Normal file
@ -0,0 +1,91 @@
|
||||
// Copyright 2010 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Write bits into a byte array.
|
||||
|
||||
#ifndef BROTLI_ENC_WRITE_BITS_H_
|
||||
#define BROTLI_ENC_WRITE_BITS_H_
|
||||
|
||||
#include <assert.h>
|
||||
#include <endian.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "./port.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
//#define BIT_WRITER_DEBUG
|
||||
|
||||
// This function writes bits into bytes in increasing addresses, and within
|
||||
// a byte least-significant-bit first.
|
||||
//
|
||||
// The function can write up to 56 bits in one go with WriteBits
|
||||
// Example: let's assume that 3 bits (Rs below) have been written already:
|
||||
//
|
||||
// BYTE-0 BYTE+1 BYTE+2
|
||||
//
|
||||
// 0000 0RRR 0000 0000 0000 0000
|
||||
//
|
||||
// Now, we could write 5 or less bits in MSB by just sifting by 3
|
||||
// and OR'ing to BYTE-0.
|
||||
//
|
||||
// For n bits, we take the last 5 bits, OR that with high bits in BYTE-0,
|
||||
// and locate the rest in BYTE+1, BYTE+2, etc.
|
||||
inline void WriteBits(int n_bits,
|
||||
uint64_t bits,
|
||||
int * __restrict pos,
|
||||
uint8_t * __restrict array) {
|
||||
#ifdef BIT_WRITER_DEBUG
|
||||
printf("WriteBits %2d 0x%016llx %10d\n", n_bits, bits, *pos);
|
||||
#endif
|
||||
#ifdef IS_LITTLE_ENDIAN
|
||||
// This branch of the code can write up to 56 bits at a time,
|
||||
// 7 bits are lost by being perhaps already in *p and at least
|
||||
// 1 bit is needed to initialize the bit-stream ahead (i.e. if 7
|
||||
// bits are in *p and we write 57 bits, then the next write will
|
||||
// access a byte that was never initialized).
|
||||
uint8_t *p = &array[*pos >> 3];
|
||||
uint64_t v = *p;
|
||||
v |= bits << (*pos & 7);
|
||||
BROTLI_UNALIGNED_STORE64(p, v); // Set some bits.
|
||||
*pos += n_bits;
|
||||
#else
|
||||
// implicit & 0xff is assumed for uint8_t arithmetics
|
||||
uint8_t *array_pos = &array[*pos >> 3];
|
||||
const int bits_reserved_in_first_byte = (*pos & 7);
|
||||
bits <<= bits_reserved_in_first_byte;
|
||||
*array_pos++ |= bits;
|
||||
for (int bits_left_to_write = n_bits - 8 + bits_reserved_in_first_byte;
|
||||
bits_left_to_write >= 1;
|
||||
bits_left_to_write -= 8) {
|
||||
bits >>= 8;
|
||||
*array_pos++ = bits;
|
||||
}
|
||||
*array_pos = 0;
|
||||
*pos += n_bits;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void WriteBitsPrepareStorage(int pos, uint8_t *array) {
|
||||
#ifdef BIT_WRITER_DEBUG
|
||||
printf("WriteBitsPrepareStorage %10d\n", pos);
|
||||
#endif
|
||||
assert((pos & 7) == 0);
|
||||
array[pos >> 3] = 0;
|
||||
}
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_WRITE_BITS_H_
|
Loading…
Reference in New Issue
Block a user