2015-11-27 10:27:11 +00:00
|
|
|
/* Copyright 2013 Google Inc. All Rights Reserved.
|
|
|
|
|
2015-12-11 10:11:51 +00:00
|
|
|
Distributed under MIT license.
|
2015-11-27 10:27:11 +00:00
|
|
|
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
|
|
|
*/
|
|
|
|
|
2013-10-23 11:06:13 +00:00
|
|
|
// Implementation of Brotli compressor.
|
|
|
|
|
|
|
|
#include "./encode.h"
|
|
|
|
|
|
|
|
#include <algorithm>
|
|
|
|
#include <limits>
|
|
|
|
|
|
|
|
#include "./backward_references.h"
|
|
|
|
#include "./bit_cost.h"
|
|
|
|
#include "./block_splitter.h"
|
2014-10-15 12:01:36 +00:00
|
|
|
#include "./brotli_bit_stream.h"
|
2013-10-23 11:06:13 +00:00
|
|
|
#include "./cluster.h"
|
|
|
|
#include "./context.h"
|
2015-03-27 13:20:35 +00:00
|
|
|
#include "./metablock.h"
|
2014-02-17 13:25:36 +00:00
|
|
|
#include "./transform.h"
|
2013-10-23 11:06:13 +00:00
|
|
|
#include "./entropy_encode.h"
|
|
|
|
#include "./fast_log.h"
|
2013-11-15 18:02:17 +00:00
|
|
|
#include "./hash.h"
|
2013-10-23 11:06:13 +00:00
|
|
|
#include "./histogram.h"
|
|
|
|
#include "./prefix.h"
|
2015-10-01 13:10:42 +00:00
|
|
|
#include "./utf8_util.h"
|
2013-10-23 11:06:13 +00:00
|
|
|
#include "./write_bits.h"
|
|
|
|
|
|
|
|
namespace brotli {
|
|
|
|
|
2015-05-07 15:30:10 +00:00
|
|
|
static const int kMinQualityForBlockSplit = 4;
|
|
|
|
static const int kMinQualityForContextModeling = 5;
|
|
|
|
static const int kMinQualityForOptimizeHistograms = 4;
|
2015-10-01 15:08:59 +00:00
|
|
|
// For quality 1 there is no block splitting, so we buffer at most this much
|
|
|
|
// literals and commands.
|
|
|
|
static const int kMaxNumDelayedSymbols = 0x2fff;
|
2015-04-23 13:26:08 +00:00
|
|
|
|
2015-04-23 11:15:42 +00:00
|
|
|
void RecomputeDistancePrefixes(Command* cmds,
|
|
|
|
size_t num_commands,
|
2014-10-28 12:25:22 +00:00
|
|
|
int num_direct_distance_codes,
|
|
|
|
int distance_postfix_bits) {
|
2015-08-28 14:09:23 +00:00
|
|
|
if (num_direct_distance_codes == 0 && distance_postfix_bits == 0) {
|
2014-10-28 12:25:22 +00:00
|
|
|
return;
|
2013-10-23 11:06:13 +00:00
|
|
|
}
|
2015-10-28 16:44:47 +00:00
|
|
|
for (size_t i = 0; i < num_commands; ++i) {
|
2015-04-23 11:15:42 +00:00
|
|
|
Command* cmd = &cmds[i];
|
2014-10-28 12:25:22 +00:00
|
|
|
if (cmd->copy_len_ > 0 && cmd->cmd_prefix_ >= 128) {
|
|
|
|
PrefixEncodeCopyDistance(cmd->DistanceCode(),
|
2013-10-23 11:06:13 +00:00
|
|
|
num_direct_distance_codes,
|
|
|
|
distance_postfix_bits,
|
2014-10-28 12:25:22 +00:00
|
|
|
&cmd->dist_prefix_,
|
|
|
|
&cmd->dist_extra_);
|
2013-10-23 11:06:13 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-04-01 14:10:15 +00:00
|
|
|
uint8_t* BrotliCompressor::GetBrotliStorage(size_t size) {
|
|
|
|
if (storage_size_ < size) {
|
2015-10-01 10:08:14 +00:00
|
|
|
delete[] storage_;
|
|
|
|
storage_ = new uint8_t[size];
|
2015-04-01 14:10:15 +00:00
|
|
|
storage_size_ = size;
|
|
|
|
}
|
2015-10-01 10:08:14 +00:00
|
|
|
return storage_;
|
2015-04-01 14:10:15 +00:00
|
|
|
}
|
|
|
|
|
2014-03-20 13:32:35 +00:00
|
|
|
BrotliCompressor::BrotliCompressor(BrotliParams params)
|
|
|
|
: params_(params),
|
|
|
|
hashers_(new Hashers()),
|
2015-04-23 13:26:08 +00:00
|
|
|
input_pos_(0),
|
|
|
|
num_commands_(0),
|
2015-04-28 08:12:47 +00:00
|
|
|
num_literals_(0),
|
2015-04-23 13:26:08 +00:00
|
|
|
last_insert_len_(0),
|
|
|
|
last_flush_pos_(0),
|
|
|
|
last_processed_pos_(0),
|
2015-04-23 14:20:29 +00:00
|
|
|
prev_byte_(0),
|
|
|
|
prev_byte2_(0),
|
2015-10-01 10:08:14 +00:00
|
|
|
storage_size_(0),
|
|
|
|
storage_(0) {
|
2015-04-01 14:10:15 +00:00
|
|
|
// Sanitize params.
|
2015-05-11 11:51:47 +00:00
|
|
|
params_.quality = std::max(1, params_.quality);
|
2015-04-01 14:10:15 +00:00
|
|
|
if (params_.lgwin < kMinWindowBits) {
|
|
|
|
params_.lgwin = kMinWindowBits;
|
2015-04-01 14:29:04 +00:00
|
|
|
} else if (params_.lgwin > kMaxWindowBits) {
|
2015-04-01 14:10:15 +00:00
|
|
|
params_.lgwin = kMaxWindowBits;
|
|
|
|
}
|
2015-04-01 14:29:04 +00:00
|
|
|
if (params_.lgblock == 0) {
|
2015-05-07 15:30:10 +00:00
|
|
|
params_.lgblock = params_.quality < kMinQualityForBlockSplit ? 14 : 16;
|
2015-04-01 14:29:04 +00:00
|
|
|
if (params_.quality >= 9 && params_.lgwin > params_.lgblock) {
|
|
|
|
params_.lgblock = std::min(21, params_.lgwin);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
params_.lgblock = std::min(kMaxInputBlockBits,
|
|
|
|
std::max(kMinInputBlockBits, params_.lgblock));
|
|
|
|
}
|
2015-04-01 14:10:15 +00:00
|
|
|
|
|
|
|
// Set maximum distance, see section 9.1. of the spec.
|
|
|
|
max_backward_distance_ = (1 << params_.lgwin) - 16;
|
|
|
|
|
|
|
|
// Initialize input and literal cost ring buffers.
|
|
|
|
// We allocate at least lgwin + 1 bits for the ring buffer so that the newly
|
|
|
|
// added block fits there completely and we still get lgwin bits and at least
|
|
|
|
// read_block_size_bits + 1 bits because the copy tail length needs to be
|
|
|
|
// smaller than ringbuffer size.
|
2015-04-01 14:29:04 +00:00
|
|
|
int ringbuffer_bits = std::max(params_.lgwin + 1, params_.lgblock + 1);
|
2015-10-01 10:08:14 +00:00
|
|
|
ringbuffer_ = new RingBuffer(ringbuffer_bits, params_.lgblock);
|
2015-04-01 14:10:15 +00:00
|
|
|
|
2015-10-01 15:08:59 +00:00
|
|
|
commands_ = 0;
|
|
|
|
cmd_alloc_size_ = 0;
|
2015-04-23 13:26:08 +00:00
|
|
|
|
|
|
|
// Initialize last byte with stream header.
|
2015-04-01 14:10:15 +00:00
|
|
|
if (params_.lgwin == 16) {
|
|
|
|
last_byte_ = 0;
|
|
|
|
last_byte_bits_ = 1;
|
2015-05-07 15:44:33 +00:00
|
|
|
} else if (params_.lgwin == 17) {
|
|
|
|
last_byte_ = 1;
|
|
|
|
last_byte_bits_ = 7;
|
2015-08-28 14:09:23 +00:00
|
|
|
} else if (params_.lgwin > 17) {
|
2015-10-28 16:44:47 +00:00
|
|
|
last_byte_ = static_cast<uint8_t>(((params_.lgwin - 17) << 1) | 1);
|
2015-04-01 14:10:15 +00:00
|
|
|
last_byte_bits_ = 4;
|
2015-08-28 14:09:23 +00:00
|
|
|
} else {
|
2015-10-28 16:44:47 +00:00
|
|
|
last_byte_ = static_cast<uint8_t>(((params_.lgwin - 8) << 4) | 1);
|
2015-08-28 14:09:23 +00:00
|
|
|
last_byte_bits_ = 7;
|
2015-04-01 14:10:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Initialize distance cache.
|
2014-10-28 12:25:22 +00:00
|
|
|
dist_cache_[0] = 4;
|
|
|
|
dist_cache_[1] = 11;
|
|
|
|
dist_cache_[2] = 15;
|
|
|
|
dist_cache_[3] = 16;
|
2015-06-12 14:11:50 +00:00
|
|
|
// Save the state of the distance cache in case we need to restore it for
|
|
|
|
// emitting an uncompressed block.
|
|
|
|
memcpy(saved_dist_cache_, dist_cache_, sizeof(dist_cache_));
|
2015-04-01 14:10:15 +00:00
|
|
|
|
|
|
|
// Initialize hashers.
|
2015-06-12 14:25:41 +00:00
|
|
|
hash_type_ = std::min(9, params_.quality);
|
2014-03-20 13:32:35 +00:00
|
|
|
hashers_->Init(hash_type_);
|
2013-11-19 22:32:56 +00:00
|
|
|
}
|
2013-11-15 18:02:17 +00:00
|
|
|
|
|
|
|
BrotliCompressor::~BrotliCompressor() {
|
2015-10-01 10:08:14 +00:00
|
|
|
delete[] storage_;
|
2015-10-01 15:08:59 +00:00
|
|
|
free(commands_);
|
2015-10-01 10:08:14 +00:00
|
|
|
delete ringbuffer_;
|
|
|
|
delete hashers_;
|
2013-11-15 18:02:17 +00:00
|
|
|
}
|
|
|
|
|
2015-04-23 13:26:08 +00:00
|
|
|
void BrotliCompressor::CopyInputToRingBuffer(const size_t input_size,
|
|
|
|
const uint8_t* input_buffer) {
|
|
|
|
ringbuffer_->Write(input_buffer, input_size);
|
|
|
|
input_pos_ += input_size;
|
|
|
|
|
2015-10-01 15:08:59 +00:00
|
|
|
// TL;DR: If needed, initialize 7 more bytes in the ring buffer to make the
|
|
|
|
// hashing not depend on uninitialized data. This makes compression
|
|
|
|
// deterministic and it prevents uninitialized memory warnings in Valgrind.
|
|
|
|
// Even without erasing, the output would be valid (but nondeterministic).
|
2015-04-23 13:26:08 +00:00
|
|
|
//
|
|
|
|
// Background information: The compressor stores short (at most 8 bytes)
|
|
|
|
// substrings of the input already read in a hash table, and detects
|
|
|
|
// repetitions by looking up such substrings in the hash table. If it
|
|
|
|
// can find a substring, it checks whether the substring is really there
|
|
|
|
// in the ring buffer (or it's just a hash collision). Should the hash
|
|
|
|
// table become corrupt, this check makes sure that the output is
|
|
|
|
// still valid, albeit the compression ratio would be bad.
|
|
|
|
//
|
|
|
|
// The compressor populates the hash table from the ring buffer as it's
|
|
|
|
// reading new bytes from the input. However, at the last few indexes of
|
|
|
|
// the ring buffer, there are not enough bytes to build full-length
|
|
|
|
// substrings from. Since the hash table always contains full-length
|
|
|
|
// substrings, we erase with dummy 0s here to make sure that those
|
|
|
|
// substrings will contain 0s at the end instead of uninitialized
|
|
|
|
// data.
|
|
|
|
//
|
|
|
|
// Please note that erasing is not necessary (because the
|
|
|
|
// memory region is already initialized since he ring buffer
|
|
|
|
// has a `tail' that holds a copy of the beginning,) so we
|
|
|
|
// skip erasing if we have already gone around at least once in
|
|
|
|
// the ring buffer.
|
|
|
|
size_t pos = ringbuffer_->position();
|
|
|
|
// Only clear during the first round of ringbuffer writes. On
|
|
|
|
// subsequent rounds data in the ringbuffer would be affected.
|
|
|
|
if (pos <= ringbuffer_->mask()) {
|
|
|
|
// This is the first time when the ring buffer is being written.
|
2015-10-01 15:08:59 +00:00
|
|
|
// We clear 7 bytes just after the bytes that have been copied from
|
2015-04-23 13:26:08 +00:00
|
|
|
// the input buffer.
|
|
|
|
//
|
|
|
|
// The ringbuffer has a "tail" that holds a copy of the beginning,
|
|
|
|
// but only once the ring buffer has been fully written once, i.e.,
|
|
|
|
// pos <= mask. For the first time, we need to write values
|
|
|
|
// in this tail (where index may be larger than mask), so that
|
|
|
|
// we have exactly defined behavior and don't read un-initialized
|
|
|
|
// memory. Due to performance reasons, hashing reads data using a
|
2015-10-01 15:08:59 +00:00
|
|
|
// LOAD64, which can go 7 bytes beyond the bytes written in the
|
2015-04-23 13:26:08 +00:00
|
|
|
// ringbuffer.
|
2015-10-01 15:08:59 +00:00
|
|
|
memset(ringbuffer_->start() + pos, 0, 7);
|
2015-04-23 13:26:08 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-06-12 13:43:54 +00:00
|
|
|
void BrotliCompressor::BrotliSetCustomDictionary(
|
|
|
|
const size_t size, const uint8_t* dict) {
|
|
|
|
CopyInputToRingBuffer(size, dict);
|
|
|
|
last_flush_pos_ = size;
|
|
|
|
last_processed_pos_ = size;
|
2015-08-10 11:13:58 +00:00
|
|
|
if (size > 0) {
|
|
|
|
prev_byte_ = dict[size - 1];
|
|
|
|
}
|
|
|
|
if (size > 1) {
|
|
|
|
prev_byte2_ = dict[size - 2];
|
|
|
|
}
|
2015-06-12 13:43:54 +00:00
|
|
|
hashers_->PrependCustomDictionary(hash_type_, size, dict);
|
|
|
|
}
|
|
|
|
|
2015-04-23 13:26:08 +00:00
|
|
|
bool BrotliCompressor::WriteBrotliData(const bool is_last,
|
|
|
|
const bool force_flush,
|
|
|
|
size_t* out_size,
|
|
|
|
uint8_t** output) {
|
|
|
|
const size_t bytes = input_pos_ - last_processed_pos_;
|
|
|
|
const uint8_t* data = ringbuffer_->start();
|
|
|
|
const size_t mask = ringbuffer_->mask();
|
|
|
|
|
|
|
|
if (bytes > input_block_size()) {
|
2015-04-01 14:10:15 +00:00
|
|
|
return false;
|
|
|
|
}
|
2015-04-23 13:26:08 +00:00
|
|
|
|
2015-10-01 15:08:59 +00:00
|
|
|
// Theoretical max number of commands is 1 per 2 bytes.
|
|
|
|
size_t newsize = num_commands_ + bytes / 2 + 1;
|
|
|
|
if (newsize > cmd_alloc_size_) {
|
|
|
|
// Reserve a bit more memory to allow merging with a next block
|
|
|
|
// without realloc: that would impact speed.
|
|
|
|
newsize += bytes / 4;
|
|
|
|
cmd_alloc_size_ = newsize;
|
|
|
|
commands_ =
|
|
|
|
static_cast<Command*>(realloc(commands_, sizeof(Command) * newsize));
|
|
|
|
}
|
|
|
|
|
2015-04-23 13:26:08 +00:00
|
|
|
CreateBackwardReferences(bytes, last_processed_pos_, data, mask,
|
|
|
|
max_backward_distance_,
|
|
|
|
params_.quality,
|
2015-10-01 10:08:14 +00:00
|
|
|
hashers_,
|
2015-04-23 13:26:08 +00:00
|
|
|
hash_type_,
|
|
|
|
dist_cache_,
|
|
|
|
&last_insert_len_,
|
|
|
|
&commands_[num_commands_],
|
2015-04-28 08:12:47 +00:00
|
|
|
&num_commands_,
|
|
|
|
&num_literals_);
|
2015-04-23 13:26:08 +00:00
|
|
|
|
2015-10-28 16:44:47 +00:00
|
|
|
size_t max_length = std::min<size_t>(mask + 1, 1u << kMaxInputBlockBits);
|
2015-04-23 13:26:08 +00:00
|
|
|
if (!is_last && !force_flush &&
|
2015-05-07 15:30:10 +00:00
|
|
|
(params_.quality >= kMinQualityForBlockSplit ||
|
2015-04-28 08:12:47 +00:00
|
|
|
(num_literals_ + num_commands_ < kMaxNumDelayedSymbols)) &&
|
2015-06-12 14:11:50 +00:00
|
|
|
input_pos_ + input_block_size() <= last_flush_pos_ + max_length) {
|
2015-10-01 15:08:59 +00:00
|
|
|
// Merge with next input block. Everything will happen later.
|
2015-04-23 13:26:08 +00:00
|
|
|
last_processed_pos_ = input_pos_;
|
|
|
|
*out_size = 0;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create the last insert-only command.
|
|
|
|
if (last_insert_len_ > 0) {
|
|
|
|
brotli::Command cmd(last_insert_len_);
|
|
|
|
commands_[num_commands_++] = cmd;
|
2015-04-28 08:12:47 +00:00
|
|
|
num_literals_ += last_insert_len_;
|
2015-04-23 13:26:08 +00:00
|
|
|
last_insert_len_ = 0;
|
|
|
|
}
|
|
|
|
|
2015-10-01 13:10:42 +00:00
|
|
|
return WriteMetaBlockInternal(is_last, out_size, output);
|
2015-04-23 13:26:08 +00:00
|
|
|
}
|
|
|
|
|
2015-08-28 14:09:23 +00:00
|
|
|
// Decide about the context map based on the ability of the prediction
|
|
|
|
// ability of the previous byte UTF8-prefix on the next byte. The
|
|
|
|
// prediction ability is calculated as shannon entropy. Here we need
|
|
|
|
// shannon entropy instead of 'BitsEntropy' since the prefix will be
|
|
|
|
// encoded with the remaining 6 bits of the following byte, and
|
|
|
|
// BitsEntropy will assume that symbol to be stored alone using Huffman
|
|
|
|
// coding.
|
|
|
|
void ChooseContextMap(int quality,
|
|
|
|
int* bigram_histo,
|
|
|
|
int* num_literal_contexts,
|
|
|
|
const int** literal_context_map) {
|
|
|
|
int monogram_histo[3] = { 0 };
|
|
|
|
int two_prefix_histo[6] = { 0 };
|
|
|
|
int total = 0;
|
|
|
|
for (int i = 0; i < 9; ++i) {
|
|
|
|
total += bigram_histo[i];
|
|
|
|
monogram_histo[i % 3] += bigram_histo[i];
|
|
|
|
int j = i;
|
|
|
|
if (j >= 6) {
|
|
|
|
j -= 6;
|
|
|
|
}
|
|
|
|
two_prefix_histo[j] += bigram_histo[i];
|
|
|
|
}
|
|
|
|
int dummy;
|
|
|
|
double entropy1 = ShannonEntropy(monogram_histo, 3, &dummy);
|
|
|
|
double entropy2 = (ShannonEntropy(two_prefix_histo, 3, &dummy) +
|
|
|
|
ShannonEntropy(two_prefix_histo + 3, 3, &dummy));
|
|
|
|
double entropy3 = 0;
|
|
|
|
for (int k = 0; k < 3; ++k) {
|
|
|
|
entropy3 += ShannonEntropy(bigram_histo + 3 * k, 3, &dummy);
|
|
|
|
}
|
2015-10-01 09:40:05 +00:00
|
|
|
|
|
|
|
assert(total != 0);
|
2015-08-28 14:09:23 +00:00
|
|
|
entropy1 *= (1.0 / total);
|
|
|
|
entropy2 *= (1.0 / total);
|
|
|
|
entropy3 *= (1.0 / total);
|
|
|
|
|
|
|
|
static const int kStaticContextMapContinuation[64] = {
|
|
|
|
1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
};
|
|
|
|
static const int kStaticContextMapSimpleUTF8[64] = {
|
|
|
|
0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
};
|
|
|
|
if (quality < 7) {
|
|
|
|
// 3 context models is a bit slower, don't use it at lower qualities.
|
|
|
|
entropy3 = entropy1 * 10;
|
|
|
|
}
|
|
|
|
// If expected savings by symbol are less than 0.2 bits, skip the
|
|
|
|
// context modeling -- in exchange for faster decoding speed.
|
|
|
|
if (entropy1 - entropy2 < 0.2 &&
|
|
|
|
entropy1 - entropy3 < 0.2) {
|
|
|
|
*num_literal_contexts = 1;
|
|
|
|
} else if (entropy2 - entropy3 < 0.02) {
|
|
|
|
*num_literal_contexts = 2;
|
|
|
|
*literal_context_map = kStaticContextMapSimpleUTF8;
|
|
|
|
} else {
|
|
|
|
*num_literal_contexts = 3;
|
|
|
|
*literal_context_map = kStaticContextMapContinuation;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-05-07 15:23:07 +00:00
|
|
|
void DecideOverLiteralContextModeling(const uint8_t* input,
|
|
|
|
size_t start_pos,
|
|
|
|
size_t length,
|
|
|
|
size_t mask,
|
|
|
|
int quality,
|
|
|
|
int* literal_context_mode,
|
|
|
|
int* num_literal_contexts,
|
|
|
|
const int** literal_context_map) {
|
2015-05-07 15:30:10 +00:00
|
|
|
if (quality < kMinQualityForContextModeling || length < 64) {
|
2015-05-07 15:23:07 +00:00
|
|
|
return;
|
|
|
|
}
|
2015-08-28 14:09:23 +00:00
|
|
|
// Gather bigram data of the UTF8 byte prefixes. To make the analysis of
|
|
|
|
// UTF8 data faster we only examine 64 byte long strides at every 4kB
|
|
|
|
// intervals.
|
2015-05-07 15:23:07 +00:00
|
|
|
const size_t end_pos = start_pos + length;
|
2015-08-28 14:09:23 +00:00
|
|
|
int bigram_prefix_histo[9] = { 0 };
|
2015-10-01 09:40:05 +00:00
|
|
|
for (; start_pos + 64 <= end_pos; start_pos += 4096) {
|
2015-08-28 14:09:23 +00:00
|
|
|
static const int lut[4] = { 0, 0, 1, 2 };
|
2015-05-07 15:23:07 +00:00
|
|
|
const size_t stride_end_pos = start_pos + 64;
|
2015-08-28 14:09:23 +00:00
|
|
|
int prev = lut[input[start_pos & mask] >> 6] * 3;
|
2015-05-07 15:23:07 +00:00
|
|
|
for (size_t pos = start_pos + 1; pos < stride_end_pos; ++pos) {
|
|
|
|
const uint8_t literal = input[pos & mask];
|
2015-08-28 14:09:23 +00:00
|
|
|
++bigram_prefix_histo[prev + lut[literal >> 6]];
|
|
|
|
prev = lut[literal >> 6] * 3;
|
2015-05-07 15:23:07 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
*literal_context_mode = CONTEXT_UTF8;
|
2015-08-28 14:09:23 +00:00
|
|
|
ChooseContextMap(quality, &bigram_prefix_histo[0], num_literal_contexts,
|
|
|
|
literal_context_map);
|
2015-05-07 15:23:07 +00:00
|
|
|
}
|
|
|
|
|
2015-04-23 13:26:08 +00:00
|
|
|
bool BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
|
|
|
|
size_t* out_size,
|
|
|
|
uint8_t** output) {
|
|
|
|
const size_t bytes = input_pos_ - last_flush_pos_;
|
|
|
|
const uint8_t* data = ringbuffer_->start();
|
|
|
|
const size_t mask = ringbuffer_->mask();
|
|
|
|
const size_t max_out_size = 2 * bytes + 500;
|
2015-04-01 14:10:15 +00:00
|
|
|
uint8_t* storage = GetBrotliStorage(max_out_size);
|
|
|
|
storage[0] = last_byte_;
|
|
|
|
int storage_ix = last_byte_bits_;
|
|
|
|
|
2015-04-23 13:26:08 +00:00
|
|
|
bool uncompressed = false;
|
|
|
|
if (num_commands_ < (bytes >> 8) + 2) {
|
2015-10-28 16:44:47 +00:00
|
|
|
if (num_literals_ > 0.99 * static_cast<double>(bytes)) {
|
2015-04-23 13:26:08 +00:00
|
|
|
int literal_histo[256] = { 0 };
|
|
|
|
static const int kSampleRate = 13;
|
|
|
|
static const double kMinEntropy = 7.92;
|
2015-10-28 16:44:47 +00:00
|
|
|
const double bit_cost_threshold =
|
|
|
|
static_cast<double>(bytes) * kMinEntropy / kSampleRate;
|
2015-10-23 09:19:04 +00:00
|
|
|
for (size_t i = last_flush_pos_; i < input_pos_; i += kSampleRate) {
|
2015-04-23 13:26:08 +00:00
|
|
|
++literal_histo[data[i & mask]];
|
|
|
|
}
|
2015-10-11 11:03:51 +00:00
|
|
|
if (BitsEntropy(literal_histo, 256) > bit_cost_threshold) {
|
2015-04-23 13:26:08 +00:00
|
|
|
uncompressed = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bytes == 0) {
|
|
|
|
if (!StoreCompressedMetaBlockHeader(is_last, 0, &storage_ix, &storage[0])) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
storage_ix = (storage_ix + 7) & ~7;
|
|
|
|
} else if (uncompressed) {
|
2015-06-26 15:37:00 +00:00
|
|
|
// Restore the distance cache, as its last update by
|
|
|
|
// CreateBackwardReferences is now unused.
|
|
|
|
memcpy(dist_cache_, saved_dist_cache_, sizeof(dist_cache_));
|
2015-04-23 13:26:08 +00:00
|
|
|
if (!StoreUncompressedMetaBlock(is_last,
|
|
|
|
data, last_flush_pos_, mask, bytes,
|
|
|
|
&storage_ix,
|
|
|
|
&storage[0])) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
int num_direct_distance_codes = 0;
|
|
|
|
int distance_postfix_bits = 0;
|
|
|
|
if (params_.quality > 9 && params_.mode == BrotliParams::MODE_FONT) {
|
|
|
|
num_direct_distance_codes = 12;
|
|
|
|
distance_postfix_bits = 1;
|
2015-10-01 10:08:14 +00:00
|
|
|
RecomputeDistancePrefixes(commands_,
|
2015-04-23 14:20:29 +00:00
|
|
|
num_commands_,
|
|
|
|
num_direct_distance_codes,
|
|
|
|
distance_postfix_bits);
|
2015-04-23 13:26:08 +00:00
|
|
|
}
|
2015-05-07 15:30:10 +00:00
|
|
|
if (params_.quality < kMinQualityForBlockSplit) {
|
2015-04-28 08:12:47 +00:00
|
|
|
if (!StoreMetaBlockTrivial(data, last_flush_pos_, bytes, mask, is_last,
|
2015-10-01 10:08:14 +00:00
|
|
|
commands_, num_commands_,
|
2015-04-28 08:12:47 +00:00
|
|
|
&storage_ix,
|
|
|
|
&storage[0])) {
|
|
|
|
return false;
|
|
|
|
}
|
2015-03-27 13:20:35 +00:00
|
|
|
} else {
|
2015-05-07 15:23:07 +00:00
|
|
|
MetaBlockSplit mb;
|
2015-10-01 13:10:42 +00:00
|
|
|
int literal_context_mode = CONTEXT_UTF8;
|
2015-06-12 14:50:49 +00:00
|
|
|
if (params_.quality <= 9) {
|
2015-05-07 15:23:07 +00:00
|
|
|
int num_literal_contexts = 1;
|
|
|
|
const int* literal_context_map = NULL;
|
|
|
|
DecideOverLiteralContextModeling(data, last_flush_pos_, bytes, mask,
|
|
|
|
params_.quality,
|
|
|
|
&literal_context_mode,
|
|
|
|
&num_literal_contexts,
|
|
|
|
&literal_context_map);
|
|
|
|
if (literal_context_map == NULL) {
|
|
|
|
BuildMetaBlockGreedy(data, last_flush_pos_, mask,
|
2015-10-01 10:08:14 +00:00
|
|
|
commands_, num_commands_,
|
2015-05-07 15:23:07 +00:00
|
|
|
&mb);
|
|
|
|
} else {
|
|
|
|
BuildMetaBlockGreedyWithContexts(data, last_flush_pos_, mask,
|
|
|
|
prev_byte_, prev_byte2_,
|
|
|
|
literal_context_mode,
|
|
|
|
num_literal_contexts,
|
|
|
|
literal_context_map,
|
2015-10-01 10:08:14 +00:00
|
|
|
commands_, num_commands_,
|
2015-05-07 15:23:07 +00:00
|
|
|
&mb);
|
|
|
|
}
|
2015-04-28 08:12:47 +00:00
|
|
|
} else {
|
2015-10-01 13:10:42 +00:00
|
|
|
if (!IsMostlyUTF8(data, last_flush_pos_, mask, bytes, kMinUTF8Ratio)) {
|
|
|
|
literal_context_mode = CONTEXT_SIGNED;
|
|
|
|
}
|
2015-04-28 08:12:47 +00:00
|
|
|
BuildMetaBlock(data, last_flush_pos_, mask,
|
|
|
|
prev_byte_, prev_byte2_,
|
2015-10-01 10:08:14 +00:00
|
|
|
commands_, num_commands_,
|
2015-04-28 08:12:47 +00:00
|
|
|
literal_context_mode,
|
|
|
|
&mb);
|
|
|
|
}
|
2015-05-07 15:30:10 +00:00
|
|
|
if (params_.quality >= kMinQualityForOptimizeHistograms) {
|
2015-04-28 08:12:47 +00:00
|
|
|
OptimizeHistograms(num_direct_distance_codes,
|
|
|
|
distance_postfix_bits,
|
|
|
|
&mb);
|
|
|
|
}
|
|
|
|
if (!StoreMetaBlock(data, last_flush_pos_, bytes, mask,
|
|
|
|
prev_byte_, prev_byte2_,
|
|
|
|
is_last,
|
|
|
|
num_direct_distance_codes,
|
|
|
|
distance_postfix_bits,
|
|
|
|
literal_context_mode,
|
2015-10-01 10:08:14 +00:00
|
|
|
commands_, num_commands_,
|
2015-04-28 08:12:47 +00:00
|
|
|
mb,
|
|
|
|
&storage_ix,
|
|
|
|
&storage[0])) {
|
|
|
|
return false;
|
|
|
|
}
|
2014-10-28 13:05:53 +00:00
|
|
|
}
|
2015-10-28 16:44:47 +00:00
|
|
|
if (bytes + 4 < static_cast<size_t>(storage_ix >> 3)) {
|
2015-04-23 13:26:08 +00:00
|
|
|
// Restore the distance cache and last byte.
|
2015-06-12 14:11:50 +00:00
|
|
|
memcpy(dist_cache_, saved_dist_cache_, sizeof(dist_cache_));
|
2015-04-23 13:26:08 +00:00
|
|
|
storage[0] = last_byte_;
|
|
|
|
storage_ix = last_byte_bits_;
|
|
|
|
if (!StoreUncompressedMetaBlock(is_last, data, last_flush_pos_, mask,
|
|
|
|
bytes, &storage_ix, &storage[0])) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
2013-12-12 12:18:04 +00:00
|
|
|
}
|
2015-04-23 13:26:08 +00:00
|
|
|
last_byte_ = storage[storage_ix >> 3];
|
|
|
|
last_byte_bits_ = storage_ix & 7;
|
|
|
|
last_flush_pos_ = input_pos_;
|
|
|
|
last_processed_pos_ = input_pos_;
|
2015-04-23 14:20:29 +00:00
|
|
|
prev_byte_ = data[(last_flush_pos_ - 1) & mask];
|
|
|
|
prev_byte2_ = data[(last_flush_pos_ - 2) & mask];
|
2015-04-23 13:26:08 +00:00
|
|
|
num_commands_ = 0;
|
2015-04-28 08:12:47 +00:00
|
|
|
num_literals_ = 0;
|
2015-06-12 14:11:50 +00:00
|
|
|
// Save the state of the distance cache in case we need to restore it for
|
|
|
|
// emitting an uncompressed block.
|
|
|
|
memcpy(saved_dist_cache_, dist_cache_, sizeof(dist_cache_));
|
2015-04-23 13:26:08 +00:00
|
|
|
*output = &storage[0];
|
|
|
|
*out_size = storage_ix >> 3;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool BrotliCompressor::WriteMetaBlock(const size_t input_size,
|
|
|
|
const uint8_t* input_buffer,
|
|
|
|
const bool is_last,
|
|
|
|
size_t* encoded_size,
|
|
|
|
uint8_t* encoded_buffer) {
|
|
|
|
CopyInputToRingBuffer(input_size, input_buffer);
|
|
|
|
size_t out_size = 0;
|
|
|
|
uint8_t* output;
|
|
|
|
if (!WriteBrotliData(is_last, /* force_flush = */ true, &out_size, &output) ||
|
|
|
|
out_size > *encoded_size) {
|
2015-04-01 14:10:15 +00:00
|
|
|
return false;
|
|
|
|
}
|
2015-04-23 13:26:08 +00:00
|
|
|
if (out_size > 0) {
|
|
|
|
memcpy(encoded_buffer, output, out_size);
|
|
|
|
}
|
|
|
|
*encoded_size = out_size;
|
2014-10-28 13:05:53 +00:00
|
|
|
return true;
|
2013-11-15 18:02:17 +00:00
|
|
|
}
|
|
|
|
|
2015-04-23 13:43:37 +00:00
|
|
|
bool BrotliCompressor::WriteMetadata(const size_t input_size,
|
|
|
|
const uint8_t* input_buffer,
|
|
|
|
const bool is_last,
|
|
|
|
size_t* encoded_size,
|
|
|
|
uint8_t* encoded_buffer) {
|
|
|
|
if (input_size > (1 << 24) || input_size + 6 > *encoded_size) {
|
|
|
|
return false;
|
|
|
|
}
|
2015-12-04 15:09:40 +00:00
|
|
|
uint64_t hdr_buffer_data[2];
|
|
|
|
uint8_t* hdr_buffer = reinterpret_cast<uint8_t*>(&hdr_buffer_data[0]);
|
2015-04-23 13:43:37 +00:00
|
|
|
int storage_ix = last_byte_bits_;
|
2015-12-04 15:09:40 +00:00
|
|
|
hdr_buffer[0] = last_byte_;
|
|
|
|
WriteBits(1, 0, &storage_ix, hdr_buffer);
|
|
|
|
WriteBits(2, 3, &storage_ix, hdr_buffer);
|
|
|
|
WriteBits(1, 0, &storage_ix, hdr_buffer);
|
2015-04-23 13:43:37 +00:00
|
|
|
if (input_size == 0) {
|
2015-12-04 15:09:40 +00:00
|
|
|
WriteBits(2, 0, &storage_ix, hdr_buffer);
|
2015-04-23 13:43:37 +00:00
|
|
|
*encoded_size = (storage_ix + 7) >> 3;
|
2015-12-04 15:09:40 +00:00
|
|
|
memcpy(encoded_buffer, hdr_buffer, *encoded_size);
|
2015-04-23 13:43:37 +00:00
|
|
|
} else {
|
2015-10-28 16:44:47 +00:00
|
|
|
int nbits = Log2Floor(static_cast<uint32_t>(input_size) - 1) + 1;
|
|
|
|
int nbytes = (nbits + 7) / 8;
|
2015-12-04 15:09:40 +00:00
|
|
|
WriteBits(2, nbytes, &storage_ix, hdr_buffer);
|
|
|
|
WriteBits(8 * nbytes, input_size - 1, &storage_ix, hdr_buffer);
|
2015-04-23 13:43:37 +00:00
|
|
|
size_t hdr_size = (storage_ix + 7) >> 3;
|
2015-12-04 15:09:40 +00:00
|
|
|
memcpy(encoded_buffer, hdr_buffer, hdr_size);
|
2015-04-23 13:43:37 +00:00
|
|
|
memcpy(&encoded_buffer[hdr_size], input_buffer, input_size);
|
|
|
|
*encoded_size = hdr_size + input_size;
|
|
|
|
}
|
|
|
|
if (is_last) {
|
|
|
|
encoded_buffer[(*encoded_size)++] = 3;
|
|
|
|
}
|
|
|
|
last_byte_ = 0;
|
|
|
|
last_byte_bits_ = 0;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-10-28 13:05:53 +00:00
|
|
|
bool BrotliCompressor::FinishStream(
|
2013-11-15 18:02:17 +00:00
|
|
|
size_t* encoded_size, uint8_t* encoded_buffer) {
|
2014-10-28 13:05:53 +00:00
|
|
|
return WriteMetaBlock(0, NULL, true, encoded_size, encoded_buffer);
|
2013-11-15 18:02:17 +00:00
|
|
|
}
|
|
|
|
|
2014-03-20 13:32:35 +00:00
|
|
|
int BrotliCompressBuffer(BrotliParams params,
|
|
|
|
size_t input_size,
|
2013-10-23 11:06:13 +00:00
|
|
|
const uint8_t* input_buffer,
|
|
|
|
size_t* encoded_size,
|
|
|
|
uint8_t* encoded_buffer) {
|
2014-10-28 13:05:53 +00:00
|
|
|
if (*encoded_size == 0) {
|
|
|
|
// Output buffer needs at least one byte.
|
|
|
|
return 0;
|
2013-10-23 11:06:13 +00:00
|
|
|
}
|
2015-04-23 13:26:08 +00:00
|
|
|
BrotliMemIn in(input_buffer, input_size);
|
|
|
|
BrotliMemOut out(encoded_buffer, *encoded_size);
|
|
|
|
if (!BrotliCompress(params, &in, &out)) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
*encoded_size = out.position();
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t CopyOneBlockToRingBuffer(BrotliIn* r, BrotliCompressor* compressor) {
|
|
|
|
const size_t block_size = compressor->input_block_size();
|
|
|
|
size_t bytes_read = 0;
|
|
|
|
const uint8_t* data = reinterpret_cast<const uint8_t*>(
|
|
|
|
r->Read(block_size, &bytes_read));
|
|
|
|
if (data == NULL) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
compressor->CopyInputToRingBuffer(bytes_read, data);
|
|
|
|
|
|
|
|
// Read more bytes until block_size is filled or an EOF (data == NULL) is
|
|
|
|
// received. This is useful to get deterministic compressed output for the
|
|
|
|
// same input no matter how r->Read splits the input to chunks.
|
|
|
|
for (size_t remaining = block_size - bytes_read; remaining > 0; ) {
|
|
|
|
size_t more_bytes_read = 0;
|
|
|
|
data = reinterpret_cast<const uint8_t*>(
|
|
|
|
r->Read(remaining, &more_bytes_read));
|
|
|
|
if (data == NULL) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
compressor->CopyInputToRingBuffer(more_bytes_read, data);
|
|
|
|
bytes_read += more_bytes_read;
|
|
|
|
remaining -= more_bytes_read;
|
|
|
|
}
|
|
|
|
return bytes_read;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool BrotliInIsFinished(BrotliIn* r) {
|
|
|
|
size_t read_bytes;
|
|
|
|
return r->Read(0, &read_bytes) == NULL;
|
|
|
|
}
|
2013-10-23 11:06:13 +00:00
|
|
|
|
2015-04-23 13:26:08 +00:00
|
|
|
int BrotliCompress(BrotliParams params, BrotliIn* in, BrotliOut* out) {
|
2015-10-01 10:08:14 +00:00
|
|
|
return BrotliCompressWithCustomDictionary(0, 0, params, in, out);
|
2015-06-12 13:43:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int BrotliCompressWithCustomDictionary(size_t dictsize, const uint8_t* dict,
|
|
|
|
BrotliParams params,
|
|
|
|
BrotliIn* in, BrotliOut* out) {
|
2015-04-23 13:26:08 +00:00
|
|
|
size_t in_bytes = 0;
|
|
|
|
size_t out_bytes = 0;
|
|
|
|
uint8_t* output;
|
|
|
|
bool final_block = false;
|
2014-03-20 13:32:35 +00:00
|
|
|
BrotliCompressor compressor(params);
|
2015-06-12 13:43:54 +00:00
|
|
|
if (dictsize != 0) compressor.BrotliSetCustomDictionary(dictsize, dict);
|
2015-04-23 13:26:08 +00:00
|
|
|
while (!final_block) {
|
|
|
|
in_bytes = CopyOneBlockToRingBuffer(in, &compressor);
|
|
|
|
final_block = in_bytes == 0 || BrotliInIsFinished(in);
|
|
|
|
out_bytes = 0;
|
|
|
|
if (!compressor.WriteBrotliData(final_block,
|
|
|
|
/* force_flush = */ false,
|
|
|
|
&out_bytes, &output)) {
|
|
|
|
return false;
|
2013-11-15 18:02:17 +00:00
|
|
|
}
|
2015-04-23 13:26:08 +00:00
|
|
|
if (out_bytes > 0 && !out->Write(output, out_bytes)) {
|
|
|
|
return false;
|
2014-10-28 13:05:53 +00:00
|
|
|
}
|
2013-10-23 11:06:13 +00:00
|
|
|
}
|
2015-04-23 13:26:08 +00:00
|
|
|
return true;
|
2013-10-23 11:06:13 +00:00
|
|
|
}
|
|
|
|
|
2015-10-01 15:08:59 +00:00
|
|
|
|
2013-10-23 11:06:13 +00:00
|
|
|
} // namespace brotli
|