2013-10-23 11:06:13 +00:00
|
|
|
// Copyright 2013 Google Inc. All Rights Reserved.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
//
|
|
|
|
// Implementation of Brotli compressor.
|
|
|
|
|
|
|
|
#include "./encode.h"
|
|
|
|
|
|
|
|
#include <algorithm>
|
|
|
|
#include <limits>
|
|
|
|
|
|
|
|
#include "./backward_references.h"
|
|
|
|
#include "./bit_cost.h"
|
|
|
|
#include "./block_splitter.h"
|
2014-10-15 12:01:36 +00:00
|
|
|
#include "./brotli_bit_stream.h"
|
2013-10-23 11:06:13 +00:00
|
|
|
#include "./cluster.h"
|
|
|
|
#include "./context.h"
|
2015-03-27 13:20:35 +00:00
|
|
|
#include "./metablock.h"
|
2014-02-17 13:25:36 +00:00
|
|
|
#include "./transform.h"
|
2013-10-23 11:06:13 +00:00
|
|
|
#include "./entropy_encode.h"
|
|
|
|
#include "./fast_log.h"
|
2013-11-15 18:02:17 +00:00
|
|
|
#include "./hash.h"
|
2013-10-23 11:06:13 +00:00
|
|
|
#include "./histogram.h"
|
2013-11-15 18:02:17 +00:00
|
|
|
#include "./literal_cost.h"
|
2013-10-23 11:06:13 +00:00
|
|
|
#include "./prefix.h"
|
|
|
|
#include "./write_bits.h"
|
|
|
|
|
|
|
|
namespace brotli {
|
|
|
|
|
2014-02-14 14:04:23 +00:00
|
|
|
int ParseAsUTF8(int* symbol, const uint8_t* input, int size) {
|
|
|
|
// ASCII
|
|
|
|
if ((input[0] & 0x80) == 0) {
|
|
|
|
*symbol = input[0];
|
|
|
|
if (*symbol > 0) {
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// 2-byte UTF8
|
|
|
|
if (size > 1 &&
|
|
|
|
(input[0] & 0xe0) == 0xc0 &&
|
|
|
|
(input[1] & 0xc0) == 0x80) {
|
|
|
|
*symbol = (((input[0] & 0x1f) << 6) |
|
|
|
|
(input[1] & 0x3f));
|
|
|
|
if (*symbol > 0x7f) {
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// 3-byte UFT8
|
|
|
|
if (size > 2 &&
|
|
|
|
(input[0] & 0xf0) == 0xe0 &&
|
|
|
|
(input[1] & 0xc0) == 0x80 &&
|
|
|
|
(input[2] & 0xc0) == 0x80) {
|
|
|
|
*symbol = (((input[0] & 0x0f) << 12) |
|
|
|
|
((input[1] & 0x3f) << 6) |
|
|
|
|
(input[2] & 0x3f));
|
|
|
|
if (*symbol > 0x7ff) {
|
|
|
|
return 3;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// 4-byte UFT8
|
|
|
|
if (size > 3 &&
|
|
|
|
(input[0] & 0xf8) == 0xf0 &&
|
|
|
|
(input[1] & 0xc0) == 0x80 &&
|
|
|
|
(input[2] & 0xc0) == 0x80 &&
|
|
|
|
(input[3] & 0xc0) == 0x80) {
|
|
|
|
*symbol = (((input[0] & 0x07) << 18) |
|
|
|
|
((input[1] & 0x3f) << 12) |
|
|
|
|
((input[2] & 0x3f) << 6) |
|
|
|
|
(input[3] & 0x3f));
|
|
|
|
if (*symbol > 0xffff && *symbol <= 0x10ffff) {
|
|
|
|
return 4;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Not UTF8, emit a special symbol above the UTF8-code space
|
|
|
|
*symbol = 0x110000 | input[0];
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Returns true if at least min_fraction of the data is UTF8-encoded.
|
|
|
|
bool IsMostlyUTF8(const uint8_t* data, size_t length, double min_fraction) {
|
|
|
|
size_t size_utf8 = 0;
|
|
|
|
size_t pos = 0;
|
|
|
|
while (pos < length) {
|
|
|
|
int symbol;
|
|
|
|
int bytes_read = ParseAsUTF8(&symbol, data + pos, length - pos);
|
|
|
|
pos += bytes_read;
|
|
|
|
if (symbol < 0x110000) size_utf8 += bytes_read;
|
|
|
|
}
|
|
|
|
return size_utf8 > min_fraction * length;
|
|
|
|
}
|
|
|
|
|
2014-10-28 12:25:22 +00:00
|
|
|
void RecomputeDistancePrefixes(std::vector<Command>* cmds,
|
|
|
|
int num_direct_distance_codes,
|
|
|
|
int distance_postfix_bits) {
|
|
|
|
if (num_direct_distance_codes == 0 &&
|
|
|
|
distance_postfix_bits == 0) {
|
|
|
|
return;
|
2013-10-23 11:06:13 +00:00
|
|
|
}
|
|
|
|
for (int i = 0; i < cmds->size(); ++i) {
|
|
|
|
Command* cmd = &(*cmds)[i];
|
2014-10-28 12:25:22 +00:00
|
|
|
if (cmd->copy_len_ > 0 && cmd->cmd_prefix_ >= 128) {
|
|
|
|
PrefixEncodeCopyDistance(cmd->DistanceCode(),
|
2013-10-23 11:06:13 +00:00
|
|
|
num_direct_distance_codes,
|
|
|
|
distance_postfix_bits,
|
2014-10-28 12:25:22 +00:00
|
|
|
&cmd->dist_prefix_,
|
|
|
|
&cmd->dist_extra_);
|
2013-10-23 11:06:13 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t MetaBlockLength(const std::vector<Command>& cmds) {
|
|
|
|
size_t length = 0;
|
|
|
|
for (int i = 0; i < cmds.size(); ++i) {
|
|
|
|
const Command& cmd = cmds[i];
|
2014-10-28 12:25:22 +00:00
|
|
|
length += cmd.insert_len_ + cmd.copy_len_;
|
2013-10-23 11:06:13 +00:00
|
|
|
}
|
|
|
|
return length;
|
|
|
|
}
|
|
|
|
|
2015-04-01 14:10:15 +00:00
|
|
|
uint8_t* BrotliCompressor::GetBrotliStorage(size_t size) {
|
|
|
|
if (storage_size_ < size) {
|
|
|
|
storage_.reset(new uint8_t[size]);
|
|
|
|
storage_size_ = size;
|
|
|
|
}
|
|
|
|
return &storage_[0];
|
|
|
|
}
|
|
|
|
|
2014-03-20 13:32:35 +00:00
|
|
|
BrotliCompressor::BrotliCompressor(BrotliParams params)
|
|
|
|
: params_(params),
|
|
|
|
hashers_(new Hashers()),
|
2015-04-01 14:10:15 +00:00
|
|
|
input_pos_(0) {
|
|
|
|
// Sanitize params.
|
|
|
|
if (params_.lgwin < kMinWindowBits) {
|
|
|
|
params_.lgwin = kMinWindowBits;
|
2015-04-01 14:29:04 +00:00
|
|
|
} else if (params_.lgwin > kMaxWindowBits) {
|
2015-04-01 14:10:15 +00:00
|
|
|
params_.lgwin = kMaxWindowBits;
|
|
|
|
}
|
2015-04-01 14:29:04 +00:00
|
|
|
if (params_.lgblock == 0) {
|
|
|
|
params_.lgblock = 16;
|
|
|
|
if (params_.quality >= 9 && params_.lgwin > params_.lgblock) {
|
|
|
|
params_.lgblock = std::min(21, params_.lgwin);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
params_.lgblock = std::min(kMaxInputBlockBits,
|
|
|
|
std::max(kMinInputBlockBits, params_.lgblock));
|
|
|
|
}
|
2015-04-01 14:10:15 +00:00
|
|
|
|
|
|
|
// Set maximum distance, see section 9.1. of the spec.
|
|
|
|
max_backward_distance_ = (1 << params_.lgwin) - 16;
|
|
|
|
|
|
|
|
// Initialize input and literal cost ring buffers.
|
|
|
|
// We allocate at least lgwin + 1 bits for the ring buffer so that the newly
|
|
|
|
// added block fits there completely and we still get lgwin bits and at least
|
|
|
|
// read_block_size_bits + 1 bits because the copy tail length needs to be
|
|
|
|
// smaller than ringbuffer size.
|
2015-04-01 14:29:04 +00:00
|
|
|
int ringbuffer_bits = std::max(params_.lgwin + 1, params_.lgblock + 1);
|
|
|
|
ringbuffer_.reset(new RingBuffer(ringbuffer_bits, params_.lgblock));
|
2015-04-01 14:10:15 +00:00
|
|
|
literal_cost_.resize(1 << ringbuffer_bits);
|
|
|
|
|
|
|
|
// Initialize storage.
|
|
|
|
storage_size_ = 1 << 16;
|
|
|
|
storage_.reset(new uint8_t[storage_size_]);
|
|
|
|
if (params_.lgwin == 16) {
|
|
|
|
last_byte_ = 0;
|
|
|
|
last_byte_bits_ = 1;
|
|
|
|
} else {
|
|
|
|
last_byte_ = ((params_.lgwin - 17) << 1) | 1;
|
|
|
|
last_byte_bits_ = 4;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Initialize distance cache.
|
2014-10-28 12:25:22 +00:00
|
|
|
dist_cache_[0] = 4;
|
|
|
|
dist_cache_[1] = 11;
|
|
|
|
dist_cache_[2] = 15;
|
|
|
|
dist_cache_[3] = 16;
|
2015-04-01 14:10:15 +00:00
|
|
|
|
|
|
|
// Initialize hashers.
|
2014-03-20 13:32:35 +00:00
|
|
|
switch (params.mode) {
|
2014-10-28 12:25:22 +00:00
|
|
|
case BrotliParams::MODE_TEXT: hash_type_ = 8; break;
|
|
|
|
case BrotliParams::MODE_FONT: hash_type_ = 9; break;
|
2014-03-20 13:32:35 +00:00
|
|
|
default: break;
|
|
|
|
}
|
|
|
|
hashers_->Init(hash_type_);
|
|
|
|
if (params.mode == BrotliParams::MODE_TEXT) {
|
2014-10-29 14:39:35 +00:00
|
|
|
StoreDictionaryWordHashes(params.enable_transforms);
|
2014-03-20 13:32:35 +00:00
|
|
|
}
|
2015-04-01 14:10:15 +00:00
|
|
|
|
2013-11-19 22:32:56 +00:00
|
|
|
}
|
2013-11-15 18:02:17 +00:00
|
|
|
|
|
|
|
BrotliCompressor::~BrotliCompressor() {
|
|
|
|
}
|
|
|
|
|
2014-03-20 13:32:35 +00:00
|
|
|
StaticDictionary *BrotliCompressor::static_dictionary_ = NULL;
|
|
|
|
|
2014-10-29 14:39:35 +00:00
|
|
|
void BrotliCompressor::StoreDictionaryWordHashes(bool enable_transforms) {
|
2014-03-20 13:32:35 +00:00
|
|
|
if (static_dictionary_ == NULL) {
|
|
|
|
static_dictionary_ = new StaticDictionary;
|
2015-04-01 14:29:04 +00:00
|
|
|
static_dictionary_->Fill(enable_transforms);
|
2014-02-17 13:25:36 +00:00
|
|
|
}
|
2014-03-20 13:32:35 +00:00
|
|
|
hashers_->SetStaticDictionary(static_dictionary_);
|
2014-02-17 13:25:36 +00:00
|
|
|
}
|
|
|
|
|
2014-10-28 13:05:53 +00:00
|
|
|
bool BrotliCompressor::WriteMetaBlock(const size_t input_size,
|
2013-11-15 18:02:17 +00:00
|
|
|
const uint8_t* input_buffer,
|
2013-12-12 12:18:04 +00:00
|
|
|
const bool is_last,
|
2013-11-15 18:02:17 +00:00
|
|
|
size_t* encoded_size,
|
|
|
|
uint8_t* encoded_buffer) {
|
2015-04-01 14:29:04 +00:00
|
|
|
if (input_size > input_block_size()) {
|
2015-04-01 14:10:15 +00:00
|
|
|
return false;
|
|
|
|
}
|
2014-02-14 14:04:23 +00:00
|
|
|
static const double kMinUTF8Ratio = 0.75;
|
|
|
|
bool utf8_mode = false;
|
2014-10-28 12:25:22 +00:00
|
|
|
std::vector<Command> commands((input_size + 1) >> 1);
|
2015-04-01 14:10:15 +00:00
|
|
|
// Save the state of the distance cache in case we need to restore it for
|
|
|
|
// emitting an uncompressed block.
|
|
|
|
int saved_dist_cache[4];
|
|
|
|
memcpy(saved_dist_cache, dist_cache_, sizeof(dist_cache_));
|
2013-12-12 12:18:04 +00:00
|
|
|
if (input_size > 0) {
|
2015-04-01 14:10:15 +00:00
|
|
|
ringbuffer_->Write(input_buffer, input_size);
|
2014-02-14 14:04:23 +00:00
|
|
|
utf8_mode = IsMostlyUTF8(
|
2015-04-01 14:10:15 +00:00
|
|
|
&ringbuffer_->start()[input_pos_ & ringbuffer_->mask()],
|
|
|
|
input_size, kMinUTF8Ratio);
|
2014-02-14 14:04:23 +00:00
|
|
|
if (utf8_mode) {
|
|
|
|
EstimateBitCostsForLiteralsUTF8(input_pos_, input_size,
|
2015-04-01 14:10:15 +00:00
|
|
|
ringbuffer_->mask(), ringbuffer_->mask(),
|
|
|
|
ringbuffer_->start(), &literal_cost_[0]);
|
2014-02-14 14:04:23 +00:00
|
|
|
} else {
|
|
|
|
EstimateBitCostsForLiterals(input_pos_, input_size,
|
2015-04-01 14:10:15 +00:00
|
|
|
ringbuffer_->mask(), ringbuffer_->mask(),
|
|
|
|
ringbuffer_->start(), &literal_cost_[0]);
|
2014-03-20 13:32:35 +00:00
|
|
|
}
|
2014-10-28 12:25:22 +00:00
|
|
|
int last_insert_len = 0;
|
|
|
|
int num_commands = 0;
|
|
|
|
double base_min_score = 8.115;
|
2014-03-20 13:32:35 +00:00
|
|
|
CreateBackwardReferences(
|
|
|
|
input_size, input_pos_,
|
2015-04-01 14:10:15 +00:00
|
|
|
ringbuffer_->start(), ringbuffer_->mask(),
|
|
|
|
&literal_cost_[0], ringbuffer_->mask(),
|
|
|
|
max_backward_distance_,
|
2014-10-28 12:25:22 +00:00
|
|
|
base_min_score,
|
2015-04-01 14:10:15 +00:00
|
|
|
params_.quality,
|
2014-03-20 13:32:35 +00:00
|
|
|
hashers_.get(),
|
|
|
|
hash_type_,
|
2014-10-28 12:25:22 +00:00
|
|
|
dist_cache_,
|
|
|
|
&last_insert_len,
|
|
|
|
&commands[0],
|
|
|
|
&num_commands);
|
|
|
|
commands.resize(num_commands);
|
|
|
|
if (last_insert_len > 0) {
|
|
|
|
commands.push_back(Command(last_insert_len));
|
|
|
|
}
|
2013-12-12 12:18:04 +00:00
|
|
|
}
|
2015-03-27 13:20:35 +00:00
|
|
|
int num_direct_distance_codes =
|
2014-03-20 13:32:35 +00:00
|
|
|
params_.mode == BrotliParams::MODE_FONT ? 12 : 0;
|
2015-03-27 13:20:35 +00:00
|
|
|
int distance_postfix_bits = params_.mode == BrotliParams::MODE_FONT ? 1 : 0;
|
|
|
|
int literal_context_mode = CONTEXT_SIGNED;
|
2015-04-01 14:10:15 +00:00
|
|
|
const size_t max_out_size = 2 * input_size + 500;
|
|
|
|
uint8_t* storage = GetBrotliStorage(max_out_size);
|
|
|
|
storage[0] = last_byte_;
|
|
|
|
int storage_ix = last_byte_bits_;
|
|
|
|
|
2015-03-27 13:20:35 +00:00
|
|
|
MetaBlockSplit mb;
|
|
|
|
size_t len = MetaBlockLength(commands);
|
|
|
|
if (!commands.empty()) {
|
|
|
|
if (params_.greedy_block_split) {
|
2015-04-01 14:10:15 +00:00
|
|
|
BuildMetaBlockGreedy(ringbuffer_->start(), input_pos_,
|
|
|
|
ringbuffer_->mask(),
|
|
|
|
commands.data(), commands.size(), params_.quality,
|
|
|
|
&mb);
|
2015-03-27 13:20:35 +00:00
|
|
|
} else {
|
|
|
|
RecomputeDistancePrefixes(&commands,
|
|
|
|
num_direct_distance_codes,
|
|
|
|
distance_postfix_bits);
|
2015-04-01 14:10:15 +00:00
|
|
|
BuildMetaBlock(ringbuffer_->start(), input_pos_, ringbuffer_->mask(),
|
2015-03-27 13:20:35 +00:00
|
|
|
commands,
|
|
|
|
num_direct_distance_codes,
|
|
|
|
distance_postfix_bits,
|
|
|
|
literal_context_mode,
|
|
|
|
&mb);
|
|
|
|
}
|
|
|
|
}
|
2015-04-01 14:10:15 +00:00
|
|
|
if (!StoreMetaBlock(ringbuffer_->start(), input_pos_, len,
|
|
|
|
ringbuffer_->mask(),
|
|
|
|
is_last, params_.quality,
|
2015-03-27 13:20:35 +00:00
|
|
|
num_direct_distance_codes,
|
|
|
|
distance_postfix_bits,
|
|
|
|
literal_context_mode,
|
|
|
|
commands.data(), commands.size(),
|
|
|
|
mb,
|
2015-04-01 14:10:15 +00:00
|
|
|
&storage_ix, storage)) {
|
2015-03-24 09:18:06 +00:00
|
|
|
return false;
|
|
|
|
}
|
2015-04-01 14:10:15 +00:00
|
|
|
size_t output_size = storage_ix >> 3;
|
2013-12-12 12:18:04 +00:00
|
|
|
if (input_size + 4 < output_size) {
|
2015-04-01 14:10:15 +00:00
|
|
|
// Restore the distance cache and last byte.
|
|
|
|
memcpy(dist_cache_, saved_dist_cache, sizeof(dist_cache_));
|
|
|
|
storage[0] = last_byte_;
|
|
|
|
storage_ix = last_byte_bits_;
|
|
|
|
if (!StoreUncompressedMetaBlock(is_last,
|
|
|
|
ringbuffer_->start(), input_pos_,
|
|
|
|
ringbuffer_->mask(), len,
|
|
|
|
&storage_ix, storage)) {
|
2014-10-28 13:05:53 +00:00
|
|
|
return false;
|
|
|
|
}
|
2015-04-01 14:10:15 +00:00
|
|
|
output_size = storage_ix >> 3;
|
2013-12-12 12:18:04 +00:00
|
|
|
}
|
2015-04-01 14:10:15 +00:00
|
|
|
if (output_size > *encoded_size) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
memcpy(encoded_buffer, storage, output_size);
|
|
|
|
*encoded_size = output_size;
|
|
|
|
last_byte_ = storage[output_size];
|
|
|
|
last_byte_bits_ = storage_ix & 7;
|
|
|
|
input_pos_ += len;
|
2014-10-28 13:05:53 +00:00
|
|
|
return true;
|
2013-11-15 18:02:17 +00:00
|
|
|
}
|
|
|
|
|
2014-10-28 13:05:53 +00:00
|
|
|
bool BrotliCompressor::FinishStream(
|
2013-11-15 18:02:17 +00:00
|
|
|
size_t* encoded_size, uint8_t* encoded_buffer) {
|
2014-10-28 13:05:53 +00:00
|
|
|
return WriteMetaBlock(0, NULL, true, encoded_size, encoded_buffer);
|
2013-11-15 18:02:17 +00:00
|
|
|
}
|
|
|
|
|
2014-03-20 13:32:35 +00:00
|
|
|
int BrotliCompressBuffer(BrotliParams params,
|
|
|
|
size_t input_size,
|
2013-10-23 11:06:13 +00:00
|
|
|
const uint8_t* input_buffer,
|
|
|
|
size_t* encoded_size,
|
|
|
|
uint8_t* encoded_buffer) {
|
2014-10-28 13:05:53 +00:00
|
|
|
if (*encoded_size == 0) {
|
|
|
|
// Output buffer needs at least one byte.
|
|
|
|
return 0;
|
|
|
|
} else if (input_size == 0) {
|
2013-11-28 16:37:13 +00:00
|
|
|
encoded_buffer[0] = 6;
|
|
|
|
*encoded_size = 1;
|
2013-10-23 11:06:13 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2014-03-20 13:32:35 +00:00
|
|
|
BrotliCompressor compressor(params);
|
2015-04-01 14:10:15 +00:00
|
|
|
const int max_block_size = compressor.input_block_size();
|
2013-11-15 18:02:17 +00:00
|
|
|
size_t max_output_size = *encoded_size;
|
|
|
|
const uint8_t* input_end = input_buffer + input_size;
|
|
|
|
*encoded_size = 0;
|
2013-10-23 11:06:13 +00:00
|
|
|
|
2013-11-15 18:02:17 +00:00
|
|
|
while (input_buffer < input_end) {
|
|
|
|
int block_size = max_block_size;
|
2013-12-12 12:18:04 +00:00
|
|
|
bool is_last = false;
|
2013-11-15 18:02:17 +00:00
|
|
|
if (block_size >= input_end - input_buffer) {
|
|
|
|
block_size = input_end - input_buffer;
|
2013-12-12 12:18:04 +00:00
|
|
|
is_last = true;
|
2013-11-15 18:02:17 +00:00
|
|
|
}
|
|
|
|
size_t output_size = max_output_size;
|
2014-10-28 13:05:53 +00:00
|
|
|
if (!compressor.WriteMetaBlock(block_size, input_buffer,
|
|
|
|
is_last, &output_size,
|
|
|
|
&encoded_buffer[*encoded_size])) {
|
|
|
|
return 0;
|
|
|
|
}
|
2013-11-15 18:02:17 +00:00
|
|
|
input_buffer += block_size;
|
|
|
|
*encoded_size += output_size;
|
|
|
|
max_output_size -= output_size;
|
2013-10-23 11:06:13 +00:00
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace brotli
|