mirror of
https://github.com/google/brotli.git
synced 2024-11-25 04:50:05 +00:00
Merge pull request #83 from szabadka/master
Encoder implementation using input/output classes.
This commit is contained in:
commit
5ea92475af
@ -139,7 +139,7 @@ static const uint8_t kUTF8ContextLookup[512] = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Context lookup table for small signed integers.
|
// Context lookup table for small signed integers.
|
||||||
static const int kSigned3BitContextLookup[] = {
|
static const uint8_t kSigned3BitContextLookup[] = {
|
||||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||||
|
371
enc/encode.cc
371
enc/encode.cc
@ -37,6 +37,8 @@
|
|||||||
|
|
||||||
namespace brotli {
|
namespace brotli {
|
||||||
|
|
||||||
|
static const double kMinUTF8Ratio = 0.75;
|
||||||
|
|
||||||
int ParseAsUTF8(int* symbol, const uint8_t* input, int size) {
|
int ParseAsUTF8(int* symbol, const uint8_t* input, int size) {
|
||||||
// ASCII
|
// ASCII
|
||||||
if ((input[0] & 0x80) == 0) {
|
if ((input[0] & 0x80) == 0) {
|
||||||
@ -130,8 +132,14 @@ uint8_t* BrotliCompressor::GetBrotliStorage(size_t size) {
|
|||||||
BrotliCompressor::BrotliCompressor(BrotliParams params)
|
BrotliCompressor::BrotliCompressor(BrotliParams params)
|
||||||
: params_(params),
|
: params_(params),
|
||||||
hashers_(new Hashers()),
|
hashers_(new Hashers()),
|
||||||
input_pos_(0) {
|
input_pos_(0),
|
||||||
|
num_commands_(0),
|
||||||
|
last_insert_len_(0),
|
||||||
|
last_flush_pos_(0),
|
||||||
|
last_processed_pos_(0),
|
||||||
|
storage_size_(0) {
|
||||||
// Sanitize params.
|
// Sanitize params.
|
||||||
|
params_.quality = std::max(0, params_.quality);
|
||||||
if (params_.lgwin < kMinWindowBits) {
|
if (params_.lgwin < kMinWindowBits) {
|
||||||
params_.lgwin = kMinWindowBits;
|
params_.lgwin = kMinWindowBits;
|
||||||
} else if (params_.lgwin > kMaxWindowBits) {
|
} else if (params_.lgwin > kMaxWindowBits) {
|
||||||
@ -168,9 +176,11 @@ BrotliCompressor::BrotliCompressor(BrotliParams params)
|
|||||||
literal_cost_.reset(new float[literal_cost_mask_ + 1]);
|
literal_cost_.reset(new float[literal_cost_mask_ + 1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize storage.
|
// Allocate command buffer.
|
||||||
storage_size_ = 1 << 16;
|
cmd_buffer_size_ = std::max(1 << 18, 1 << params_.lgblock);
|
||||||
storage_.reset(new uint8_t[storage_size_]);
|
commands_.reset(new brotli::Command[cmd_buffer_size_]);
|
||||||
|
|
||||||
|
// Initialize last byte with stream header.
|
||||||
if (params_.lgwin == 16) {
|
if (params_.lgwin == 16) {
|
||||||
last_byte_ = 0;
|
last_byte_ = 0;
|
||||||
last_byte_bits_ = 1;
|
last_byte_bits_ = 1;
|
||||||
@ -186,10 +196,19 @@ BrotliCompressor::BrotliCompressor(BrotliParams params)
|
|||||||
dist_cache_[3] = 16;
|
dist_cache_[3] = 16;
|
||||||
|
|
||||||
// Initialize hashers.
|
// Initialize hashers.
|
||||||
switch (params_.mode) {
|
switch (params_.quality) {
|
||||||
case BrotliParams::MODE_TEXT: hash_type_ = 8; break;
|
case 0:
|
||||||
case BrotliParams::MODE_FONT: hash_type_ = 9; break;
|
case 1: hash_type_ = 1; break;
|
||||||
default: break;
|
case 2:
|
||||||
|
case 3: hash_type_ = 2; break;
|
||||||
|
case 4: hash_type_ = 3; break;
|
||||||
|
case 5:
|
||||||
|
case 6: hash_type_ = 4; break;
|
||||||
|
case 7: hash_type_ = 5; break;
|
||||||
|
case 8: hash_type_ = 6; break;
|
||||||
|
case 9: hash_type_ = 7; break;
|
||||||
|
default: // quality > 9
|
||||||
|
hash_type_ = (params_.mode == BrotliParams::MODE_TEXT) ? 8 : 9;
|
||||||
}
|
}
|
||||||
hashers_->Init(hash_type_);
|
hashers_->Init(hash_type_);
|
||||||
if (params_.mode == BrotliParams::MODE_TEXT &&
|
if (params_.mode == BrotliParams::MODE_TEXT &&
|
||||||
@ -201,7 +220,7 @@ BrotliCompressor::BrotliCompressor(BrotliParams params)
|
|||||||
BrotliCompressor::~BrotliCompressor() {
|
BrotliCompressor::~BrotliCompressor() {
|
||||||
}
|
}
|
||||||
|
|
||||||
StaticDictionary *BrotliCompressor::static_dictionary_ = NULL;
|
StaticDictionary* BrotliCompressor::static_dictionary_ = NULL;
|
||||||
|
|
||||||
void BrotliCompressor::StoreDictionaryWordHashes(bool enable_transforms) {
|
void BrotliCompressor::StoreDictionaryWordHashes(bool enable_transforms) {
|
||||||
if (static_dictionary_ == NULL) {
|
if (static_dictionary_ == NULL) {
|
||||||
@ -211,123 +230,240 @@ void BrotliCompressor::StoreDictionaryWordHashes(bool enable_transforms) {
|
|||||||
hashers_->SetStaticDictionary(static_dictionary_);
|
hashers_->SetStaticDictionary(static_dictionary_);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BrotliCompressor::WriteMetaBlock(const size_t input_size,
|
void BrotliCompressor::CopyInputToRingBuffer(const size_t input_size,
|
||||||
const uint8_t* input_buffer,
|
const uint8_t* input_buffer) {
|
||||||
const bool is_last,
|
ringbuffer_->Write(input_buffer, input_size);
|
||||||
size_t* encoded_size,
|
input_pos_ += input_size;
|
||||||
uint8_t* encoded_buffer) {
|
|
||||||
if (input_size > input_block_size()) {
|
// Erase a few more bytes in the ring buffer to make hashing not
|
||||||
|
// depend on uninitialized data. This makes compression deterministic
|
||||||
|
// and it prevents uninitialized memory warnings in Valgrind. Even
|
||||||
|
// without erasing, the output would be valid (but nondeterministic).
|
||||||
|
//
|
||||||
|
// Background information: The compressor stores short (at most 8 bytes)
|
||||||
|
// substrings of the input already read in a hash table, and detects
|
||||||
|
// repetitions by looking up such substrings in the hash table. If it
|
||||||
|
// can find a substring, it checks whether the substring is really there
|
||||||
|
// in the ring buffer (or it's just a hash collision). Should the hash
|
||||||
|
// table become corrupt, this check makes sure that the output is
|
||||||
|
// still valid, albeit the compression ratio would be bad.
|
||||||
|
//
|
||||||
|
// The compressor populates the hash table from the ring buffer as it's
|
||||||
|
// reading new bytes from the input. However, at the last few indexes of
|
||||||
|
// the ring buffer, there are not enough bytes to build full-length
|
||||||
|
// substrings from. Since the hash table always contains full-length
|
||||||
|
// substrings, we erase with dummy 0s here to make sure that those
|
||||||
|
// substrings will contain 0s at the end instead of uninitialized
|
||||||
|
// data.
|
||||||
|
//
|
||||||
|
// Please note that erasing is not necessary (because the
|
||||||
|
// memory region is already initialized since he ring buffer
|
||||||
|
// has a `tail' that holds a copy of the beginning,) so we
|
||||||
|
// skip erasing if we have already gone around at least once in
|
||||||
|
// the ring buffer.
|
||||||
|
size_t pos = ringbuffer_->position();
|
||||||
|
// Only clear during the first round of ringbuffer writes. On
|
||||||
|
// subsequent rounds data in the ringbuffer would be affected.
|
||||||
|
if (pos <= ringbuffer_->mask()) {
|
||||||
|
// This is the first time when the ring buffer is being written.
|
||||||
|
// We clear 3 bytes just after the bytes that have been copied from
|
||||||
|
// the input buffer.
|
||||||
|
//
|
||||||
|
// The ringbuffer has a "tail" that holds a copy of the beginning,
|
||||||
|
// but only once the ring buffer has been fully written once, i.e.,
|
||||||
|
// pos <= mask. For the first time, we need to write values
|
||||||
|
// in this tail (where index may be larger than mask), so that
|
||||||
|
// we have exactly defined behavior and don't read un-initialized
|
||||||
|
// memory. Due to performance reasons, hashing reads data using a
|
||||||
|
// LOAD32, which can go 3 bytes beyond the bytes written in the
|
||||||
|
// ringbuffer.
|
||||||
|
memset(ringbuffer_->start() + pos, 0, 3);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool BrotliCompressor::WriteBrotliData(const bool is_last,
|
||||||
|
const bool force_flush,
|
||||||
|
size_t* out_size,
|
||||||
|
uint8_t** output) {
|
||||||
|
const size_t bytes = input_pos_ - last_processed_pos_;
|
||||||
|
const uint8_t* data = ringbuffer_->start();
|
||||||
|
const size_t mask = ringbuffer_->mask();
|
||||||
|
|
||||||
|
if (bytes > input_block_size()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
static const double kMinUTF8Ratio = 0.75;
|
|
||||||
bool utf8_mode = false;
|
bool utf8_mode =
|
||||||
std::vector<Command> commands((input_size + 1) >> 1);
|
|
||||||
// Save the state of the distance cache in case we need to restore it for
|
|
||||||
// emitting an uncompressed block.
|
|
||||||
int saved_dist_cache[4];
|
|
||||||
memcpy(saved_dist_cache, dist_cache_, sizeof(dist_cache_));
|
|
||||||
if (input_size > 0) {
|
|
||||||
ringbuffer_->Write(input_buffer, input_size);
|
|
||||||
utf8_mode =
|
|
||||||
params_.enable_context_modeling &&
|
params_.enable_context_modeling &&
|
||||||
IsMostlyUTF8(&ringbuffer_->start()[input_pos_ & ringbuffer_->mask()],
|
IsMostlyUTF8(&data[last_processed_pos_ & mask], bytes, kMinUTF8Ratio);
|
||||||
input_size, kMinUTF8Ratio);
|
|
||||||
if (literal_cost_.get()) {
|
if (literal_cost_.get()) {
|
||||||
if (utf8_mode) {
|
if (utf8_mode) {
|
||||||
EstimateBitCostsForLiteralsUTF8(input_pos_, input_size,
|
EstimateBitCostsForLiteralsUTF8(last_processed_pos_, bytes, mask,
|
||||||
ringbuffer_->mask(),
|
literal_cost_mask_, data,
|
||||||
literal_cost_mask_,
|
|
||||||
ringbuffer_->start(),
|
|
||||||
literal_cost_.get());
|
literal_cost_.get());
|
||||||
} else {
|
} else {
|
||||||
EstimateBitCostsForLiterals(input_pos_, input_size,
|
EstimateBitCostsForLiterals(last_processed_pos_, bytes, mask,
|
||||||
ringbuffer_->mask(), literal_cost_mask_,
|
literal_cost_mask_,
|
||||||
ringbuffer_->start(), literal_cost_.get());
|
data, literal_cost_.get());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
int last_insert_len = 0;
|
|
||||||
int num_commands = 0;
|
|
||||||
double base_min_score = params_.enable_context_modeling ? 8.115 : 4.0;
|
double base_min_score = params_.enable_context_modeling ? 8.115 : 4.0;
|
||||||
CreateBackwardReferences(
|
CreateBackwardReferences(bytes, last_processed_pos_, data, mask,
|
||||||
input_size, input_pos_,
|
literal_cost_.get(),
|
||||||
ringbuffer_->start(), ringbuffer_->mask(),
|
literal_cost_mask_,
|
||||||
literal_cost_.get(), literal_cost_mask_,
|
|
||||||
max_backward_distance_,
|
max_backward_distance_,
|
||||||
base_min_score,
|
base_min_score,
|
||||||
params_.quality,
|
params_.quality,
|
||||||
hashers_.get(),
|
hashers_.get(),
|
||||||
hash_type_,
|
hash_type_,
|
||||||
dist_cache_,
|
dist_cache_,
|
||||||
&last_insert_len,
|
&last_insert_len_,
|
||||||
&commands[0],
|
&commands_[num_commands_],
|
||||||
&num_commands);
|
&num_commands_);
|
||||||
commands.resize(num_commands);
|
|
||||||
if (last_insert_len > 0) {
|
if (!is_last && !force_flush &&
|
||||||
commands.push_back(Command(last_insert_len));
|
num_commands_ + (input_block_size() >> 1) < cmd_buffer_size_ &&
|
||||||
|
input_pos_ + input_block_size() + 2 <= last_flush_pos_ + mask + 1) {
|
||||||
|
// Everything will happen later.
|
||||||
|
last_processed_pos_ = input_pos_;
|
||||||
|
*out_size = 0;
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Create the last insert-only command.
|
||||||
|
if (last_insert_len_ > 0) {
|
||||||
|
brotli::Command cmd(last_insert_len_);
|
||||||
|
commands_[num_commands_++] = cmd;
|
||||||
|
last_insert_len_ = 0;
|
||||||
}
|
}
|
||||||
int num_direct_distance_codes =
|
|
||||||
params_.mode == BrotliParams::MODE_FONT ? 12 : 0;
|
return WriteMetaBlockInternal(is_last, utf8_mode, out_size, output);
|
||||||
int distance_postfix_bits = params_.mode == BrotliParams::MODE_FONT ? 1 : 0;
|
}
|
||||||
int literal_context_mode = utf8_mode ? CONTEXT_UTF8 : CONTEXT_SIGNED;
|
|
||||||
const size_t max_out_size = 2 * input_size + 500;
|
bool BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
|
||||||
|
const bool utf8_mode,
|
||||||
|
size_t* out_size,
|
||||||
|
uint8_t** output) {
|
||||||
|
const size_t bytes = input_pos_ - last_flush_pos_;
|
||||||
|
const uint8_t* data = ringbuffer_->start();
|
||||||
|
const size_t mask = ringbuffer_->mask();
|
||||||
|
const size_t max_out_size = 2 * bytes + 500;
|
||||||
uint8_t* storage = GetBrotliStorage(max_out_size);
|
uint8_t* storage = GetBrotliStorage(max_out_size);
|
||||||
storage[0] = last_byte_;
|
storage[0] = last_byte_;
|
||||||
int storage_ix = last_byte_bits_;
|
int storage_ix = last_byte_bits_;
|
||||||
|
|
||||||
|
bool uncompressed = false;
|
||||||
|
if (num_commands_ < (bytes >> 8) + 2) {
|
||||||
|
int num_literals = 0;
|
||||||
|
for (int i = 0; i < num_commands_; ++i) {
|
||||||
|
num_literals += commands_[i].insert_len_;
|
||||||
|
}
|
||||||
|
if (num_literals > 0.99 * bytes) {
|
||||||
|
int literal_histo[256] = { 0 };
|
||||||
|
static const int kSampleRate = 13;
|
||||||
|
static const double kMinEntropy = 7.92;
|
||||||
|
static const double kBitCostThreshold = bytes * kMinEntropy / kSampleRate;
|
||||||
|
for (int i = last_flush_pos_; i < input_pos_; i += kSampleRate) {
|
||||||
|
++literal_histo[data[i & mask]];
|
||||||
|
}
|
||||||
|
if (BitsEntropy(literal_histo, 256) > kBitCostThreshold) {
|
||||||
|
uncompressed = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bytes == 0) {
|
||||||
|
if (!StoreCompressedMetaBlockHeader(is_last, 0, &storage_ix, &storage[0])) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
storage_ix = (storage_ix + 7) & ~7;
|
||||||
|
} else if (uncompressed) {
|
||||||
|
if (!StoreUncompressedMetaBlock(is_last,
|
||||||
|
data, last_flush_pos_, mask, bytes,
|
||||||
|
&storage_ix,
|
||||||
|
&storage[0])) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Save the state of the distance cache in case we need to restore it for
|
||||||
|
// emitting an uncompressed block.
|
||||||
|
int saved_dist_cache[4];
|
||||||
|
memcpy(saved_dist_cache, dist_cache_, sizeof(dist_cache_));
|
||||||
|
int num_direct_distance_codes = 0;
|
||||||
|
int distance_postfix_bits = 0;
|
||||||
|
if (params_.quality > 9 && params_.mode == BrotliParams::MODE_FONT) {
|
||||||
|
num_direct_distance_codes = 12;
|
||||||
|
distance_postfix_bits = 1;
|
||||||
|
}
|
||||||
|
int literal_context_mode = utf8_mode ? CONTEXT_UTF8 : CONTEXT_SIGNED;
|
||||||
MetaBlockSplit mb;
|
MetaBlockSplit mb;
|
||||||
if (!commands.empty()) {
|
|
||||||
if (params_.greedy_block_split) {
|
if (params_.greedy_block_split) {
|
||||||
BuildMetaBlockGreedy(ringbuffer_->start(), input_pos_,
|
BuildMetaBlockGreedy(data, last_flush_pos_, mask,
|
||||||
ringbuffer_->mask(),
|
commands_.get(), num_commands_,
|
||||||
commands.data(), commands.size(), params_.quality,
|
params_.quality,
|
||||||
&mb);
|
&mb);
|
||||||
} else {
|
} else {
|
||||||
RecomputeDistancePrefixes(&commands[0], commands.size(),
|
RecomputeDistancePrefixes(commands_.get(),
|
||||||
|
num_commands_,
|
||||||
num_direct_distance_codes,
|
num_direct_distance_codes,
|
||||||
distance_postfix_bits);
|
distance_postfix_bits);
|
||||||
BuildMetaBlock(ringbuffer_->start(), input_pos_, ringbuffer_->mask(),
|
BuildMetaBlock(data, last_flush_pos_, mask,
|
||||||
commands.data(), commands.size(),
|
commands_.get(), num_commands_,
|
||||||
num_direct_distance_codes,
|
num_direct_distance_codes,
|
||||||
distance_postfix_bits,
|
distance_postfix_bits,
|
||||||
literal_context_mode,
|
literal_context_mode,
|
||||||
params_.enable_context_modeling,
|
params_.enable_context_modeling,
|
||||||
&mb);
|
&mb);
|
||||||
}
|
}
|
||||||
}
|
if (!StoreMetaBlock(data, last_flush_pos_, bytes, mask,
|
||||||
if (!StoreMetaBlock(ringbuffer_->start(), input_pos_, input_size,
|
|
||||||
ringbuffer_->mask(),
|
|
||||||
is_last, params_.quality,
|
is_last, params_.quality,
|
||||||
num_direct_distance_codes,
|
num_direct_distance_codes,
|
||||||
distance_postfix_bits,
|
distance_postfix_bits,
|
||||||
literal_context_mode,
|
literal_context_mode,
|
||||||
commands.data(), commands.size(),
|
commands_.get(), num_commands_,
|
||||||
mb,
|
mb,
|
||||||
&storage_ix, storage)) {
|
&storage_ix,
|
||||||
|
&storage[0])) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
size_t output_size = storage_ix >> 3;
|
if (bytes + 4 < (storage_ix >> 3)) {
|
||||||
if (input_size + 4 < output_size) {
|
|
||||||
// Restore the distance cache and last byte.
|
// Restore the distance cache and last byte.
|
||||||
memcpy(dist_cache_, saved_dist_cache, sizeof(dist_cache_));
|
memcpy(dist_cache_, saved_dist_cache, sizeof(dist_cache_));
|
||||||
storage[0] = last_byte_;
|
storage[0] = last_byte_;
|
||||||
storage_ix = last_byte_bits_;
|
storage_ix = last_byte_bits_;
|
||||||
if (!StoreUncompressedMetaBlock(is_last,
|
if (!StoreUncompressedMetaBlock(is_last, data, last_flush_pos_, mask,
|
||||||
ringbuffer_->start(), input_pos_,
|
bytes, &storage_ix, &storage[0])) {
|
||||||
ringbuffer_->mask(), input_size,
|
|
||||||
&storage_ix, storage)) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
output_size = storage_ix >> 3;
|
|
||||||
}
|
}
|
||||||
if (output_size > *encoded_size) {
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
memcpy(encoded_buffer, storage, output_size);
|
last_byte_ = storage[storage_ix >> 3];
|
||||||
*encoded_size = output_size;
|
|
||||||
last_byte_ = storage[output_size];
|
|
||||||
last_byte_bits_ = storage_ix & 7;
|
last_byte_bits_ = storage_ix & 7;
|
||||||
input_pos_ += input_size;
|
last_flush_pos_ = input_pos_;
|
||||||
|
last_processed_pos_ = input_pos_;
|
||||||
|
num_commands_ = 0;
|
||||||
|
*output = &storage[0];
|
||||||
|
*out_size = storage_ix >> 3;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool BrotliCompressor::WriteMetaBlock(const size_t input_size,
|
||||||
|
const uint8_t* input_buffer,
|
||||||
|
const bool is_last,
|
||||||
|
size_t* encoded_size,
|
||||||
|
uint8_t* encoded_buffer) {
|
||||||
|
CopyInputToRingBuffer(input_size, input_buffer);
|
||||||
|
size_t out_size = 0;
|
||||||
|
uint8_t* output;
|
||||||
|
if (!WriteBrotliData(is_last, /* force_flush = */ true, &out_size, &output) ||
|
||||||
|
out_size > *encoded_size) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (out_size > 0) {
|
||||||
|
memcpy(encoded_buffer, output, out_size);
|
||||||
|
}
|
||||||
|
*encoded_size = out_size;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -344,36 +480,69 @@ int BrotliCompressBuffer(BrotliParams params,
|
|||||||
if (*encoded_size == 0) {
|
if (*encoded_size == 0) {
|
||||||
// Output buffer needs at least one byte.
|
// Output buffer needs at least one byte.
|
||||||
return 0;
|
return 0;
|
||||||
} else if (input_size == 0) {
|
|
||||||
encoded_buffer[0] = 6;
|
|
||||||
*encoded_size = 1;
|
|
||||||
return 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
BrotliCompressor compressor(params);
|
BrotliCompressor compressor(params);
|
||||||
const int max_block_size = compressor.input_block_size();
|
BrotliMemIn in(input_buffer, input_size);
|
||||||
size_t max_output_size = *encoded_size;
|
BrotliMemOut out(encoded_buffer, *encoded_size);
|
||||||
const uint8_t* input_end = input_buffer + input_size;
|
if (!BrotliCompress(params, &in, &out)) {
|
||||||
*encoded_size = 0;
|
|
||||||
|
|
||||||
while (input_buffer < input_end) {
|
|
||||||
int block_size = max_block_size;
|
|
||||||
bool is_last = false;
|
|
||||||
if (block_size >= input_end - input_buffer) {
|
|
||||||
block_size = input_end - input_buffer;
|
|
||||||
is_last = true;
|
|
||||||
}
|
|
||||||
size_t output_size = max_output_size;
|
|
||||||
if (!compressor.WriteMetaBlock(block_size, input_buffer,
|
|
||||||
is_last, &output_size,
|
|
||||||
&encoded_buffer[*encoded_size])) {
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
input_buffer += block_size;
|
*encoded_size = out.position();
|
||||||
*encoded_size += output_size;
|
|
||||||
max_output_size -= output_size;
|
|
||||||
}
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t CopyOneBlockToRingBuffer(BrotliIn* r, BrotliCompressor* compressor) {
|
||||||
|
const size_t block_size = compressor->input_block_size();
|
||||||
|
size_t bytes_read = 0;
|
||||||
|
const uint8_t* data = reinterpret_cast<const uint8_t*>(
|
||||||
|
r->Read(block_size, &bytes_read));
|
||||||
|
if (data == NULL) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
compressor->CopyInputToRingBuffer(bytes_read, data);
|
||||||
|
|
||||||
|
// Read more bytes until block_size is filled or an EOF (data == NULL) is
|
||||||
|
// received. This is useful to get deterministic compressed output for the
|
||||||
|
// same input no matter how r->Read splits the input to chunks.
|
||||||
|
for (size_t remaining = block_size - bytes_read; remaining > 0; ) {
|
||||||
|
size_t more_bytes_read = 0;
|
||||||
|
data = reinterpret_cast<const uint8_t*>(
|
||||||
|
r->Read(remaining, &more_bytes_read));
|
||||||
|
if (data == NULL) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
compressor->CopyInputToRingBuffer(more_bytes_read, data);
|
||||||
|
bytes_read += more_bytes_read;
|
||||||
|
remaining -= more_bytes_read;
|
||||||
|
}
|
||||||
|
return bytes_read;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool BrotliInIsFinished(BrotliIn* r) {
|
||||||
|
size_t read_bytes;
|
||||||
|
return r->Read(0, &read_bytes) == NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
int BrotliCompress(BrotliParams params, BrotliIn* in, BrotliOut* out) {
|
||||||
|
size_t in_bytes = 0;
|
||||||
|
size_t out_bytes = 0;
|
||||||
|
uint8_t* output;
|
||||||
|
bool final_block = false;
|
||||||
|
BrotliCompressor compressor(params);
|
||||||
|
while (!final_block) {
|
||||||
|
in_bytes = CopyOneBlockToRingBuffer(in, &compressor);
|
||||||
|
final_block = in_bytes == 0 || BrotliInIsFinished(in);
|
||||||
|
out_bytes = 0;
|
||||||
|
if (!compressor.WriteBrotliData(final_block,
|
||||||
|
/* force_flush = */ false,
|
||||||
|
&out_bytes, &output)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (out_bytes > 0 && !out->Write(output, out_bytes)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace brotli
|
} // namespace brotli
|
||||||
|
35
enc/encode.h
35
enc/encode.h
@ -21,9 +21,11 @@
|
|||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include "./command.h"
|
||||||
#include "./hash.h"
|
#include "./hash.h"
|
||||||
#include "./ringbuffer.h"
|
#include "./ringbuffer.h"
|
||||||
#include "./static_dict.h"
|
#include "./static_dict.h"
|
||||||
|
#include "./streams.h"
|
||||||
|
|
||||||
namespace brotli {
|
namespace brotli {
|
||||||
|
|
||||||
@ -90,6 +92,24 @@ class BrotliCompressor {
|
|||||||
// an error and true otherwise.
|
// an error and true otherwise.
|
||||||
bool FinishStream(size_t* encoded_size, uint8_t* encoded_buffer);
|
bool FinishStream(size_t* encoded_size, uint8_t* encoded_buffer);
|
||||||
|
|
||||||
|
// Copies the given input data to the internal ring buffer of the compressor.
|
||||||
|
// No processing of the data occurs at this time and this function can be
|
||||||
|
// called multiple times before calling WriteBrotliData() to process the
|
||||||
|
// accumulated input. At most input_block_size() bytes of input data can be
|
||||||
|
// copied to the ring buffer, otherwise the next WriteBrotliData() will fail.
|
||||||
|
void CopyInputToRingBuffer(const size_t input_size,
|
||||||
|
const uint8_t* input_buffer);
|
||||||
|
|
||||||
|
// Processes the accumulated input data and sets *out_size to the length of
|
||||||
|
// the new output meta-block, or to zero if no new output meta-block was
|
||||||
|
// created (in this case the processed input data is buffered internally).
|
||||||
|
// If *out_size is positive, *output points to the start of the output data.
|
||||||
|
// Returns false if the size of the input data is larger than
|
||||||
|
// input_block_size() or if there was an error during writing the output.
|
||||||
|
// If is_last or force_flush is true, an output meta-block is always created.
|
||||||
|
bool WriteBrotliData(const bool is_last, const bool force_flush,
|
||||||
|
size_t* out_size, uint8_t** output);
|
||||||
|
|
||||||
// No-op, but we keep it here for API backward-compatibility.
|
// No-op, but we keep it here for API backward-compatibility.
|
||||||
void WriteStreamHeader() {}
|
void WriteStreamHeader() {}
|
||||||
|
|
||||||
@ -99,6 +119,11 @@ class BrotliCompressor {
|
|||||||
|
|
||||||
uint8_t* GetBrotliStorage(size_t size);
|
uint8_t* GetBrotliStorage(size_t size);
|
||||||
|
|
||||||
|
bool WriteMetaBlockInternal(const bool is_last,
|
||||||
|
const bool utf8_mode,
|
||||||
|
size_t* out_size,
|
||||||
|
uint8_t** output);
|
||||||
|
|
||||||
BrotliParams params_;
|
BrotliParams params_;
|
||||||
int max_backward_distance_;
|
int max_backward_distance_;
|
||||||
std::unique_ptr<Hashers> hashers_;
|
std::unique_ptr<Hashers> hashers_;
|
||||||
@ -107,6 +132,12 @@ class BrotliCompressor {
|
|||||||
std::unique_ptr<RingBuffer> ringbuffer_;
|
std::unique_ptr<RingBuffer> ringbuffer_;
|
||||||
std::unique_ptr<float[]> literal_cost_;
|
std::unique_ptr<float[]> literal_cost_;
|
||||||
size_t literal_cost_mask_;
|
size_t literal_cost_mask_;
|
||||||
|
size_t cmd_buffer_size_;
|
||||||
|
std::unique_ptr<Command[]> commands_;
|
||||||
|
int num_commands_;
|
||||||
|
int last_insert_len_;
|
||||||
|
size_t last_flush_pos_;
|
||||||
|
size_t last_processed_pos_;
|
||||||
int dist_cache_[4];
|
int dist_cache_[4];
|
||||||
uint8_t last_byte_;
|
uint8_t last_byte_;
|
||||||
uint8_t last_byte_bits_;
|
uint8_t last_byte_bits_;
|
||||||
@ -124,6 +155,10 @@ int BrotliCompressBuffer(BrotliParams params,
|
|||||||
size_t* encoded_size,
|
size_t* encoded_size,
|
||||||
uint8_t* encoded_buffer);
|
uint8_t* encoded_buffer);
|
||||||
|
|
||||||
|
// Same as above, but uses the specified input and output classes instead
|
||||||
|
// of reading from and writing to pre-allocated memory buffers.
|
||||||
|
int BrotliCompress(BrotliParams params, BrotliIn* in, BrotliOut* out);
|
||||||
|
|
||||||
} // namespace brotli
|
} // namespace brotli
|
||||||
|
|
||||||
#endif // BROTLI_ENC_ENCODE_H_
|
#endif // BROTLI_ENC_ENCODE_H_
|
||||||
|
26
tools/bro.cc
26
tools/bro.cc
@ -24,6 +24,7 @@
|
|||||||
|
|
||||||
#include "../dec/decode.h"
|
#include "../dec/decode.h"
|
||||||
#include "../enc/encode.h"
|
#include "../enc/encode.h"
|
||||||
|
#include "../enc/streams.h"
|
||||||
|
|
||||||
|
|
||||||
static void ParseArgv(int argc, char **argv,
|
static void ParseArgv(int argc, char **argv,
|
||||||
@ -133,32 +134,15 @@ int main(int argc, char** argv) {
|
|||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
const int max_block_size = 1 << 21;
|
|
||||||
const size_t max_output_size = 1 << 22;
|
|
||||||
uint8_t* input_buffer = new uint8_t[max_block_size];
|
|
||||||
uint8_t* output_buffer = new uint8_t[max_output_size];
|
|
||||||
bool input_end = false;
|
|
||||||
int block_size;
|
|
||||||
brotli::BrotliParams params;
|
brotli::BrotliParams params;
|
||||||
brotli::BrotliCompressor compressor(params);
|
brotli::BrotliFileIn in(fin, 1 << 16);
|
||||||
compressor.WriteStreamHeader();
|
brotli::BrotliFileOut out(fout);
|
||||||
while (!input_end) {
|
if (!BrotliCompress(params, &in, &out)) {
|
||||||
block_size = fread(input_buffer, 1, max_block_size, fin);
|
fprintf(stderr, "compression failed\n");
|
||||||
if (block_size == 0) {
|
|
||||||
input_end = true;
|
|
||||||
}
|
|
||||||
size_t output_size = max_output_size;
|
|
||||||
compressor.WriteMetaBlock(block_size, input_buffer, input_end,
|
|
||||||
&output_size, output_buffer);
|
|
||||||
if (fwrite(output_buffer, output_size, 1, fout) != 1) {
|
|
||||||
perror("fwrite");
|
|
||||||
unlink(output_path);
|
unlink(output_path);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
delete[] input_buffer;
|
|
||||||
delete[] output_buffer;
|
|
||||||
}
|
|
||||||
if (fclose(fin) != 0) {
|
if (fclose(fin) != 0) {
|
||||||
perror("fclose");
|
perror("fclose");
|
||||||
exit(1);
|
exit(1);
|
||||||
|
Loading…
Reference in New Issue
Block a user