Merge pull request #115 from szabadka/master

Bug fixes for the brotli encoder.
This commit is contained in:
szabadka 2015-06-12 16:12:38 +02:00
commit a0d0ecfead
8 changed files with 26 additions and 24 deletions

View File

@ -91,6 +91,9 @@ double PopulationCost(const Histogram<kSize>& histogram) {
// Approximate the bit depth by round(-log2(P(symbol)))
int depth = static_cast<int>(log2p + 0.5);
bits += histogram.data_[i] * log2p;
if (depth > 15) {
depth = 15;
}
if (depth > max_depth) {
max_depth = depth;
}

View File

@ -74,7 +74,7 @@ void CopyLiteralsToByteArray(const Command* cmds,
void CopyCommandsToByteArray(const Command* cmds,
const size_t num_commands,
std::vector<uint16_t>* insert_and_copy_codes,
std::vector<uint8_t>* distance_prefixes) {
std::vector<uint16_t>* distance_prefixes) {
for (int i = 0; i < num_commands; ++i) {
const Command& cmd = cmds[i];
insert_and_copy_codes->push_back(cmd.cmd_prefix_);
@ -356,7 +356,7 @@ void SplitBlock(const Command* cmds,
// Compute prefix codes for commands.
std::vector<uint16_t> insert_and_copy_codes;
std::vector<uint8_t> distance_prefixes;
std::vector<uint16_t> distance_prefixes;
CopyCommandsToByteArray(cmds, num_commands,
&insert_and_copy_codes,
&distance_prefixes);

View File

@ -203,13 +203,12 @@ BrotliCompressor::BrotliCompressor(BrotliParams params)
dist_cache_[1] = 11;
dist_cache_[2] = 15;
dist_cache_[3] = 16;
// Save the state of the distance cache in case we need to restore it for
// emitting an uncompressed block.
memcpy(saved_dist_cache_, dist_cache_, sizeof(dist_cache_));
// Initialize hashers.
if (params_.quality <= 9) {
hash_type_ = params_.quality;
} else {
hash_type_ = 10;
}
hash_type_ = std::min(10, params_.quality);
hashers_->Init(hash_type_);
if ((params_.mode == BrotliParams::MODE_GENERIC ||
params_.mode == BrotliParams::MODE_TEXT) &&
@ -338,11 +337,12 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last,
// For quality 1 there is no block splitting, so we buffer at most this much
// literals and commands.
static const int kMaxNumDelayedSymbols = 0x2fff;
int max_length = std::min<int>(mask + 1, 1 << kMaxInputBlockBits);
if (!is_last && !force_flush &&
(params_.quality >= kMinQualityForBlockSplit ||
(num_literals_ + num_commands_ < kMaxNumDelayedSymbols)) &&
num_commands_ + (input_block_size() >> 1) < cmd_buffer_size_ &&
input_pos_ + input_block_size() <= last_flush_pos_ + mask + 1) {
input_pos_ + input_block_size() <= last_flush_pos_ + max_length) {
// Everything will happen later.
last_processed_pos_ = input_pos_;
*out_size = 0;
@ -448,10 +448,6 @@ bool BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
return false;
}
} else {
// Save the state of the distance cache in case we need to restore it for
// emitting an uncompressed block.
int saved_dist_cache[4];
memcpy(saved_dist_cache, dist_cache_, sizeof(dist_cache_));
int num_direct_distance_codes = 0;
int distance_postfix_bits = 0;
if (params_.quality > 9 && params_.mode == BrotliParams::MODE_FONT) {
@ -521,7 +517,7 @@ bool BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
}
if (bytes + 4 < (storage_ix >> 3)) {
// Restore the distance cache and last byte.
memcpy(dist_cache_, saved_dist_cache, sizeof(dist_cache_));
memcpy(dist_cache_, saved_dist_cache_, sizeof(dist_cache_));
storage[0] = last_byte_;
storage_ix = last_byte_bits_;
if (!StoreUncompressedMetaBlock(is_last, data, last_flush_pos_, mask,
@ -538,6 +534,9 @@ bool BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
prev_byte2_ = data[(last_flush_pos_ - 2) & mask];
num_commands_ = 0;
num_literals_ = 0;
// Save the state of the distance cache in case we need to restore it for
// emitting an uncompressed block.
memcpy(saved_dist_cache_, dist_cache_, sizeof(dist_cache_));
*output = &storage[0];
*out_size = storage_ix >> 3;
return true;

View File

@ -165,6 +165,7 @@ class BrotliCompressor {
size_t last_flush_pos_;
size_t last_processed_pos_;
int dist_cache_[4];
int saved_dist_cache_[4];
uint8_t last_byte_;
uint8_t last_byte_bits_;
uint8_t prev_byte_;

View File

@ -136,7 +136,9 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
// Copy prefix + next input block into a continuous area.
size_t input_pos = prefix_size;
std::vector<uint8_t> input(prefix_size + input_size);
// CreateBackwardReferences reads up to 3 bytes past the end of input if the
// mask points past the end of input.
std::vector<uint8_t> input(prefix_size + input_size + 4);
memcpy(&input[0], prefix_buffer, prefix_size);
memcpy(&input[input_pos], input_buffer, input_size);
// Since we don't have a ringbuffer, masking is a no-op.
@ -151,8 +153,10 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
static const double kMinUTF8Ratio = 0.75;
bool utf8_mode = IsMostlyUTF8(&input[input_pos], input_size, kMinUTF8Ratio);
// Compute literal costs.
std::vector<float> literal_cost(prefix_size + input_size);
// Compute literal costs. The 4 bytes at the end are there to cover for an
// over-read past the end of input, but not past the mask, in
// CreateBackwardReferences.
std::vector<float> literal_cost(prefix_size + input_size + 4);
if (utf8_mode) {
EstimateBitCostsForLiteralsUTF8(input_pos, input_size, mask, mask,
&input[0], &literal_cost[0]);

View File

@ -40,7 +40,7 @@ void CreateHuffmanTree(const int *data,
uint8_t *depth);
// Change the population counts in a way that the consequent
// Hufmann tree compression, especially its rle-part will be more
// Huffman tree compression, especially its rle-part will be more
// likely to compress this data more efficiently.
//
// length contains the size of the histogram.

View File

@ -59,13 +59,6 @@ struct Histogram {
data_[i] += v.data_[i];
}
}
double EntropyBitCost() const {
double retval = total_count_ * FastLog2(total_count_);
for (int i = 0; i < kDataSize; ++i) {
retval -= data_[i] * FastLog2(data_[i]);
}
return retval;
}
int data_[kDataSize];
int total_count_;

View File

@ -17,6 +17,8 @@
#ifndef BROTLI_ENC_PORT_H_
#define BROTLI_ENC_PORT_H_
#include <string.h>
#if defined OS_LINUX || defined OS_CYGWIN
#include <endian.h>
#elif defined OS_FREEBSD