mirror of
https://github.com/google/brotli.git
synced 2024-11-22 11:40:06 +00:00
Merge pull request #115 from szabadka/master
Bug fixes for the brotli encoder.
This commit is contained in:
commit
a0d0ecfead
@ -91,6 +91,9 @@ double PopulationCost(const Histogram<kSize>& histogram) {
|
||||
// Approximate the bit depth by round(-log2(P(symbol)))
|
||||
int depth = static_cast<int>(log2p + 0.5);
|
||||
bits += histogram.data_[i] * log2p;
|
||||
if (depth > 15) {
|
||||
depth = 15;
|
||||
}
|
||||
if (depth > max_depth) {
|
||||
max_depth = depth;
|
||||
}
|
||||
|
@ -74,7 +74,7 @@ void CopyLiteralsToByteArray(const Command* cmds,
|
||||
void CopyCommandsToByteArray(const Command* cmds,
|
||||
const size_t num_commands,
|
||||
std::vector<uint16_t>* insert_and_copy_codes,
|
||||
std::vector<uint8_t>* distance_prefixes) {
|
||||
std::vector<uint16_t>* distance_prefixes) {
|
||||
for (int i = 0; i < num_commands; ++i) {
|
||||
const Command& cmd = cmds[i];
|
||||
insert_and_copy_codes->push_back(cmd.cmd_prefix_);
|
||||
@ -356,7 +356,7 @@ void SplitBlock(const Command* cmds,
|
||||
|
||||
// Compute prefix codes for commands.
|
||||
std::vector<uint16_t> insert_and_copy_codes;
|
||||
std::vector<uint8_t> distance_prefixes;
|
||||
std::vector<uint16_t> distance_prefixes;
|
||||
CopyCommandsToByteArray(cmds, num_commands,
|
||||
&insert_and_copy_codes,
|
||||
&distance_prefixes);
|
||||
|
@ -203,13 +203,12 @@ BrotliCompressor::BrotliCompressor(BrotliParams params)
|
||||
dist_cache_[1] = 11;
|
||||
dist_cache_[2] = 15;
|
||||
dist_cache_[3] = 16;
|
||||
// Save the state of the distance cache in case we need to restore it for
|
||||
// emitting an uncompressed block.
|
||||
memcpy(saved_dist_cache_, dist_cache_, sizeof(dist_cache_));
|
||||
|
||||
// Initialize hashers.
|
||||
if (params_.quality <= 9) {
|
||||
hash_type_ = params_.quality;
|
||||
} else {
|
||||
hash_type_ = 10;
|
||||
}
|
||||
hash_type_ = std::min(10, params_.quality);
|
||||
hashers_->Init(hash_type_);
|
||||
if ((params_.mode == BrotliParams::MODE_GENERIC ||
|
||||
params_.mode == BrotliParams::MODE_TEXT) &&
|
||||
@ -338,11 +337,12 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last,
|
||||
// For quality 1 there is no block splitting, so we buffer at most this much
|
||||
// literals and commands.
|
||||
static const int kMaxNumDelayedSymbols = 0x2fff;
|
||||
int max_length = std::min<int>(mask + 1, 1 << kMaxInputBlockBits);
|
||||
if (!is_last && !force_flush &&
|
||||
(params_.quality >= kMinQualityForBlockSplit ||
|
||||
(num_literals_ + num_commands_ < kMaxNumDelayedSymbols)) &&
|
||||
num_commands_ + (input_block_size() >> 1) < cmd_buffer_size_ &&
|
||||
input_pos_ + input_block_size() <= last_flush_pos_ + mask + 1) {
|
||||
input_pos_ + input_block_size() <= last_flush_pos_ + max_length) {
|
||||
// Everything will happen later.
|
||||
last_processed_pos_ = input_pos_;
|
||||
*out_size = 0;
|
||||
@ -448,10 +448,6 @@ bool BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
// Save the state of the distance cache in case we need to restore it for
|
||||
// emitting an uncompressed block.
|
||||
int saved_dist_cache[4];
|
||||
memcpy(saved_dist_cache, dist_cache_, sizeof(dist_cache_));
|
||||
int num_direct_distance_codes = 0;
|
||||
int distance_postfix_bits = 0;
|
||||
if (params_.quality > 9 && params_.mode == BrotliParams::MODE_FONT) {
|
||||
@ -521,7 +517,7 @@ bool BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
|
||||
}
|
||||
if (bytes + 4 < (storage_ix >> 3)) {
|
||||
// Restore the distance cache and last byte.
|
||||
memcpy(dist_cache_, saved_dist_cache, sizeof(dist_cache_));
|
||||
memcpy(dist_cache_, saved_dist_cache_, sizeof(dist_cache_));
|
||||
storage[0] = last_byte_;
|
||||
storage_ix = last_byte_bits_;
|
||||
if (!StoreUncompressedMetaBlock(is_last, data, last_flush_pos_, mask,
|
||||
@ -538,6 +534,9 @@ bool BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
|
||||
prev_byte2_ = data[(last_flush_pos_ - 2) & mask];
|
||||
num_commands_ = 0;
|
||||
num_literals_ = 0;
|
||||
// Save the state of the distance cache in case we need to restore it for
|
||||
// emitting an uncompressed block.
|
||||
memcpy(saved_dist_cache_, dist_cache_, sizeof(dist_cache_));
|
||||
*output = &storage[0];
|
||||
*out_size = storage_ix >> 3;
|
||||
return true;
|
||||
|
@ -165,6 +165,7 @@ class BrotliCompressor {
|
||||
size_t last_flush_pos_;
|
||||
size_t last_processed_pos_;
|
||||
int dist_cache_[4];
|
||||
int saved_dist_cache_[4];
|
||||
uint8_t last_byte_;
|
||||
uint8_t last_byte_bits_;
|
||||
uint8_t prev_byte_;
|
||||
|
@ -136,7 +136,9 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
|
||||
|
||||
// Copy prefix + next input block into a continuous area.
|
||||
size_t input_pos = prefix_size;
|
||||
std::vector<uint8_t> input(prefix_size + input_size);
|
||||
// CreateBackwardReferences reads up to 3 bytes past the end of input if the
|
||||
// mask points past the end of input.
|
||||
std::vector<uint8_t> input(prefix_size + input_size + 4);
|
||||
memcpy(&input[0], prefix_buffer, prefix_size);
|
||||
memcpy(&input[input_pos], input_buffer, input_size);
|
||||
// Since we don't have a ringbuffer, masking is a no-op.
|
||||
@ -151,8 +153,10 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
|
||||
static const double kMinUTF8Ratio = 0.75;
|
||||
bool utf8_mode = IsMostlyUTF8(&input[input_pos], input_size, kMinUTF8Ratio);
|
||||
|
||||
// Compute literal costs.
|
||||
std::vector<float> literal_cost(prefix_size + input_size);
|
||||
// Compute literal costs. The 4 bytes at the end are there to cover for an
|
||||
// over-read past the end of input, but not past the mask, in
|
||||
// CreateBackwardReferences.
|
||||
std::vector<float> literal_cost(prefix_size + input_size + 4);
|
||||
if (utf8_mode) {
|
||||
EstimateBitCostsForLiteralsUTF8(input_pos, input_size, mask, mask,
|
||||
&input[0], &literal_cost[0]);
|
||||
|
@ -40,7 +40,7 @@ void CreateHuffmanTree(const int *data,
|
||||
uint8_t *depth);
|
||||
|
||||
// Change the population counts in a way that the consequent
|
||||
// Hufmann tree compression, especially its rle-part will be more
|
||||
// Huffman tree compression, especially its rle-part will be more
|
||||
// likely to compress this data more efficiently.
|
||||
//
|
||||
// length contains the size of the histogram.
|
||||
|
@ -59,13 +59,6 @@ struct Histogram {
|
||||
data_[i] += v.data_[i];
|
||||
}
|
||||
}
|
||||
double EntropyBitCost() const {
|
||||
double retval = total_count_ * FastLog2(total_count_);
|
||||
for (int i = 0; i < kDataSize; ++i) {
|
||||
retval -= data_[i] * FastLog2(data_[i]);
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
int data_[kDataSize];
|
||||
int total_count_;
|
||||
|
@ -17,6 +17,8 @@
|
||||
#ifndef BROTLI_ENC_PORT_H_
|
||||
#define BROTLI_ENC_PORT_H_
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#if defined OS_LINUX || defined OS_CYGWIN
|
||||
#include <endian.h>
|
||||
#elif defined OS_FREEBSD
|
||||
|
Loading…
Reference in New Issue
Block a user