mirror of
https://github.com/google/brotli.git
synced 2025-01-08 15:50:16 +00:00
Merge pull request #295 from szabadka/master
Add two more fast modes to the brotli compressor.
This commit is contained in:
commit
1f01d61bcf
@ -2,7 +2,7 @@
|
||||
|
||||
include ../shared.mk
|
||||
|
||||
OBJS_NODICT = backward_references.o block_splitter.o brotli_bit_stream.o encode.o encode_parallel.o entropy_encode.o histogram.o literal_cost.o metablock.o static_dict.o streams.o utf8_util.o
|
||||
OBJS_NODICT = backward_references.o block_splitter.o brotli_bit_stream.o compress_fragment.o compress_fragment_two_pass.o encode.o encode_parallel.o entropy_encode.o histogram.o literal_cost.o metablock.o static_dict.o streams.o utf8_util.o
|
||||
OBJS = $(OBJS_NODICT) dictionary.o
|
||||
|
||||
nodict : $(OBJS_NODICT)
|
||||
|
@ -715,13 +715,6 @@ void CreateBackwardReferences(size_t num_bytes,
|
||||
}
|
||||
|
||||
switch (hash_type) {
|
||||
case 1:
|
||||
CreateBackwardReferences<Hashers::H1>(
|
||||
num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
|
||||
max_backward_limit, quality, hashers->hash_h1, dist_cache,
|
||||
last_insert_len, commands, num_commands, num_literals);
|
||||
break;
|
||||
|
||||
case 2:
|
||||
CreateBackwardReferences<Hashers::H2>(
|
||||
num_bytes, position, is_last, ringbuffer, ringbuffer_mask,
|
||||
|
693
enc/compress_fragment.cc
Normal file
693
enc/compress_fragment.cc
Normal file
@ -0,0 +1,693 @@
|
||||
/* Copyright 2015 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Function for fast encoding of an input fragment, independently from the input
|
||||
// history. This function uses one-pass processing: when we find a backward
|
||||
// match, we immediately emit the corresponding command and literal codes to
|
||||
// the bit stream.
|
||||
//
|
||||
// Adapted from the CompressFragment() function in
|
||||
// https://github.com/google/snappy/blob/master/snappy.cc
|
||||
|
||||
#include "./compress_fragment.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
|
||||
#include "./brotli_bit_stream.h"
|
||||
#include "./entropy_encode.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./find_match_length.h"
|
||||
#include "./port.h"
|
||||
#include "./write_bits.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// kHashMul32 multiplier has these properties:
|
||||
// * The multiplier must be odd. Otherwise we may lose the highest bit.
|
||||
// * No long streaks of 1s or 0s.
|
||||
// * There is no effort to ensure that it is a prime, the oddity is enough
|
||||
// for this use.
|
||||
// * The number has been tuned heuristically against compression benchmarks.
|
||||
static const uint32_t kHashMul32 = 0x1e35a7bd;
|
||||
|
||||
static inline uint32_t Hash(const uint8_t* p, size_t shift) {
|
||||
const uint64_t h = (BROTLI_UNALIGNED_LOAD64(p) << 24) * kHashMul32;
|
||||
return static_cast<uint32_t>(h >> shift);
|
||||
}
|
||||
|
||||
static inline uint32_t HashBytesAtOffset(uint64_t v, int offset, size_t shift) {
|
||||
assert(offset >= 0);
|
||||
assert(offset <= 3);
|
||||
const uint64_t h = ((v >> (8 * offset)) << 24) * kHashMul32;
|
||||
return static_cast<uint32_t>(h >> shift);
|
||||
}
|
||||
|
||||
static inline int IsMatch(const uint8_t* p1, const uint8_t* p2) {
|
||||
return (BROTLI_UNALIGNED_LOAD32(p1) == BROTLI_UNALIGNED_LOAD32(p2) &&
|
||||
p1[4] == p2[4]);
|
||||
}
|
||||
|
||||
// Builds a literal prefix code into "depths" and "bits" based on the statistics
|
||||
// of the "input" string and stores it into the bit stream.
|
||||
// Note that the prefix code here is built from the pre-LZ77 input, therefore
|
||||
// we can only approximate the statistics of the actual literal stream.
|
||||
// Moreover, for long inputs we build a histogram from a sample of the input
|
||||
// and thus have to assign a non-zero depth for each literal.
|
||||
void BuildAndStoreLiteralPrefixCode(const uint8_t* input,
|
||||
const size_t input_size,
|
||||
uint8_t depths[256],
|
||||
uint16_t bits[256],
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
uint32_t histogram[256] = { 0 };
|
||||
size_t histogram_total;
|
||||
if (input_size < (1 << 15)) {
|
||||
for (size_t i = 0; i < input_size; ++i) {
|
||||
++histogram[input[i]];
|
||||
}
|
||||
histogram_total = input_size;
|
||||
for (size_t i = 0; i < 256; ++i) {
|
||||
// We weigh the first 11 samples with weight 3 to account for the
|
||||
// balancing effect of the LZ77 phase on the histogram.
|
||||
const uint32_t adjust = 2 * std::min(histogram[i], 11u);
|
||||
histogram[i] += adjust;
|
||||
histogram_total += adjust;
|
||||
}
|
||||
} else {
|
||||
static const size_t kSampleRate = 29;
|
||||
for (size_t i = 0; i < input_size; i += kSampleRate) {
|
||||
++histogram[input[i]];
|
||||
}
|
||||
histogram_total = (input_size + kSampleRate - 1) / kSampleRate;
|
||||
for (size_t i = 0; i < 256; ++i) {
|
||||
// We add 1 to each population count to avoid 0 bit depths (since this is
|
||||
// only a sample and we don't know if the symbol appears or not), and we
|
||||
// weigh the first 11 samples with weight 3 to account for the balancing
|
||||
// effect of the LZ77 phase on the histogram (more frequent symbols are
|
||||
// more likely to be in backward references instead as literals).
|
||||
const uint32_t adjust = 1 + 2 * std::min(histogram[i], 11u);
|
||||
histogram[i] += adjust;
|
||||
histogram_total += adjust;
|
||||
}
|
||||
}
|
||||
BuildAndStoreHuffmanTreeFast(histogram, histogram_total,
|
||||
/* max_bits = */ 8,
|
||||
depths, bits, storage_ix, storage);
|
||||
}
|
||||
|
||||
// Builds a command and distance prefix code (each 64 symbols) into "depth" and
|
||||
// "bits" based on "histogram" and stores it into the bit stream.
|
||||
void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128],
|
||||
uint8_t depth[128], uint16_t bits[128],
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
CreateHuffmanTree(histogram, 64, 15, depth);
|
||||
CreateHuffmanTree(&histogram[64], 64, 14, &depth[64]);
|
||||
// We have to jump through a few hoopes here in order to compute
|
||||
// the command bits because the symbols are in a different order than in
|
||||
// the full alphabet. This looks complicated, but having the symbols
|
||||
// in this order in the command bits saves a few branches in the Emit*
|
||||
// functions.
|
||||
uint8_t cmd_depth[64];
|
||||
uint16_t cmd_bits[64];
|
||||
memcpy(cmd_depth, depth, 24);
|
||||
memcpy(cmd_depth + 24, depth + 40, 8);
|
||||
memcpy(cmd_depth + 32, depth + 24, 8);
|
||||
memcpy(cmd_depth + 40, depth + 48, 8);
|
||||
memcpy(cmd_depth + 48, depth + 32, 8);
|
||||
memcpy(cmd_depth + 56, depth + 56, 8);
|
||||
ConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
|
||||
memcpy(bits, cmd_bits, 48);
|
||||
memcpy(bits + 24, cmd_bits + 32, 16);
|
||||
memcpy(bits + 32, cmd_bits + 48, 16);
|
||||
memcpy(bits + 40, cmd_bits + 24, 16);
|
||||
memcpy(bits + 48, cmd_bits + 40, 16);
|
||||
memcpy(bits + 56, cmd_bits + 56, 16);
|
||||
ConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
|
||||
{
|
||||
// Create the bit length array for the full command alphabet.
|
||||
uint8_t cmd_depth[704] = { 0 };
|
||||
memcpy(cmd_depth, depth, 8);
|
||||
memcpy(cmd_depth + 64, depth + 8, 8);
|
||||
memcpy(cmd_depth + 128, depth + 16, 8);
|
||||
memcpy(cmd_depth + 192, depth + 24, 8);
|
||||
memcpy(cmd_depth + 384, depth + 32, 8);
|
||||
for (size_t i = 0; i < 8; ++i) {
|
||||
cmd_depth[128 + 8 * i] = depth[40 + i];
|
||||
cmd_depth[256 + 8 * i] = depth[48 + i];
|
||||
cmd_depth[448 + 8 * i] = depth[56 + i];
|
||||
}
|
||||
StoreHuffmanTree(cmd_depth, 704, storage_ix, storage);
|
||||
}
|
||||
StoreHuffmanTree(&depth[64], 64, storage_ix, storage);
|
||||
}
|
||||
|
||||
// REQUIRES: insertlen < 6210
|
||||
inline void EmitInsertLen(size_t insertlen,
|
||||
const uint8_t depth[128],
|
||||
const uint16_t bits[128],
|
||||
uint32_t histo[128],
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
if (insertlen < 6) {
|
||||
const size_t code = insertlen + 40;
|
||||
WriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
++histo[code];
|
||||
} else if (insertlen < 130) {
|
||||
insertlen -= 2;
|
||||
const uint32_t nbits = Log2FloorNonZero(insertlen) - 1u;
|
||||
const size_t prefix = insertlen >> nbits;
|
||||
const size_t inscode = (nbits << 1) + prefix + 42;
|
||||
WriteBits(depth[inscode], bits[inscode], storage_ix, storage);
|
||||
WriteBits(nbits, insertlen - (prefix << nbits), storage_ix, storage);
|
||||
++histo[inscode];
|
||||
} else if (insertlen < 2114) {
|
||||
insertlen -= 66;
|
||||
const uint32_t nbits = Log2FloorNonZero(insertlen);
|
||||
const size_t code = nbits + 50;
|
||||
WriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
WriteBits(nbits, insertlen - (1 << nbits), storage_ix, storage);
|
||||
++histo[code];
|
||||
} else {
|
||||
WriteBits(depth[61], bits[61], storage_ix, storage);
|
||||
WriteBits(12, insertlen - 2114, storage_ix, storage);
|
||||
++histo[21];
|
||||
}
|
||||
}
|
||||
|
||||
inline void EmitLongInsertLen(size_t insertlen,
|
||||
const uint8_t depth[128],
|
||||
const uint16_t bits[128],
|
||||
uint32_t histo[128],
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
if (insertlen < 22594) {
|
||||
WriteBits(depth[62], bits[62], storage_ix, storage);
|
||||
WriteBits(14, insertlen - 6210, storage_ix, storage);
|
||||
++histo[22];
|
||||
} else {
|
||||
WriteBits(depth[63], bits[63], storage_ix, storage);
|
||||
WriteBits(24, insertlen - 22594, storage_ix, storage);
|
||||
++histo[23];
|
||||
}
|
||||
}
|
||||
|
||||
inline void EmitCopyLen(size_t copylen,
|
||||
const uint8_t depth[128],
|
||||
const uint16_t bits[128],
|
||||
uint32_t histo[128],
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
if (copylen < 10) {
|
||||
WriteBits(depth[copylen + 14], bits[copylen + 14], storage_ix, storage);
|
||||
++histo[copylen + 14];
|
||||
} else if (copylen < 134) {
|
||||
copylen -= 6;
|
||||
const uint32_t nbits = Log2FloorNonZero(copylen) - 1u;
|
||||
const size_t prefix = copylen >> nbits;
|
||||
const size_t code = (nbits << 1) + prefix + 20;
|
||||
WriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
WriteBits(nbits, copylen - (prefix << nbits), storage_ix, storage);
|
||||
++histo[code];
|
||||
} else if (copylen < 2118) {
|
||||
copylen -= 70;
|
||||
const uint32_t nbits = Log2FloorNonZero(copylen);
|
||||
const size_t code = nbits + 28;
|
||||
WriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
WriteBits(nbits, copylen - (1 << nbits), storage_ix, storage);
|
||||
++histo[code];
|
||||
} else {
|
||||
WriteBits(depth[39], bits[39], storage_ix, storage);
|
||||
WriteBits(24, copylen - 2118, storage_ix, storage);
|
||||
++histo[47];
|
||||
}
|
||||
}
|
||||
|
||||
inline void EmitCopyLenLastDistance(size_t copylen,
|
||||
const uint8_t depth[128],
|
||||
const uint16_t bits[128],
|
||||
uint32_t histo[128],
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
if (copylen < 12) {
|
||||
WriteBits(depth[copylen - 4], bits[copylen - 4], storage_ix, storage);
|
||||
++histo[copylen - 4];
|
||||
} else if (copylen < 72) {
|
||||
copylen -= 8;
|
||||
const uint32_t nbits = Log2FloorNonZero(copylen) - 1;
|
||||
const size_t prefix = copylen >> nbits;
|
||||
const size_t code = (nbits << 1) + prefix + 4;
|
||||
WriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
WriteBits(nbits, copylen - (prefix << nbits), storage_ix, storage);
|
||||
++histo[code];
|
||||
} else if (copylen < 136) {
|
||||
copylen -= 8;
|
||||
const size_t code = (copylen >> 5) + 30;
|
||||
WriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
WriteBits(5, copylen & 31, storage_ix, storage);
|
||||
WriteBits(depth[64], bits[64], storage_ix, storage);
|
||||
++histo[code];
|
||||
++histo[64];
|
||||
} else if (copylen < 2120) {
|
||||
copylen -= 72;
|
||||
const uint32_t nbits = Log2FloorNonZero(copylen);
|
||||
const size_t code = nbits + 28;
|
||||
WriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
WriteBits(nbits, copylen - (1 << nbits), storage_ix, storage);
|
||||
WriteBits(depth[64], bits[64], storage_ix, storage);
|
||||
++histo[code];
|
||||
++histo[64];
|
||||
} else {
|
||||
WriteBits(depth[39], bits[39], storage_ix, storage);
|
||||
WriteBits(24, copylen - 2120, storage_ix, storage);
|
||||
WriteBits(depth[64], bits[64], storage_ix, storage);
|
||||
++histo[47];
|
||||
++histo[64];
|
||||
}
|
||||
}
|
||||
|
||||
inline void EmitDistance(size_t distance,
|
||||
const uint8_t depth[128],
|
||||
const uint16_t bits[128],
|
||||
uint32_t histo[128],
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
distance += 3;
|
||||
const uint32_t nbits = Log2FloorNonZero(distance) - 1u;
|
||||
const size_t prefix = (distance >> nbits) & 1;
|
||||
const size_t offset = (2 + prefix) << nbits;
|
||||
const size_t distcode = 2 * (nbits - 1) + prefix + 80;
|
||||
WriteBits(depth[distcode], bits[distcode], storage_ix, storage);
|
||||
WriteBits(nbits, distance - offset, storage_ix, storage);
|
||||
++histo[distcode];
|
||||
}
|
||||
|
||||
inline void EmitLiterals(const uint8_t* input, const size_t len,
|
||||
const uint8_t depth[256], const uint16_t bits[256],
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
for (size_t j = 0; j < len; j++) {
|
||||
const uint8_t lit = input[j];
|
||||
WriteBits(depth[lit], bits[lit], storage_ix, storage);
|
||||
}
|
||||
}
|
||||
|
||||
// REQUIRES: len <= 1 << 20.
|
||||
static void StoreMetaBlockHeader(
|
||||
size_t len, bool is_uncompressed, size_t* storage_ix, uint8_t* storage) {
|
||||
// ISLAST
|
||||
WriteBits(1, 0, storage_ix, storage);
|
||||
if (len <= (1U << 16)) {
|
||||
// MNIBBLES is 4
|
||||
WriteBits(2, 0, storage_ix, storage);
|
||||
WriteBits(16, len - 1, storage_ix, storage);
|
||||
} else {
|
||||
// MNIBBLES is 5
|
||||
WriteBits(2, 1, storage_ix, storage);
|
||||
WriteBits(20, len - 1, storage_ix, storage);
|
||||
}
|
||||
// ISUNCOMPRESSED
|
||||
WriteBits(1, is_uncompressed, storage_ix, storage);
|
||||
}
|
||||
|
||||
void UpdateBits(size_t n_bits,
|
||||
uint32_t bits,
|
||||
size_t pos,
|
||||
uint8_t *array) {
|
||||
while (n_bits > 0) {
|
||||
size_t byte_pos = pos >> 3;
|
||||
size_t n_unchanged_bits = pos & 7;
|
||||
size_t n_changed_bits = std::min(n_bits, 8 - n_unchanged_bits);
|
||||
size_t total_bits = n_unchanged_bits + n_changed_bits;
|
||||
uint32_t mask = (~((1 << total_bits) - 1)) | ((1 << n_unchanged_bits) - 1);
|
||||
uint32_t unchanged_bits = array[byte_pos] & mask;
|
||||
uint32_t changed_bits = bits & ((1 << n_changed_bits) - 1);
|
||||
array[byte_pos] =
|
||||
static_cast<uint8_t>((changed_bits << n_unchanged_bits) |
|
||||
unchanged_bits);
|
||||
n_bits -= n_changed_bits;
|
||||
bits >>= n_changed_bits;
|
||||
pos += n_changed_bits;
|
||||
}
|
||||
}
|
||||
|
||||
void RewindBitPosition(const size_t new_storage_ix,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
const size_t bitpos = new_storage_ix & 7;
|
||||
const size_t mask = (1u << bitpos) - 1;
|
||||
storage[new_storage_ix >> 3] &= static_cast<uint8_t>(mask);
|
||||
*storage_ix = new_storage_ix;
|
||||
}
|
||||
|
||||
bool ShouldMergeBlock(const uint8_t* data, size_t len, const uint8_t* depths) {
|
||||
size_t histo[256] = { 0 };
|
||||
static const size_t kSampleRate = 43;
|
||||
for (size_t i = 0; i < len; i += kSampleRate) {
|
||||
++histo[data[i]];
|
||||
}
|
||||
const size_t total = (len + kSampleRate - 1) / kSampleRate;
|
||||
double r = (FastLog2(total) + 0.5) * static_cast<double>(total) + 200;
|
||||
for (size_t i = 0; i < 256; ++i) {
|
||||
r -= static_cast<double>(histo[i]) * (depths[i] + FastLog2(histo[i]));
|
||||
}
|
||||
return r >= 0.0;
|
||||
}
|
||||
|
||||
inline bool ShouldUseUncompressedMode(const uint8_t* metablock_start,
|
||||
const uint8_t* next_emit,
|
||||
const size_t insertlen,
|
||||
const uint8_t literal_depths[256]) {
|
||||
const size_t compressed = static_cast<size_t>(next_emit - metablock_start);
|
||||
if (compressed * 50 > insertlen) {
|
||||
return false;
|
||||
}
|
||||
static const double kAcceptableLossForUncompressibleSpeedup = 0.02;
|
||||
static const double kMinEntropy =
|
||||
8 * (1.0 - kAcceptableLossForUncompressibleSpeedup);
|
||||
uint32_t sum = 0;
|
||||
for (int i = 0; i < 256; ++i) {
|
||||
const uint32_t n = literal_depths[i];
|
||||
sum += n << (15 - n);
|
||||
}
|
||||
return sum > static_cast<uint32_t>((1 << 15) * kMinEntropy);
|
||||
}
|
||||
|
||||
void EmitUncompressedMetaBlock(const uint8_t* begin, const uint8_t* end,
|
||||
const size_t storage_ix_start,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
const size_t len = static_cast<size_t>(end - begin);
|
||||
RewindBitPosition(storage_ix_start, storage_ix, storage);
|
||||
StoreMetaBlockHeader(len, 1, storage_ix, storage);
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
memcpy(&storage[*storage_ix >> 3], begin, len);
|
||||
*storage_ix += len << 3;
|
||||
storage[*storage_ix >> 3] = 0;
|
||||
}
|
||||
|
||||
void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
|
||||
bool is_last,
|
||||
int* table, size_t table_size,
|
||||
uint8_t cmd_depth[128], uint16_t cmd_bits[128],
|
||||
size_t* cmd_code_numbits, uint8_t* cmd_code,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
if (input_size == 0) {
|
||||
assert(is_last);
|
||||
WriteBits(1, 1, storage_ix, storage); // islast
|
||||
WriteBits(1, 1, storage_ix, storage); // isempty
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
return;
|
||||
}
|
||||
|
||||
// "next_emit" is a pointer to the first byte that is not covered by a
|
||||
// previous copy. Bytes between "next_emit" and the start of the next copy or
|
||||
// the end of the input will be emitted as literal bytes.
|
||||
const uint8_t* next_emit = input;
|
||||
// Save the start of the first block for position and distance computations.
|
||||
const uint8_t* base_ip = input;
|
||||
|
||||
static const size_t kFirstBlockSize = 3 << 15;
|
||||
static const size_t kMergeBlockSize = 1 << 16;
|
||||
|
||||
const uint8_t* metablock_start = input;
|
||||
size_t block_size = std::min(input_size, kFirstBlockSize);
|
||||
size_t total_block_size = block_size;
|
||||
// Save the bit position of the MLEN field of the meta-block header, so that
|
||||
// we can update it later if we decide to extend this meta-block.
|
||||
size_t mlen_storage_ix = *storage_ix + 3;
|
||||
StoreMetaBlockHeader(block_size, 0, storage_ix, storage);
|
||||
// No block splits, no contexts.
|
||||
WriteBits(13, 0, storage_ix, storage);
|
||||
|
||||
uint8_t lit_depth[256] = { 0 };
|
||||
uint16_t lit_bits[256] = { 0 };
|
||||
BuildAndStoreLiteralPrefixCode(input, block_size, lit_depth, lit_bits,
|
||||
storage_ix, storage);
|
||||
|
||||
// Store the pre-compressed command and distance prefix codes.
|
||||
for (size_t i = 0; i + 7 < *cmd_code_numbits; i += 8) {
|
||||
WriteBits(8, cmd_code[i >> 3], storage_ix, storage);
|
||||
}
|
||||
WriteBits(*cmd_code_numbits & 7, cmd_code[*cmd_code_numbits >> 3],
|
||||
storage_ix, storage);
|
||||
|
||||
emit_commands:
|
||||
// Initialize the command and distance histograms. We will gather
|
||||
// statistics of command and distance codes during the processing
|
||||
// of this block and use it to update the command and distance
|
||||
// prefix codes for the next block.
|
||||
uint32_t cmd_histo[128] = {
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
// "ip" is the input pointer.
|
||||
const uint8_t* ip = input;
|
||||
assert(table_size);
|
||||
assert(table_size <= (1u << 31));
|
||||
assert((table_size & (table_size - 1)) == 0); // table must be power of two
|
||||
const size_t shift = 64u - Log2FloorNonZero(table_size);
|
||||
assert(static_cast<size_t>(0xffffffffffffffffU >> shift) == table_size - 1);
|
||||
const uint8_t* ip_end = input + block_size;
|
||||
|
||||
int last_distance = -1;
|
||||
const size_t kInputMarginBytes = 16;
|
||||
const size_t kMinMatchLen = 5;
|
||||
if (PREDICT_TRUE(block_size >= kInputMarginBytes)) {
|
||||
// For the last block, we need to keep a 16 bytes margin so that we can be
|
||||
// sure that all distances are at most window size - 16.
|
||||
// For all other blocks, we only need to keep a margin of 5 bytes so that
|
||||
// we don't go over the block size with a copy.
|
||||
const size_t len_limit = std::min(block_size - kMinMatchLen,
|
||||
input_size - kInputMarginBytes);
|
||||
const uint8_t* ip_limit = input + len_limit;
|
||||
|
||||
for (uint32_t next_hash = Hash(++ip, shift); ; ) {
|
||||
assert(next_emit < ip);
|
||||
// Step 1: Scan forward in the input looking for a 5-byte-long match.
|
||||
// If we get close to exhausting the input then goto emit_remainder.
|
||||
//
|
||||
// Heuristic match skipping: If 32 bytes are scanned with no matches
|
||||
// found, start looking only at every other byte. If 32 more bytes are
|
||||
// scanned, look at every third byte, etc.. When a match is found,
|
||||
// immediately go back to looking at every byte. This is a small loss
|
||||
// (~5% performance, ~0.1% density) for compressible data due to more
|
||||
// bookkeeping, but for non-compressible data (such as JPEG) it's a huge
|
||||
// win since the compressor quickly "realizes" the data is incompressible
|
||||
// and doesn't bother looking for matches everywhere.
|
||||
//
|
||||
// The "skip" variable keeps track of how many bytes there are since the
|
||||
// last match; dividing it by 32 (ie. right-shifting by five) gives the
|
||||
// number of bytes to move ahead for each iteration.
|
||||
uint32_t skip = 32;
|
||||
|
||||
const uint8_t* next_ip = ip;
|
||||
const uint8_t* candidate;
|
||||
do {
|
||||
ip = next_ip;
|
||||
uint32_t hash = next_hash;
|
||||
assert(hash == Hash(ip, shift));
|
||||
uint32_t bytes_between_hash_lookups = skip++ >> 5;
|
||||
next_ip = ip + bytes_between_hash_lookups;
|
||||
if (PREDICT_FALSE(next_ip > ip_limit)) {
|
||||
goto emit_remainder;
|
||||
}
|
||||
next_hash = Hash(next_ip, shift);
|
||||
candidate = ip - last_distance;
|
||||
if (IsMatch(ip, candidate)) {
|
||||
if (PREDICT_TRUE(candidate < ip)) {
|
||||
table[hash] = static_cast<int>(ip - base_ip);
|
||||
break;
|
||||
}
|
||||
}
|
||||
candidate = base_ip + table[hash];
|
||||
assert(candidate >= base_ip);
|
||||
assert(candidate < ip);
|
||||
|
||||
table[hash] = static_cast<int>(ip - base_ip);
|
||||
} while (PREDICT_TRUE(!IsMatch(ip, candidate)));
|
||||
|
||||
// Step 2: Emit the found match together with the literal bytes from
|
||||
// "next_emit" to the bit stream, and then see if we can find a next macth
|
||||
// immediately afterwards. Repeat until we find no match for the input
|
||||
// without emitting some literal bytes.
|
||||
uint64_t input_bytes;
|
||||
|
||||
{
|
||||
// We have a 5-byte match at ip, and we need to emit bytes in
|
||||
// [next_emit, ip).
|
||||
const uint8_t* base = ip;
|
||||
size_t matched = 5 + FindMatchLengthWithLimit(
|
||||
candidate + 5, ip + 5, static_cast<size_t>(ip_end - ip) - 5);
|
||||
ip += matched;
|
||||
int distance = static_cast<int>(base - candidate); /* > 0 */
|
||||
size_t insert = static_cast<size_t>(base - next_emit);
|
||||
assert(0 == memcmp(base, candidate, matched));
|
||||
if (PREDICT_TRUE(insert < 6210)) {
|
||||
EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
|
||||
storage_ix, storage);
|
||||
} else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
|
||||
lit_depth)) {
|
||||
EmitUncompressedMetaBlock(metablock_start, base, mlen_storage_ix - 3,
|
||||
storage_ix, storage);
|
||||
input_size -= static_cast<size_t>(base - input);
|
||||
input = base;
|
||||
next_emit = input;
|
||||
goto next_block;
|
||||
} else {
|
||||
EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
|
||||
storage_ix, storage);
|
||||
}
|
||||
EmitLiterals(next_emit, insert, lit_depth, lit_bits,
|
||||
storage_ix, storage);
|
||||
if (distance == last_distance) {
|
||||
WriteBits(cmd_depth[64], cmd_bits[64], storage_ix, storage);
|
||||
++cmd_histo[64];
|
||||
} else {
|
||||
EmitDistance(static_cast<size_t>(distance), cmd_depth, cmd_bits,
|
||||
cmd_histo, storage_ix, storage);
|
||||
last_distance = distance;
|
||||
}
|
||||
EmitCopyLenLastDistance(matched, cmd_depth, cmd_bits, cmd_histo,
|
||||
storage_ix, storage);
|
||||
|
||||
next_emit = ip;
|
||||
if (PREDICT_FALSE(ip >= ip_limit)) {
|
||||
goto emit_remainder;
|
||||
}
|
||||
// We could immediately start working at ip now, but to improve
|
||||
// compression we first update "table" with the hashes of some positions
|
||||
// within the last copy.
|
||||
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3);
|
||||
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 3);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 2);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 1);
|
||||
|
||||
uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
|
||||
candidate = base_ip + table[cur_hash];
|
||||
table[cur_hash] = static_cast<int>(ip - base_ip);
|
||||
}
|
||||
|
||||
while (IsMatch(ip, candidate)) {
|
||||
// We have a 5-byte match at ip, and no need to emit any literal bytes
|
||||
// prior to ip.
|
||||
const uint8_t* base = ip;
|
||||
size_t matched = 5 + FindMatchLengthWithLimit(
|
||||
candidate + 5, ip + 5, static_cast<size_t>(ip_end - ip) - 5);
|
||||
ip += matched;
|
||||
last_distance = static_cast<int>(base - candidate); /* > 0 */
|
||||
assert(0 == memcmp(base, candidate, matched));
|
||||
EmitCopyLen(matched, cmd_depth, cmd_bits, cmd_histo,
|
||||
storage_ix, storage);
|
||||
EmitDistance(static_cast<size_t>(last_distance), cmd_depth, cmd_bits,
|
||||
cmd_histo, storage_ix, storage);
|
||||
|
||||
next_emit = ip;
|
||||
if (PREDICT_FALSE(ip >= ip_limit)) {
|
||||
goto emit_remainder;
|
||||
}
|
||||
// We could immediately start working at ip now, but to improve
|
||||
// compression we first update "table" with the hashes of some positions
|
||||
// within the last copy.
|
||||
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3);
|
||||
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 3);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 2);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 1);
|
||||
|
||||
uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
|
||||
candidate = base_ip + table[cur_hash];
|
||||
table[cur_hash] = static_cast<int>(ip - base_ip);
|
||||
}
|
||||
|
||||
next_hash = Hash(++ip, shift);
|
||||
}
|
||||
}
|
||||
|
||||
emit_remainder:
|
||||
assert(next_emit <= ip_end);
|
||||
input += block_size;
|
||||
input_size -= block_size;
|
||||
block_size = std::min(input_size, kMergeBlockSize);
|
||||
|
||||
// Decide if we want to continue this meta-block instead of emitting the
|
||||
// last insert-only command.
|
||||
if (input_size > 0 &&
|
||||
total_block_size + block_size <= (1 << 20) &&
|
||||
ShouldMergeBlock(input, block_size, lit_depth)) {
|
||||
assert(total_block_size > (1 << 16));
|
||||
// Update the size of the current meta-block and continue emitting commands.
|
||||
// We can do this because the current size and the new size both have 5
|
||||
// nibbles.
|
||||
total_block_size += block_size;
|
||||
UpdateBits(20, static_cast<uint32_t>(total_block_size - 1),
|
||||
mlen_storage_ix, storage);
|
||||
goto emit_commands;
|
||||
}
|
||||
|
||||
// Emit the remaining bytes as literals.
|
||||
if (next_emit < ip_end) {
|
||||
const size_t insert = static_cast<size_t>(ip_end - next_emit);
|
||||
if (PREDICT_TRUE(insert < 6210)) {
|
||||
EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
|
||||
storage_ix, storage);
|
||||
EmitLiterals(next_emit, insert, lit_depth, lit_bits, storage_ix, storage);
|
||||
} else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
|
||||
lit_depth)) {
|
||||
EmitUncompressedMetaBlock(metablock_start, ip_end, mlen_storage_ix - 3,
|
||||
storage_ix, storage);
|
||||
} else {
|
||||
EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
|
||||
storage_ix, storage);
|
||||
EmitLiterals(next_emit, insert, lit_depth, lit_bits,
|
||||
storage_ix, storage);
|
||||
}
|
||||
}
|
||||
next_emit = ip_end;
|
||||
|
||||
next_block:
|
||||
// If we have more data, write a new meta-block header and prefix codes and
|
||||
// then continue emitting commands.
|
||||
if (input_size > 0) {
|
||||
metablock_start = input;
|
||||
block_size = std::min(input_size, kFirstBlockSize);
|
||||
total_block_size = block_size;
|
||||
// Save the bit position of the MLEN field of the meta-block header, so that
|
||||
// we can update it later if we decide to extend this meta-block.
|
||||
mlen_storage_ix = *storage_ix + 3;
|
||||
StoreMetaBlockHeader(block_size, 0, storage_ix, storage);
|
||||
// No block splits, no contexts.
|
||||
WriteBits(13, 0, storage_ix, storage);
|
||||
memset(lit_depth, 0, sizeof(lit_depth));
|
||||
memset(lit_bits, 0, sizeof(lit_bits));
|
||||
BuildAndStoreLiteralPrefixCode(input, block_size, lit_depth, lit_bits,
|
||||
storage_ix, storage);
|
||||
BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
|
||||
storage_ix, storage);
|
||||
goto emit_commands;
|
||||
}
|
||||
|
||||
if (is_last) {
|
||||
WriteBits(1, 1, storage_ix, storage); // islast
|
||||
WriteBits(1, 1, storage_ix, storage); // isempty
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
} else {
|
||||
// If this is not the last block, update the command and distance prefix
|
||||
// codes for the next block and store the compressed forms.
|
||||
cmd_code[0] = 0;
|
||||
*cmd_code_numbits = 0;
|
||||
BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
|
||||
cmd_code_numbits, cmd_code);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace brotli
|
47
enc/compress_fragment.h
Normal file
47
enc/compress_fragment.h
Normal file
@ -0,0 +1,47 @@
|
||||
/* Copyright 2015 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Function for fast encoding of an input fragment, independently from the input
|
||||
// history. This function uses one-pass processing: when we find a backward
|
||||
// match, we immediately emit the corresponding command and literal codes to
|
||||
// the bit stream.
|
||||
|
||||
#ifndef BROTLI_ENC_COMPRESS_FRAGMENT_H_
|
||||
#define BROTLI_ENC_COMPRESS_FRAGMENT_H_
|
||||
|
||||
#include "./types.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// Compresses "input" string to the "*storage" buffer as one or more complete
|
||||
// meta-blocks, and updates the "*storage_ix" bit position.
|
||||
//
|
||||
// If "is_last" is true, emits an additional empty last meta-block.
|
||||
//
|
||||
// "cmd_depth" and "cmd_bits" contain the command and distance prefix codes
|
||||
// (see comment in encode.h) used for the encoding of this input fragment.
|
||||
// If "is_last" is false, they are updated to reflect the statistics
|
||||
// of this input fragment, to be used for the encoding of the next fragment.
|
||||
//
|
||||
// "*cmd_code_numbits" is the number of bits of the compressed representation
|
||||
// of the command and distance prefix codes, and "cmd_code" is an array of
|
||||
// at least "(*cmd_code_numbits + 7) >> 3" size that contains the compressed
|
||||
// command and distance prefix codes. If "is_last" is false, these are also
|
||||
// updated to represent the updated "cmd_depth" and "cmd_bits".
|
||||
//
|
||||
// REQUIRES: "input_size" is greater than zero, or "is_last" is true.
|
||||
// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
|
||||
// REQUIRES: "table_size" is a power of two
|
||||
void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
|
||||
bool is_last,
|
||||
int* table, size_t table_size,
|
||||
uint8_t cmd_depth[128], uint16_t cmd_bits[128],
|
||||
size_t* cmd_code_numbits, uint8_t* cmd_code,
|
||||
size_t* storage_ix, uint8_t* storage);
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_COMPRESS_FRAGMENT_H_
|
519
enc/compress_fragment_two_pass.cc
Normal file
519
enc/compress_fragment_two_pass.cc
Normal file
@ -0,0 +1,519 @@
|
||||
/* Copyright 2015 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Function for fast encoding of an input fragment, independently from the input
|
||||
// history. This function uses two-pass processing: in the first pass we save
|
||||
// the found backward matches and literal bytes into a buffer, and in the
|
||||
// second pass we emit them into the bit stream using prefix codes built based
|
||||
// on the actual command and literal byte histograms.
|
||||
|
||||
#include "./compress_fragment_two_pass.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "./brotli_bit_stream.h"
|
||||
#include "./bit_cost.h"
|
||||
#include "./entropy_encode.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./find_match_length.h"
|
||||
#include "./port.h"
|
||||
#include "./write_bits.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// kHashMul32 multiplier has these properties:
|
||||
// * The multiplier must be odd. Otherwise we may lose the highest bit.
|
||||
// * No long streaks of 1s or 0s.
|
||||
// * There is no effort to ensure that it is a prime, the oddity is enough
|
||||
// for this use.
|
||||
// * The number has been tuned heuristically against compression benchmarks.
|
||||
static const uint32_t kHashMul32 = 0x1e35a7bd;
|
||||
|
||||
static inline uint32_t Hash(const uint8_t* p, size_t shift) {
|
||||
const uint64_t h = (BROTLI_UNALIGNED_LOAD64(p) << 16) * kHashMul32;
|
||||
return static_cast<uint32_t>(h >> shift);
|
||||
}
|
||||
|
||||
static inline uint32_t HashBytesAtOffset(uint64_t v, int offset, size_t shift) {
|
||||
assert(offset >= 0);
|
||||
assert(offset <= 2);
|
||||
const uint64_t h = ((v >> (8 * offset)) << 16) * kHashMul32;
|
||||
return static_cast<uint32_t>(h >> shift);
|
||||
}
|
||||
|
||||
static inline int IsMatch(const uint8_t* p1, const uint8_t* p2) {
|
||||
return (BROTLI_UNALIGNED_LOAD32(p1) == BROTLI_UNALIGNED_LOAD32(p2) &&
|
||||
p1[4] == p2[4] &&
|
||||
p1[5] == p2[5]);
|
||||
}
|
||||
|
||||
// Builds a command and distance prefix code (each 64 symbols) into "depth" and
|
||||
// "bits" based on "histogram" and stores it into the bit stream.
|
||||
static void BuildAndStoreCommandPrefixCode(
|
||||
const uint32_t histogram[128],
|
||||
uint8_t depth[128], uint16_t bits[128],
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
CreateHuffmanTree(histogram, 64, 15, depth);
|
||||
CreateHuffmanTree(&histogram[64], 64, 14, &depth[64]);
|
||||
// We have to jump through a few hoopes here in order to compute
|
||||
// the command bits because the symbols are in a different order than in
|
||||
// the full alphabet. This looks complicated, but having the symbols
|
||||
// in this order in the command bits saves a few branches in the Emit*
|
||||
// functions.
|
||||
uint8_t cmd_depth[64];
|
||||
uint16_t cmd_bits[64];
|
||||
memcpy(cmd_depth, depth + 24, 24);
|
||||
memcpy(cmd_depth + 24, depth, 8);
|
||||
memcpy(cmd_depth + 32, depth + 48, 8);
|
||||
memcpy(cmd_depth + 40, depth + 8, 8);
|
||||
memcpy(cmd_depth + 48, depth + 56, 8);
|
||||
memcpy(cmd_depth + 56, depth + 16, 8);
|
||||
ConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
|
||||
memcpy(bits, cmd_bits + 24, 16);
|
||||
memcpy(bits + 8, cmd_bits + 40, 16);
|
||||
memcpy(bits + 16, cmd_bits + 56, 16);
|
||||
memcpy(bits + 24, cmd_bits, 48);
|
||||
memcpy(bits + 48, cmd_bits + 32, 16);
|
||||
memcpy(bits + 56, cmd_bits + 48, 16);
|
||||
ConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
|
||||
{
|
||||
// Create the bit length array for the full command alphabet.
|
||||
uint8_t cmd_depth[704] = { 0 };
|
||||
memcpy(cmd_depth, depth + 24, 8);
|
||||
memcpy(cmd_depth + 64, depth + 32, 8);
|
||||
memcpy(cmd_depth + 128, depth + 40, 8);
|
||||
memcpy(cmd_depth + 192, depth + 48, 8);
|
||||
memcpy(cmd_depth + 384, depth + 56, 8);
|
||||
for (size_t i = 0; i < 8; ++i) {
|
||||
cmd_depth[128 + 8 * i] = depth[i];
|
||||
cmd_depth[256 + 8 * i] = depth[8 + i];
|
||||
cmd_depth[448 + 8 * i] = depth[16 + i];
|
||||
}
|
||||
StoreHuffmanTree(cmd_depth, 704, storage_ix, storage);
|
||||
}
|
||||
StoreHuffmanTree(&depth[64], 64, storage_ix, storage);
|
||||
}
|
||||
|
||||
inline void EmitInsertLen(uint32_t insertlen, uint32_t** commands) {
|
||||
if (insertlen < 6) {
|
||||
**commands = insertlen;
|
||||
} else if (insertlen < 130) {
|
||||
insertlen -= 2;
|
||||
const uint32_t nbits = Log2FloorNonZero(insertlen) - 1u;
|
||||
const uint32_t prefix = insertlen >> nbits;
|
||||
const uint32_t inscode = (nbits << 1) + prefix + 2;
|
||||
const uint32_t extra = insertlen - (prefix << nbits);
|
||||
**commands = inscode | (extra << 8);
|
||||
} else if (insertlen < 2114) {
|
||||
insertlen -= 66;
|
||||
const uint32_t nbits = Log2FloorNonZero(insertlen);
|
||||
const uint32_t code = nbits + 10;
|
||||
const uint32_t extra = insertlen - (1 << nbits);
|
||||
**commands = code | (extra << 8);
|
||||
} else if (insertlen < 6210) {
|
||||
const uint32_t extra = insertlen - 2114;
|
||||
**commands = 21 | (extra << 8);
|
||||
} else if (insertlen < 22594) {
|
||||
const uint32_t extra = insertlen - 6210;
|
||||
**commands = 22 | (extra << 8);
|
||||
} else {
|
||||
const uint32_t extra = insertlen - 22594;
|
||||
**commands = 23 | (extra << 8);
|
||||
}
|
||||
++(*commands);
|
||||
}
|
||||
|
||||
inline void EmitCopyLen(size_t copylen, uint32_t** commands) {
|
||||
if (copylen < 10) {
|
||||
**commands = static_cast<uint32_t>(copylen + 38);
|
||||
} else if (copylen < 134) {
|
||||
copylen -= 6;
|
||||
const size_t nbits = Log2FloorNonZero(copylen) - 1;
|
||||
const size_t prefix = copylen >> nbits;
|
||||
const size_t code = (nbits << 1) + prefix + 44;
|
||||
const size_t extra = copylen - (prefix << nbits);
|
||||
**commands = static_cast<uint32_t>(code | (extra << 8));
|
||||
} else if (copylen < 2118) {
|
||||
copylen -= 70;
|
||||
const size_t nbits = Log2FloorNonZero(copylen);
|
||||
const size_t code = nbits + 52;
|
||||
const size_t extra = copylen - (1 << nbits);
|
||||
**commands = static_cast<uint32_t>(code | (extra << 8));
|
||||
} else {
|
||||
const size_t extra = copylen - 2118;
|
||||
**commands = static_cast<uint32_t>(63 | (extra << 8));
|
||||
}
|
||||
++(*commands);
|
||||
}
|
||||
|
||||
inline void EmitCopyLenLastDistance(size_t copylen, uint32_t** commands) {
|
||||
if (copylen < 12) {
|
||||
**commands = static_cast<uint32_t>(copylen + 20);
|
||||
++(*commands);
|
||||
} else if (copylen < 72) {
|
||||
copylen -= 8;
|
||||
const size_t nbits = Log2FloorNonZero(copylen) - 1;
|
||||
const size_t prefix = copylen >> nbits;
|
||||
const size_t code = (nbits << 1) + prefix + 28;
|
||||
const size_t extra = copylen - (prefix << nbits);
|
||||
**commands = static_cast<uint32_t>(code | (extra << 8));
|
||||
++(*commands);
|
||||
} else if (copylen < 136) {
|
||||
copylen -= 8;
|
||||
const size_t code = (copylen >> 5) + 54;
|
||||
const size_t extra = copylen & 31;
|
||||
**commands = static_cast<uint32_t>(code | (extra << 8));
|
||||
++(*commands);
|
||||
**commands = 64;
|
||||
++(*commands);
|
||||
} else if (copylen < 2120) {
|
||||
copylen -= 72;
|
||||
const size_t nbits = Log2FloorNonZero(copylen);
|
||||
const size_t code = nbits + 52;
|
||||
const size_t extra = copylen - (1 << nbits);
|
||||
**commands = static_cast<uint32_t>(code | (extra << 8));
|
||||
++(*commands);
|
||||
**commands = 64;
|
||||
++(*commands);
|
||||
} else {
|
||||
const size_t extra = copylen - 2120;
|
||||
**commands = static_cast<uint32_t>(63 | (extra << 8));
|
||||
++(*commands);
|
||||
**commands = 64;
|
||||
++(*commands);
|
||||
}
|
||||
}
|
||||
|
||||
inline void EmitDistance(uint32_t distance, uint32_t** commands) {
|
||||
distance += 3;
|
||||
uint32_t nbits = Log2FloorNonZero(distance) - 1;
|
||||
const uint32_t prefix = (distance >> nbits) & 1;
|
||||
const uint32_t offset = (2 + prefix) << nbits;
|
||||
const uint32_t distcode = 2 * (nbits - 1) + prefix + 80;
|
||||
uint32_t extra = distance - offset;
|
||||
**commands = distcode | (extra << 8);
|
||||
++(*commands);
|
||||
}
|
||||
|
||||
// REQUIRES: len <= 1 << 20.
|
||||
static void StoreMetaBlockHeader(
|
||||
size_t len, bool is_uncompressed, size_t* storage_ix, uint8_t* storage) {
|
||||
// ISLAST
|
||||
WriteBits(1, 0, storage_ix, storage);
|
||||
if (len <= (1U << 16)) {
|
||||
// MNIBBLES is 4
|
||||
WriteBits(2, 0, storage_ix, storage);
|
||||
WriteBits(16, len - 1, storage_ix, storage);
|
||||
} else {
|
||||
// MNIBBLES is 5
|
||||
WriteBits(2, 1, storage_ix, storage);
|
||||
WriteBits(20, len - 1, storage_ix, storage);
|
||||
}
|
||||
// ISUNCOMPRESSED
|
||||
WriteBits(1, is_uncompressed, storage_ix, storage);
|
||||
}
|
||||
|
||||
void CreateCommands(const uint8_t* input, size_t block_size, size_t input_size,
|
||||
const uint8_t* base_ip,
|
||||
int* table, size_t table_size,
|
||||
uint8_t** literals, uint32_t** commands) {
|
||||
// "ip" is the input pointer.
|
||||
const uint8_t* ip = input;
|
||||
assert(table_size);
|
||||
assert(table_size <= (1u << 31));
|
||||
assert((table_size & (table_size - 1)) == 0); // table must be power of two
|
||||
const size_t shift = 64u - Log2FloorNonZero(table_size);
|
||||
assert(static_cast<size_t>(0xffffffffffffffffU >> shift) == table_size - 1);
|
||||
const uint8_t* ip_end = input + block_size;
|
||||
// "next_emit" is a pointer to the first byte that is not covered by a
|
||||
// previous copy. Bytes between "next_emit" and the start of the next copy or
|
||||
// the end of the input will be emitted as literal bytes.
|
||||
const uint8_t* next_emit = input;
|
||||
|
||||
int last_distance = -1;
|
||||
const size_t kInputMarginBytes = 16;
|
||||
const size_t kMinMatchLen = 6;
|
||||
if (PREDICT_TRUE(block_size >= kInputMarginBytes)) {
|
||||
// For the last block, we need to keep a 16 bytes margin so that we can be
|
||||
// sure that all distances are at most window size - 16.
|
||||
// For all other blocks, we only need to keep a margin of 5 bytes so that
|
||||
// we don't go over the block size with a copy.
|
||||
const size_t len_limit = std::min(block_size - kMinMatchLen,
|
||||
input_size - kInputMarginBytes);
|
||||
const uint8_t* ip_limit = input + len_limit;
|
||||
|
||||
for (uint32_t next_hash = Hash(++ip, shift); ; ) {
|
||||
assert(next_emit < ip);
|
||||
// Step 1: Scan forward in the input looking for a 6-byte-long match.
|
||||
// If we get close to exhausting the input then goto emit_remainder.
|
||||
//
|
||||
// Heuristic match skipping: If 32 bytes are scanned with no matches
|
||||
// found, start looking only at every other byte. If 32 more bytes are
|
||||
// scanned, look at every third byte, etc.. When a match is found,
|
||||
// immediately go back to looking at every byte. This is a small loss
|
||||
// (~5% performance, ~0.1% density) for compressible data due to more
|
||||
// bookkeeping, but for non-compressible data (such as JPEG) it's a huge
|
||||
// win since the compressor quickly "realizes" the data is incompressible
|
||||
// and doesn't bother looking for matches everywhere.
|
||||
//
|
||||
// The "skip" variable keeps track of how many bytes there are since the
|
||||
// last match; dividing it by 32 (ie. right-shifting by five) gives the
|
||||
// number of bytes to move ahead for each iteration.
|
||||
uint32_t skip = 32;
|
||||
|
||||
const uint8_t* next_ip = ip;
|
||||
const uint8_t* candidate;
|
||||
do {
|
||||
ip = next_ip;
|
||||
uint32_t hash = next_hash;
|
||||
assert(hash == Hash(ip, shift));
|
||||
uint32_t bytes_between_hash_lookups = skip++ >> 5;
|
||||
next_ip = ip + bytes_between_hash_lookups;
|
||||
if (PREDICT_FALSE(next_ip > ip_limit)) {
|
||||
goto emit_remainder;
|
||||
}
|
||||
next_hash = Hash(next_ip, shift);
|
||||
candidate = ip - last_distance;
|
||||
if (IsMatch(ip, candidate)) {
|
||||
if (PREDICT_TRUE(candidate < ip)) {
|
||||
table[hash] = static_cast<int>(ip - base_ip);
|
||||
break;
|
||||
}
|
||||
}
|
||||
candidate = base_ip + table[hash];
|
||||
assert(candidate >= base_ip);
|
||||
assert(candidate < ip);
|
||||
|
||||
table[hash] = static_cast<int>(ip - base_ip);
|
||||
} while (PREDICT_TRUE(!IsMatch(ip, candidate)));
|
||||
|
||||
// Step 2: Emit the found match together with the literal bytes from
|
||||
// "next_emit", and then see if we can find a next macth immediately
|
||||
// afterwards. Repeat until we find no match for the input
|
||||
// without emitting some literal bytes.
|
||||
uint64_t input_bytes;
|
||||
|
||||
{
|
||||
// We have a 6-byte match at ip, and we need to emit bytes in
|
||||
// [next_emit, ip).
|
||||
const uint8_t* base = ip;
|
||||
size_t matched = 6 + FindMatchLengthWithLimit(
|
||||
candidate + 6, ip + 6, static_cast<size_t>(ip_end - ip) - 6);
|
||||
ip += matched;
|
||||
int distance = static_cast<int>(base - candidate); /* > 0 */
|
||||
int insert = static_cast<int>(base - next_emit);
|
||||
assert(0 == memcmp(base, candidate, matched));
|
||||
EmitInsertLen(static_cast<uint32_t>(insert), commands);
|
||||
memcpy(*literals, next_emit, static_cast<size_t>(insert));
|
||||
*literals += insert;
|
||||
if (distance == last_distance) {
|
||||
**commands = 64;
|
||||
++(*commands);
|
||||
} else {
|
||||
EmitDistance(static_cast<uint32_t>(distance), commands);
|
||||
last_distance = distance;
|
||||
}
|
||||
EmitCopyLenLastDistance(matched, commands);
|
||||
|
||||
next_emit = ip;
|
||||
if (PREDICT_FALSE(ip >= ip_limit)) {
|
||||
goto emit_remainder;
|
||||
}
|
||||
// We could immediately start working at ip now, but to improve
|
||||
// compression we first update "table" with the hashes of some positions
|
||||
// within the last copy.
|
||||
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 5);
|
||||
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 5);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 4);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 3);
|
||||
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 2);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 2);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 1);
|
||||
|
||||
uint32_t cur_hash = HashBytesAtOffset(input_bytes, 2, shift);
|
||||
candidate = base_ip + table[cur_hash];
|
||||
table[cur_hash] = static_cast<int>(ip - base_ip);
|
||||
}
|
||||
|
||||
while (IsMatch(ip, candidate)) {
|
||||
// We have a 6-byte match at ip, and no need to emit any
|
||||
// literal bytes prior to ip.
|
||||
const uint8_t* base = ip;
|
||||
size_t matched = 6 + FindMatchLengthWithLimit(
|
||||
candidate + 6, ip + 6, static_cast<size_t>(ip_end - ip) - 6);
|
||||
ip += matched;
|
||||
last_distance = static_cast<int>(base - candidate); /* > 0 */
|
||||
assert(0 == memcmp(base, candidate, matched));
|
||||
EmitCopyLen(matched, commands);
|
||||
EmitDistance(static_cast<uint32_t>(last_distance), commands);
|
||||
|
||||
next_emit = ip;
|
||||
if (PREDICT_FALSE(ip >= ip_limit)) {
|
||||
goto emit_remainder;
|
||||
}
|
||||
// We could immediately start working at ip now, but to improve
|
||||
// compression we first update "table" with the hashes of some positions
|
||||
// within the last copy.
|
||||
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 5);
|
||||
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 5);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 4);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 3);
|
||||
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 2);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 2);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 1);
|
||||
|
||||
uint32_t cur_hash = HashBytesAtOffset(input_bytes, 2, shift);
|
||||
candidate = base_ip + table[cur_hash];
|
||||
table[cur_hash] = static_cast<int>(ip - base_ip);
|
||||
}
|
||||
|
||||
next_hash = Hash(++ip, shift);
|
||||
}
|
||||
}
|
||||
|
||||
emit_remainder:
|
||||
assert(next_emit <= ip_end);
|
||||
// Emit the remaining bytes as literals.
|
||||
if (next_emit < ip_end) {
|
||||
const uint32_t insert = static_cast<uint32_t>(ip_end - next_emit);
|
||||
EmitInsertLen(insert, commands);
|
||||
memcpy(*literals, next_emit, insert);
|
||||
*literals += insert;
|
||||
}
|
||||
}
|
||||
|
||||
void StoreCommands(const uint8_t* literals, const size_t num_literals,
|
||||
const uint32_t* commands, const size_t num_commands,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
uint8_t lit_depths[256] = { 0 };
|
||||
uint16_t lit_bits[256] = { 0 };
|
||||
uint32_t lit_histo[256] = { 0 };
|
||||
for (size_t i = 0; i < num_literals; ++i) {
|
||||
++lit_histo[literals[i]];
|
||||
}
|
||||
BuildAndStoreHuffmanTreeFast(lit_histo, num_literals,
|
||||
/* max_bits = */ 8,
|
||||
lit_depths, lit_bits,
|
||||
storage_ix, storage);
|
||||
|
||||
uint8_t cmd_depths[128] = { 0 };
|
||||
uint16_t cmd_bits[128] = { 0 };
|
||||
uint32_t cmd_histo[128] = { 0 };
|
||||
for (size_t i = 0; i < num_commands; ++i) {
|
||||
++cmd_histo[commands[i] & 0xff];
|
||||
}
|
||||
cmd_histo[1] += 1;
|
||||
cmd_histo[2] += 1;
|
||||
cmd_histo[64] += 1;
|
||||
cmd_histo[84] += 1;
|
||||
BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depths, cmd_bits,
|
||||
storage_ix, storage);
|
||||
|
||||
static const uint32_t kNumExtraBits[128] = {
|
||||
0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 12, 14, 24,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 24,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
|
||||
9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16,
|
||||
17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24,
|
||||
};
|
||||
static const uint32_t kInsertOffset[24] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50, 66, 98, 130, 194, 322, 578,
|
||||
1090, 2114, 6210, 22594,
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < num_commands; ++i) {
|
||||
const uint32_t cmd = commands[i];
|
||||
const uint32_t code = cmd & 0xff;
|
||||
const uint32_t extra = cmd >> 8;
|
||||
WriteBits(cmd_depths[code], cmd_bits[code], storage_ix, storage);
|
||||
WriteBits(kNumExtraBits[code], extra, storage_ix, storage);
|
||||
if (code < 24) {
|
||||
const uint32_t insert = kInsertOffset[code] + extra;
|
||||
for (uint32_t j = 0; j < insert; ++j) {
|
||||
const uint8_t lit = *literals;
|
||||
WriteBits(lit_depths[lit], lit_bits[lit], storage_ix, storage);
|
||||
++literals;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool ShouldCompress(const uint8_t* input, size_t input_size,
|
||||
size_t num_literals) {
|
||||
static const double kAcceptableLossForUncompressibleSpeedup = 0.02;
|
||||
static const double kMaxRatioOfLiterals =
|
||||
1.0 - kAcceptableLossForUncompressibleSpeedup;
|
||||
if (num_literals < kMaxRatioOfLiterals * static_cast<double>(input_size)) {
|
||||
return true;
|
||||
}
|
||||
uint32_t literal_histo[256] = { 0 };
|
||||
static const uint32_t kSampleRate = 43;
|
||||
static const double kMaxEntropy =
|
||||
8 * (1.0 - kAcceptableLossForUncompressibleSpeedup);
|
||||
const double max_total_bit_cost =
|
||||
static_cast<double>(input_size) * kMaxEntropy / kSampleRate;
|
||||
for (size_t i = 0; i < input_size; i += kSampleRate) {
|
||||
++literal_histo[input[i]];
|
||||
}
|
||||
return BitsEntropy(literal_histo, 256) < max_total_bit_cost;
|
||||
}
|
||||
|
||||
void BrotliCompressFragmentTwoPass(const uint8_t* input, size_t input_size,
|
||||
bool is_last,
|
||||
uint32_t* command_buf, uint8_t* literal_buf,
|
||||
int* table, size_t table_size,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
// Save the start of the first block for position and distance computations.
|
||||
const uint8_t* base_ip = input;
|
||||
|
||||
while (input_size > 0) {
|
||||
size_t block_size = std::min(input_size, kCompressFragmentTwoPassBlockSize);
|
||||
uint32_t* commands = command_buf;
|
||||
uint8_t* literals = literal_buf;
|
||||
CreateCommands(input, block_size, input_size, base_ip, table, table_size,
|
||||
&literals, &commands);
|
||||
const size_t num_literals = static_cast<size_t>(literals - literal_buf);
|
||||
const size_t num_commands = static_cast<size_t>(commands - command_buf);
|
||||
if (ShouldCompress(input, block_size, num_literals)) {
|
||||
StoreMetaBlockHeader(block_size, 0, storage_ix, storage);
|
||||
// No block splits, no contexts.
|
||||
WriteBits(13, 0, storage_ix, storage);
|
||||
StoreCommands(literal_buf, num_literals, command_buf, num_commands,
|
||||
storage_ix, storage);
|
||||
} else {
|
||||
// Since we did not find many backward references and the entropy of
|
||||
// the data is close to 8 bits, we can simply emit an uncompressed block.
|
||||
// This makes compression speed of uncompressible data about 3x faster.
|
||||
StoreMetaBlockHeader(block_size, 1, storage_ix, storage);
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
memcpy(&storage[*storage_ix >> 3], input, block_size);
|
||||
*storage_ix += block_size << 3;
|
||||
storage[*storage_ix >> 3] = 0;
|
||||
}
|
||||
input += block_size;
|
||||
input_size -= block_size;
|
||||
}
|
||||
|
||||
if (is_last) {
|
||||
WriteBits(1, 1, storage_ix, storage); // islast
|
||||
WriteBits(1, 1, storage_ix, storage); // isempty
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace brotli
|
40
enc/compress_fragment_two_pass.h
Normal file
40
enc/compress_fragment_two_pass.h
Normal file
@ -0,0 +1,40 @@
|
||||
/* Copyright 2015 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Function for fast encoding of an input fragment, independently from the input
|
||||
// history. This function uses two-pass processing: in the first pass we save
|
||||
// the found backward matches and literal bytes into a buffer, and in the
|
||||
// second pass we emit them into the bit stream using prefix codes built based
|
||||
// on the actual command and literal byte histograms.
|
||||
|
||||
#ifndef BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
|
||||
#define BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
|
||||
|
||||
#include "./types.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
static const size_t kCompressFragmentTwoPassBlockSize = 1 << 17;
|
||||
|
||||
// Compresses "input" string to the "*storage" buffer as one or more complete
|
||||
// meta-blocks, and updates the "*storage_ix" bit position.
|
||||
//
|
||||
// If "is_last" is true, emits an additional empty last meta-block.
|
||||
//
|
||||
// REQUIRES: "input_size" is greater than zero, or "is_last" is true.
|
||||
// REQUIRES: "command_buf" and "literal_buf" point to at least
|
||||
// kCompressFragmentTwoPassBlockSize long arrays.
|
||||
// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
|
||||
// REQUIRES: "table_size" is a power of two
|
||||
void BrotliCompressFragmentTwoPass(const uint8_t* input, size_t input_size,
|
||||
bool is_last,
|
||||
uint32_t* command_buf, uint8_t* literal_buf,
|
||||
int* table, size_t table_size,
|
||||
size_t* storage_ix, uint8_t* storage);
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
|
270
enc/encode.cc
270
enc/encode.cc
@ -20,6 +20,8 @@
|
||||
#include "./context.h"
|
||||
#include "./metablock.h"
|
||||
#include "./transform.h"
|
||||
#include "./compress_fragment.h"
|
||||
#include "./compress_fragment_two_pass.h"
|
||||
#include "./entropy_encode.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./hash.h"
|
||||
@ -33,10 +35,12 @@ namespace brotli {
|
||||
static const int kMinQualityForBlockSplit = 4;
|
||||
static const int kMinQualityForContextModeling = 5;
|
||||
static const int kMinQualityForOptimizeHistograms = 4;
|
||||
// For quality 1 there is no block splitting, so we buffer at most this much
|
||||
// For quality 2 there is no block splitting, so we buffer at most this much
|
||||
// literals and commands.
|
||||
static const int kMaxNumDelayedSymbols = 0x2fff;
|
||||
|
||||
#define COPY_ARRAY(dst, src) memcpy(dst, src, sizeof(src));
|
||||
|
||||
void RecomputeDistancePrefixes(Command* cmds,
|
||||
size_t num_commands,
|
||||
uint32_t num_direct_distance_codes,
|
||||
@ -75,6 +79,44 @@ uint8_t* BrotliCompressor::GetBrotliStorage(size_t size) {
|
||||
return storage_;
|
||||
}
|
||||
|
||||
size_t MaxHashTableSize(int quality) {
|
||||
return quality == 0 ? 1 << 15 : 1 << 17;
|
||||
}
|
||||
|
||||
size_t HashTableSize(size_t max_table_size, size_t input_size) {
|
||||
size_t htsize = 256;
|
||||
while (htsize < max_table_size && htsize < input_size) {
|
||||
htsize <<= 1;
|
||||
}
|
||||
return htsize;
|
||||
}
|
||||
|
||||
int* BrotliCompressor::GetHashTable(int quality,
|
||||
size_t input_size,
|
||||
size_t* table_size) {
|
||||
// Use smaller hash table when input.size() is smaller, since we
|
||||
// fill the table, incurring O(hash table size) overhead for
|
||||
// compression, and if the input is short, we won't need that
|
||||
// many hash table entries anyway.
|
||||
const size_t max_table_size = MaxHashTableSize(quality);
|
||||
assert(max_table_size >= 256);
|
||||
size_t htsize = HashTableSize(max_table_size, input_size);
|
||||
|
||||
int* table;
|
||||
if (htsize <= sizeof(small_table_) / sizeof(small_table_[0])) {
|
||||
table = small_table_;
|
||||
} else {
|
||||
if (large_table_ == NULL) {
|
||||
large_table_ = new int[max_table_size];
|
||||
}
|
||||
table = large_table_;
|
||||
}
|
||||
|
||||
*table_size = htsize;
|
||||
memset(table, 0, htsize * sizeof(*table));
|
||||
return table;
|
||||
}
|
||||
|
||||
void EncodeWindowBits(int lgwin, uint8_t* last_byte, uint8_t* last_byte_bits) {
|
||||
if (lgwin == 16) {
|
||||
*last_byte = 0;
|
||||
@ -91,6 +133,52 @@ void EncodeWindowBits(int lgwin, uint8_t* last_byte, uint8_t* last_byte_bits) {
|
||||
}
|
||||
}
|
||||
|
||||
// Initializes the command and distance prefix codes for the first block.
|
||||
void InitCommandPrefixCodes(uint8_t cmd_depths[128],
|
||||
uint16_t cmd_bits[128],
|
||||
uint8_t cmd_code[512],
|
||||
size_t* cmd_code_numbits) {
|
||||
static const uint8_t kDefaultCommandDepths[128] = {
|
||||
0, 4, 4, 5, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8,
|
||||
0, 0, 0, 4, 4, 4, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7,
|
||||
7, 7, 10, 10, 10, 10, 10, 10, 0, 4, 4, 5, 5, 5, 6, 6,
|
||||
7, 8, 8, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4,
|
||||
4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 7, 7, 7, 8, 10,
|
||||
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
|
||||
};
|
||||
static const uint16_t kDefaultCommandBits[128] = {
|
||||
0, 0, 8, 9, 3, 35, 7, 71,
|
||||
39, 103, 23, 47, 175, 111, 239, 31,
|
||||
0, 0, 0, 4, 12, 2, 10, 6,
|
||||
13, 29, 11, 43, 27, 59, 87, 55,
|
||||
15, 79, 319, 831, 191, 703, 447, 959,
|
||||
0, 14, 1, 25, 5, 21, 19, 51,
|
||||
119, 159, 95, 223, 479, 991, 63, 575,
|
||||
127, 639, 383, 895, 255, 767, 511, 1023,
|
||||
14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
27, 59, 7, 39, 23, 55, 30, 1, 17, 9, 25, 5, 0, 8, 4, 12,
|
||||
2, 10, 6, 21, 13, 29, 3, 19, 11, 15, 47, 31, 95, 63, 127, 255,
|
||||
767, 2815, 1791, 3839, 511, 2559, 1535, 3583, 1023, 3071, 2047, 4095,
|
||||
};
|
||||
COPY_ARRAY(cmd_depths, kDefaultCommandDepths);
|
||||
COPY_ARRAY(cmd_bits, kDefaultCommandBits);
|
||||
|
||||
// Initialize the pre-compressed form of the command and distance prefix
|
||||
// codes.
|
||||
static const uint8_t kDefaultCommandCode[] = {
|
||||
0xff, 0x77, 0xd5, 0xbf, 0xe7, 0xde, 0xea, 0x9e, 0x51, 0x5d, 0xde, 0xc6,
|
||||
0x70, 0x57, 0xbc, 0x58, 0x58, 0x58, 0xd8, 0xd8, 0x58, 0xd5, 0xcb, 0x8c,
|
||||
0xea, 0xe0, 0xc3, 0x87, 0x1f, 0x83, 0xc1, 0x60, 0x1c, 0x67, 0xb2, 0xaa,
|
||||
0x06, 0x83, 0xc1, 0x60, 0x30, 0x18, 0xcc, 0xa1, 0xce, 0x88, 0x54, 0x94,
|
||||
0x46, 0xe1, 0xb0, 0xd0, 0x4e, 0xb2, 0xf7, 0x04, 0x00,
|
||||
};
|
||||
static const int kDefaultCommandCodeNumBits = 448;
|
||||
COPY_ARRAY(cmd_code, kDefaultCommandCode);
|
||||
*cmd_code_numbits = kDefaultCommandCodeNumBits;
|
||||
}
|
||||
|
||||
BrotliCompressor::BrotliCompressor(BrotliParams params)
|
||||
: params_(params),
|
||||
hashers_(new Hashers()),
|
||||
@ -103,16 +191,23 @@ BrotliCompressor::BrotliCompressor(BrotliParams params)
|
||||
prev_byte_(0),
|
||||
prev_byte2_(0),
|
||||
storage_size_(0),
|
||||
storage_(0) {
|
||||
storage_(0),
|
||||
large_table_(NULL),
|
||||
command_buf_(NULL),
|
||||
literal_buf_(NULL) {
|
||||
// Sanitize params.
|
||||
params_.quality = std::max(1, params_.quality);
|
||||
params_.quality = std::max(0, params_.quality);
|
||||
if (params_.lgwin < kMinWindowBits) {
|
||||
params_.lgwin = kMinWindowBits;
|
||||
} else if (params_.lgwin > kMaxWindowBits) {
|
||||
params_.lgwin = kMaxWindowBits;
|
||||
}
|
||||
if (params_.lgblock == 0) {
|
||||
params_.lgblock = params_.quality < kMinQualityForBlockSplit ? 14 : 16;
|
||||
if (params_.quality <= 1) {
|
||||
params_.lgblock = params_.lgwin;
|
||||
} else if (params_.quality < kMinQualityForBlockSplit) {
|
||||
params_.lgblock = 14;
|
||||
} else if (params_.lgblock == 0) {
|
||||
params_.lgblock = 16;
|
||||
if (params_.quality >= 9 && params_.lgwin > params_.lgblock) {
|
||||
params_.lgblock = std::min(21, params_.lgwin);
|
||||
}
|
||||
@ -147,6 +242,14 @@ BrotliCompressor::BrotliCompressor(BrotliParams params)
|
||||
// emitting an uncompressed block.
|
||||
memcpy(saved_dist_cache_, dist_cache_, sizeof(dist_cache_));
|
||||
|
||||
if (params_.quality == 0) {
|
||||
InitCommandPrefixCodes(cmd_depths_, cmd_bits_,
|
||||
cmd_code_, &cmd_code_numbits_);
|
||||
} else if (params_.quality == 1) {
|
||||
command_buf_ = new uint32_t[kCompressFragmentTwoPassBlockSize];
|
||||
literal_buf_ = new uint8_t[kCompressFragmentTwoPassBlockSize];
|
||||
}
|
||||
|
||||
// Initialize hashers.
|
||||
hash_type_ = std::min(9, params_.quality);
|
||||
hashers_->Init(hash_type_);
|
||||
@ -157,6 +260,9 @@ BrotliCompressor::~BrotliCompressor() {
|
||||
free(commands_);
|
||||
delete ringbuffer_;
|
||||
delete hashers_;
|
||||
delete[] large_table_;
|
||||
delete[] command_buf_;
|
||||
delete[] literal_buf_;
|
||||
}
|
||||
|
||||
void BrotliCompressor::CopyInputToRingBuffer(const size_t input_size,
|
||||
@ -237,6 +343,37 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last,
|
||||
}
|
||||
const uint32_t bytes = static_cast<uint32_t>(delta);
|
||||
|
||||
if (params_.quality <= 1) {
|
||||
const size_t max_out_size = 2 * bytes + 500;
|
||||
uint8_t* storage = GetBrotliStorage(max_out_size);
|
||||
storage[0] = last_byte_;
|
||||
size_t storage_ix = last_byte_bits_;
|
||||
size_t table_size;
|
||||
int* table = GetHashTable(params_.quality, bytes, &table_size);
|
||||
if (params_.quality == 0) {
|
||||
BrotliCompressFragmentFast(
|
||||
&data[WrapPosition(last_processed_pos_) & mask],
|
||||
bytes, is_last,
|
||||
table, table_size,
|
||||
cmd_depths_, cmd_bits_,
|
||||
&cmd_code_numbits_, cmd_code_,
|
||||
&storage_ix, storage);
|
||||
} else {
|
||||
BrotliCompressFragmentTwoPass(
|
||||
&data[WrapPosition(last_processed_pos_) & mask],
|
||||
bytes, is_last,
|
||||
command_buf_, literal_buf_,
|
||||
table, table_size,
|
||||
&storage_ix, storage);
|
||||
}
|
||||
last_byte_ = storage[storage_ix >> 3];
|
||||
last_byte_bits_ = storage_ix & 7u;
|
||||
last_processed_pos_ = input_pos_;
|
||||
*output = &storage[0];
|
||||
*out_size = storage_ix >> 3;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Theoretical max number of commands is 1 per 2 bytes.
|
||||
size_t newsize = num_commands_ + bytes / 2 + 1;
|
||||
if (newsize > cmd_alloc_size_) {
|
||||
@ -439,7 +576,7 @@ void BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
|
||||
num_direct_distance_codes,
|
||||
distance_postfix_bits);
|
||||
}
|
||||
if (params_.quality == 1) {
|
||||
if (params_.quality == 2) {
|
||||
StoreMetaBlockFast(data, WrapPosition(last_flush_pos_),
|
||||
bytes, mask, is_last,
|
||||
commands_, num_commands_,
|
||||
@ -659,9 +796,130 @@ int BrotliCompress(BrotliParams params, BrotliIn* in, BrotliOut* out) {
|
||||
return BrotliCompressWithCustomDictionary(0, 0, params, in, out);
|
||||
}
|
||||
|
||||
// Reads the provided input in 'block_size' blocks. Only the last read can be
|
||||
// smaller than 'block_size'.
|
||||
class BrotliBlockReader {
|
||||
public:
|
||||
explicit BrotliBlockReader(size_t block_size)
|
||||
: block_size_(block_size), buf_(NULL) {}
|
||||
~BrotliBlockReader() { delete[] buf_; }
|
||||
|
||||
const uint8_t* Read(BrotliIn* in, size_t* bytes_read, bool* is_last) {
|
||||
*bytes_read = 0;
|
||||
const uint8_t* data = BrotliInReadAndCheckEnd(block_size_, in,
|
||||
bytes_read, is_last);
|
||||
if (data == NULL || *bytes_read == block_size_ || *is_last) {
|
||||
// If we could get the whole block in one read, or it is the last block,
|
||||
// we just return the pointer to the data without copying.
|
||||
return data;
|
||||
}
|
||||
// If the data comes in smaller chunks, we need to copy it into an internal
|
||||
// buffer until we get a whole block or reach the last chunk.
|
||||
if (buf_ == NULL) {
|
||||
buf_ = new uint8_t[block_size_];
|
||||
}
|
||||
memcpy(buf_, data, *bytes_read);
|
||||
do {
|
||||
size_t cur_bytes_read = 0;
|
||||
data = BrotliInReadAndCheckEnd(block_size_ - *bytes_read, in,
|
||||
&cur_bytes_read, is_last);
|
||||
if (data == NULL) {
|
||||
return *is_last ? buf_ : NULL;
|
||||
}
|
||||
memcpy(&buf_[*bytes_read], data, cur_bytes_read);
|
||||
*bytes_read += cur_bytes_read;
|
||||
} while (*bytes_read < block_size_ && !*is_last);
|
||||
return buf_;
|
||||
}
|
||||
|
||||
private:
|
||||
const size_t block_size_;
|
||||
uint8_t* buf_;
|
||||
};
|
||||
|
||||
int BrotliCompressWithCustomDictionary(size_t dictsize, const uint8_t* dict,
|
||||
BrotliParams params,
|
||||
BrotliIn* in, BrotliOut* out) {
|
||||
if (params.quality <= 1) {
|
||||
const int quality = std::max(0, params.quality);
|
||||
const int lgwin = std::min(kMaxWindowBits,
|
||||
std::max(kMinWindowBits, params.lgwin));
|
||||
uint8_t* storage = NULL;
|
||||
int* table = NULL;
|
||||
uint32_t* command_buf = NULL;
|
||||
uint8_t* literal_buf = NULL;
|
||||
uint8_t cmd_depths[128];
|
||||
uint16_t cmd_bits[128];
|
||||
uint8_t cmd_code[512];
|
||||
size_t cmd_code_numbits;
|
||||
if (quality == 0) {
|
||||
InitCommandPrefixCodes(cmd_depths, cmd_bits, cmd_code, &cmd_code_numbits);
|
||||
}
|
||||
uint8_t last_byte;
|
||||
uint8_t last_byte_bits;
|
||||
EncodeWindowBits(lgwin, &last_byte, &last_byte_bits);
|
||||
BrotliBlockReader r(1u << lgwin);
|
||||
int ok = 1;
|
||||
bool is_last = false;
|
||||
while (ok && !is_last) {
|
||||
// Read next block of input.
|
||||
size_t bytes;
|
||||
const uint8_t* data = r.Read(in, &bytes, &is_last);
|
||||
if (data == NULL) {
|
||||
if (!is_last) {
|
||||
ok = 0;
|
||||
break;
|
||||
}
|
||||
assert(bytes == 0);
|
||||
}
|
||||
// Set up output storage.
|
||||
const size_t max_out_size = 2 * bytes + 500;
|
||||
if (storage == NULL) {
|
||||
storage = new uint8_t[max_out_size];
|
||||
}
|
||||
storage[0] = last_byte;
|
||||
size_t storage_ix = last_byte_bits;
|
||||
// Set up hash table.
|
||||
size_t htsize = HashTableSize(MaxHashTableSize(quality), bytes);
|
||||
if (table == NULL) {
|
||||
table = new int[htsize];
|
||||
}
|
||||
memset(table, 0, htsize * sizeof(table[0]));
|
||||
// Set up command and literal buffers for two pass mode.
|
||||
if (quality == 1 && command_buf == NULL) {
|
||||
size_t buf_size = std::min(bytes, kCompressFragmentTwoPassBlockSize);
|
||||
command_buf = new uint32_t[buf_size];
|
||||
literal_buf = new uint8_t[buf_size];
|
||||
}
|
||||
// Do the actual compression.
|
||||
if (quality == 0) {
|
||||
BrotliCompressFragmentFast(data, bytes, is_last, table, htsize,
|
||||
cmd_depths, cmd_bits,
|
||||
&cmd_code_numbits, cmd_code,
|
||||
&storage_ix, storage);
|
||||
} else {
|
||||
BrotliCompressFragmentTwoPass(data, bytes, is_last,
|
||||
command_buf, literal_buf,
|
||||
table, htsize,
|
||||
&storage_ix, storage);
|
||||
}
|
||||
// Save last bytes to stitch it together with the next output block.
|
||||
last_byte = storage[storage_ix >> 3];
|
||||
last_byte_bits = storage_ix & 7u;
|
||||
// Write output block.
|
||||
size_t out_bytes = storage_ix >> 3;
|
||||
if (out_bytes > 0 && !out->Write(storage, out_bytes)) {
|
||||
ok = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
delete[] storage;
|
||||
delete[] table;
|
||||
delete[] command_buf;
|
||||
delete[] literal_buf;
|
||||
return ok;
|
||||
}
|
||||
|
||||
size_t in_bytes = 0;
|
||||
size_t out_bytes = 0;
|
||||
uint8_t* output;
|
||||
|
26
enc/encode.h
26
enc/encode.h
@ -134,6 +134,12 @@ class BrotliCompressor {
|
||||
private:
|
||||
uint8_t* GetBrotliStorage(size_t size);
|
||||
|
||||
// Allocates and clears a hash table using memory in "*this",
|
||||
// stores the number of buckets in "*table_size" and returns a pointer to
|
||||
// the base of the hash table.
|
||||
int* GetHashTable(int quality,
|
||||
size_t input_size, size_t* table_size);
|
||||
|
||||
void WriteMetaBlockInternal(const bool is_last,
|
||||
size_t* out_size,
|
||||
uint8_t** output);
|
||||
@ -159,6 +165,26 @@ class BrotliCompressor {
|
||||
uint8_t prev_byte2_;
|
||||
size_t storage_size_;
|
||||
uint8_t* storage_;
|
||||
// Hash table for quality 0 mode.
|
||||
int small_table_[1 << 10]; // 2KB
|
||||
int* large_table_; // Allocated only when needed
|
||||
// Command and distance prefix codes (each 64 symbols, stored back-to-back)
|
||||
// used for the next block in quality 0. The command prefix code is over a
|
||||
// smaller alphabet with the following 64 symbols:
|
||||
// 0 - 15: insert length code 0, copy length code 0 - 15, same distance
|
||||
// 16 - 39: insert length code 0, copy length code 0 - 23
|
||||
// 40 - 63: insert length code 0 - 23, copy length code 0
|
||||
// Note that symbols 16 and 40 represent the same code in the full alphabet,
|
||||
// but we do not use either of them in quality 0.
|
||||
uint8_t cmd_depths_[128];
|
||||
uint16_t cmd_bits_[128];
|
||||
// The compressed form of the command and distance prefix codes for the next
|
||||
// block in quality 0.
|
||||
uint8_t cmd_code_[512];
|
||||
size_t cmd_code_numbits_;
|
||||
// Command and literal buffers for quality 1.
|
||||
uint32_t* command_buf_;
|
||||
uint8_t* literal_buf_;
|
||||
};
|
||||
|
||||
// Compresses the data in input_buffer into encoded_buffer, and sets
|
||||
|
13
enc/hash.h
13
enc/hash.h
@ -144,7 +144,6 @@ class HashLongestMatchQuickly {
|
||||
need_init_ = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Look at 4 bytes at data.
|
||||
// Compute a hash from these, and store the value somewhere within
|
||||
// [ix .. ix+3].
|
||||
@ -621,6 +620,7 @@ class HashLongestMatch {
|
||||
|
||||
// Buckets containing kBlockSize of backward references.
|
||||
uint32_t buckets_[kBucketSize][kBlockSize];
|
||||
|
||||
// True if num_ array needs to be initialized.
|
||||
bool need_init_;
|
||||
|
||||
@ -632,9 +632,8 @@ struct Hashers {
|
||||
// For kBucketSweep == 1, enabling the dictionary lookup makes compression
|
||||
// a little faster (0.5% - 1%) and it compresses 0.15% better on small text
|
||||
// and html inputs.
|
||||
typedef HashLongestMatchQuickly<16, 1, true> H1;
|
||||
typedef HashLongestMatchQuickly<16, 2, false> H2;
|
||||
typedef HashLongestMatchQuickly<16, 4, false> H3;
|
||||
typedef HashLongestMatchQuickly<16, 1, true> H2;
|
||||
typedef HashLongestMatchQuickly<16, 2, false> H3;
|
||||
typedef HashLongestMatchQuickly<17, 4, true> H4;
|
||||
typedef HashLongestMatch<14, 4, 4> H5;
|
||||
typedef HashLongestMatch<14, 5, 4> H6;
|
||||
@ -642,11 +641,10 @@ struct Hashers {
|
||||
typedef HashLongestMatch<15, 7, 10> H8;
|
||||
typedef HashLongestMatch<15, 8, 16> H9;
|
||||
|
||||
Hashers() : hash_h1(0), hash_h2(0), hash_h3(0), hash_h4(0), hash_h5(0),
|
||||
Hashers() : hash_h2(0), hash_h3(0), hash_h4(0), hash_h5(0),
|
||||
hash_h6(0), hash_h7(0), hash_h8(0), hash_h9(0) {}
|
||||
|
||||
~Hashers() {
|
||||
delete hash_h1;
|
||||
delete hash_h2;
|
||||
delete hash_h3;
|
||||
delete hash_h4;
|
||||
@ -659,7 +657,6 @@ struct Hashers {
|
||||
|
||||
void Init(int type) {
|
||||
switch (type) {
|
||||
case 1: hash_h1 = new H1; break;
|
||||
case 2: hash_h2 = new H2; break;
|
||||
case 3: hash_h3 = new H3; break;
|
||||
case 4: hash_h4 = new H4; break;
|
||||
@ -684,7 +681,6 @@ struct Hashers {
|
||||
void PrependCustomDictionary(
|
||||
int type, const size_t size, const uint8_t* dict) {
|
||||
switch (type) {
|
||||
case 1: WarmupHash(size, dict, hash_h1); break;
|
||||
case 2: WarmupHash(size, dict, hash_h2); break;
|
||||
case 3: WarmupHash(size, dict, hash_h3); break;
|
||||
case 4: WarmupHash(size, dict, hash_h4); break;
|
||||
@ -698,7 +694,6 @@ struct Hashers {
|
||||
}
|
||||
|
||||
|
||||
H1* hash_h1;
|
||||
H2* hash_h2;
|
||||
H3* hash_h3;
|
||||
H4* hash_h4;
|
||||
|
5
setup.py
5
setup.py
@ -124,6 +124,8 @@ brotli = Extension("brotli",
|
||||
"enc/backward_references.cc",
|
||||
"enc/block_splitter.cc",
|
||||
"enc/brotli_bit_stream.cc",
|
||||
"enc/compress_fragment.cc",
|
||||
"enc/compress_fragment_two_pass.cc",
|
||||
"enc/encode.cc",
|
||||
"enc/entropy_encode.cc",
|
||||
"enc/histogram.cc",
|
||||
@ -146,11 +148,14 @@ brotli = Extension("brotli",
|
||||
"enc/brotli_bit_stream.h",
|
||||
"enc/cluster.h",
|
||||
"enc/command.h",
|
||||
"enc/compress_fragment.h",
|
||||
"enc/compress_fragment_tw_pass.h"
|
||||
"enc/context.h",
|
||||
"enc/dictionary.h",
|
||||
"enc/dictionary_hash.h",
|
||||
"enc/encode.h",
|
||||
"enc/entropy_encode.h",
|
||||
"enc/entropy_encode_static.h",
|
||||
"enc/fast_log.h",
|
||||
"enc/find_match_length.h",
|
||||
"enc/hash.h",
|
||||
|
Loading…
Reference in New Issue
Block a user