mirror of
https://github.com/google/brotli.git
synced 2024-11-21 19:20:09 +00:00
Updates to Brotli compression format, decoder and encoder
This commit contains a batch of changes that were made to the Brotli compression algorithm in the last three weeks. Most important changes: * Added UTF8 context model for good text compression. * Simplified context modeling by having only 4 context modes. * Per-block context mode selection. * Faster backward copying and bit reading functions. * More efficient histogram coding. * Streaming support for the decoder and encoder.
This commit is contained in:
parent
c66e4e3e4f
commit
c6b9c7c5c8
@ -15,6 +15,7 @@
|
||||
// Bit reading helpers
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "./bit_reader.h"
|
||||
|
||||
@ -22,99 +23,24 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define MAX_NUM_BIT_READ 25
|
||||
|
||||
#define LBITS 64 // Number of bits prefetched.
|
||||
#define WBITS 32 // Minimum number of bytes needed after
|
||||
// BrotliFillBitWindow.
|
||||
#define LOG8_WBITS 4 // Number of bytes needed to store WBITS bits.
|
||||
|
||||
static const uint32_t kBitMask[MAX_NUM_BIT_READ] = {
|
||||
0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767,
|
||||
65535, 131071, 262143, 524287, 1048575, 2097151, 4194303, 8388607, 16777215
|
||||
};
|
||||
|
||||
void BrotliInitBitReader(BrotliBitReader* const br,
|
||||
const uint8_t* const start,
|
||||
size_t length) {
|
||||
int BrotliInitBitReader(BrotliBitReader* const br, BrotliInput input) {
|
||||
size_t i;
|
||||
assert(br != NULL);
|
||||
assert(start != NULL);
|
||||
assert(length < 0xfffffff8u); // can't happen with a RIFF chunk.
|
||||
|
||||
br->buf_ = start;
|
||||
br->len_ = length;
|
||||
br->input_ = input;
|
||||
br->val_ = 0;
|
||||
br->pos_ = 0;
|
||||
br->bit_pos_ = 0;
|
||||
br->end_pos_ = 0;
|
||||
br->eos_ = 0;
|
||||
br->error_ = 0;
|
||||
for (i = 0; i < sizeof(br->val_) && i < br->len_; ++i) {
|
||||
if (!BrotliReadMoreInput(br)) {
|
||||
return 0;
|
||||
}
|
||||
for (i = 0; i < sizeof(br->val_); ++i) {
|
||||
br->val_ |= ((uint64_t)br->buf_[br->pos_]) << (8 * i);
|
||||
++br->pos_;
|
||||
}
|
||||
}
|
||||
|
||||
void BrotliBitReaderSetBuffer(BrotliBitReader* const br,
|
||||
const uint8_t* const buf, size_t len) {
|
||||
assert(br != NULL);
|
||||
assert(buf != NULL);
|
||||
assert(len < 0xfffffff8u); // can't happen with a RIFF chunk.
|
||||
br->eos_ = (br->pos_ >= len);
|
||||
br->buf_ = buf;
|
||||
br->len_ = len;
|
||||
}
|
||||
|
||||
// If not at EOS, reload up to LBITS byte-by-byte
|
||||
static void ShiftBytes(BrotliBitReader* const br) {
|
||||
while (br->bit_pos_ >= 8 && br->pos_ < br->len_) {
|
||||
br->val_ >>= 8;
|
||||
br->val_ |= ((uint64_t)br->buf_[br->pos_]) << (LBITS - 8);
|
||||
++br->pos_;
|
||||
br->bit_pos_ -= 8;
|
||||
}
|
||||
}
|
||||
|
||||
void BrotliFillBitWindow(BrotliBitReader* const br) {
|
||||
if (br->bit_pos_ >= WBITS) {
|
||||
#if (defined(__x86_64__) || defined(_M_X64))
|
||||
if (br->pos_ + sizeof(br->val_) < br->len_) {
|
||||
br->val_ >>= WBITS;
|
||||
br->bit_pos_ -= WBITS;
|
||||
// The expression below needs a little-endian arch to work correctly.
|
||||
// This gives a large speedup for decoding speed.
|
||||
br->val_ |= *(const uint64_t*)(br->buf_ + br->pos_) << (LBITS - WBITS);
|
||||
br->pos_ += LOG8_WBITS;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
ShiftBytes(br); // Slow path.
|
||||
if (br->pos_ == br->len_ && br->bit_pos_ == LBITS) {
|
||||
br->eos_ = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t BrotliReadBits(BrotliBitReader* const br, int n_bits) {
|
||||
assert(n_bits >= 0);
|
||||
// Flag an error if end_of_stream or n_bits is more than allowed limit.
|
||||
if (n_bits == 0 || (!br->eos_ && n_bits < MAX_NUM_BIT_READ)) {
|
||||
const uint32_t val =
|
||||
(uint32_t)(br->val_ >> br->bit_pos_) & kBitMask[n_bits];
|
||||
const int new_bits = br->bit_pos_ + n_bits;
|
||||
br->bit_pos_ = new_bits;
|
||||
// If this read is going to cross the read buffer, set the eos flag.
|
||||
if (br->pos_ == br->len_) {
|
||||
if (new_bits >= LBITS) {
|
||||
br->eos_ = 1;
|
||||
}
|
||||
}
|
||||
ShiftBytes(br);
|
||||
return val;
|
||||
} else {
|
||||
br->error_ = 1;
|
||||
return 0;
|
||||
}
|
||||
return (br->end_pos_ > 0);
|
||||
}
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
|
131
dec/bit_reader.h
131
dec/bit_reader.h
@ -17,34 +17,39 @@
|
||||
#ifndef BROTLI_DEC_BIT_READER_H_
|
||||
#define BROTLI_DEC_BIT_READER_H_
|
||||
|
||||
#include <string.h>
|
||||
#include "./streams.h"
|
||||
#include "./types.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define BROTLI_MAX_NUM_BIT_READ 25
|
||||
#define BROTLI_READ_SIZE 4096
|
||||
#define BROTLI_IBUF_SIZE (2 * BROTLI_READ_SIZE + 32)
|
||||
#define BROTLI_IBUF_MASK (2 * BROTLI_READ_SIZE - 1)
|
||||
|
||||
#define UNALIGNED_COPY64(dst, src) *(uint64_t*)(dst) = *(const uint64_t*)(src)
|
||||
|
||||
static const uint32_t kBitMask[BROTLI_MAX_NUM_BIT_READ] = {
|
||||
0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767,
|
||||
65535, 131071, 262143, 524287, 1048575, 2097151, 4194303, 8388607, 16777215
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
uint64_t val_; // pre-fetched bits
|
||||
const uint8_t* buf_; // input byte buffer
|
||||
size_t len_; // buffer length
|
||||
size_t pos_; // byte position in buf_
|
||||
int bit_pos_; // current bit-reading position in val_
|
||||
int eos_; // bitstream is finished
|
||||
int error_; // an error occurred (buffer overflow attempt...)
|
||||
// Input byte buffer, consist of a ringbuffer and a "slack" region where
|
||||
// bytes from the start of the ringbuffer are copied.
|
||||
uint8_t buf_[BROTLI_IBUF_SIZE];
|
||||
BrotliInput input_; // input callback
|
||||
uint64_t val_; // pre-fetched bits
|
||||
size_t pos_; // byte position in stream
|
||||
int bit_pos_; // current bit-reading position in val_
|
||||
size_t end_pos_; // current end position in stream
|
||||
int eos_; // input stream is finished
|
||||
} BrotliBitReader;
|
||||
|
||||
void BrotliInitBitReader(BrotliBitReader* const br,
|
||||
const uint8_t* const start,
|
||||
size_t length);
|
||||
|
||||
// Sets a new data buffer.
|
||||
void BrotliBitReaderSetBuffer(BrotliBitReader* const br,
|
||||
const uint8_t* const buffer, size_t length);
|
||||
|
||||
// Reads the specified number of bits from Read Buffer.
|
||||
// Flags an error in case end_of_stream or n_bits is more than allowed limit.
|
||||
// Flags eos if this read attempt is going to cross the read buffer.
|
||||
uint32_t BrotliReadBits(BrotliBitReader* const br, int n_bits);
|
||||
int BrotliInitBitReader(BrotliBitReader* const br, BrotliInput input);
|
||||
|
||||
// Return the prefetched bits, so they can be looked up.
|
||||
static BROTLI_INLINE uint32_t BrotliPrefetchBits(BrotliBitReader* const br) {
|
||||
@ -57,8 +62,92 @@ static BROTLI_INLINE void BrotliSetBitPos(BrotliBitReader* const br, int val) {
|
||||
br->bit_pos_ = val;
|
||||
}
|
||||
|
||||
// Advances the Read buffer by 4 bytes to make room for reading next 32 bits.
|
||||
void BrotliFillBitWindow(BrotliBitReader* const br);
|
||||
// Reload up to 64 bits byte-by-byte
|
||||
static BROTLI_INLINE void ShiftBytes(BrotliBitReader* const br) {
|
||||
while (br->bit_pos_ >= 8) {
|
||||
br->val_ >>= 8;
|
||||
br->val_ |= ((uint64_t)br->buf_[br->pos_ & BROTLI_IBUF_MASK]) << 56;
|
||||
++br->pos_;
|
||||
br->bit_pos_ -= 8;
|
||||
}
|
||||
}
|
||||
|
||||
// Fills up the input ringbuffer by calling the input callback.
|
||||
//
|
||||
// Does nothing if there are at least 32 bytes present after current position.
|
||||
//
|
||||
// Returns 0 if either:
|
||||
// - the input callback returned an error, or
|
||||
// - there is no more input and the position is past the end of the stream.
|
||||
//
|
||||
// After encountering the end of the input stream, 32 additional zero bytes are
|
||||
// copied to the ringbuffer, therefore it is safe to call this function after
|
||||
// every 32 bytes of input is read.
|
||||
static BROTLI_INLINE int BrotliReadMoreInput(BrotliBitReader* const br) {
|
||||
if (br->pos_ + 32 < br->end_pos_) {
|
||||
return 1;
|
||||
} else if (br->eos_) {
|
||||
return (br->pos_ << 3) + br->bit_pos_ <= (br->end_pos_ << 3) + 64;
|
||||
} else {
|
||||
uint8_t* dst = br->buf_ + (br->end_pos_ & BROTLI_IBUF_MASK);
|
||||
int bytes_read = BrotliRead(br->input_, dst, BROTLI_READ_SIZE);
|
||||
if (bytes_read < 0) {
|
||||
return 0;
|
||||
}
|
||||
if (bytes_read < BROTLI_READ_SIZE) {
|
||||
br->eos_ = 1;
|
||||
// Store 32 bytes of zero after the stream end.
|
||||
#if (defined(__x86_64__) || defined(_M_X64))
|
||||
*(uint64_t*)(dst + bytes_read) = 0;
|
||||
*(uint64_t*)(dst + bytes_read + 8) = 0;
|
||||
*(uint64_t*)(dst + bytes_read + 16) = 0;
|
||||
*(uint64_t*)(dst + bytes_read + 24) = 0;
|
||||
#else
|
||||
memset(dst + bytes_read, 0, 32);
|
||||
#endif
|
||||
}
|
||||
if (dst == br->buf_) {
|
||||
// Copy the head of the ringbuffer to the slack region.
|
||||
#if (defined(__x86_64__) || defined(_M_X64))
|
||||
UNALIGNED_COPY64(br->buf_ + BROTLI_IBUF_SIZE - 32, br->buf_);
|
||||
UNALIGNED_COPY64(br->buf_ + BROTLI_IBUF_SIZE - 24, br->buf_ + 8);
|
||||
UNALIGNED_COPY64(br->buf_ + BROTLI_IBUF_SIZE - 16, br->buf_ + 16);
|
||||
UNALIGNED_COPY64(br->buf_ + BROTLI_IBUF_SIZE - 8, br->buf_ + 24);
|
||||
#else
|
||||
memcpy(br->buf_ + (BROTLI_READ_SIZE << 1), br->buf_, 32);
|
||||
#endif
|
||||
}
|
||||
br->end_pos_ += bytes_read;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Advances the Read buffer by 5 bytes to make room for reading next 24 bits.
|
||||
static BROTLI_INLINE void BrotliFillBitWindow(BrotliBitReader* const br) {
|
||||
if (br->bit_pos_ >= 40) {
|
||||
#if (defined(__x86_64__) || defined(_M_X64))
|
||||
br->val_ >>= 40;
|
||||
br->bit_pos_ -= 40;
|
||||
// The expression below needs a little-endian arch to work correctly.
|
||||
// This gives a large speedup for decoding speed.
|
||||
br->val_ |= *(const uint64_t*)(
|
||||
br->buf_ + (br->pos_ & BROTLI_IBUF_MASK)) << 24;
|
||||
br->pos_ += 5;
|
||||
#else
|
||||
ShiftBytes(br);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// Reads the specified number of bits from Read Buffer.
|
||||
// Requires that n_bits is positive.
|
||||
static BROTLI_INLINE uint32_t BrotliReadBits(
|
||||
BrotliBitReader* const br, int n_bits) {
|
||||
BrotliFillBitWindow(br);
|
||||
const uint32_t val = (uint32_t)(br->val_ >> br->bit_pos_) & kBitMask[n_bits];
|
||||
br->bit_pos_ += n_bits;
|
||||
return val;
|
||||
}
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} // extern "C"
|
||||
|
298
dec/context.h
298
dec/context.h
@ -12,34 +12,154 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Lookup tables to map the previous one to three bytes to a context id.
|
||||
// Lookup table to map the previous two bytes to a context id.
|
||||
//
|
||||
// There are four different context modeling modes defined here:
|
||||
// CONTEXT_LSB6: context id is the least significant 6 bits of the last byte,
|
||||
// CONTEXT_MSB6: context id is the most significant 6 bits of the last byte,
|
||||
// CONTEXT_UTF8: second-order context model tuned for UTF8-encoded text,
|
||||
// CONTEXT_SIGNED: second-order context model tuned for signed integers.
|
||||
//
|
||||
// The context id for the UTF8 context model is calculated as follows. If p1
|
||||
// and p2 are the previous two bytes, we calcualte the context as
|
||||
//
|
||||
// context = kContextLookup[p1] | kContextLookup[p2 + 256].
|
||||
//
|
||||
// If the previous two bytes are ASCII characters (i.e. < 128), this will be
|
||||
// equivalent to
|
||||
//
|
||||
// context = 4 * context1(p1) + context2(p2),
|
||||
//
|
||||
// where context1 is based on the previous byte in the following way:
|
||||
//
|
||||
// 0 : non-ASCII control
|
||||
// 1 : \t, \n, \r
|
||||
// 2 : space
|
||||
// 3 : other punctuation
|
||||
// 4 : " '
|
||||
// 5 : %
|
||||
// 6 : ( < [ {
|
||||
// 7 : ) > ] }
|
||||
// 8 : , ; :
|
||||
// 9 : .
|
||||
// 10 : =
|
||||
// 11 : number
|
||||
// 12 : upper-case vowel
|
||||
// 13 : upper-case consonant
|
||||
// 14 : lower-case vowel
|
||||
// 15 : lower-case consonant
|
||||
//
|
||||
// and context2 is based on the second last byte:
|
||||
//
|
||||
// 0 : control, space
|
||||
// 1 : punctuation
|
||||
// 2 : upper-case letter, number
|
||||
// 3 : lower-case letter
|
||||
//
|
||||
// If the last byte is ASCII, and the second last byte is not (in a valid UTF8
|
||||
// stream it will be a continuation byte, value between 128 and 191), the
|
||||
// context is the same as if the second last byte was an ASCII control or space.
|
||||
//
|
||||
// If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
|
||||
// be a continuation byte and the context id is 2 or 3 depending on the LSB of
|
||||
// the last byte and to a lesser extent on the second last byte if it is ASCII.
|
||||
//
|
||||
// If the last byte is a UTF8 continuation byte, the second last byte can be:
|
||||
// - continuation byte: the next byte is probably ASCII or lead byte (assuming
|
||||
// 4-byte UTF8 characters are rare) and the context id is 0 or 1.
|
||||
// - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
|
||||
// - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3
|
||||
//
|
||||
// The possible value combinations of the previous two bytes, the range of
|
||||
// context ids and the type of the next byte is summarized in the table below:
|
||||
//
|
||||
// |--------\-----------------------------------------------------------------|
|
||||
// | \ Last byte |
|
||||
// | Second \---------------------------------------------------------------|
|
||||
// | last byte \ ASCII | cont. byte | lead byte |
|
||||
// | \ (0-127) | (128-191) | (192-) |
|
||||
// |=============|===================|=====================|==================|
|
||||
// | ASCII | next: ASCII/lead | not valid | next: cont. |
|
||||
// | (0-127) | context: 4 - 63 | | context: 2 - 3 |
|
||||
// |-------------|-------------------|---------------------|------------------|
|
||||
// | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. |
|
||||
// | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 |
|
||||
// |-------------|-------------------|---------------------|------------------|
|
||||
// | lead byte | not valid | next: ASCII/lead | not valid |
|
||||
// | (192-207) | | context: 0 - 1 | |
|
||||
// |-------------|-------------------|---------------------|------------------|
|
||||
// | lead byte | not valid | next: cont. | not valid |
|
||||
// | (208-) | | context: 2 - 3 | |
|
||||
// |-------------|-------------------|---------------------|------------------|
|
||||
//
|
||||
// The context id for the signed context mode is calculated as:
|
||||
//
|
||||
// context = (kContextLookup[512 + p1] << 3) | kContextLookup[512 + p2].
|
||||
//
|
||||
// For any context modeling modes, the context ids can be calculated by |-ing
|
||||
// together two lookups from one table using context model dependent offsets:
|
||||
//
|
||||
// context = kContextLookup[offset1 + p1] | kContextLookup[offset2 + p2].
|
||||
//
|
||||
// where offset1 and offset2 are dependent on the context mode.
|
||||
|
||||
#ifndef BROTLI_DEC_CONTEXT_H_
|
||||
#define BROTLI_DEC_CONTEXT_H_
|
||||
|
||||
|
||||
#include "./types.h"
|
||||
|
||||
static const int kSigned2BitContextLookup[] = {
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
|
||||
enum ContextType {
|
||||
CONTEXT_LSB6 = 0,
|
||||
CONTEXT_MSB6 = 1,
|
||||
CONTEXT_UTF8 = 2,
|
||||
CONTEXT_SIGNED = 3
|
||||
};
|
||||
|
||||
static const int kSigned3BitContextLookup[] = {
|
||||
// Common context lookup table for all context modes.
|
||||
static const uint8_t kContextLookup[1792] = {
|
||||
// CONTEXT_UTF8, last byte.
|
||||
//
|
||||
// ASCII range.
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
|
||||
44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,
|
||||
12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
|
||||
52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
|
||||
12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
|
||||
60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0,
|
||||
// UTF8 continuation byte range.
|
||||
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
||||
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
||||
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
||||
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
||||
// UTF8 lead byte range.
|
||||
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
||||
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
||||
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
||||
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
||||
// CONTEXT_UTF8 second last byte.
|
||||
//
|
||||
// ASCII range.
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
|
||||
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
|
||||
1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
|
||||
// UTF8 continuation byte range.
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
// UTF8 lead byte range.
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
// CONTEXT_SIGNED, second last byte.
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
@ -56,69 +176,85 @@ static const int kSigned3BitContextLookup[] = {
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
|
||||
// CONTEXT_SIGNED, last byte, same as the above values shifted by 3 bits.
|
||||
0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
||||
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
||||
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
|
||||
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
||||
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
||||
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
||||
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
||||
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
|
||||
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
|
||||
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
|
||||
48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 56,
|
||||
// CONTEXT_LSB6, last byte.
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
||||
// CONTEXT_MSB6, last byte.
|
||||
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
|
||||
4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
|
||||
8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11,
|
||||
12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15,
|
||||
16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19,
|
||||
20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23,
|
||||
24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27,
|
||||
28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31,
|
||||
32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35,
|
||||
36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39,
|
||||
40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43,
|
||||
44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47,
|
||||
48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51,
|
||||
52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55,
|
||||
56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59,
|
||||
60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63,
|
||||
// CONTEXT_{M,L}SB6, second last byte,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
static const int kSigned4BitContextLookup[] = {
|
||||
0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 13, 13, 14, 15,
|
||||
static const int kContextLookupOffsets[8] = {
|
||||
// CONTEXT_LSB6
|
||||
1024, 1536,
|
||||
// CONTEXT_MSB6
|
||||
1280, 1536,
|
||||
// CONTEXT_UTF8
|
||||
0, 256,
|
||||
// CONTEXT_SIGNED
|
||||
768, 512,
|
||||
};
|
||||
|
||||
enum ContextType {
|
||||
CONTEXT_FULL = 0,
|
||||
CONTEXT_MSB7 = 1,
|
||||
CONTEXT_MSB6 = 2,
|
||||
CONTEXT_MSB5 = 3,
|
||||
CONTEXT_MSB4 = 4,
|
||||
CONTEXT_MSB3 = 5,
|
||||
CONTEXT_MSB2 = 6,
|
||||
CONTEXT_MSB1 = 7,
|
||||
CONTEXT_IS_ZERO = 8,
|
||||
CONTEXT_SIGNED_2BIT = 9,
|
||||
CONTEXT_SIGNED_3BIT = 10,
|
||||
CONTEXT_SIGNED_4BIT = 11,
|
||||
CONTEXT_SIGNED_MIXED_3BYTE = 12
|
||||
};
|
||||
|
||||
static const int kContextSize[] = {
|
||||
256, 128, 64, 32, 16, 8, 4, 2, 2, 4, 8, 16, 64,
|
||||
};
|
||||
|
||||
static BROTLI_INLINE int NumContexts(int mode) {
|
||||
return kContextSize[mode];
|
||||
}
|
||||
|
||||
static BROTLI_INLINE uint8_t Context(uint8_t prev_byte, uint8_t prev_byte2,
|
||||
uint8_t prev_byte3, int mode) {
|
||||
switch (mode) {
|
||||
case CONTEXT_IS_ZERO:
|
||||
return prev_byte == 0 ? 0 : 1;
|
||||
case CONTEXT_SIGNED_2BIT:
|
||||
return kSigned2BitContextLookup[prev_byte];
|
||||
case CONTEXT_SIGNED_3BIT:
|
||||
return kSigned3BitContextLookup[prev_byte];
|
||||
case CONTEXT_SIGNED_4BIT:
|
||||
return kSigned4BitContextLookup[prev_byte];
|
||||
case CONTEXT_SIGNED_MIXED_3BYTE:
|
||||
return ((kSigned3BitContextLookup[prev_byte] << 3) +
|
||||
(kSigned2BitContextLookup[prev_byte2] << 1) +
|
||||
(prev_byte3 == 0 ? 0 : 1));
|
||||
default:
|
||||
return prev_byte >> mode;
|
||||
}
|
||||
}
|
||||
|
||||
#endif // BROTLI_DEC_CONTEXT_H_
|
||||
|
721
dec/decode.c
721
dec/decode.c
File diff suppressed because it is too large
Load Diff
@ -17,6 +17,7 @@
|
||||
#ifndef BROTLI_DEC_DECODE_H_
|
||||
#define BROTLI_DEC_DECODE_H_
|
||||
|
||||
#include "./streams.h"
|
||||
#include "./types.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
@ -39,6 +40,10 @@ int BrotliDecompressBuffer(size_t encoded_size,
|
||||
size_t* decoded_size,
|
||||
uint8_t* decoded_buffer);
|
||||
|
||||
// Same as above, but uses the specified input and output callbacks instead of
|
||||
// reading from and writing to pre-allocated memory buffers.
|
||||
int BrotliDecompress(BrotliInput input, BrotliOutput output);
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
@ -24,10 +24,6 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Uncomment the following to use look-up table for ReverseBits()
|
||||
// (might be faster on some platform)
|
||||
// #define USE_LUT_REVERSE_BITS
|
||||
|
||||
#define NON_EXISTENT_SYMBOL (-1)
|
||||
#define MAX_ALLOWED_CODE_LENGTH 15
|
||||
|
||||
@ -55,7 +51,6 @@ static void AssignChildren(HuffmanTree* const tree,
|
||||
|
||||
static int TreeInit(HuffmanTree* const tree, int num_leaves) {
|
||||
assert(tree != NULL);
|
||||
tree->fixed_bit_length_ = 0;
|
||||
if (num_leaves == 0) return 0;
|
||||
// We allocate maximum possible nodes in the tree at once.
|
||||
// Note that a Huffman tree is a full binary tree; and in a full binary tree
|
||||
@ -84,7 +79,7 @@ void BrotliHuffmanTreeRelease(HuffmanTree* const tree) {
|
||||
// Utility: converts Huffman code lengths to corresponding Huffman codes.
|
||||
// 'huff_codes' should be pre-allocated.
|
||||
// Returns false in case of error (memory allocation, invalid codes).
|
||||
static int HuffmanCodeLengthsToCodes(const int* const code_lengths,
|
||||
static int HuffmanCodeLengthsToCodes(const uint8_t* const code_lengths,
|
||||
int code_lengths_size,
|
||||
int* const huff_codes) {
|
||||
int symbol;
|
||||
@ -133,35 +128,21 @@ static int HuffmanCodeLengthsToCodes(const int* const code_lengths,
|
||||
return 1;
|
||||
}
|
||||
|
||||
#ifndef USE_LUT_REVERSE_BITS
|
||||
|
||||
static int ReverseBitsShort(int bits, int num_bits) {
|
||||
int retval = 0;
|
||||
int i;
|
||||
assert(num_bits <= 8); // Not a hard requirement, just for coherency.
|
||||
for (i = 0; i < num_bits; ++i) {
|
||||
retval <<= 1;
|
||||
retval |= bits & 1;
|
||||
bits >>= 1;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static const uint8_t kReversedBits[16] = { // Pre-reversed 4-bit values.
|
||||
0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
|
||||
0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf
|
||||
static const uint8_t kReverse7[128] = {
|
||||
0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120,
|
||||
4, 68, 36, 100, 20, 84, 52, 116, 12, 76, 44, 108, 28, 92, 60, 124,
|
||||
2, 66, 34, 98, 18, 82, 50, 114, 10, 74, 42, 106, 26, 90, 58, 122,
|
||||
6, 70, 38, 102, 22, 86, 54, 118, 14, 78, 46, 110, 30, 94, 62, 126,
|
||||
1, 65, 33, 97, 17, 81, 49, 113, 9, 73, 41, 105, 25, 89, 57, 121,
|
||||
5, 69, 37, 101, 21, 85, 53, 117, 13, 77, 45, 109, 29, 93, 61, 125,
|
||||
3, 67, 35, 99, 19, 83, 51, 115, 11, 75, 43, 107, 27, 91, 59, 123,
|
||||
7, 71, 39, 103, 23, 87, 55, 119, 15, 79, 47, 111, 31, 95, 63, 127
|
||||
};
|
||||
|
||||
static int ReverseBitsShort(int bits, int num_bits) {
|
||||
const uint8_t v = (kReversedBits[bits & 0xf] << 4) | kReversedBits[bits >> 4];
|
||||
assert(num_bits <= 8);
|
||||
return v >> (8 - num_bits);
|
||||
return kReverse7[bits] >> (7 - num_bits);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static int TreeAddSymbol(HuffmanTree* const tree,
|
||||
int symbol, int code, int code_length) {
|
||||
int step = HUFF_LUT_BITS;
|
||||
@ -170,13 +151,14 @@ static int TreeAddSymbol(HuffmanTree* const tree,
|
||||
const HuffmanTreeNode* const max_node = tree->root_ + tree->max_nodes_;
|
||||
assert(symbol == (int16_t)symbol);
|
||||
if (code_length <= HUFF_LUT_BITS) {
|
||||
int i;
|
||||
int i = 1 << (HUFF_LUT_BITS - code_length);
|
||||
base_code = ReverseBitsShort(code, code_length);
|
||||
for (i = 0; i < (1 << (HUFF_LUT_BITS - code_length)); ++i) {
|
||||
do {
|
||||
--i;
|
||||
const int idx = base_code | (i << code_length);
|
||||
tree->lut_symbol_[idx] = (int16_t)symbol;
|
||||
tree->lut_bits_[idx] = code_length;
|
||||
}
|
||||
} while (i > 0);
|
||||
} else {
|
||||
base_code = ReverseBitsShort((code >> (code_length - HUFF_LUT_BITS)),
|
||||
HUFF_LUT_BITS);
|
||||
@ -206,7 +188,7 @@ static int TreeAddSymbol(HuffmanTree* const tree,
|
||||
}
|
||||
|
||||
int BrotliHuffmanTreeBuildImplicit(HuffmanTree* const tree,
|
||||
const int* const code_lengths,
|
||||
const uint8_t* const code_lengths,
|
||||
int code_lengths_size) {
|
||||
int symbol;
|
||||
int num_symbols = 0;
|
||||
@ -264,41 +246,6 @@ int BrotliHuffmanTreeBuildImplicit(HuffmanTree* const tree,
|
||||
}
|
||||
}
|
||||
|
||||
int BrotliHuffmanTreeBuildExplicit(HuffmanTree* const tree,
|
||||
const int* const code_lengths,
|
||||
const int* const codes,
|
||||
const int* const symbols,
|
||||
int max_symbol,
|
||||
int num_symbols) {
|
||||
int ok = 0;
|
||||
int i;
|
||||
|
||||
assert(tree != NULL);
|
||||
assert(code_lengths != NULL);
|
||||
assert(codes != NULL);
|
||||
assert(symbols != NULL);
|
||||
|
||||
// Initialize the tree. Will fail if num_symbols = 0.
|
||||
if (!TreeInit(tree, num_symbols)) return 0;
|
||||
|
||||
// Add symbols one-by-one.
|
||||
for (i = 0; i < num_symbols; ++i) {
|
||||
if (codes[i] != NON_EXISTENT_SYMBOL) {
|
||||
if (symbols[i] < 0 || symbols[i] >= max_symbol) {
|
||||
goto End;
|
||||
}
|
||||
if (!TreeAddSymbol(tree, symbols[i], codes[i], code_lengths[i])) {
|
||||
goto End;
|
||||
}
|
||||
}
|
||||
}
|
||||
ok = 1;
|
||||
End:
|
||||
ok = ok && IsFull(tree);
|
||||
if (!ok) BrotliHuffmanTreeRelease(tree);
|
||||
return ok;
|
||||
}
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
@ -43,7 +43,6 @@ struct HuffmanTree {
|
||||
HuffmanTreeNode* root_; // all the nodes, starting at root.
|
||||
int max_nodes_; // max number of nodes
|
||||
int num_nodes_; // number of currently occupied nodes
|
||||
int fixed_bit_length_; // If non-zero, uses fixed length coding
|
||||
};
|
||||
|
||||
// Returns true if the given node is not a leaf of the Huffman tree.
|
||||
@ -65,19 +64,9 @@ void BrotliHuffmanTreeRelease(HuffmanTree* const tree);
|
||||
// Builds Huffman tree assuming code lengths are implicitly in symbol order.
|
||||
// Returns false in case of error (invalid tree or memory error).
|
||||
int BrotliHuffmanTreeBuildImplicit(HuffmanTree* const tree,
|
||||
const int* const code_lengths,
|
||||
const uint8_t* const code_lengths,
|
||||
int code_lengths_size);
|
||||
|
||||
// Build a Huffman tree with explicitly given lists of code lengths, codes
|
||||
// and symbols. Verifies that all symbols added are smaller than max_symbol.
|
||||
// Returns false in case of an invalid symbol, invalid tree or memory error.
|
||||
int BrotliHuffmanTreeBuildExplicit(HuffmanTree* const tree,
|
||||
const int* const code_lengths,
|
||||
const int* const codes,
|
||||
const int* const symbols,
|
||||
int max_symbol,
|
||||
int num_symbols);
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
106
dec/streams.c
Normal file
106
dec/streams.c
Normal file
@ -0,0 +1,106 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Functions for streaming input and output.
|
||||
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include "./streams.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
int BrotliMemInputFunction(void* data, uint8_t* buf, size_t count) {
|
||||
BrotliMemInput* input = (BrotliMemInput*)data;
|
||||
if (input->pos > input->length) {
|
||||
return -1;
|
||||
}
|
||||
if (input->pos + count > input->length) {
|
||||
count = input->length - input->pos;
|
||||
}
|
||||
memcpy(buf, input->buffer + input->pos, count);
|
||||
input->pos += count;
|
||||
return count;
|
||||
}
|
||||
|
||||
BrotliInput BrotliInitMemInput(const uint8_t* buffer, size_t length,
|
||||
BrotliMemInput* mem_input) {
|
||||
mem_input->buffer = buffer;
|
||||
mem_input->length = length;
|
||||
mem_input->pos = 0;
|
||||
BrotliInput input;
|
||||
input.cb_ = &BrotliMemInputFunction;
|
||||
input.data_ = mem_input;
|
||||
return input;
|
||||
}
|
||||
|
||||
int BrotliMemOutputFunction(void* data, const uint8_t* buf, size_t count) {
|
||||
BrotliMemOutput* output = (BrotliMemOutput*)data;
|
||||
if (output->pos + count > output->length) {
|
||||
return -1;
|
||||
}
|
||||
memcpy(output->buffer + output->pos, buf, count);
|
||||
output->pos += count;
|
||||
return count;
|
||||
}
|
||||
|
||||
BrotliOutput BrotliInitMemOutput(uint8_t* buffer, size_t length,
|
||||
BrotliMemOutput* mem_output) {
|
||||
mem_output->buffer = buffer;
|
||||
mem_output->length = length;
|
||||
mem_output->pos = 0;
|
||||
BrotliOutput output;
|
||||
output.cb_ = &BrotliMemOutputFunction;
|
||||
output.data_ = mem_output;
|
||||
return output;
|
||||
}
|
||||
|
||||
int BrotliStdinInputFunction(void* data, uint8_t* buf, size_t count) {
|
||||
return read(STDIN_FILENO, buf, count);
|
||||
}
|
||||
|
||||
BrotliInput BrotliStdinInput() {
|
||||
BrotliInput in;
|
||||
in.cb_ = BrotliStdinInputFunction;
|
||||
in.data_ = NULL;
|
||||
return in;
|
||||
}
|
||||
|
||||
int BrotliStdoutOutputFunction(void* data, const uint8_t* buf, size_t count) {
|
||||
return write(STDOUT_FILENO, buf, count);
|
||||
}
|
||||
|
||||
BrotliOutput BrotliStdoutOutput() {
|
||||
BrotliOutput out;
|
||||
out.cb_ = BrotliStdoutOutputFunction;
|
||||
out.data_ = NULL;
|
||||
return out;
|
||||
}
|
||||
|
||||
int BrotliFileOutputFunction(void* data, const uint8_t* buf, size_t count) {
|
||||
return fwrite(buf, 1, count, (FILE*)data);
|
||||
}
|
||||
|
||||
BrotliOutput BrotliFileOutput(FILE* f) {
|
||||
BrotliOutput out;
|
||||
out.cb_ = BrotliFileOutputFunction;
|
||||
out.data_ = f;
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} // extern "C"
|
||||
#endif
|
102
dec/streams.h
Normal file
102
dec/streams.h
Normal file
@ -0,0 +1,102 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Functions for streaming input and output.
|
||||
|
||||
#ifndef BROTLI_DEC_STREAMS_H_
|
||||
#define BROTLI_DEC_STREAMS_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include "./types.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Function pointer type used to read len bytes into buf. Returns the
|
||||
// number of bytes read or -1 on error.
|
||||
typedef int (*BrotliInputFunction)(void* data, uint8_t* buf, size_t len);
|
||||
|
||||
// Input callback function with associated data.
|
||||
typedef struct {
|
||||
BrotliInputFunction cb_;
|
||||
void* data_;
|
||||
} BrotliInput;
|
||||
|
||||
// Reads len bytes into buf, using the in callback.
|
||||
static BROTLI_INLINE int BrotliRead(BrotliInput in, uint8_t* buf, size_t len) {
|
||||
return in.cb_(in.data_, buf, len);
|
||||
}
|
||||
|
||||
// Function pointer type used to write len bytes into buf. Returns the
|
||||
// number of bytes written or -1 on error.
|
||||
typedef int (*BrotliOutputFunction)(void* data, const uint8_t* buf, size_t len);
|
||||
|
||||
// Output callback function with associated data.
|
||||
typedef struct {
|
||||
BrotliOutputFunction cb_;
|
||||
void* data_;
|
||||
} BrotliOutput;
|
||||
|
||||
// Writes len bytes into buf, using the out callback.
|
||||
static BROTLI_INLINE int BrotliWrite(BrotliOutput out,
|
||||
const uint8_t* buf, size_t len) {
|
||||
return out.cb_(out.data_, buf, len);
|
||||
}
|
||||
|
||||
// Memory region with position.
|
||||
typedef struct {
|
||||
const uint8_t* buffer;
|
||||
size_t length;
|
||||
size_t pos;
|
||||
} BrotliMemInput;
|
||||
|
||||
// Input callback where *data is a BrotliMemInput struct.
|
||||
int BrotliMemInputFunction(void* data, uint8_t* buf, size_t count);
|
||||
|
||||
// Returns an input callback that wraps the given memory region.
|
||||
BrotliInput BrotliInitMemInput(const uint8_t* buffer, size_t length,
|
||||
BrotliMemInput* mem_input);
|
||||
|
||||
// Output buffer with position.
|
||||
typedef struct {
|
||||
uint8_t* buffer;
|
||||
size_t length;
|
||||
size_t pos;
|
||||
} BrotliMemOutput;
|
||||
|
||||
// Output callback where *data is a BrotliMemOutput struct.
|
||||
int BrotliMemOutputFunction(void* data, const uint8_t* buf, size_t count);
|
||||
|
||||
// Returns an output callback that wraps the given memory region.
|
||||
BrotliOutput BrotliInitMemOutput(uint8_t* buffer, size_t length,
|
||||
BrotliMemOutput* mem_output);
|
||||
|
||||
// Input callback that reads from standard input.
|
||||
int BrotliStdinInputFunction(void* data, uint8_t* buf, size_t count);
|
||||
BrotliInput BrotliStdinInput();
|
||||
|
||||
// Output callback that writes to standard output.
|
||||
int BrotliStdoutOutputFunction(void* data, const uint8_t* buf, size_t count);
|
||||
BrotliOutput BrotliStdoutOutput();
|
||||
|
||||
// Output callback that writes to a file.
|
||||
int BrotliFileOutputFunction(void* data, const uint8_t* buf, size_t count);
|
||||
BrotliOutput BrotliFileOutput(FILE* f);
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // BROTLI_DEC_STREAMS_H_
|
@ -20,60 +20,64 @@
|
||||
#include <vector>
|
||||
|
||||
#include "./command.h"
|
||||
#include "./hash.h"
|
||||
#include "./literal_cost.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
void CreateBackwardReferences(const uint8_t* data,
|
||||
int length,
|
||||
void CreateBackwardReferences(size_t num_bytes,
|
||||
size_t position,
|
||||
const uint8_t* ringbuffer,
|
||||
const float* literal_cost,
|
||||
size_t ringbuffer_mask,
|
||||
const size_t max_backward_limit,
|
||||
Hasher* hasher,
|
||||
std::vector<Command>* commands) {
|
||||
HashLongestMatch<13,11> *hasher = new HashLongestMatch<13,11>;
|
||||
float *literal_cost = new float[length];
|
||||
EstimateBitCostsForLiterals(length, data, literal_cost);
|
||||
hasher->SetLiteralCost(literal_cost);
|
||||
|
||||
// Length heuristic that seems to help probably by better selection
|
||||
// of lazy matches of similar lengths.
|
||||
int insert_length = 0;
|
||||
size_t i = 0;
|
||||
size_t i = position & ringbuffer_mask;
|
||||
const int i_diff = position - i;
|
||||
const size_t i_end = i + num_bytes;
|
||||
|
||||
double average_cost = 0.0;
|
||||
for (int i = 0; i < length; ++i) {
|
||||
average_cost += literal_cost[i];
|
||||
for (int k = position; k < position + num_bytes; ++k) {
|
||||
average_cost += literal_cost[k & ringbuffer_mask];
|
||||
}
|
||||
average_cost /= length;
|
||||
average_cost /= num_bytes;
|
||||
hasher->set_average_cost(average_cost);
|
||||
|
||||
while (i + 2 < length) {
|
||||
while (i + 2 < i_end) {
|
||||
size_t best_len = 0;
|
||||
size_t best_dist = 0;
|
||||
double best_score = 0;
|
||||
const size_t max_distance = std::min(i, 1UL << 24);
|
||||
const size_t max_distance = std::min(i + i_diff, max_backward_limit);
|
||||
hasher->set_insert_length(insert_length);
|
||||
bool match_found = hasher->FindLongestMatch(
|
||||
data, i, length - i, max_distance,
|
||||
ringbuffer, literal_cost, ringbuffer_mask,
|
||||
i + i_diff, i_end - i, max_distance,
|
||||
&best_len, &best_dist, &best_score);
|
||||
if (match_found) {
|
||||
// Found a match. Let's look for something even better ahead.
|
||||
int delayed_backward_references_in_row = 0;
|
||||
while (i + 4 < length &&
|
||||
while (i + 4 < i_end &&
|
||||
delayed_backward_references_in_row < 4) {
|
||||
size_t best_len_2 = 0;
|
||||
size_t best_dist_2 = 0;
|
||||
double best_score_2 = 0;
|
||||
hasher->Store(data + i, i);
|
||||
hasher->Store(ringbuffer + i, i + i_diff);
|
||||
match_found = hasher->FindLongestMatch(
|
||||
data, i + 1, length - i - 1, max_distance,
|
||||
ringbuffer, literal_cost, ringbuffer_mask,
|
||||
i + i_diff + 1, i_end - i - 1, max_distance,
|
||||
&best_len_2, &best_dist_2, &best_score_2);
|
||||
double cost_diff_lazy = 0;
|
||||
if (best_len >= 4) {
|
||||
cost_diff_lazy += hasher->literal_cost(i + 4) - average_cost;
|
||||
cost_diff_lazy +=
|
||||
literal_cost[(i + 4) & ringbuffer_mask] - average_cost;
|
||||
}
|
||||
{
|
||||
const int tail_length = best_len_2 - best_len + 1;
|
||||
for (int k = 0; k < tail_length; ++k) {
|
||||
cost_diff_lazy -= hasher->literal_cost(i + best_len + k) -
|
||||
cost_diff_lazy -=
|
||||
literal_cost[(i + best_len + k) & ringbuffer_mask] -
|
||||
average_cost;
|
||||
}
|
||||
}
|
||||
@ -84,7 +88,7 @@ void CreateBackwardReferences(const uint8_t* data,
|
||||
}
|
||||
// Add bias to slightly avoid lazy matching.
|
||||
cost_diff_lazy += 2.0 + delayed_backward_references_in_row * 0.2;
|
||||
cost_diff_lazy += 0.04 * hasher->literal_cost(i);
|
||||
cost_diff_lazy += 0.04 * literal_cost[i & ringbuffer_mask];
|
||||
|
||||
if (match_found && best_score_2 >= best_score + cost_diff_lazy) {
|
||||
// Ok, let's just write one byte for now and start a match from the
|
||||
@ -109,18 +113,18 @@ void CreateBackwardReferences(const uint8_t* data,
|
||||
insert_length = 0;
|
||||
++i;
|
||||
for (int j = 1; j < best_len; ++j) {
|
||||
if (i + 2 < length) {
|
||||
hasher->Store(data + i, i);
|
||||
if (i + 2 < i_end) {
|
||||
hasher->Store(ringbuffer + i, i + i_diff);
|
||||
}
|
||||
++i;
|
||||
}
|
||||
} else {
|
||||
++insert_length;
|
||||
hasher->Store(data + i, i);
|
||||
hasher->Store(ringbuffer + i, i + i_diff);
|
||||
++i;
|
||||
}
|
||||
}
|
||||
insert_length += (length - i);
|
||||
insert_length += (i_end - i);
|
||||
|
||||
if (insert_length > 0) {
|
||||
Command cmd;
|
||||
@ -129,9 +133,6 @@ void CreateBackwardReferences(const uint8_t* data,
|
||||
cmd.copy_distance_ = 0;
|
||||
commands->push_back(cmd);
|
||||
}
|
||||
|
||||
delete[] literal_cost;
|
||||
delete hasher;
|
||||
}
|
||||
|
||||
} // namespace brotli
|
||||
|
@ -20,12 +20,18 @@
|
||||
#include <stdint.h>
|
||||
#include <vector>
|
||||
|
||||
#include "./hash.h"
|
||||
#include "./command.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
void CreateBackwardReferences(const uint8_t* data,
|
||||
int length,
|
||||
void CreateBackwardReferences(size_t num_bytes,
|
||||
size_t position,
|
||||
const uint8_t* ringbuffer,
|
||||
const float* literal_cost,
|
||||
size_t ringbuffer_mask,
|
||||
const size_t max_backward_limit,
|
||||
Hasher* hasher,
|
||||
std::vector<Command>* commands);
|
||||
|
||||
} // namespace brotli
|
||||
|
@ -122,26 +122,31 @@ static inline int HuffmanBitCost(const uint8_t* depth, int length) {
|
||||
template<int kSize>
|
||||
double PopulationCost(const Histogram<kSize>& histogram) {
|
||||
if (histogram.total_count_ == 0) {
|
||||
return 4;
|
||||
return 11;
|
||||
}
|
||||
int symbols[2] = { 0 };
|
||||
int count = 0;
|
||||
for (int i = 0; i < kSize && count < 3; ++i) {
|
||||
for (int i = 0; i < kSize && count < 5; ++i) {
|
||||
if (histogram.data_[i] > 0) {
|
||||
if (count < 2) symbols[count] = i;
|
||||
++count;
|
||||
}
|
||||
}
|
||||
if (count <= 2 && symbols[0] < 256 && symbols[1] < 256) {
|
||||
return ((symbols[0] <= 1 ? 4 : 11) +
|
||||
(count == 2 ? 8 + histogram.total_count_ : 0));
|
||||
if (count == 1) {
|
||||
return 11;
|
||||
}
|
||||
if (count == 2) {
|
||||
return 19 + histogram.total_count_;
|
||||
}
|
||||
uint8_t depth[kSize] = { 0 };
|
||||
CreateHuffmanTree(&histogram.data_[0], kSize, 15, depth);
|
||||
int bits = HuffmanBitCost(depth, kSize);
|
||||
int bits = 0;
|
||||
for (int i = 0; i < kSize; ++i) {
|
||||
bits += histogram.data_[i] * depth[i];
|
||||
}
|
||||
if (count == 3) {
|
||||
bits += 27;
|
||||
} else {
|
||||
bits += HuffmanBitCost(depth, kSize);
|
||||
}
|
||||
return bits;
|
||||
}
|
||||
|
||||
|
199
enc/context.h
199
enc/context.h
@ -21,25 +21,124 @@
|
||||
|
||||
namespace brotli {
|
||||
|
||||
static const int kSigned2BitContextLookup[] = {
|
||||
// Second-order context lookup table for UTF8 byte streams.
|
||||
//
|
||||
// If p1 and p2 are the previous two bytes, we calcualte the context as
|
||||
//
|
||||
// context = kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256].
|
||||
//
|
||||
// If the previous two bytes are ASCII characters (i.e. < 128), this will be
|
||||
// equivalent to
|
||||
//
|
||||
// context = 4 * context1(p1) + context2(p2),
|
||||
//
|
||||
// where context1 is based on the previous byte in the following way:
|
||||
//
|
||||
// 0 : non-ASCII control
|
||||
// 1 : \t, \n, \r
|
||||
// 2 : space
|
||||
// 3 : other punctuation
|
||||
// 4 : " '
|
||||
// 5 : %
|
||||
// 6 : ( < [ {
|
||||
// 7 : ) > ] }
|
||||
// 8 : , ; :
|
||||
// 9 : .
|
||||
// 10 : =
|
||||
// 11 : number
|
||||
// 12 : upper-case vowel
|
||||
// 13 : upper-case consonant
|
||||
// 14 : lower-case vowel
|
||||
// 15 : lower-case consonant
|
||||
//
|
||||
// and context2 is based on the second last byte:
|
||||
//
|
||||
// 0 : control, space
|
||||
// 1 : punctuation
|
||||
// 2 : upper-case letter, number
|
||||
// 3 : lower-case letter
|
||||
//
|
||||
// If the last byte is ASCII, and the second last byte is not (in a valid UTF8
|
||||
// stream it will be a continuation byte, value between 128 and 191), the
|
||||
// context is the same as if the second last byte was an ASCII control or space.
|
||||
//
|
||||
// If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
|
||||
// be a continuation byte and the context id is 2 or 3 depending on the LSB of
|
||||
// the last byte and to a lesser extent on the second last byte if it is ASCII.
|
||||
//
|
||||
// If the last byte is a UTF8 continuation byte, the second last byte can be:
|
||||
// - continuation byte: the next byte is probably ASCII or lead byte (assuming
|
||||
// 4-byte UTF8 characters are rare) and the context id is 0 or 1.
|
||||
// - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
|
||||
// - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3
|
||||
//
|
||||
// The possible value combinations of the previous two bytes, the range of
|
||||
// context ids and the type of the next byte is summarized in the table below:
|
||||
//
|
||||
// |--------\-----------------------------------------------------------------|
|
||||
// | \ Last byte |
|
||||
// | Second \---------------------------------------------------------------|
|
||||
// | last byte \ ASCII | cont. byte | lead byte |
|
||||
// | \ (0-127) | (128-191) | (192-) |
|
||||
// |=============|===================|=====================|==================|
|
||||
// | ASCII | next: ASCII/lead | not valid | next: cont. |
|
||||
// | (0-127) | context: 4 - 63 | | context: 2 - 3 |
|
||||
// |-------------|-------------------|---------------------|------------------|
|
||||
// | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. |
|
||||
// | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 |
|
||||
// |-------------|-------------------|---------------------|------------------|
|
||||
// | lead byte | not valid | next: ASCII/lead | not valid |
|
||||
// | (192-207) | | context: 0 - 1 | |
|
||||
// |-------------|-------------------|---------------------|------------------|
|
||||
// | lead byte | not valid | next: cont. | not valid |
|
||||
// | (208-) | | context: 2 - 3 | |
|
||||
// |-------------|-------------------|---------------------|------------------|
|
||||
static const uint8_t kUTF8ContextLookup[512] = {
|
||||
// Last byte.
|
||||
//
|
||||
// ASCII range.
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
|
||||
44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,
|
||||
12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
|
||||
52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
|
||||
12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
|
||||
60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0,
|
||||
// UTF8 continuation byte range.
|
||||
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
||||
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
||||
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
||||
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
||||
// UTF8 lead byte range.
|
||||
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
||||
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
||||
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
||||
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
||||
// Second last byte.
|
||||
//
|
||||
// ASCII range.
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
|
||||
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
|
||||
1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
|
||||
// UTF8 continuation byte range.
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
// UTF8 lead byte range.
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
|
||||
};
|
||||
|
||||
// Context lookup table for small signed integers.
|
||||
static const int kSigned3BitContextLookup[] = {
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
@ -59,69 +158,25 @@ static const int kSigned3BitContextLookup[] = {
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
|
||||
};
|
||||
|
||||
static const int kSigned4BitContextLookup[] = {
|
||||
0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
|
||||
11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 13, 13, 14, 15,
|
||||
};
|
||||
|
||||
enum ContextType {
|
||||
CONTEXT_NONE = 0,
|
||||
CONTEXT_FULL = 1,
|
||||
CONTEXT_MSB7 = 2,
|
||||
CONTEXT_MSB6 = 3,
|
||||
CONTEXT_MSB5 = 4,
|
||||
CONTEXT_MSB4 = 5,
|
||||
CONTEXT_MSB3 = 6,
|
||||
CONTEXT_MSB2 = 7,
|
||||
CONTEXT_MSB1 = 8,
|
||||
CONTEXT_IS_ZERO = 9,
|
||||
CONTEXT_SIGNED_2BIT = 10,
|
||||
CONTEXT_SIGNED_3BIT = 11,
|
||||
CONTEXT_SIGNED_4BIT = 12,
|
||||
CONTEXT_SIGNED_MIXED_3BYTE = 13,
|
||||
CONTEXT_LSB6 = 0,
|
||||
CONTEXT_MSB6 = 1,
|
||||
CONTEXT_UTF8 = 2,
|
||||
CONTEXT_SIGNED = 3
|
||||
};
|
||||
|
||||
static const int kContextSize[] = {
|
||||
1, 256, 128, 64, 32, 16, 8, 4, 2, 2, 4, 8, 16, 64,
|
||||
};
|
||||
|
||||
static inline int NumContexts(int mode) {
|
||||
return kContextSize[mode];
|
||||
}
|
||||
|
||||
static inline uint8_t Context(uint8_t prev_byte, uint8_t prev_byte2,
|
||||
uint8_t prev_byte3, int mode) {
|
||||
static inline uint8_t Context(uint8_t p1, uint8_t p2, int mode) {
|
||||
switch (mode) {
|
||||
case CONTEXT_NONE:
|
||||
return 0;
|
||||
case CONTEXT_IS_ZERO:
|
||||
return prev_byte == 0 ? 0 : 1;
|
||||
case CONTEXT_SIGNED_2BIT:
|
||||
return kSigned2BitContextLookup[prev_byte];
|
||||
case CONTEXT_SIGNED_3BIT:
|
||||
return kSigned3BitContextLookup[prev_byte];
|
||||
case CONTEXT_SIGNED_4BIT:
|
||||
return kSigned4BitContextLookup[prev_byte];
|
||||
case CONTEXT_SIGNED_MIXED_3BYTE:
|
||||
return ((kSigned3BitContextLookup[prev_byte] << 3) +
|
||||
(kSigned2BitContextLookup[prev_byte2] << 1) +
|
||||
(prev_byte3 == 0 ? 0 : 1));
|
||||
case CONTEXT_LSB6:
|
||||
return p1 & 0x3f;
|
||||
case CONTEXT_MSB6:
|
||||
return p1 >> 2;
|
||||
case CONTEXT_UTF8:
|
||||
return kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256];
|
||||
case CONTEXT_SIGNED:
|
||||
return (kSigned3BitContextLookup[p1] << 3) + kSigned3BitContextLookup[p2];
|
||||
default:
|
||||
return prev_byte >> (mode - 1);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
371
enc/encode.cc
371
enc/encode.cc
@ -26,7 +26,9 @@
|
||||
#include "./context.h"
|
||||
#include "./entropy_encode.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./hash.h"
|
||||
#include "./histogram.h"
|
||||
#include "./literal_cost.h"
|
||||
#include "./prefix.h"
|
||||
#include "./write_bits.h"
|
||||
|
||||
@ -41,31 +43,39 @@ double Entropy(const std::vector<Histogram<kSize> >& histograms) {
|
||||
return retval;
|
||||
}
|
||||
|
||||
template<int kSize>
|
||||
double TotalBitCost(const std::vector<Histogram<kSize> >& histograms) {
|
||||
double retval = 0;
|
||||
for (int i = 0; i < histograms.size(); ++i) {
|
||||
retval += PopulationCost(histograms[i]);
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
void EncodeSize(size_t len, int* storage_ix, uint8_t* storage) {
|
||||
std::vector<uint8_t> len_bytes;
|
||||
while (len > 0) {
|
||||
do {
|
||||
len_bytes.push_back(len & 0xff);
|
||||
len >>= 8;
|
||||
};
|
||||
} while (len > 0);
|
||||
WriteBits(3, len_bytes.size(), storage_ix, storage);
|
||||
for (int i = 0; i < len_bytes.size(); ++i) {
|
||||
WriteBits(8, len_bytes[i], storage_ix, storage);
|
||||
}
|
||||
}
|
||||
|
||||
void EncodeMetaBlockLength(int input_size_bits,
|
||||
size_t meta_block_size,
|
||||
bool is_last_meta_block,
|
||||
void EncodeMetaBlockLength(size_t meta_block_size,
|
||||
int* storage_ix, uint8_t* storage) {
|
||||
WriteBits(1, is_last_meta_block, storage_ix, storage);
|
||||
if (is_last_meta_block) return;
|
||||
while (input_size_bits > 0) {
|
||||
WriteBits(8, meta_block_size & 0xff, storage_ix, storage);
|
||||
meta_block_size >>= 8;
|
||||
input_size_bits -= 8;
|
||||
WriteBits(1, 0, storage_ix, storage);
|
||||
int num_bits = Log2Floor(meta_block_size) + 1;
|
||||
WriteBits(3, (num_bits + 3) >> 2, storage_ix, storage);
|
||||
while (num_bits > 0) {
|
||||
WriteBits(4, meta_block_size & 0xf, storage_ix, storage);
|
||||
meta_block_size >>= 4;
|
||||
num_bits -= 4;
|
||||
}
|
||||
if (input_size_bits > 0) {
|
||||
WriteBits(input_size_bits, meta_block_size, storage_ix, storage);
|
||||
if (num_bits > 0) {
|
||||
WriteBits(num_bits, meta_block_size, storage_ix, storage);
|
||||
}
|
||||
}
|
||||
|
||||
@ -82,7 +92,7 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask(
|
||||
const uint8_t* code_length_bitdepth,
|
||||
int* storage_ix, uint8_t* storage) {
|
||||
static const uint8_t kStorageOrder[kCodeLengthCodes] = {
|
||||
17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
1, 2, 3, 4, 0, 17, 18, 5, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
};
|
||||
// Throw away trailing zeros:
|
||||
int codes_to_store = kCodeLengthCodes;
|
||||
@ -92,8 +102,16 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask(
|
||||
}
|
||||
}
|
||||
WriteBits(4, codes_to_store - 4, storage_ix, storage);
|
||||
for (int i = 0; i < codes_to_store; ++i) {
|
||||
WriteBits(3, code_length_bitdepth[kStorageOrder[i]], storage_ix, storage);
|
||||
const int skip_two_first =
|
||||
code_length_bitdepth[kStorageOrder[0]] == 0 &&
|
||||
code_length_bitdepth[kStorageOrder[1]] == 0;
|
||||
WriteBits(1, skip_two_first, storage_ix, storage);
|
||||
|
||||
for (int i = skip_two_first * 2; i < codes_to_store; ++i) {
|
||||
uint8_t len[] = { 2, 4, 3, 2, 2, 4 };
|
||||
uint8_t bits[] = { 0, 7, 3, 1, 2, 15 };
|
||||
int v = code_length_bitdepth[kStorageOrder[i]];
|
||||
WriteBits(len[v], bits[v], storage_ix, storage);
|
||||
}
|
||||
}
|
||||
|
||||
@ -124,30 +142,49 @@ void StoreHuffmanTreeToBitMask(
|
||||
template<int kSize>
|
||||
void StoreHuffmanCode(const EntropyCode<kSize>& code, int alphabet_size,
|
||||
int* storage_ix, uint8_t* storage) {
|
||||
const int kMaxBits = 8;
|
||||
const int kMaxSymbol = 1 << kMaxBits;
|
||||
|
||||
const uint8_t *depth = &code.depth_[0];
|
||||
int max_bits_counter = alphabet_size - 1;
|
||||
int max_bits = 0;
|
||||
while (max_bits_counter) {
|
||||
max_bits_counter >>= 1;
|
||||
++max_bits;
|
||||
}
|
||||
if (code.count_ == 0) { // emit minimal tree for empty cases
|
||||
// bits: small tree marker: 1, count-1: 0, large 8-bit code: 0, code: 0
|
||||
WriteBits(4, 0x01, storage_ix, storage);
|
||||
// bits: small tree marker: 1, count-1: 0, max_bits-sized encoding for 0
|
||||
WriteBits(3 + max_bits, 0x01, storage_ix, storage);
|
||||
return;
|
||||
}
|
||||
if (code.count_ <= 2 &&
|
||||
code.symbols_[0] < kMaxSymbol &&
|
||||
code.symbols_[1] < kMaxSymbol) {
|
||||
// Small tree marker to encode 1 or 2 symbols.
|
||||
WriteBits(1, 1, storage_ix, storage);
|
||||
WriteBits(1, code.count_ - 1, storage_ix, storage);
|
||||
if (code.symbols_[0] <= 1) {
|
||||
// Code bit for small (1 bit) symbol value.
|
||||
WriteBits(1, 0, storage_ix, storage);
|
||||
WriteBits(1, code.symbols_[0], storage_ix, storage);
|
||||
} else {
|
||||
WriteBits(1, 1, storage_ix, storage);
|
||||
WriteBits(8, code.symbols_[0], storage_ix, storage);
|
||||
if (code.count_ <= 4) {
|
||||
int symbols[4];
|
||||
// Quadratic sort.
|
||||
int k, j;
|
||||
for (k = 0; k < code.count_; ++k) {
|
||||
symbols[k] = code.symbols_[k];
|
||||
}
|
||||
if (code.count_ == 2) {
|
||||
WriteBits(8, code.symbols_[1], storage_ix, storage);
|
||||
for (k = 0; k < code.count_; ++k) {
|
||||
for (j = k + 1; j < code.count_; ++j) {
|
||||
if (depth[symbols[j]] < depth[symbols[k]]) {
|
||||
int t = symbols[k];
|
||||
symbols[k] = symbols[j];
|
||||
symbols[j] = t;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Small tree marker to encode 1-4 symbols.
|
||||
WriteBits(1, 1, storage_ix, storage);
|
||||
WriteBits(2, code.count_ - 1, storage_ix, storage);
|
||||
for (int i = 0; i < code.count_; ++i) {
|
||||
WriteBits(max_bits, symbols[i], storage_ix, storage);
|
||||
}
|
||||
if (code.count_ == 4) {
|
||||
if (depth[symbols[0]] == 2 &&
|
||||
depth[symbols[1]] == 2 &&
|
||||
depth[symbols[2]] == 2 &&
|
||||
depth[symbols[3]] == 2) {
|
||||
WriteBits(1, 0, storage_ix, storage);
|
||||
} else {
|
||||
WriteBits(1, 1, storage_ix, storage);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
@ -156,7 +193,7 @@ void StoreHuffmanCode(const EntropyCode<kSize>& code, int alphabet_size,
|
||||
uint8_t huffman_tree[kSize];
|
||||
uint8_t huffman_tree_extra_bits[kSize];
|
||||
int huffman_tree_size = 0;
|
||||
WriteHuffmanTree(&code.depth_[0],
|
||||
WriteHuffmanTree(depth,
|
||||
alphabet_size,
|
||||
&huffman_tree[0],
|
||||
&huffman_tree_extra_bits[0],
|
||||
@ -167,7 +204,7 @@ void StoreHuffmanCode(const EntropyCode<kSize>& code, int alphabet_size,
|
||||
huffman_tree_histogram.Add(huffman_tree[i]);
|
||||
}
|
||||
EntropyCode<kCodeLengthCodes> huffman_tree_entropy;
|
||||
BuildEntropyCode(huffman_tree_histogram, 7, kCodeLengthCodes,
|
||||
BuildEntropyCode(huffman_tree_histogram, 5, kCodeLengthCodes,
|
||||
&huffman_tree_entropy);
|
||||
Histogram<kCodeLengthCodes> trimmed_histogram = huffman_tree_histogram;
|
||||
uint8_t* last_code = &huffman_tree[huffman_tree_size - 1];
|
||||
@ -178,7 +215,7 @@ void StoreHuffmanCode(const EntropyCode<kSize>& code, int alphabet_size,
|
||||
bool write_length = false;
|
||||
if (trimmed_size > 1 && trimmed_size < huffman_tree_size) {
|
||||
EntropyCode<kCodeLengthCodes> trimmed_entropy;
|
||||
BuildEntropyCode(trimmed_histogram, 7, kCodeLengthCodes, &trimmed_entropy);
|
||||
BuildEntropyCode(trimmed_histogram, 5, kCodeLengthCodes, &trimmed_entropy);
|
||||
int huffman_bit_cost = HuffmanTreeBitCost(huffman_tree_histogram,
|
||||
huffman_tree_entropy);
|
||||
int trimmed_bit_cost = HuffmanTreeBitCost(trimmed_histogram,
|
||||
@ -247,16 +284,15 @@ void EncodeCopyDistance(const Command& cmd, const EntropyCodeDistance& entropy,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void ComputeDistanceShortCodes(std::vector<Command>* cmds) {
|
||||
void ComputeDistanceShortCodes(std::vector<Command>* cmds,
|
||||
int* dist_ringbuffer,
|
||||
size_t* ringbuffer_idx) {
|
||||
static const int kIndexOffset[16] = {
|
||||
3, 2, 1, 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2
|
||||
};
|
||||
static const int kValueOffset[16] = {
|
||||
0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
|
||||
};
|
||||
int dist_ringbuffer[4] = { 4, 11, 15, 16 };
|
||||
int ringbuffer_idx = 0;
|
||||
for (int i = 0; i < cmds->size(); ++i) {
|
||||
int cur_dist = (*cmds)[i].copy_distance_;
|
||||
if (cur_dist == 0) break;
|
||||
@ -268,7 +304,7 @@ void ComputeDistanceShortCodes(std::vector<Command>* cmds) {
|
||||
// with them.
|
||||
continue;
|
||||
}
|
||||
int comp = (dist_ringbuffer[(ringbuffer_idx + kIndexOffset[k]) & 3] +
|
||||
int comp = (dist_ringbuffer[(*ringbuffer_idx + kIndexOffset[k]) & 3] +
|
||||
kValueOffset[k]);
|
||||
if (cur_dist == comp) {
|
||||
dist_code = k + 1;
|
||||
@ -276,8 +312,8 @@ void ComputeDistanceShortCodes(std::vector<Command>* cmds) {
|
||||
}
|
||||
}
|
||||
if (dist_code > 1) {
|
||||
dist_ringbuffer[ringbuffer_idx & 3] = cur_dist;
|
||||
++ringbuffer_idx;
|
||||
dist_ringbuffer[*ringbuffer_idx & 3] = cur_dist;
|
||||
++(*ringbuffer_idx);
|
||||
}
|
||||
(*cmds)[i].distance_code_ = dist_code;
|
||||
}
|
||||
@ -414,19 +450,8 @@ int BestMaxZeroRunLengthPrefix(const std::vector<int>& v) {
|
||||
}
|
||||
|
||||
void EncodeContextMap(const std::vector<int>& context_map,
|
||||
int context_mode,
|
||||
int context_mode_bits,
|
||||
int num_clusters,
|
||||
int* storage_ix, uint8_t* storage) {
|
||||
if (context_mode == 0) {
|
||||
WriteBits(1, 0, storage_ix, storage); // no context
|
||||
return;
|
||||
}
|
||||
|
||||
WriteBits(1, 1, storage_ix, storage); // have context
|
||||
if (context_mode_bits > 0) {
|
||||
WriteBits(context_mode_bits, context_mode - 1, storage_ix, storage);
|
||||
}
|
||||
WriteBits(8, num_clusters - 1, storage_ix, storage);
|
||||
|
||||
if (num_clusters == 1 || num_clusters == context_map.size()) {
|
||||
@ -560,7 +585,6 @@ struct EncodingParams {
|
||||
int num_direct_distance_codes;
|
||||
int distance_postfix_bits;
|
||||
int literal_context_mode;
|
||||
int distance_context_mode;
|
||||
};
|
||||
|
||||
struct MetaBlock {
|
||||
@ -569,6 +593,7 @@ struct MetaBlock {
|
||||
BlockSplit literal_split;
|
||||
BlockSplit command_split;
|
||||
BlockSplit distance_split;
|
||||
std::vector<int> literal_context_modes;
|
||||
std::vector<int> literal_context_map;
|
||||
std::vector<int> distance_context_map;
|
||||
std::vector<HistogramLiteral> literal_histograms;
|
||||
@ -578,8 +603,9 @@ struct MetaBlock {
|
||||
|
||||
void BuildMetaBlock(const EncodingParams& params,
|
||||
const std::vector<Command>& cmds,
|
||||
const uint8_t* input_buffer,
|
||||
size_t pos,
|
||||
const uint8_t* ringbuffer,
|
||||
const size_t pos,
|
||||
const size_t mask,
|
||||
MetaBlock* mb) {
|
||||
mb->cmds = cmds;
|
||||
mb->params = params;
|
||||
@ -587,7 +613,7 @@ void BuildMetaBlock(const EncodingParams& params,
|
||||
mb->params.num_direct_distance_codes,
|
||||
mb->params.distance_postfix_bits);
|
||||
SplitBlock(mb->cmds,
|
||||
input_buffer + pos,
|
||||
&ringbuffer[pos & mask],
|
||||
&mb->literal_split,
|
||||
&mb->command_split,
|
||||
&mb->distance_split);
|
||||
@ -595,16 +621,14 @@ void BuildMetaBlock(const EncodingParams& params,
|
||||
ComputeBlockTypeShortCodes(&mb->command_split);
|
||||
ComputeBlockTypeShortCodes(&mb->distance_split);
|
||||
|
||||
int num_literal_contexts_per_block_type =
|
||||
NumContexts(mb->params.literal_context_mode);
|
||||
mb->literal_context_modes.resize(mb->literal_split.num_types_,
|
||||
mb->params.literal_context_mode);
|
||||
|
||||
|
||||
int num_literal_contexts =
|
||||
mb->literal_split.num_types_ *
|
||||
num_literal_contexts_per_block_type;
|
||||
int num_distance_contexts_per_block_type =
|
||||
(mb->params.distance_context_mode > 0 ? 4 : 1);
|
||||
mb->literal_split.num_types_ << kLiteralContextBits;
|
||||
int num_distance_contexts =
|
||||
mb->distance_split.num_types_ *
|
||||
num_distance_contexts_per_block_type;
|
||||
mb->distance_split.num_types_ << kDistanceContextBits;
|
||||
std::vector<HistogramLiteral> literal_histograms(num_literal_contexts);
|
||||
mb->command_histograms.resize(mb->command_split.num_types_);
|
||||
std::vector<HistogramDistance> distance_histograms(num_distance_contexts);
|
||||
@ -612,10 +636,10 @@ void BuildMetaBlock(const EncodingParams& params,
|
||||
mb->literal_split,
|
||||
mb->command_split,
|
||||
mb->distance_split,
|
||||
input_buffer,
|
||||
ringbuffer,
|
||||
pos,
|
||||
mb->params.literal_context_mode,
|
||||
mb->params.distance_context_mode,
|
||||
mask,
|
||||
mb->literal_context_modes,
|
||||
&literal_histograms,
|
||||
&mb->command_histograms,
|
||||
&distance_histograms);
|
||||
@ -625,24 +649,20 @@ void BuildMetaBlock(const EncodingParams& params,
|
||||
static const int kMaxNumberOfHistograms = 240;
|
||||
|
||||
mb->literal_histograms = literal_histograms;
|
||||
if (mb->params.literal_context_mode > 0) {
|
||||
ClusterHistograms(literal_histograms,
|
||||
num_literal_contexts_per_block_type,
|
||||
mb->literal_split.num_types_,
|
||||
kMaxNumberOfHistograms,
|
||||
&mb->literal_histograms,
|
||||
&mb->literal_context_map);
|
||||
}
|
||||
ClusterHistograms(literal_histograms,
|
||||
1 << kLiteralContextBits,
|
||||
mb->literal_split.num_types_,
|
||||
kMaxNumberOfHistograms,
|
||||
&mb->literal_histograms,
|
||||
&mb->literal_context_map);
|
||||
|
||||
mb->distance_histograms = distance_histograms;
|
||||
if (mb->params.distance_context_mode > 0) {
|
||||
ClusterHistograms(distance_histograms,
|
||||
num_distance_contexts_per_block_type,
|
||||
mb->distance_split.num_types_,
|
||||
kMaxNumberOfHistograms,
|
||||
&mb->distance_histograms,
|
||||
&mb->distance_context_map);
|
||||
}
|
||||
ClusterHistograms(distance_histograms,
|
||||
1 << kDistanceContextBits,
|
||||
mb->distance_split.num_types_,
|
||||
kMaxNumberOfHistograms,
|
||||
&mb->distance_histograms,
|
||||
&mb->distance_context_map);
|
||||
}
|
||||
|
||||
size_t MetaBlockLength(const std::vector<Command>& cmds) {
|
||||
@ -655,14 +675,13 @@ size_t MetaBlockLength(const std::vector<Command>& cmds) {
|
||||
}
|
||||
|
||||
void StoreMetaBlock(const MetaBlock& mb,
|
||||
const uint8_t* input_buffer,
|
||||
int input_size_bits,
|
||||
bool is_last,
|
||||
const uint8_t* ringbuffer,
|
||||
const size_t mask,
|
||||
size_t* pos,
|
||||
int* storage_ix, uint8_t* storage) {
|
||||
size_t length = MetaBlockLength(mb.cmds);
|
||||
const size_t end_pos = *pos + length;
|
||||
EncodeMetaBlockLength(input_size_bits, length - 1, is_last,
|
||||
EncodeMetaBlockLength(length - 1,
|
||||
storage_ix, storage);
|
||||
BlockSplitCode literal_split_code;
|
||||
BlockSplitCode command_split_code;
|
||||
@ -680,10 +699,11 @@ void StoreMetaBlock(const MetaBlock& mb,
|
||||
int num_distance_codes =
|
||||
kNumDistanceShortCodes + mb.params.num_direct_distance_codes +
|
||||
(48 << mb.params.distance_postfix_bits);
|
||||
EncodeContextMap(mb.literal_context_map, mb.params.literal_context_mode, 4,
|
||||
mb.literal_histograms.size(), storage_ix, storage);
|
||||
EncodeContextMap(mb.distance_context_map, mb.params.distance_context_mode, 0,
|
||||
mb.distance_histograms.size(), storage_ix, storage);
|
||||
for (int i = 0; i < mb.literal_split.num_types_; ++i) {
|
||||
WriteBits(2, mb.literal_context_modes[i], storage_ix, storage);
|
||||
}
|
||||
EncodeContextMap(mb.literal_context_map, mb.literal_histograms.size(), storage_ix, storage);
|
||||
EncodeContextMap(mb.distance_context_map, mb.distance_histograms.size(), storage_ix, storage);
|
||||
std::vector<EntropyCodeLiteral> literal_codes;
|
||||
std::vector<EntropyCodeCommand> command_codes;
|
||||
std::vector<EntropyCodeDistance> distance_codes;
|
||||
@ -705,27 +725,22 @@ void StoreMetaBlock(const MetaBlock& mb,
|
||||
for (int j = 0; j < cmd.insert_length_; ++j) {
|
||||
MoveAndEncode(literal_split_code, &literal_it, storage_ix, storage);
|
||||
int histogram_idx = literal_it.type_;
|
||||
if (mb.params.literal_context_mode > 0) {
|
||||
uint8_t prev_byte = *pos > 0 ? input_buffer[*pos - 1] : 0;
|
||||
uint8_t prev_byte2 = *pos > 1 ? input_buffer[*pos - 2] : 0;
|
||||
uint8_t prev_byte3 = *pos > 2 ? input_buffer[*pos - 3] : 0;
|
||||
int context = (literal_it.type_ *
|
||||
NumContexts(mb.params.literal_context_mode) +
|
||||
Context(prev_byte, prev_byte2, prev_byte3,
|
||||
mb.params.literal_context_mode));
|
||||
histogram_idx = mb.literal_context_map[context];
|
||||
}
|
||||
EntropyEncode(input_buffer[(*pos)++],
|
||||
uint8_t prev_byte = *pos > 0 ? ringbuffer[(*pos - 1) & mask] : 0;
|
||||
uint8_t prev_byte2 = *pos > 1 ? ringbuffer[(*pos - 2) & mask] : 0;
|
||||
int context = ((literal_it.type_ << kLiteralContextBits) +
|
||||
Context(prev_byte, prev_byte2,
|
||||
mb.literal_context_modes[literal_it.type_]));
|
||||
histogram_idx = mb.literal_context_map[context];
|
||||
EntropyEncode(ringbuffer[*pos & mask],
|
||||
literal_codes[histogram_idx], storage_ix, storage);
|
||||
++(*pos);
|
||||
}
|
||||
if (*pos < end_pos && cmd.distance_prefix_ != 0xffff) {
|
||||
MoveAndEncode(distance_split_code, &distance_it, storage_ix, storage);
|
||||
int histogram_index = distance_it.type_;
|
||||
if (mb.params.distance_context_mode > 0) {
|
||||
int context = distance_it.type_ << 2;
|
||||
context += (cmd.copy_length_ > 4) ? 3 : cmd.copy_length_ - 2;
|
||||
histogram_index = mb.distance_context_map[context];
|
||||
}
|
||||
int context = (distance_it.type_ << 2) +
|
||||
((cmd.copy_length_ > 4) ? 3 : cmd.copy_length_ - 2);
|
||||
histogram_index = mb.distance_context_map[context];
|
||||
EncodeCopyDistance(cmd, distance_codes[histogram_index],
|
||||
storage_ix, storage);
|
||||
}
|
||||
@ -733,45 +748,123 @@ void StoreMetaBlock(const MetaBlock& mb,
|
||||
}
|
||||
}
|
||||
|
||||
static const int kWindowBits = 22;
|
||||
// To make decoding faster, we allow the decoder to write 16 bytes ahead in
|
||||
// its ringbuffer, therefore the encoder has to decrease max distance by this
|
||||
// amount.
|
||||
static const int kDecoderRingBufferWriteAheadSlack = 16;
|
||||
static const int kMaxBackwardDistance =
|
||||
(1 << kWindowBits) - kDecoderRingBufferWriteAheadSlack;
|
||||
|
||||
static const int kMetaBlockSizeBits = 21;
|
||||
static const int kRingBufferBits = 23;
|
||||
static const int kRingBufferMask = (1 << kRingBufferBits) - 1;
|
||||
|
||||
BrotliCompressor::BrotliCompressor()
|
||||
: hasher_(new Hasher),
|
||||
dist_ringbuffer_idx_(0),
|
||||
input_pos_(0),
|
||||
ringbuffer_(kRingBufferBits, kMetaBlockSizeBits),
|
||||
literal_cost_(1 << kRingBufferBits),
|
||||
storage_ix_(0),
|
||||
storage_(new uint8_t[2 << kMetaBlockSizeBits]) {
|
||||
dist_ringbuffer_[0] = 4;
|
||||
dist_ringbuffer_[1] = 11;
|
||||
dist_ringbuffer_[2] = 15;
|
||||
dist_ringbuffer_[3] = 16;
|
||||
storage_[0] = 0;
|
||||
}
|
||||
|
||||
BrotliCompressor::~BrotliCompressor() {
|
||||
delete hasher_;
|
||||
delete[] storage_;
|
||||
}
|
||||
|
||||
void BrotliCompressor::WriteStreamHeader() {
|
||||
// Don't encode input size.
|
||||
WriteBits(3, 0, &storage_ix_, storage_);
|
||||
// Encode window size.
|
||||
WriteBits(1, 1, &storage_ix_, storage_);
|
||||
WriteBits(3, kWindowBits - 17, &storage_ix_, storage_);
|
||||
}
|
||||
|
||||
void BrotliCompressor::WriteMetaBlock(const size_t input_size,
|
||||
const uint8_t* input_buffer,
|
||||
size_t* encoded_size,
|
||||
uint8_t* encoded_buffer) {
|
||||
ringbuffer_.Write(input_buffer, input_size);
|
||||
EstimateBitCostsForLiterals(input_pos_, input_size,
|
||||
kRingBufferMask, ringbuffer_.start(),
|
||||
&literal_cost_[0]);
|
||||
std::vector<Command> commands;
|
||||
CreateBackwardReferences(input_size, input_pos_,
|
||||
ringbuffer_.start(),
|
||||
&literal_cost_[0],
|
||||
kRingBufferMask, kMaxBackwardDistance,
|
||||
hasher_,
|
||||
&commands);
|
||||
ComputeDistanceShortCodes(&commands, dist_ringbuffer_,
|
||||
&dist_ringbuffer_idx_);
|
||||
EncodingParams params;
|
||||
params.num_direct_distance_codes = 12;
|
||||
params.distance_postfix_bits = 1;
|
||||
params.literal_context_mode = CONTEXT_SIGNED;
|
||||
MetaBlock mb;
|
||||
BuildMetaBlock(params, commands, ringbuffer_.start(), input_pos_,
|
||||
kRingBufferMask, &mb);
|
||||
StoreMetaBlock(mb, ringbuffer_.start(), kRingBufferMask,
|
||||
&input_pos_, &storage_ix_, storage_);
|
||||
size_t output_size = storage_ix_ >> 3;
|
||||
memcpy(encoded_buffer, storage_, output_size);
|
||||
*encoded_size = output_size;
|
||||
storage_ix_ -= output_size << 3;
|
||||
storage_[storage_ix_ >> 3] = storage_[output_size];
|
||||
}
|
||||
|
||||
void BrotliCompressor::FinishStream(
|
||||
size_t* encoded_size, uint8_t* encoded_buffer) {
|
||||
WriteBits(1, 1, &storage_ix_, storage_);
|
||||
*encoded_size = (storage_ix_ + 7) >> 3;
|
||||
memcpy(encoded_buffer, storage_, *encoded_size);
|
||||
}
|
||||
|
||||
|
||||
int BrotliCompressBuffer(size_t input_size,
|
||||
const uint8_t* input_buffer,
|
||||
size_t* encoded_size,
|
||||
uint8_t* encoded_buffer) {
|
||||
int storage_ix = 0;
|
||||
uint8_t* storage = encoded_buffer;
|
||||
WriteBitsPrepareStorage(storage_ix, storage);
|
||||
EncodeSize(input_size, &storage_ix, storage);
|
||||
|
||||
if (input_size == 0) {
|
||||
*encoded_size = (storage_ix + 7) >> 3;
|
||||
encoded_buffer[0] = 1;
|
||||
encoded_buffer[1] = 0;
|
||||
*encoded_size = 2;
|
||||
return 1;
|
||||
}
|
||||
int input_size_bits = Log2Ceiling(input_size);
|
||||
|
||||
std::vector<Command> all_commands;
|
||||
CreateBackwardReferences(input_buffer, input_size, &all_commands);
|
||||
ComputeDistanceShortCodes(&all_commands);
|
||||
BrotliCompressor compressor;
|
||||
compressor.WriteStreamHeader();
|
||||
|
||||
std::vector<std::vector<Command> > meta_block_commands;
|
||||
SplitBlockByTotalLength(all_commands, input_size, 2 << 20,
|
||||
&meta_block_commands);
|
||||
const int max_block_size = 1 << kMetaBlockSizeBits;
|
||||
size_t max_output_size = *encoded_size;
|
||||
const uint8_t* input_end = input_buffer + input_size;
|
||||
*encoded_size = 0;
|
||||
|
||||
size_t pos = 0;
|
||||
for (int block_idx = 0; block_idx < meta_block_commands.size(); ++block_idx) {
|
||||
const std::vector<Command>& commands = meta_block_commands[block_idx];
|
||||
bool is_last_meta_block = (block_idx + 1 == meta_block_commands.size());
|
||||
EncodingParams params;
|
||||
params.num_direct_distance_codes = 12;
|
||||
params.distance_postfix_bits = 1;
|
||||
params.literal_context_mode = CONTEXT_SIGNED_MIXED_3BYTE;
|
||||
params.distance_context_mode = 1;
|
||||
MetaBlock mb;
|
||||
BuildMetaBlock(params, commands, input_buffer, pos, &mb);
|
||||
StoreMetaBlock(mb, input_buffer, input_size_bits, is_last_meta_block,
|
||||
&pos, &storage_ix, storage);
|
||||
while (input_buffer < input_end) {
|
||||
int block_size = max_block_size;
|
||||
if (block_size >= input_end - input_buffer) {
|
||||
block_size = input_end - input_buffer;
|
||||
}
|
||||
size_t output_size = max_output_size;
|
||||
compressor.WriteMetaBlock(block_size, input_buffer,
|
||||
&output_size, &encoded_buffer[*encoded_size]);
|
||||
input_buffer += block_size;
|
||||
*encoded_size += output_size;
|
||||
max_output_size -= output_size;
|
||||
}
|
||||
|
||||
*encoded_size = (storage_ix + 7) >> 3;
|
||||
size_t output_size = max_output_size;
|
||||
compressor.FinishStream(&output_size, &encoded_buffer[*encoded_size]);
|
||||
*encoded_size += output_size;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
36
enc/encode.h
36
enc/encode.h
@ -20,9 +20,45 @@
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "./hash.h"
|
||||
#include "./ringbuffer.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
class BrotliCompressor {
|
||||
public:
|
||||
BrotliCompressor();
|
||||
~BrotliCompressor();
|
||||
|
||||
// Writes the stream header into the internal output buffer.
|
||||
void WriteStreamHeader();
|
||||
|
||||
// Encodes the data in input_buffer as a meta-block and writes it to
|
||||
// encoded_buffer and sets *encoded_size to the number of bytes that was
|
||||
// written.
|
||||
void WriteMetaBlock(const size_t input_size,
|
||||
const uint8_t* input_buffer,
|
||||
size_t* encoded_size,
|
||||
uint8_t* encoded_buffer);
|
||||
|
||||
// Writes a zero-length meta-block with end-of-input bit set to the
|
||||
// internal output buffer and copies the output buffer to encoded_buffer and
|
||||
// sets *encoded_size to the number of bytes written.
|
||||
void FinishStream(size_t* encoded_size, uint8_t* encoded_buffer);
|
||||
|
||||
|
||||
private:
|
||||
Hasher* hasher_;
|
||||
int dist_ringbuffer_[4];
|
||||
size_t dist_ringbuffer_idx_;
|
||||
size_t input_pos_;
|
||||
RingBuffer ringbuffer_;
|
||||
std::vector<float> literal_cost_;
|
||||
int storage_ix_;
|
||||
uint8_t* storage_;
|
||||
};
|
||||
|
||||
// Compresses the data in input_buffer into encoded_buffer, and sets
|
||||
// *encoded_size to the compressed length.
|
||||
// Returns 0 if there was an error and 1 otherwise.
|
||||
|
@ -43,6 +43,9 @@ HuffmanTree::HuffmanTree() {}
|
||||
|
||||
// Sort the root nodes, least popular first.
|
||||
bool SortHuffmanTree(const HuffmanTree &v0, const HuffmanTree &v1) {
|
||||
if (v0.total_count_ == v1.total_count_) {
|
||||
return v0.index_right_or_value_ > v1.index_right_or_value_;
|
||||
}
|
||||
return v0.total_count_ < v1.total_count_;
|
||||
}
|
||||
|
||||
@ -276,7 +279,7 @@ int OptimizeHuffmanCountsForRle(int length, int* counts) {
|
||||
}
|
||||
// 3) Let's replace those population counts that lead to more rle codes.
|
||||
stride = 0;
|
||||
limit = counts[0];
|
||||
limit = (counts[0] + counts[1] + counts[2]) / 3 + 1;
|
||||
sum = 0;
|
||||
for (i = 0; i < length + 1; ++i) {
|
||||
if (i == length || good_for_rle[i] ||
|
||||
@ -301,11 +304,10 @@ int OptimizeHuffmanCountsForRle(int length, int* counts) {
|
||||
}
|
||||
stride = 0;
|
||||
sum = 0;
|
||||
if (i < length - 3) {
|
||||
if (i < length - 2) {
|
||||
// All interesting strides have a count of at least 4,
|
||||
// at least when non-zeros.
|
||||
limit = (counts[i] + counts[i + 1] +
|
||||
counts[i + 2] + counts[i + 3] + 2) / 4;
|
||||
limit = (counts[i] + counts[i + 1] + counts[i + 2]) / 3 + 1;
|
||||
} else if (i < length) {
|
||||
limit = counts[i];
|
||||
} else {
|
||||
@ -329,7 +331,7 @@ void WriteHuffmanTree(const uint8_t* depth, const int length,
|
||||
uint8_t* tree,
|
||||
uint8_t* extra_bits_data,
|
||||
int* huffman_tree_size) {
|
||||
int previous_value = 0;
|
||||
int previous_value = 8;
|
||||
for (uint32_t i = 0; i < length;) {
|
||||
const int value = depth[i];
|
||||
int reps = 1;
|
||||
|
@ -66,8 +66,8 @@ struct EntropyCode {
|
||||
uint16_t bits_[kSize];
|
||||
// How many non-zero depth.
|
||||
int count_;
|
||||
// First two symbols with non-zero depth.
|
||||
int symbols_[2];
|
||||
// First four symbols with non-zero depth.
|
||||
int symbols_[4];
|
||||
};
|
||||
|
||||
template<int kSize>
|
||||
@ -82,7 +82,7 @@ void BuildEntropyCode(const Histogram<kSize>& histogram,
|
||||
if (histogram.total_count_ == 0) return;
|
||||
for (int i = 0; i < kSize; ++i) {
|
||||
if (histogram.data_[i] > 0) {
|
||||
if (code->count_ < 2) code->symbols_[code->count_] = i;
|
||||
if (code->count_ < 4) code->symbols_[code->count_] = i;
|
||||
++code->count_;
|
||||
}
|
||||
}
|
||||
|
62
enc/hash.h
62
enc/hash.h
@ -103,8 +103,7 @@ template <int kBucketBits, int kBlockBits>
|
||||
class HashLongestMatch {
|
||||
public:
|
||||
HashLongestMatch()
|
||||
: literal_cost_(NULL),
|
||||
last_distance1_(4),
|
||||
: last_distance1_(4),
|
||||
last_distance2_(11),
|
||||
last_distance3_(15),
|
||||
last_distance4_(16),
|
||||
@ -115,10 +114,6 @@ class HashLongestMatch {
|
||||
void Reset() {
|
||||
std::fill(&num_[0], &num_[sizeof(num_) / sizeof(num_[0])], 0);
|
||||
}
|
||||
void SetLiteralCost(float *cost) {
|
||||
literal_cost_ = cost;
|
||||
}
|
||||
double literal_cost(int i) const { return literal_cost_[i]; }
|
||||
|
||||
// Look at 3 bytes at data.
|
||||
// Compute a hash from these, and store the value of ix at that position.
|
||||
@ -146,25 +141,27 @@ class HashLongestMatch {
|
||||
// into best_distance_out.
|
||||
// Write the score of the best match into best_score_out.
|
||||
bool FindLongestMatch(const uint8_t * __restrict data,
|
||||
const float * __restrict literal_cost,
|
||||
const size_t ring_buffer_mask,
|
||||
const uint32_t cur_ix,
|
||||
uint32_t max_length,
|
||||
const uint32_t max_backward,
|
||||
size_t * __restrict best_len_out,
|
||||
size_t * __restrict best_distance_out,
|
||||
double * __restrict best_score_out) {
|
||||
const double start_cost4 = literal_cost_ == NULL ? 20 :
|
||||
literal_cost_[cur_ix] +
|
||||
literal_cost_[cur_ix + 1] +
|
||||
literal_cost_[cur_ix + 2] +
|
||||
literal_cost_[cur_ix + 3];
|
||||
|
||||
const double start_cost3 = literal_cost_ == NULL ? 15 :
|
||||
literal_cost_[cur_ix] +
|
||||
literal_cost_[cur_ix + 1] +
|
||||
literal_cost_[cur_ix + 2] + 0.3;
|
||||
double start_cost2 = literal_cost_ == NULL ? 10 :
|
||||
literal_cost_[cur_ix] +
|
||||
literal_cost_[cur_ix + 1] + 1.2;
|
||||
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
|
||||
const double start_cost4 = literal_cost == NULL ? 20 :
|
||||
literal_cost[cur_ix_masked] +
|
||||
literal_cost[(cur_ix + 1) & ring_buffer_mask] +
|
||||
literal_cost[(cur_ix + 2) & ring_buffer_mask] +
|
||||
literal_cost[(cur_ix + 3) & ring_buffer_mask];
|
||||
const double start_cost3 = literal_cost == NULL ? 15 :
|
||||
literal_cost[cur_ix_masked] +
|
||||
literal_cost[(cur_ix + 1) & ring_buffer_mask] +
|
||||
literal_cost[(cur_ix + 2) & ring_buffer_mask] + 0.3;
|
||||
double start_cost2 = literal_cost == NULL ? 10 :
|
||||
literal_cost[cur_ix_masked] +
|
||||
literal_cost[(cur_ix + 1) & ring_buffer_mask] + 1.2;
|
||||
bool match_found = false;
|
||||
// Don't accept a short copy from far away.
|
||||
double best_score = 8.25;
|
||||
@ -177,7 +174,7 @@ class HashLongestMatch {
|
||||
size_t best_ix = 1;
|
||||
// Try last distance first.
|
||||
for (int i = 0; i < 16; ++i) {
|
||||
int prev_ix = cur_ix;
|
||||
size_t prev_ix = cur_ix;
|
||||
switch(i) {
|
||||
case 0: prev_ix -= last_distance1_; break;
|
||||
case 1: prev_ix -= last_distance2_; break;
|
||||
@ -205,11 +202,13 @@ class HashLongestMatch {
|
||||
if (PREDICT_FALSE(backward > max_backward)) {
|
||||
continue;
|
||||
}
|
||||
if (data[cur_ix + best_len] != data[prev_ix + best_len]) {
|
||||
prev_ix &= ring_buffer_mask;
|
||||
if (data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
|
||||
continue;
|
||||
}
|
||||
const size_t len =
|
||||
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix], max_length);
|
||||
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
|
||||
max_length);
|
||||
if (len >= 3 || (len == 2 && i < 2)) {
|
||||
// Comparing for >= 2 does not change the semantics, but just saves for
|
||||
// a few unnecessary binary logarithms in backward reference score,
|
||||
@ -234,7 +233,7 @@ class HashLongestMatch {
|
||||
}
|
||||
}
|
||||
}
|
||||
const uint32_t key = Hash3Bytes(&data[cur_ix], kBucketBits);
|
||||
const uint32_t key = Hash3Bytes(&data[cur_ix_masked], kBucketBits);
|
||||
const uint32_t * __restrict const bucket = &buckets_[key][0];
|
||||
const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
|
||||
int stop = int(cur_ix) - 64;
|
||||
@ -247,8 +246,9 @@ class HashLongestMatch {
|
||||
if (PREDICT_FALSE(backward > max_backward)) {
|
||||
break;
|
||||
}
|
||||
if (data[cur_ix] != data[prev_ix] ||
|
||||
data[cur_ix + 1] != data[prev_ix + 1]) {
|
||||
prev_ix &= ring_buffer_mask;
|
||||
if (data[cur_ix_masked] != data[prev_ix] ||
|
||||
data[cur_ix_masked + 1] != data[prev_ix + 1]) {
|
||||
continue;
|
||||
}
|
||||
int len = 2;
|
||||
@ -269,11 +269,13 @@ class HashLongestMatch {
|
||||
if (PREDICT_FALSE(backward > max_backward)) {
|
||||
break;
|
||||
}
|
||||
if (data[cur_ix + best_len] != data[prev_ix + best_len]) {
|
||||
prev_ix &= ring_buffer_mask;
|
||||
if (data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
|
||||
continue;
|
||||
}
|
||||
const size_t len =
|
||||
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix], max_length);
|
||||
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
|
||||
max_length);
|
||||
if (len >= 3) {
|
||||
// Comparing for >= 3 does not change the semantics, but just saves for
|
||||
// a few unnecessary binary logarithms in backward reference score,
|
||||
@ -333,10 +335,6 @@ class HashLongestMatch {
|
||||
// Buckets containing kBlockSize of backward references.
|
||||
uint32_t buckets_[kBucketSize][kBlockSize];
|
||||
|
||||
// Model of how much the ith literal costs to encode using
|
||||
// the entropy model.
|
||||
float *literal_cost_;
|
||||
|
||||
int last_distance1_;
|
||||
int last_distance2_;
|
||||
int last_distance3_;
|
||||
@ -349,6 +347,8 @@ class HashLongestMatch {
|
||||
double average_cost_;
|
||||
};
|
||||
|
||||
typedef HashLongestMatch<13, 11> Hasher;
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_HASH_H_
|
||||
|
@ -31,10 +31,10 @@ void BuildHistograms(
|
||||
const BlockSplit& literal_split,
|
||||
const BlockSplit& insert_and_copy_split,
|
||||
const BlockSplit& dist_split,
|
||||
const uint8_t* input_buffer,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t pos,
|
||||
int context_mode,
|
||||
int distance_context_mode,
|
||||
size_t mask,
|
||||
const std::vector<int>& context_modes,
|
||||
std::vector<HistogramLiteral>* literal_histograms,
|
||||
std::vector<HistogramCommand>* insert_and_copy_histograms,
|
||||
std::vector<HistogramDistance>* copy_dist_histograms) {
|
||||
@ -48,25 +48,47 @@ void BuildHistograms(
|
||||
cmd.command_prefix_);
|
||||
for (int j = 0; j < cmd.insert_length_; ++j) {
|
||||
literal_it.Next();
|
||||
uint8_t prev_byte = pos > 0 ? input_buffer[pos - 1] : 0;
|
||||
uint8_t prev_byte2 = pos > 1 ? input_buffer[pos - 2] : 0;
|
||||
uint8_t prev_byte3 = pos > 2 ? input_buffer[pos - 3] : 0;
|
||||
int context = (literal_it.type_ * NumContexts(context_mode) +
|
||||
Context(prev_byte, prev_byte2, prev_byte3, context_mode));
|
||||
(*literal_histograms)[context].Add(input_buffer[pos]);
|
||||
uint8_t prev_byte = pos > 0 ? ringbuffer[(pos - 1) & mask] : 0;
|
||||
uint8_t prev_byte2 = pos > 1 ? ringbuffer[(pos - 2) & mask] : 0;
|
||||
int context = (literal_it.type_ << kLiteralContextBits) +
|
||||
Context(prev_byte, prev_byte2, context_modes[literal_it.type_]);
|
||||
(*literal_histograms)[context].Add(ringbuffer[pos & mask]);
|
||||
++pos;
|
||||
}
|
||||
pos += cmd.copy_length_;
|
||||
if (cmd.copy_length_ > 0 && cmd.distance_prefix_ != 0xffff) {
|
||||
dist_it.Next();
|
||||
int context = dist_it.type_;
|
||||
if (distance_context_mode > 0) {
|
||||
context <<= 2;
|
||||
context += (cmd.copy_length_ > 4) ? 3 : cmd.copy_length_ - 2;
|
||||
}
|
||||
int context = (dist_it.type_ << kDistanceContextBits) +
|
||||
((cmd.copy_length_ > 4) ? 3 : cmd.copy_length_ - 2);
|
||||
(*copy_dist_histograms)[context].Add(cmd.distance_prefix_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BuildLiteralHistogramsForBlockType(
|
||||
const std::vector<Command>& cmds,
|
||||
const BlockSplit& literal_split,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t pos,
|
||||
size_t mask,
|
||||
int block_type,
|
||||
int context_mode,
|
||||
std::vector<HistogramLiteral>* histograms) {
|
||||
BlockSplitIterator literal_it(literal_split);
|
||||
for (int i = 0; i < cmds.size(); ++i) {
|
||||
const Command &cmd = cmds[i];
|
||||
for (int j = 0; j < cmd.insert_length_; ++j) {
|
||||
literal_it.Next();
|
||||
if (literal_it.type_ == block_type) {
|
||||
uint8_t prev_byte = pos > 0 ? ringbuffer[(pos - 1) & mask] : 0;
|
||||
uint8_t prev_byte2 = pos > 1 ? ringbuffer[(pos - 2) & mask] : 0;
|
||||
int context = Context(prev_byte, prev_byte2, context_mode);
|
||||
(*histograms)[context].Add(ringbuffer[pos & mask]);
|
||||
}
|
||||
++pos;
|
||||
}
|
||||
pos += cmd.copy_length_;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace brotli
|
||||
|
@ -79,19 +79,32 @@ typedef Histogram<kNumCommandPrefixes> HistogramCommand;
|
||||
typedef Histogram<kNumDistancePrefixes> HistogramDistance;
|
||||
typedef Histogram<kNumBlockLenPrefixes> HistogramBlockLength;
|
||||
|
||||
static const int kLiteralContextBits = 6;
|
||||
static const int kDistanceContextBits = 2;
|
||||
|
||||
void BuildHistograms(
|
||||
const std::vector<Command>& cmds,
|
||||
const BlockSplit& literal_split,
|
||||
const BlockSplit& insert_and_copy_split,
|
||||
const BlockSplit& dist_split,
|
||||
const uint8_t* input_buffer,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t pos,
|
||||
int context_mode,
|
||||
int distance_context_mode,
|
||||
size_t mask,
|
||||
const std::vector<int>& context_modes,
|
||||
std::vector<HistogramLiteral>* literal_histograms,
|
||||
std::vector<HistogramCommand>* insert_and_copy_histograms,
|
||||
std::vector<HistogramDistance>* copy_dist_histograms);
|
||||
|
||||
void BuildLiteralHistogramsForBlockType(
|
||||
const std::vector<Command>& cmds,
|
||||
const BlockSplit& literal_split,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t pos,
|
||||
size_t mask,
|
||||
int block_type,
|
||||
int context_mode,
|
||||
std::vector<HistogramLiteral>* histograms);
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_HISTOGRAM_H_
|
||||
|
@ -22,37 +22,39 @@
|
||||
|
||||
namespace brotli {
|
||||
|
||||
void EstimateBitCostsForLiterals(size_t len, const uint8_t *data, float *cost) {
|
||||
void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
|
||||
const uint8_t *data, float *cost) {
|
||||
int histogram[256] = { 0 };
|
||||
int window_half = 2000;
|
||||
int in_window = std::min(static_cast<size_t>(window_half), len);
|
||||
|
||||
// Bootstrap histogram.
|
||||
for (int i = 0; i < in_window; ++i) {
|
||||
++histogram[data[i]];
|
||||
++histogram[data[(pos + i) & mask]];
|
||||
}
|
||||
|
||||
// Compute bit costs with sliding window.
|
||||
for (int i = 0; i < len; ++i) {
|
||||
if (i - window_half >= 0) {
|
||||
// Remove a byte in the past.
|
||||
--histogram[data[i - window_half]];
|
||||
--histogram[data[(pos + i - window_half) & mask]];
|
||||
--in_window;
|
||||
}
|
||||
if (i + window_half < len) {
|
||||
// Add a byte in the future.
|
||||
++histogram[data[i + window_half]];
|
||||
++histogram[data[(pos + i + window_half) & mask]];
|
||||
++in_window;
|
||||
}
|
||||
int histo = histogram[data[i]];
|
||||
int masked_pos = (pos + i) & mask;
|
||||
int histo = histogram[data[masked_pos]];
|
||||
if (histo == 0) {
|
||||
histo = 1;
|
||||
}
|
||||
cost[i] = log2(static_cast<double>(in_window) / histo);
|
||||
cost[i] += 0.03;
|
||||
if (cost[i] < 1.0) {
|
||||
cost[i] *= 0.5;
|
||||
cost[i] += 0.5;
|
||||
cost[masked_pos] = log2(static_cast<double>(in_window) / histo);
|
||||
cost[masked_pos] += 0.03;
|
||||
if (cost[masked_pos] < 1.0) {
|
||||
cost[masked_pos] *= 0.5;
|
||||
cost[masked_pos] += 0.5;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -22,9 +22,11 @@
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// Input: length of data, and the bytes.
|
||||
// Output: estimate of how many bits the literal will take entropy coded.
|
||||
void EstimateBitCostsForLiterals(size_t len, const uint8_t *data, float *cost);
|
||||
// Estimates how many bits the literals in the interval [pos, pos + len) in the
|
||||
// ringbuffer (data, mask) will take entropy coded and writes these estimates
|
||||
// to the ringbuffer (cost, mask).
|
||||
void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
|
||||
const uint8_t *data, float *cost);
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
|
89
enc/ringbuffer.h
Normal file
89
enc/ringbuffer.h
Normal file
@ -0,0 +1,89 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Sliding window over the input data.
|
||||
|
||||
#ifndef BROTLI_ENC_RINGBUFFER_H_
|
||||
#define BROTLI_ENC_RINGBUFFER_H_
|
||||
|
||||
// A RingBuffer(window_bits, tail_bits) contains `1 << window_bits' bytes of
|
||||
// data in a circular manner: writing a byte writes it to
|
||||
// `position() % (1 << window_bits)'. For convenience, the RingBuffer array
|
||||
// contains another copy of the first `1 << tail_bits' bytes:
|
||||
// buffer_[i] == buffer_[i + (1 << window_bits)] if i < (1 << tail_bits).
|
||||
class RingBuffer {
|
||||
public:
|
||||
RingBuffer(int window_bits, int tail_bits)
|
||||
: window_bits_(window_bits), tail_bits_(tail_bits), pos_(0) {
|
||||
static const int kSlackForThreeByteHashingEverywhere = 2;
|
||||
const int buflen = (1 << window_bits_) + (1 << tail_bits_);
|
||||
buffer_ = new uint8_t[buflen + kSlackForThreeByteHashingEverywhere];
|
||||
for (int i = 0; i < kSlackForThreeByteHashingEverywhere; ++i) {
|
||||
buffer_[buflen + i] = 0;
|
||||
}
|
||||
}
|
||||
~RingBuffer() {
|
||||
delete [] buffer_;
|
||||
}
|
||||
|
||||
// Push bytes into the ring buffer.
|
||||
void Write(const uint8_t *bytes, size_t n) {
|
||||
const size_t masked_pos = pos_ & ((1 << window_bits_) - 1);
|
||||
// The length of the writes is limited so that we do not need to worry
|
||||
// about a write
|
||||
WriteTail(bytes, n);
|
||||
if (masked_pos + n <= (1 << window_bits_)) {
|
||||
// A single write fits.
|
||||
memcpy(&buffer_[masked_pos], bytes, n);
|
||||
} else {
|
||||
// Split into two writes.
|
||||
// Copy into the end of the buffer, including the tail buffer.
|
||||
memcpy(&buffer_[masked_pos], bytes,
|
||||
std::min(n,
|
||||
((1 << window_bits_) + (1 << tail_bits_)) - masked_pos));
|
||||
// Copy into the begining of the buffer
|
||||
memcpy(&buffer_[0], bytes + ((1 << window_bits_) - masked_pos),
|
||||
n - ((1 << window_bits_) - masked_pos));
|
||||
}
|
||||
pos_ += n;
|
||||
}
|
||||
|
||||
// Logical cursor position in the ring buffer.
|
||||
size_t position() const { return pos_; }
|
||||
|
||||
uint8_t *start() { return &buffer_[0]; }
|
||||
const uint8_t *start() const { return &buffer_[0]; }
|
||||
|
||||
private:
|
||||
void WriteTail(const uint8_t *bytes, size_t n) {
|
||||
const size_t masked_pos = pos_ & ((1 << window_bits_) - 1);
|
||||
if (masked_pos < (1 << tail_bits_)) {
|
||||
// Just fill the tail buffer with the beginning data.
|
||||
const size_t p = (1 << window_bits_) + masked_pos;
|
||||
memcpy(&buffer_[p], bytes, std::min(n, (1 << tail_bits_) - masked_pos));
|
||||
}
|
||||
}
|
||||
|
||||
// Size of the ringbuffer is (1 << window_bits) + (1 << tail_bits).
|
||||
const int window_bits_;
|
||||
const int tail_bits_;
|
||||
|
||||
// Position to write in the ring buffer.
|
||||
size_t pos_;
|
||||
// The actual ring buffer containing the data and the copy of the beginning
|
||||
// as a tail.
|
||||
uint8_t *buffer_;
|
||||
};
|
||||
|
||||
#endif // BROTLI_ENC_RINGBUFFER_H_
|
Loading…
Reference in New Issue
Block a user