brotli decoder performance improvements

This commit is contained in:
Lode Vandevenne 2015-08-10 13:35:23 +02:00
parent 17ed258993
commit 94cd7085f7
17 changed files with 1466 additions and 1346 deletions

View File

@ -24,34 +24,27 @@
extern "C" {
#endif
void BrotliInitBitReader(BrotliBitReader* const br,
BrotliInput input, int finish) {
void BrotliInitBitReader(BrotliBitReader* const br, BrotliInput input) {
BROTLI_DCHECK(br != NULL);
br->finish_ = finish;
br->tmp_bytes_read_ = 0;
br->buf_ptr_ = br->buf_;
br->input_ = input;
br->val_ = 0;
br->pos_ = 0;
br->bit_pos_ = 0;
br->bit_end_pos_ = 0;
br->avail_in = 0;
br->eos_ = 0;
br->tmp_bytes_read_ = 0;
br->next_in = NULL;
}
int BrotliWarmupBitReader(BrotliBitReader* const br) {
void BrotliWarmupBitReader(BrotliBitReader* const br) {
size_t i;
if (!BrotliReadMoreInput(br)) {
return 0;
}
br->val_ = 0;
for (i = 0; i < sizeof(br->val_); ++i) {
br->val_ |= ((uint64_t)br->buf_[br->pos_]) << (8 * i);
++br->pos_;
br->val_ |= ((uint64_t)*br->next_in) << (8 * i);
++br->next_in;
--br->avail_in;
}
return (br->bit_end_pos_ > 0);
}
#if defined(__cplusplus) || defined(c_plusplus)

View File

@ -27,53 +27,27 @@
extern "C" {
#endif
#if (defined(__x86_64__) || defined(_M_X64))
/* This should be set to 1 only on little-endian machines. */
#define BROTLI_USE_64_BITS 1
#elif (defined(__arm__) && defined(__BYTE_ORDER__) \
&& (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))
/* Enable some optimizations for ARM architectures with little endian byte
order. So far the optimizations have been tested on a Cortex-A7. */
#define ARMv7
#define BROTLI_USE_64_BITS 1
#else
#define BROTLI_USE_64_BITS 0
#endif
#define BROTLI_MAX_NUM_BIT_READ 25
#define BROTLI_READ_SIZE 4096
#define BROTLI_IBUF_SIZE (2 * BROTLI_READ_SIZE + 128)
#define BROTLI_IBUF_MASK (2 * BROTLI_READ_SIZE - 1)
#define BROTLI_READ_SIZE 1024
#define BROTLI_IBUF_SIZE (BROTLI_READ_SIZE + 128)
#define BROTLI_IBUF_MASK (BROTLI_READ_SIZE - 1)
#define UNALIGNED_COPY64(dst, src) memcpy(dst, src, 8)
#define UNALIGNED_MOVE64(dst, src) memmove(dst, src, 8)
#ifdef ARMv7
/* Arm instructions can shift and negate registers before an AND operation. */
/* Masking with this expression turns to a single "Unsigned Bit Field Extract"
UBFX instruction on ARM. */
static BROTLI_INLINE uint32_t BitMask(int n) { return ~((0xffffffff) << n); }
#else
static const uint32_t kBitMask[BROTLI_MAX_NUM_BIT_READ] = {
0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767,
65535, 131071, 262143, 524287, 1048575, 2097151, 4194303, 8388607, 16777215
};
static BROTLI_INLINE uint32_t BitMask(int n) { return kBitMask[n]; }
#endif
typedef struct {
#if (BROTLI_USE_64_BITS)
#if (BROTLI_64_BITS_LITTLE_ENDIAN)
uint64_t val_; /* pre-fetched bits */
#else
uint32_t val_; /* pre-fetched bits */
#endif
uint32_t pos_; /* byte position in stream */
uint32_t bit_pos_; /* current bit-reading position in val_ */
uint32_t bit_end_pos_; /* bit-reading end position from LSB of val_ */
uint8_t* next_in; /* the byte we're reading from */
uint32_t avail_in;
int eos_; /* input stream is finished */
uint8_t* buf_ptr_; /* next input will write here */
BrotliInput input_; /* input callback */
/* Set to 0 to support partial data streaming. Set to 1 to expect full data or
for the last chunk of partial data. */
int finish_;
/* indicates how much bytes already read when reading partial data */
int tmp_bytes_read_;
@ -82,38 +56,15 @@ typedef struct {
uint8_t buf_[BROTLI_IBUF_SIZE];
} BrotliBitReader;
/* Initializes the bitreader fields. After this, BrotliWarmupBitReader must
be used. */
void BrotliInitBitReader(BrotliBitReader* const br,
BrotliInput input, int finish);
/* Initializes the bitreader fields. After this, BrotliReadInput then
BrotliWarmupBitReader must be used. */
void BrotliInitBitReader(BrotliBitReader* const br, BrotliInput input);
/* Fetches data to fill up internal buffers. Returns 0 if there wasn't enough */
/* data to read. It then buffers the read data and can be called again with */
/* more data. If br->finish_ is 1, never fails. */
int BrotliWarmupBitReader(BrotliBitReader* const br);
/* Initializes bit reading and bit position with the first input data available.
Requires that there is enough input available (BrotliCheckInputAmount). */
void BrotliWarmupBitReader(BrotliBitReader* const br);
/* Return the prefetched bits, so they can be looked up. */
static BROTLI_INLINE uint32_t BrotliPrefetchBits(BrotliBitReader* const br) {
return (uint32_t)(br->val_ >> br->bit_pos_);
}
/*
* Reload up to 32 bits byte-by-byte.
* This function works on both little and big endian.
*/
static BROTLI_INLINE void ShiftBytes32(BrotliBitReader* const br) {
while (br->bit_pos_ >= 8) {
br->val_ >>= 8;
br->val_ |= ((uint32_t)br->buf_[br->pos_ & BROTLI_IBUF_MASK]) << 24;
++br->pos_;
br->bit_pos_ -= 8;
br->bit_end_pos_ -= 8;
}
}
/* Fills up the input ringbuffer by calling the input callback.
Does nothing if there are at least 32 bytes present after current position.
/* Pulls data from the input to the the read buffer.
Returns 0 if one of:
- the input callback returned an error, or
@ -122,18 +73,27 @@ static BROTLI_INLINE void ShiftBytes32(BrotliBitReader* const br) {
when more data is available makes it continue including the partially read
data
After encountering the end of the input stream, 32 additional zero bytes are
copied to the ringbuffer, therefore it is safe to call this function after
every 32 bytes of input is read.
If finish is true and the end of the stream is reached, 128 additional zero
bytes are copied to the ringbuffer.
*/
static BROTLI_INLINE int BrotliReadMoreInput(BrotliBitReader* const br) {
if (PREDICT_TRUE(br->bit_end_pos_ > 256)) {
return 1;
} else if (PREDICT_FALSE(br->eos_)) {
return br->bit_pos_ <= br->bit_end_pos_;
static BROTLI_INLINE int BrotliReadInput(
BrotliBitReader* const br, int finish) {
if (PREDICT_FALSE(br->eos_)) {
return 0;
} else {
uint8_t* dst = br->buf_ptr_;
int bytes_read = BrotliRead(br->input_, dst + br->tmp_bytes_read_,
size_t i;
int bytes_read;
uint8_t* dst = br->buf_;
if (br->next_in != br->buf_) {
int num = (int)(br->avail_in);
for (i = 0; i < num; i++) {
br->buf_[i] = br->next_in[i];
}
br->next_in = br->buf_;
br->tmp_bytes_read_ = num;
}
bytes_read = BrotliRead(br->input_, dst + br->tmp_bytes_read_,
(size_t) (BROTLI_READ_SIZE - br->tmp_bytes_read_));
if (bytes_read < 0) {
return 0;
@ -141,132 +101,135 @@ static BROTLI_INLINE int BrotliReadMoreInput(BrotliBitReader* const br) {
bytes_read += br->tmp_bytes_read_;
br->tmp_bytes_read_ = 0;
if (bytes_read < BROTLI_READ_SIZE) {
if (!br->finish_) {
if (!finish) {
br->tmp_bytes_read_ = bytes_read;
return 0;
}
br->eos_ = 1;
/* Store 32 bytes of zero after the stream end. */
#if (BROTLI_USE_64_BITS) && !defined(ARMv7)
*(uint64_t*)(dst + bytes_read) = 0;
*(uint64_t*)(dst + bytes_read + 8) = 0;
*(uint64_t*)(dst + bytes_read + 16) = 0;
*(uint64_t*)(dst + bytes_read + 24) = 0;
#else
memset(dst + bytes_read, 0, 32);
#endif
/* Store 128 bytes of zero after the stream end. */
memset(dst + bytes_read, 0, 128);
bytes_read += 128;
}
if (dst == br->buf_) {
/* Copy the head of the ringbuffer to the slack region. */
#if (BROTLI_USE_64_BITS) && !defined(ARMv7)
UNALIGNED_COPY64(br->buf_ + BROTLI_IBUF_SIZE - 32, br->buf_);
UNALIGNED_COPY64(br->buf_ + BROTLI_IBUF_SIZE - 24, br->buf_ + 8);
UNALIGNED_COPY64(br->buf_ + BROTLI_IBUF_SIZE - 16, br->buf_ + 16);
UNALIGNED_COPY64(br->buf_ + BROTLI_IBUF_SIZE - 8, br->buf_ + 24);
#else
memcpy(br->buf_ + (BROTLI_READ_SIZE << 1), br->buf_, 32);
#endif
br->buf_ptr_ = br->buf_ + BROTLI_READ_SIZE;
} else {
br->buf_ptr_ = br->buf_;
}
br->bit_end_pos_ += ((uint32_t)bytes_read << 3);
br->avail_in = (uint32_t)bytes_read;
br->next_in = br->buf_;
return 1;
}
}
/* Similar to BrotliReadMoreInput, but guarantees num bytes available. The
maximum value for num is 128 bytes, the slack region size. */
static BROTLI_INLINE int BrotliReadInputAmount(
/* Returns amount of unread bytes the bit reader still has buffered from the
BrotliInput, including whole bytes in br->val_. */
static BROTLI_INLINE size_t BrotliGetRemainingBytes(BrotliBitReader* br) {
return br->avail_in + sizeof(br->val_) - (br->bit_pos_ >> 3);
}
/* Checks if there is at least num bytes left in the input ringbuffer (excluding
the bits remaining in br->val_). The maximum value for num is 128 bytes. */
static BROTLI_INLINE int BrotliCheckInputAmount(
BrotliBitReader* const br, size_t num) {
if (PREDICT_TRUE(br->bit_end_pos_ > (num << 3))) {
return 1;
} else if (PREDICT_FALSE(br->eos_)) {
return br->bit_pos_ <= br->bit_end_pos_;
} else {
uint8_t* dst = br->buf_ptr_;
int bytes_read = BrotliRead(br->input_, dst + br->tmp_bytes_read_,
(size_t) (BROTLI_READ_SIZE - br->tmp_bytes_read_));
if (bytes_read < 0) {
return 0;
}
bytes_read += br->tmp_bytes_read_;
br->tmp_bytes_read_ = 0;
if (bytes_read < BROTLI_READ_SIZE) {
if (!br->finish_) {
br->tmp_bytes_read_ = bytes_read;
return 0;
}
br->eos_ = 1;
/* Store num bytes of zero after the stream end. */
memset(dst + bytes_read, 0, num);
}
if (dst == br->buf_) {
/* Copy the head of the ringbuffer to the slack region. */
memcpy(br->buf_ + (BROTLI_READ_SIZE << 1), br->buf_, num);
br->buf_ptr_ = br->buf_ + BROTLI_READ_SIZE;
} else {
br->buf_ptr_ = br->buf_;
}
br->bit_end_pos_ += ((uint32_t)bytes_read << 3);
return 1;
}
return br->avail_in >= num;
}
/* Guarantees that there are at least 24 bits in the buffer. */
static BROTLI_INLINE void BrotliFillBitWindow(BrotliBitReader* const br) {
#if (BROTLI_USE_64_BITS)
/* Guarantees that there are at least n_bits in the buffer.
n_bits should be in the range [1..24] */
static BROTLI_INLINE void BrotliFillBitWindow(
BrotliBitReader* const br, int n_bits) {
#if (BROTLI_64_BITS_LITTLE_ENDIAN)
if (br->bit_pos_ >= 32) {
/*
* Advances the Read buffer by 4 bytes to make room for reading next
* 24 bits.
* The expression below needs a little-endian arch to work correctly.
* This gives a large speedup for decoding speed.
*/
br->val_ >>= 32;
br->val_ |= ((uint64_t)(*(const uint32_t*)(
br->buf_ + (br->pos_ & BROTLI_IBUF_MASK)))) << 32;
br->pos_ += 4;
br->bit_pos_ -= 32;
br->bit_end_pos_ -= 32;
br->bit_pos_ ^= 32; /* here same as -= 32 because of the if condition */
br->val_ |= ((uint64_t)(*(const uint32_t*)(br->next_in))) << 32;
br->avail_in -= 4;
br->next_in += 4;
}
#elif (BROTLI_LITTLE_ENDIAN)
if (br->bit_pos_ >= 16) {
br->val_ >>= 16;
br->bit_pos_ ^= 16; /* here same as -= 16 because of the if condition */
br->val_ |= ((uint32_t)(*(const uint16_t*)(br->next_in))) << 16;
br->avail_in -= 2;
br->next_in += 2;
}
if (!IS_CONSTANT(n_bits) || (n_bits > 16)) {
if (br->bit_pos_ >= 8) {
br->val_ >>= 8;
br->bit_pos_ ^= 8; /* here same as -= 8 because of the if condition */
br->val_ |= ((uint32_t)*br->next_in) << 24;
--br->avail_in;
++br->next_in;
}
}
#else
ShiftBytes32(br);
while (br->bit_pos_ >= 8) {
br->val_ >>= 8;
br->val_ |= ((uint32_t)*br->next_in) << 24;
++br->pos_;
br->bit_pos_ -= 8;
--br->avail_in;
}
#endif
}
/* Reads the specified number of bits from Read Buffer. */
/* Like BrotliGetBits, but does not mask the result, it is only guaranteed
that it has minimum n_bits. */
static BROTLI_INLINE uint32_t BrotliGetBitsUnmasked(
BrotliBitReader* const br, int n_bits) {
BrotliFillBitWindow(br, n_bits);
return (uint32_t)(br->val_ >> br->bit_pos_);
}
/* Returns the specified number of bits from br without advancing bit pos. */
static BROTLI_INLINE uint32_t BrotliGetBits(
BrotliBitReader* const br, int n_bits) {
BrotliFillBitWindow(br, n_bits);
return (uint32_t)(br->val_ >> br->bit_pos_) & BitMask(n_bits);
}
/* Advances the bit pos by n_bits. */
static BROTLI_INLINE void BrotliDropBits(
BrotliBitReader* const br, int n_bits) {
br->bit_pos_ += (uint32_t)n_bits;
}
/* Reads the specified number of bits from br and advances the bit pos. */
static BROTLI_INLINE uint32_t BrotliReadBits(
BrotliBitReader* const br, int n_bits) {
uint32_t val;
#if (BROTLI_USE_64_BITS)
#if defined(ARMv7)
if ((64 - br->bit_pos_) < ((uint32_t) n_bits)) {
BrotliFillBitWindow(br);
}
BrotliFillBitWindow(br, n_bits);
val = (uint32_t)(br->val_ >> br->bit_pos_) & BitMask(n_bits);
#else
BrotliFillBitWindow(br);
val = (uint32_t)(br->val_ >> br->bit_pos_) & BitMask(n_bits);
#endif /* defined (ARMv7) */
#else
/*
* The if statement gives 2-4% speed boost on Canterbury data set with
* asm.js/firefox/x86-64.
*/
if ((32 - br->bit_pos_) < ((uint32_t) n_bits)) {
BrotliFillBitWindow(br);
}
val = (br->val_ >> br->bit_pos_) & BitMask(n_bits);
#endif /* BROTLI_USE_64_BITS */
#ifdef BROTLI_DECODE_DEBUG
printf("[BrotliReadBits] %010d %2d val: %6x\n",
(br->pos_ << 3) + br->bit_pos_ - 64, n_bits, val);
printf("[BrotliReadBits] %d %d %d val: %6x\n",
(int)br->avail_in, (int)br->bit_pos_, n_bits, val);
#endif
br->bit_pos_ += (uint32_t)n_bits;
return val;
}
/* Advances the bit reader position to the next byte boundary and verifies
that any skipped bits are set to zero. */
static BROTLI_INLINE int BrotliJumpToByteBoundary(BrotliBitReader* br) {
uint32_t new_bit_pos = (br->bit_pos_ + 7) & (uint32_t)(~7UL);
uint32_t pad_bits = BrotliReadBits(br, (int)(new_bit_pos - br->bit_pos_));
return pad_bits == 0;
}
/* Copies remaining input bytes stored in the bit reader to the output. Value
num may not be larger than BrotliGetRemainingBytes. The bit reader must be
warmed up again after this. */
static BROTLI_INLINE void BrotliCopyBytes(uint8_t* dest,
BrotliBitReader* br, size_t num) {
while (br->bit_pos_ + 8 <= (BROTLI_64_BITS_LITTLE_ENDIAN ? 64 : 32)
&& num > 0) {
*dest = (uint8_t)(br->val_ >> br->bit_pos_);
br->bit_pos_ += 8;
++dest;
--num;
}
memcpy(dest, br->next_in, num);
br->avail_in -= (uint32_t)num;
br->next_in += num;
br->bit_pos_ = 0;
}
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
#endif

File diff suppressed because it is too large Load Diff

View File

@ -44,7 +44,7 @@ typedef enum {
#else
#define BROTLI_FAILURE() \
BrotliFailure(__FILE__, __LINE__, __PRETTY_FUNCTION__)
inline BrotliResult BrotliFailure(const char *f, int l, const char *fn) {
static inline BrotliResult BrotliFailure(const char *f, int l, const char *fn) {
fprintf(stderr, "ERROR at %s:%d (%s)\n", f, l, fn);
fflush(stderr);
return BROTLI_RESULT_ERROR;
@ -56,9 +56,9 @@ inline BrotliResult BrotliFailure(const char *f, int l, const char *fn) {
/* or if it has two meta-blocks, where the first is uncompressed and the */
/* second is empty. */
/* Returns 1 on success, 0 on failure. */
BrotliResult BrotliDecompressedSize(size_t encoded_size,
const uint8_t* encoded_buffer,
size_t* decoded_size);
int BrotliDecompressedSize(size_t encoded_size,
const uint8_t* encoded_buffer,
size_t* decoded_size);
/* Decompresses the data in encoded_buffer into decoded_buffer, and sets */
/* *decoded_size to the decompressed length. */
@ -150,6 +150,9 @@ BrotliResult BrotliDecompressBufferStreaming(size_t* available_in,
void BrotliSetCustomDictionary(
size_t size, const uint8_t* dict, BrotliState* s);
/* Escalate internal functions visibility; for testing purposes only. */
void InverseMoveToFrontTransformForTesting(uint8_t* v, int l, BrotliState* s);
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
#endif

View File

@ -9478,7 +9478,7 @@ static const int kBrotliDictionaryOffsetsByLength[] = {
115968, 118528, 119872, 121280, 122016,
};
static const int kBrotliDictionarySizeBitsByLength[] = {
static const int8_t kBrotliDictionarySizeBitsByLength[] = {
0, 0, 0, 0, 10, 10, 11, 11, 10, 10,
10, 10, 10, 9, 9, 8, 7, 7, 8, 7,
7, 6, 6, 5, 5,

View File

@ -55,7 +55,7 @@ static BROTLI_INLINE void ReplicateValue(HuffmanCode* table,
/* Returns the table width of the next 2nd level table. count is the histogram
of bit lengths for the remaining symbols, len is the code length of the next
processed symbol */
static BROTLI_INLINE int NextTableBitSize(const int* const count,
static BROTLI_INLINE int NextTableBitSize(const uint16_t* const count,
int len, int root_bits) {
int left = 1 << (len - root_bits);
while (len < MAX_LENGTH) {
@ -70,11 +70,12 @@ static BROTLI_INLINE int NextTableBitSize(const int* const count,
int BrotliBuildHuffmanTable(HuffmanCode* root_table,
int root_bits,
const uint8_t* const code_lengths,
int code_lengths_size) {
int code_lengths_size,
uint16_t *count) {
HuffmanCode code; /* current table entry */
HuffmanCode* table; /* next available space in table */
int len; /* current code length */
int symbol; /* symbol index in original or sorted table */
int symbol; /* symbol index in original or sorted table */
int key; /* reversed prefix code */
int step; /* step size to replicate values in current table */
int low; /* low bits for current root entry */
@ -82,29 +83,32 @@ int BrotliBuildHuffmanTable(HuffmanCode* root_table,
int table_bits; /* key length of current table */
int table_size; /* size of current table */
int total_size; /* sum of root table size and 2nd level table sizes */
int sorted[MAX_CODE_LENGTHS_SIZE]; /* symbols sorted by code length */
int count[MAX_LENGTH + 1] = { 0 }; /* number of codes of each length */
int offset[MAX_LENGTH + 1]; /* offsets in sorted table for each length */
/* symbols sorted by code length */
uint16_t sorted[MAX_CODE_LENGTHS_SIZE];
/* offsets in sorted table for each length */
uint16_t offset[MAX_LENGTH + 1];
int max_length = 1;
if (PREDICT_FALSE(code_lengths_size > MAX_CODE_LENGTHS_SIZE)) {
return 0;
}
/* build histogram of code lengths */
for (symbol = 0; symbol < code_lengths_size; symbol++) {
count[code_lengths[symbol]]++;
}
/* generate offsets into sorted symbol table by code length */
offset[1] = 0;
for (len = 1; len < MAX_LENGTH; len++) {
offset[len + 1] = offset[len] + count[len];
{
uint16_t sum = 0;
for (len = 1; len <= MAX_LENGTH; len++) {
offset[len] = sum;
if (count[len]) {
sum = (uint16_t)(sum + count[len]);
max_length = len;
}
}
}
/* sort symbols by length, by symbol order within each length */
for (symbol = 0; symbol < code_lengths_size; symbol++) {
if (code_lengths[symbol] != 0) {
sorted[offset[code_lengths[symbol]]++] = symbol;
sorted[offset[code_lengths[symbol]]++] = (uint16_t)symbol;
}
}
@ -124,22 +128,38 @@ int BrotliBuildHuffmanTable(HuffmanCode* root_table,
}
/* fill in root table */
/* let's reduce the table size to a smaller size if possible, and */
/* create the repetitions by memcpy if possible in the coming loop */
if (table_bits > max_length) {
table_bits = max_length;
table_size = 1 << table_bits;
}
key = 0;
symbol = 0;
for (len = 1, step = 2; len <= root_bits; ++len, step <<= 1) {
for (; count[len] > 0; --count[len]) {
code.bits = (uint8_t)(len);
code.bits = 1;
step = 2;
do {
for (; count[code.bits] != 0; --count[code.bits]) {
code.value = (uint16_t)sorted[symbol++];
ReplicateValue(&table[key], step, table_size, code);
key = GetNextKey(key, len);
key = GetNextKey(key, code.bits);
}
step <<= 1;
} while (++code.bits <= table_bits);
/* if root_bits != table_bits we only created one fraction of the */
/* table, and we need to replicate it now. */
while (total_size != table_size) {
memcpy(&table[table_size], &table[0],
(size_t)table_size * sizeof(table[0]));
table_size <<= 1;
}
/* fill in 2nd level tables and add pointers to root table */
mask = total_size - 1;
low = -1;
for (len = root_bits + 1, step = 2; len <= MAX_LENGTH; ++len, step <<= 1) {
for (; count[len] > 0; --count[len]) {
for (len = root_bits + 1, step = 2; len <= max_length; ++len, step <<= 1) {
for (; count[len] != 0; --count[len]) {
if ((key & mask) != low) {
table += table_size;
table_bits = NextTableBitSize(count, len, root_bits);
@ -159,22 +179,114 @@ int BrotliBuildHuffmanTable(HuffmanCode* root_table,
return total_size;
}
int BrotliBuildSimpleHuffmanTable(HuffmanCode* table,
int root_bits,
uint16_t *val,
uint32_t num_symbols) {
int table_size = 1;
const int goal_size = 1 << root_bits;
switch (num_symbols) {
case 0:
table[0].bits = 0;
table[0].value = val[0];
break;
case 1:
table[0].bits = 1;
table[1].bits = 1;
if (val[1] > val[0]) {
table[0].value = val[0];
table[1].value = val[1];
} else {
table[0].value = val[1];
table[1].value = val[0];
}
table_size = 2;
break;
case 2:
table[0].bits = 1;
table[0].value = val[0];
table[2].bits = 1;
table[2].value = val[0];
if (val[2] > val[1]) {
table[1].value = val[1];
table[3].value = val[2];
} else {
table[1].value = val[2];
table[3].value = val[1];
}
table[1].bits = 2;
table[3].bits = 2;
table_size = 4;
break;
case 3:
{
int i, k;
for (i = 0; i < 3; ++i) {
for (k = i + 1; k < 4; ++k) {
if (val[k] < val[i]) {
uint16_t t = val[k];
val[k] = val[i];
val[i] = t;
}
}
}
for (i = 0; i < 4; ++i) {
table[i].bits = 2;
}
table[0].value = val[0];
table[2].value = val[1];
table[1].value = val[2];
table[3].value = val[3];
table_size = 4;
}
break;
case 4:
{
int i;
if (val[3] < val[2]) {
uint16_t t = val[3];
val[3] = val[2];
val[2] = t;
}
for (i = 0; i < 7; ++i) {
table[i].value = val[0];
table[i].bits = (uint8_t)(1 + (i & 1));
}
table[1].value = val[1];
table[3].value = val[2];
table[5].value = val[1];
table[7].value = val[3];
table[3].bits = 3;
table[7].bits = 3;
table_size = 8;
}
break;
}
while (table_size != goal_size) {
memcpy(&table[table_size], &table[0],
(size_t)table_size * sizeof(table[0]));
table_size <<= 1;
}
return goal_size;
}
void BrotliHuffmanTreeGroupInit(HuffmanTreeGroup* group, int alphabet_size,
int ntrees) {
group->alphabet_size = alphabet_size;
group->num_htrees = ntrees;
group->codes = (HuffmanCode*)malloc(
sizeof(HuffmanCode) * (size_t)(ntrees * BROTLI_HUFFMAN_MAX_TABLE_SIZE));
group->htrees = (HuffmanCode**)malloc(sizeof(HuffmanCode*) * (size_t)ntrees);
/* Pack two mallocs into one */
const size_t code_size =
sizeof(HuffmanCode) * (size_t)(ntrees * BROTLI_HUFFMAN_MAX_TABLE_SIZE);
const size_t htree_size = sizeof(HuffmanCode*) * (size_t)ntrees;
char *p = (char*)malloc(code_size + htree_size);
group->alphabet_size = (int16_t)alphabet_size;
group->num_htrees = (int16_t)ntrees;
group->codes = (HuffmanCode*)p;
group->htrees = (HuffmanCode**)(p + code_size);
}
void BrotliHuffmanTreeGroupRelease(HuffmanTreeGroup* group) {
if (group->codes) {
free(group->codes);
}
if (group->htrees) {
free(group->htrees);
}
}
#if defined(__cplusplus) || defined(c_plusplus)

View File

@ -38,14 +38,20 @@ typedef struct {
int BrotliBuildHuffmanTable(HuffmanCode* root_table,
int root_bits,
const uint8_t* const code_lengths,
int code_lengths_size);
int code_lengths_size,
uint16_t *count_arg);
int BrotliBuildSimpleHuffmanTable(HuffmanCode* table,
int root_bits,
uint16_t *symbols,
uint32_t num_symbols);
/* Contains a collection of huffman trees with the same alphabet size. */
typedef struct {
int alphabet_size;
int num_htrees;
HuffmanCode* codes;
HuffmanCode** htrees;
HuffmanCode* codes;
int16_t alphabet_size;
int16_t num_htrees;
} HuffmanTreeGroup;
void BrotliHuffmanTreeGroupInit(HuffmanTreeGroup* group,

View File

@ -25,6 +25,16 @@
#define __has_builtin(x) 0
#endif
#ifndef __has_attribute
#define __has_attribute(x) 0
#endif
#ifndef __has_feature
#define __has_feature(x) 0
#endif
#define BROTLI_ASAN_BUILD __has_feature(address_sanitizer)
/* Define "PREDICT_TRUE" and "PREDICT_FALSE" macros for capable compilers.
To apply compiler hint, enclose the branching condition into macros, like this:
@ -51,10 +61,70 @@ OR:
#define PREDICT_TRUE(x) (x)
#endif
/* IS_CONSTANT macros returns true for compile-time constant expressions. */
#if (__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ > 0) || \
(defined(__llvm__) && __has_builtin(__builtin_constant_p))
#define IS_CONSTANT(x) __builtin_constant_p(x)
#else
#define IS_CONSTANT(x) 0
#endif
#if (__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ > 0) || \
(defined(__llvm__) && __has_attribute(always_inline))
#define ATTRIBUTE_ALWAYS_INLINE __attribute__ ((always_inline))
#else
#define ATTRIBUTE_ALWAYS_INLINE
#endif
#ifndef _MSC_VER
#if defined(__cplusplus) || !defined(__STRICT_ANSI__) \
|| __STDC_VERSION__ >= 199901L
#define BROTLI_INLINE inline ATTRIBUTE_ALWAYS_INLINE
#else
#define BROTLI_INLINE
#endif
#else /* _MSC_VER */
#define BROTLI_INLINE __forceinline
#endif /* _MSC_VER */
#ifdef BROTLI_DECODE_DEBUG
#define BROTLI_DCHECK(x) assert(x)
#else
#define BROTLI_DCHECK(x)
#endif
#if (defined(__x86_64__) || defined(_M_X64) || defined(__aarch64__) || \
defined(__PPC64__))
#define BROTLI_64_BITS 1
#define BROTLI_PRELOAD_SYMBOLS 1
#else
#define BROTLI_64_BITS 0
#define BROTLI_PRELOAD_SYMBOLS 0
#endif
#if (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))
#define BROTLI_LITTLE_ENDIAN 1
#else
#define BROTLI_LITTLE_ENDIAN 0
#endif
#if (BROTLI_64_BITS && BROTLI_LITTLE_ENDIAN)
#define BROTLI_64_BITS_LITTLE_ENDIAN 1
#else
#define BROTLI_64_BITS_LITTLE_ENDIAN 0
#endif
#if (__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1) || \
(defined(__llvm__) && __has_attribute(noinline))
#define BROTLI_NOINLINE __attribute__ ((noinline))
#else
#define BROTLI_NOINLINE
#endif
#if BROTLI_ASAN_BUILD
#define BROTLI_NO_ASAN __attribute__((no_sanitize("address"))) BROTLI_NOINLINE
#else
#define BROTLI_NO_ASAN
#endif
#endif /* BROTLI_DEC_PORT_H_ */

View File

@ -23,8 +23,8 @@
/* Represents the range of values belonging to a prefix code: */
/* [offset, offset + 2^nbits) */
struct PrefixCodeRange {
int offset;
int nbits;
int16_t offset;
int8_t nbits;
};
static const struct PrefixCodeRange kBlockLengthPrefixCode[] = {
@ -55,12 +55,12 @@ static const struct PrefixCodeRange kCopyLengthPrefixCode[] = {
{326, 8}, { 582, 9}, {1094, 10}, {2118, 24},
};
static const int kInsertRangeLut[9] = {
0, 0, 8, 8, 0, 16, 8, 16, 16,
static const uint8_t kInsertRangeLut[11] = {
0, 0, 0, 0, 8, 8, 0, 16, 8, 16, 16,
};
static const int kCopyRangeLut[9] = {
0, 8, 0, 8, 16, 0, 16, 8, 16,
static const uint8_t kCopyRangeLut[11] = {
0, 8, 0, 8, 0, 8, 16, 0, 16, 8, 16,
};
#endif /* BROTLI_DEC_PREFIX_H_ */

View File

@ -27,7 +27,10 @@ extern "C" {
static int CheckSizeArgumentsOverflow(uint64_t nmemb, size_t size) {
const uint64_t total_size = nmemb * size;
if (nmemb == 0) return 1;
if ((uint64_t)size > BROTLI_MAX_ALLOCABLE_MEMORY / nmemb) return 0;
if ((nmemb | size) >> 31) {
return 0;
}
if (((size * nmemb) >> BROTLI_MAX_ALLOCABLE_MEMORY_BITS) != 0) return 0;
if (total_size != (size_t)total_size) return 0;
return 1;
}

View File

@ -25,7 +25,7 @@ extern "C" {
#endif
/* This is the maximum memory amount that we will ever try to allocate. */
#define BROTLI_MAX_ALLOCABLE_MEMORY (1 << 30)
#define BROTLI_MAX_ALLOCABLE_MEMORY_BITS 30
/* size-checking safe malloc/calloc: verify that the requested size is not too
large, or return NULL. You don't need to call these for constructs like

View File

@ -13,6 +13,7 @@
limitations under the License.
*/
#include "./huffman.h"
#include "./state.h"
#include <stdlib.h>
@ -23,11 +24,9 @@ extern "C" {
#endif
void BrotliStateInit(BrotliState* s) {
int i;
s->state = BROTLI_STATE_UNINITED;
s->sub_state[0] = BROTLI_STATE_SUB_NONE;
s->sub_state[1] = BROTLI_STATE_SUB_NONE;
s->sub0_state = BROTLI_STATE_SUB0_NONE;
s->sub1_state = BROTLI_STATE_SUB1_NONE;
s->block_type_trees = NULL;
s->block_len_trees = NULL;
@ -39,21 +38,90 @@ void BrotliStateInit(BrotliState* s) {
s->context_map_slice = NULL;
s->dist_context_map_slice = NULL;
for (i = 0; i < 3; ++i) {
s->hgroup[i].codes = NULL;
s->hgroup[i].htrees = NULL;
}
s->literal_hgroup.codes = NULL;
s->literal_hgroup.htrees = NULL;
s->insert_copy_hgroup.codes = NULL;
s->insert_copy_hgroup.htrees = NULL;
s->distance_hgroup.codes = NULL;
s->distance_hgroup.htrees = NULL;
s->code_lengths = NULL;
s->context_map_table = NULL;
s->custom_dict = NULL;
s->custom_dict_size = 0;
s->input_end = 0;
s->window_bits = 0;
s->max_distance = 0;
s->dist_rb[0] = 16;
s->dist_rb[1] = 15;
s->dist_rb[2] = 11;
s->dist_rb[3] = 4;
s->dist_rb_idx = 0;
s->block_type_trees = NULL;
s->block_len_trees = NULL;
s->mtf_upper_bound = 255;
}
void BrotliStateMetablockBegin(BrotliState* s) {
s->meta_block_remaining_len = 0;
s->block_length[0] = 1 << 28;
s->block_length[1] = 1 << 28;
s->block_length[2] = 1 << 28;
s->num_block_types[0] = 1;
s->num_block_types[1] = 1;
s->num_block_types[2] = 1;
s->block_type_rb[0] = 1;
s->block_type_rb[1] = 0;
s->block_type_rb[2] = 1;
s->block_type_rb[3] = 0;
s->block_type_rb[4] = 1;
s->block_type_rb[5] = 0;
s->context_map = NULL;
s->context_modes = NULL;
s->dist_context_map = NULL;
s->context_map_slice = NULL;
s->literal_htree_index = 0;
s->dist_context_map_slice = NULL;
s->dist_htree_index = 0;
s->context_lookup1 = NULL;
s->context_lookup2 = NULL;
s->literal_hgroup.codes = NULL;
s->literal_hgroup.htrees = NULL;
s->insert_copy_hgroup.codes = NULL;
s->insert_copy_hgroup.htrees = NULL;
s->distance_hgroup.codes = NULL;
s->distance_hgroup.htrees = NULL;
}
void BrotliStateCleanupAfterMetablock(BrotliState* s) {
if (s->context_modes != 0) {
free(s->context_modes);
s->context_modes = NULL;
}
if (s->context_map != 0) {
free(s->context_map);
s->context_map = NULL;
}
if (s->dist_context_map != 0) {
free(s->dist_context_map);
s->dist_context_map = NULL;
}
BrotliHuffmanTreeGroupRelease(&s->literal_hgroup);
BrotliHuffmanTreeGroupRelease(&s->insert_copy_hgroup);
BrotliHuffmanTreeGroupRelease(&s->distance_hgroup);
s->literal_hgroup.codes = NULL;
s->literal_hgroup.htrees = NULL;
s->insert_copy_hgroup.codes = NULL;
s->insert_copy_hgroup.htrees = NULL;
s->distance_hgroup.codes = NULL;
s->distance_hgroup.htrees = NULL;
}
void BrotliStateCleanup(BrotliState* s) {
int i;
if (s->context_map_table != 0) {
free(s->context_map_table);
}
@ -70,9 +138,9 @@ void BrotliStateCleanup(BrotliState* s) {
if (s->dist_context_map != 0) {
free(s->dist_context_map);
}
for (i = 0; i < 3; ++i) {
BrotliHuffmanTreeGroupRelease(&s->hgroup[i]);
}
BrotliHuffmanTreeGroupRelease(&s->literal_hgroup);
BrotliHuffmanTreeGroupRelease(&s->insert_copy_hgroup);
BrotliHuffmanTreeGroupRelease(&s->distance_hgroup);
if (s->ringbuffer != 0) {
free(s->ringbuffer);

View File

@ -29,52 +29,60 @@ extern "C" {
#endif
typedef enum {
BROTLI_STATE_UNINITED = 0,
BROTLI_STATE_BITREADER_WARMUP = 1,
BROTLI_STATE_METABLOCK_BEGIN = 10,
BROTLI_STATE_METABLOCK_HEADER_1 = 11,
BROTLI_STATE_METABLOCK_HEADER_2 = 12,
BROTLI_STATE_BLOCK_BEGIN = 13,
BROTLI_STATE_BLOCK_INNER = 14,
BROTLI_STATE_BLOCK_DISTANCE = 15,
BROTLI_STATE_BLOCK_POST = 16,
BROTLI_STATE_UNCOMPRESSED = 17,
BROTLI_STATE_METADATA = 18,
BROTLI_STATE_BLOCK_INNER_WRITE = 19,
BROTLI_STATE_METABLOCK_DONE = 20,
BROTLI_STATE_BLOCK_POST_WRITE_1 = 21,
BROTLI_STATE_BLOCK_POST_WRITE_2 = 22,
BROTLI_STATE_BLOCK_POST_CONTINUE = 23,
BROTLI_STATE_HUFFMAN_CODE_0 = 30,
BROTLI_STATE_HUFFMAN_CODE_1 = 31,
BROTLI_STATE_HUFFMAN_CODE_2 = 32,
BROTLI_STATE_CONTEXT_MAP_1 = 33,
BROTLI_STATE_CONTEXT_MAP_2 = 34,
BROTLI_STATE_TREE_GROUP = 35,
BROTLI_STATE_SUB_NONE = 50,
BROTLI_STATE_SUB_UNCOMPRESSED_SHORT = 51,
BROTLI_STATE_SUB_UNCOMPRESSED_FILL = 52,
BROTLI_STATE_SUB_UNCOMPRESSED_COPY = 53,
BROTLI_STATE_SUB_UNCOMPRESSED_WARMUP = 54,
BROTLI_STATE_SUB_UNCOMPRESSED_WRITE_1 = 55,
BROTLI_STATE_SUB_UNCOMPRESSED_WRITE_2 = 56,
BROTLI_STATE_SUB_UNCOMPRESSED_WRITE_3 = 57,
BROTLI_STATE_SUB_HUFFMAN_LENGTH_BEGIN = 60,
BROTLI_STATE_SUB_HUFFMAN_LENGTH_SYMBOLS = 61,
BROTLI_STATE_SUB_HUFFMAN_DONE = 62,
BROTLI_STATE_SUB_TREE_GROUP = 70,
BROTLI_STATE_SUB_CONTEXT_MAP_HUFFMAN = 80,
BROTLI_STATE_SUB_CONTEXT_MAPS = 81,
BROTLI_STATE_DONE = 100
BROTLI_STATE_UNINITED,
BROTLI_STATE_BITREADER_WARMUP,
BROTLI_STATE_METABLOCK_BEGIN,
BROTLI_STATE_METABLOCK_HEADER_1,
BROTLI_STATE_METABLOCK_HEADER_2,
BROTLI_STATE_BLOCK_BEGIN,
BROTLI_STATE_BLOCK_INNER,
BROTLI_STATE_BLOCK_DISTANCE,
BROTLI_STATE_BLOCK_POST,
BROTLI_STATE_UNCOMPRESSED,
BROTLI_STATE_METADATA,
BROTLI_STATE_BLOCK_INNER_WRITE,
BROTLI_STATE_METABLOCK_DONE,
BROTLI_STATE_BLOCK_POST_WRITE_1,
BROTLI_STATE_BLOCK_POST_WRITE_2,
BROTLI_STATE_BLOCK_POST_WRAP_COPY,
BROTLI_STATE_HUFFMAN_CODE_0,
BROTLI_STATE_HUFFMAN_CODE_1,
BROTLI_STATE_HUFFMAN_CODE_2,
BROTLI_STATE_CONTEXT_MAP_1,
BROTLI_STATE_CONTEXT_MAP_2,
BROTLI_STATE_TREE_GROUP,
BROTLI_STATE_DONE
} BrotliRunningState;
typedef enum {
BROTLI_STATE_SUB0_NONE,
BROTLI_STATE_SUB0_UNCOMPRESSED_SHORT,
BROTLI_STATE_SUB0_UNCOMPRESSED_FILL,
BROTLI_STATE_SUB0_UNCOMPRESSED_COPY,
BROTLI_STATE_SUB0_UNCOMPRESSED_WARMUP,
BROTLI_STATE_SUB0_UNCOMPRESSED_WRITE_1,
BROTLI_STATE_SUB0_UNCOMPRESSED_WRITE_2,
BROTLI_STATE_SUB0_UNCOMPRESSED_WRITE_3,
BROTLI_STATE_SUB0_TREE_GROUP,
BROTLI_STATE_SUB0_CONTEXT_MAP_HUFFMAN,
BROTLI_STATE_SUB0_CONTEXT_MAPS
} BrotliRunningSub0State;
typedef enum {
BROTLI_STATE_SUB1_NONE,
BROTLI_STATE_SUB1_HUFFMAN_LENGTH_BEGIN,
BROTLI_STATE_SUB1_HUFFMAN_LENGTH_SYMBOLS,
BROTLI_STATE_SUB1_HUFFMAN_DONE
} BrotliRunningSub1State;
typedef struct {
BrotliRunningState state;
BrotliRunningState sub_state[2]; /* State inside function call */
BrotliRunningSub0State sub0_state; /* State inside function call */
BrotliRunningSub1State sub1_state; /* State inside function call */
int pos;
int input_end;
int window_bits;
uint32_t window_bits;
int max_backward_distance;
int max_distance;
int ringbuffer_size;
@ -85,10 +93,10 @@ typedef struct {
/* some special distance codes. */
int dist_rb[4];
int dist_rb_idx;
/* The previous 2 bytes used for context. */
uint8_t prev_byte1;
uint8_t prev_byte2;
HuffmanTreeGroup hgroup[3];
HuffmanTreeGroup literal_hgroup;
HuffmanTreeGroup insert_copy_hgroup;
HuffmanTreeGroup distance_hgroup;
HuffmanCode* block_type_trees;
HuffmanCode* block_len_trees;
BrotliBitReader br;
@ -102,45 +110,38 @@ typedef struct {
int is_metadata;
int is_uncompressed;
int block_length[3];
int block_type[3];
int num_block_types[3];
int block_type_rb[6];
int block_type_rb_index[3];
int distance_postfix_bits;
int num_direct_distance_codes;
int distance_postfix_mask;
int num_distance_codes;
uint8_t* context_map;
uint8_t* context_modes;
int num_literal_htrees;
uint8_t* dist_context_map;
int num_dist_htrees;
int context_offset;
uint8_t* context_map_slice;
uint8_t literal_htree_index;
int dist_context_offset;
uint8_t* dist_context_map_slice;
uint8_t literal_htree_index;
uint8_t dist_htree_index;
int context_lookup_offset1;
int context_lookup_offset2;
uint8_t context_mode;
uint8_t prev_code_len;
uint8_t repeat_code_len;
const uint8_t* context_lookup1;
const uint8_t* context_lookup2;
HuffmanCode* htree_command;
int cmd_code;
int range_idx;
int insert_code;
int copy_code;
int insert_length;
int copy_length;
int distance_code;
int distance;
const uint8_t* copy_src;
uint8_t* copy_dst;
/* For CopyUncompressedBlockToOutput */
int nbytes;
/* For partial write operations */
int to_write;
int partially_written;
/* For HuffmanTreeGroupDecode */
@ -148,16 +149,17 @@ typedef struct {
/* For ReadHuffmanCodeLengths */
int symbol;
uint8_t prev_code_len;
int repeat;
uint8_t repeat_code_len;
int space;
HuffmanCode table[32];
uint8_t code_length_code_lengths[18];
/* For ReadHuffmanCode */
int simple_code_or_skip;
uint8_t* code_lengths;
/* The maximum non-zero code length index in code lengths */
uint32_t huffman_max_nonzero;
/* Population counts for the code lengths */
uint16_t code_length_histo[16];
/* For HuffmanTreeGroupDecode */
int htree_index;
@ -168,6 +170,10 @@ typedef struct {
int max_run_length_prefix;
HuffmanCode* context_map_table;
/* For InverseMoveToFrontTransform */
int mtf_upper_bound;
uint8_t mtf[256];
/* For custom dictionaries */
const uint8_t* custom_dict;
int custom_dict_size;
@ -175,6 +181,8 @@ typedef struct {
void BrotliStateInit(BrotliState* s);
void BrotliStateCleanup(BrotliState* s);
void BrotliStateMetablockBegin(BrotliState* s);
void BrotliStateCleanupAfterMetablock(BrotliState* s);
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */

View File

@ -71,38 +71,6 @@ BrotliOutput BrotliInitMemOutput(uint8_t* buffer, size_t length,
return output;
}
int BrotliStdinInputFunction(void* data, uint8_t* buf, size_t count) {
(void) data; /* Shut up LLVM */
#ifndef _WIN32
return (int)read(STDIN_FILENO, buf, count);
#else
return -1;
#endif
}
BrotliInput BrotliStdinInput() {
BrotliInput in;
in.cb_ = BrotliStdinInputFunction;
in.data_ = NULL;
return in;
}
int BrotliStdoutOutputFunction(void* data, const uint8_t* buf, size_t count) {
(void) data; /* Shut up LLVM */
#ifndef _WIN32
return (int)write(STDOUT_FILENO, buf, count);
#else
return -1;
#endif
}
BrotliOutput BrotliStdoutOutput() {
BrotliOutput out;
out.cb_ = BrotliStdoutOutputFunction;
out.data_ = NULL;
return out;
}
int BrotliFileInputFunction(void* data, uint8_t* buf, size_t count) {
return (int)fread(buf, 1, count, (FILE*)data);
}

View File

@ -19,6 +19,7 @@
#define BROTLI_DEC_STREAMS_H_
#include <stdio.h>
#include "./port.h"
#include "./types.h"
#if defined(__cplusplus) || defined(c_plusplus)
@ -84,14 +85,6 @@ int BrotliMemOutputFunction(void* data, const uint8_t* buf, size_t count);
BrotliOutput BrotliInitMemOutput(uint8_t* buffer, size_t length,
BrotliMemOutput* mem_output);
/* Input callback that reads from standard input. */
int BrotliStdinInputFunction(void* data, uint8_t* buf, size_t count);
BrotliInput BrotliStdinInput();
/* Output callback that writes to standard output. */
int BrotliStdoutOutputFunction(void* data, const uint8_t* buf, size_t count);
BrotliOutput BrotliStdoutOutput();
/* Input callback that reads from a file. */
int BrotliFileInputFunction(void* data, uint8_t* buf, size_t count);
BrotliInput BrotliFileInput(FILE* f);

View File

@ -20,6 +20,7 @@
#include <stdio.h>
#include <ctype.h>
#include "./port.h"
#include "./types.h"
#if defined(__cplusplus) || defined(c_plusplus)
@ -51,133 +52,202 @@ enum WordTransformType {
};
typedef struct {
const char* prefix;
enum WordTransformType transform;
const char* suffix;
const uint8_t prefix_id;
const uint8_t transform;
const uint8_t suffix_id;
} Transform;
static const char kPrefixSuffix[208] =
"\0 \0, \0 of the \0 of \0s \0.\0 and \0 in \0\"\0 to \0\">\0\n\0. \0]\0"
" for \0 a \0 that \0\'\0 with \0 from \0 by \0(\0. The \0 on \0 as \0"
" is \0ing \0\n\t\0:\0ed \0=\"\0 at \0ly \0,\0=\'\0.com/\0. This \0"
" not \0er \0al \0ful \0ive \0less \0est \0ize \0\xc2\xa0\0ous ";
enum {
/* EMPTY = ""
SP = " "
DQUOT = "\""
SQUOT = "'"
CLOSEBR = "]"
OPEN = "("
SLASH = "/"
NBSP = non-breaking space "\0xc2\xa0"
*/
kPFix_EMPTY = 0,
kPFix_SP = 1,
kPFix_COMMASP = 3,
kPFix_SPofSPtheSP = 6,
kPFix_SPtheSP = 9,
kPFix_eSP = 12,
kPFix_SPofSP = 15,
kPFix_sSP = 20,
kPFix_DOT = 23,
kPFix_SPandSP = 25,
kPFix_SPinSP = 31,
kPFix_DQUOT = 36,
kPFix_SPtoSP = 38,
kPFix_DQUOTGT = 43,
kPFix_NEWLINE = 46,
kPFix_DOTSP = 48,
kPFix_CLOSEBR = 51,
kPFix_SPforSP = 53,
kPFix_SPaSP = 59,
kPFix_SPthatSP = 63,
kPFix_SQUOT = 70,
kPFix_SPwithSP = 72,
kPFix_SPfromSP = 79,
kPFix_SPbySP = 86,
kPFix_OPEN = 91,
kPFix_DOTSPTheSP = 93,
kPFix_SPonSP = 100,
kPFix_SPasSP = 105,
kPFix_SPisSP = 110,
kPFix_ingSP = 115,
kPFix_NEWLINETAB = 120,
kPFix_COLON = 123,
kPFix_edSP = 125,
kPFix_EQDQUOT = 129,
kPFix_SPatSP = 132,
kPFix_lySP = 137,
kPFix_COMMA = 141,
kPFix_EQSQUOT = 143,
kPFix_DOTcomSLASH = 146,
kPFix_DOTSPThisSP = 152,
kPFix_SPnotSP = 160,
kPFix_erSP = 166,
kPFix_alSP = 170,
kPFix_fulSP = 174,
kPFix_iveSP = 179,
kPFix_lessSP = 184,
kPFix_estSP = 190,
kPFix_izeSP = 195,
kPFix_NBSP = 200,
kPFix_ousSP = 203
};
static const Transform kTransforms[] = {
{ "", kIdentity, "" },
{ "", kIdentity, " " },
{ " ", kIdentity, " " },
{ "", kOmitFirst1, "" },
{ "", kUppercaseFirst, " " },
{ "", kIdentity, " the " },
{ " ", kIdentity, "" },
{ "s ", kIdentity, " " },
{ "", kIdentity, " of " },
{ "", kUppercaseFirst, "" },
{ "", kIdentity, " and " },
{ "", kOmitFirst2, "" },
{ "", kOmitLast1, "" },
{ ", ", kIdentity, " " },
{ "", kIdentity, ", " },
{ " ", kUppercaseFirst, " " },
{ "", kIdentity, " in " },
{ "", kIdentity, " to " },
{ "e ", kIdentity, " " },
{ "", kIdentity, "\"" },
{ "", kIdentity, "." },
{ "", kIdentity, "\">" },
{ "", kIdentity, "\n" },
{ "", kOmitLast3, "" },
{ "", kIdentity, "]" },
{ "", kIdentity, " for " },
{ "", kOmitFirst3, "" },
{ "", kOmitLast2, "" },
{ "", kIdentity, " a " },
{ "", kIdentity, " that " },
{ " ", kUppercaseFirst, "" },
{ "", kIdentity, ". " },
{ ".", kIdentity, "" },
{ " ", kIdentity, ", " },
{ "", kOmitFirst4, "" },
{ "", kIdentity, " with " },
{ "", kIdentity, "'" },
{ "", kIdentity, " from " },
{ "", kIdentity, " by " },
{ "", kOmitFirst5, "" },
{ "", kOmitFirst6, "" },
{ " the ", kIdentity, "" },
{ "", kOmitLast4, "" },
{ "", kIdentity, ". The " },
{ "", kUppercaseAll, "" },
{ "", kIdentity, " on " },
{ "", kIdentity, " as " },
{ "", kIdentity, " is " },
{ "", kOmitLast7, "" },
{ "", kOmitLast1, "ing " },
{ "", kIdentity, "\n\t" },
{ "", kIdentity, ":" },
{ " ", kIdentity, ". " },
{ "", kIdentity, "ed " },
{ "", kOmitFirst9, "" },
{ "", kOmitFirst7, "" },
{ "", kOmitLast6, "" },
{ "", kIdentity, "(" },
{ "", kUppercaseFirst, ", " },
{ "", kOmitLast8, "" },
{ "", kIdentity, " at " },
{ "", kIdentity, "ly " },
{ " the ", kIdentity, " of " },
{ "", kOmitLast5, "" },
{ "", kOmitLast9, "" },
{ " ", kUppercaseFirst, ", " },
{ "", kUppercaseFirst, "\"" },
{ ".", kIdentity, "(" },
{ "", kUppercaseAll, " " },
{ "", kUppercaseFirst, "\">" },
{ "", kIdentity, "=\"" },
{ " ", kIdentity, "." },
{ ".com/", kIdentity, "" },
{ " the ", kIdentity, " of the " },
{ "", kUppercaseFirst, "'" },
{ "", kIdentity, ". This " },
{ "", kIdentity, "," },
{ ".", kIdentity, " " },
{ "", kUppercaseFirst, "(" },
{ "", kUppercaseFirst, "." },
{ "", kIdentity, " not " },
{ " ", kIdentity, "=\"" },
{ "", kIdentity, "er " },
{ " ", kUppercaseAll, " " },
{ "", kIdentity, "al " },
{ " ", kUppercaseAll, "" },
{ "", kIdentity, "='" },
{ "", kUppercaseAll, "\"" },
{ "", kUppercaseFirst, ". " },
{ " ", kIdentity, "(" },
{ "", kIdentity, "ful " },
{ " ", kUppercaseFirst, ". " },
{ "", kIdentity, "ive " },
{ "", kIdentity, "less " },
{ "", kUppercaseAll, "'" },
{ "", kIdentity, "est " },
{ " ", kUppercaseFirst, "." },
{ "", kUppercaseAll, "\">" },
{ " ", kIdentity, "='" },
{ "", kUppercaseFirst, "," },
{ "", kIdentity, "ize " },
{ "", kUppercaseAll, "." },
{ "\xc2\xa0", kIdentity, "" },
{ " ", kIdentity, "," },
{ "", kUppercaseFirst, "=\"" },
{ "", kUppercaseAll, "=\"" },
{ "", kIdentity, "ous " },
{ "", kUppercaseAll, ", " },
{ "", kUppercaseFirst, "='" },
{ " ", kUppercaseFirst, "," },
{ " ", kUppercaseAll, "=\"" },
{ " ", kUppercaseAll, ", " },
{ "", kUppercaseAll, "," },
{ "", kUppercaseAll, "(" },
{ "", kUppercaseAll, ". " },
{ " ", kUppercaseAll, "." },
{ "", kUppercaseAll, "='" },
{ " ", kUppercaseAll, ". " },
{ " ", kUppercaseFirst, "=\"" },
{ " ", kUppercaseAll, "='" },
{ " ", kUppercaseFirst, "='" },
{ kPFix_EMPTY, kIdentity, kPFix_EMPTY },
{ kPFix_EMPTY, kIdentity, kPFix_SP },
{ kPFix_SP, kIdentity, kPFix_SP },
{ kPFix_EMPTY, kOmitFirst1, kPFix_EMPTY },
{ kPFix_EMPTY, kUppercaseFirst, kPFix_SP },
{ kPFix_EMPTY, kIdentity, kPFix_SPtheSP },
{ kPFix_SP, kIdentity, kPFix_EMPTY },
{ kPFix_sSP, kIdentity, kPFix_SP },
{ kPFix_EMPTY, kIdentity, kPFix_SPofSP },
{ kPFix_EMPTY, kUppercaseFirst, kPFix_EMPTY },
{ kPFix_EMPTY, kIdentity, kPFix_SPandSP },
{ kPFix_EMPTY, kOmitFirst2, kPFix_EMPTY },
{ kPFix_EMPTY, kOmitLast1, kPFix_EMPTY },
{ kPFix_COMMASP, kIdentity, kPFix_SP },
{ kPFix_EMPTY, kIdentity, kPFix_COMMASP },
{ kPFix_SP, kUppercaseFirst, kPFix_SP },
{ kPFix_EMPTY, kIdentity, kPFix_SPinSP },
{ kPFix_EMPTY, kIdentity, kPFix_SPtoSP },
{ kPFix_eSP, kIdentity, kPFix_SP },
{ kPFix_EMPTY, kIdentity, kPFix_DQUOT },
{ kPFix_EMPTY, kIdentity, kPFix_DOT },
{ kPFix_EMPTY, kIdentity, kPFix_DQUOTGT },
{ kPFix_EMPTY, kIdentity, kPFix_NEWLINE },
{ kPFix_EMPTY, kOmitLast3, kPFix_EMPTY },
{ kPFix_EMPTY, kIdentity, kPFix_CLOSEBR },
{ kPFix_EMPTY, kIdentity, kPFix_SPforSP },
{ kPFix_EMPTY, kOmitFirst3, kPFix_EMPTY },
{ kPFix_EMPTY, kOmitLast2, kPFix_EMPTY },
{ kPFix_EMPTY, kIdentity, kPFix_SPaSP },
{ kPFix_EMPTY, kIdentity, kPFix_SPthatSP },
{ kPFix_SP, kUppercaseFirst, kPFix_EMPTY },
{ kPFix_EMPTY, kIdentity, kPFix_DOTSP },
{ kPFix_DOT, kIdentity, kPFix_EMPTY },
{ kPFix_SP, kIdentity, kPFix_COMMASP },
{ kPFix_EMPTY, kOmitFirst4, kPFix_EMPTY },
{ kPFix_EMPTY, kIdentity, kPFix_SPwithSP },
{ kPFix_EMPTY, kIdentity, kPFix_SQUOT },
{ kPFix_EMPTY, kIdentity, kPFix_SPfromSP },
{ kPFix_EMPTY, kIdentity, kPFix_SPbySP },
{ kPFix_EMPTY, kOmitFirst5, kPFix_EMPTY },
{ kPFix_EMPTY, kOmitFirst6, kPFix_EMPTY },
{ kPFix_SPtheSP, kIdentity, kPFix_EMPTY },
{ kPFix_EMPTY, kOmitLast4, kPFix_EMPTY },
{ kPFix_EMPTY, kIdentity, kPFix_DOTSPTheSP },
{ kPFix_EMPTY, kUppercaseAll, kPFix_EMPTY },
{ kPFix_EMPTY, kIdentity, kPFix_SPonSP },
{ kPFix_EMPTY, kIdentity, kPFix_SPasSP },
{ kPFix_EMPTY, kIdentity, kPFix_SPisSP },
{ kPFix_EMPTY, kOmitLast7, kPFix_EMPTY },
{ kPFix_EMPTY, kOmitLast1, kPFix_ingSP },
{ kPFix_EMPTY, kIdentity, kPFix_NEWLINETAB },
{ kPFix_EMPTY, kIdentity, kPFix_COLON },
{ kPFix_SP, kIdentity, kPFix_DOTSP },
{ kPFix_EMPTY, kIdentity, kPFix_edSP },
{ kPFix_EMPTY, kOmitFirst9, kPFix_EMPTY },
{ kPFix_EMPTY, kOmitFirst7, kPFix_EMPTY },
{ kPFix_EMPTY, kOmitLast6, kPFix_EMPTY },
{ kPFix_EMPTY, kIdentity, kPFix_OPEN },
{ kPFix_EMPTY, kUppercaseFirst, kPFix_COMMASP },
{ kPFix_EMPTY, kOmitLast8, kPFix_EMPTY },
{ kPFix_EMPTY, kIdentity, kPFix_SPatSP },
{ kPFix_EMPTY, kIdentity, kPFix_lySP },
{ kPFix_SPtheSP, kIdentity, kPFix_SPofSP },
{ kPFix_EMPTY, kOmitLast5, kPFix_EMPTY },
{ kPFix_EMPTY, kOmitLast9, kPFix_EMPTY },
{ kPFix_SP, kUppercaseFirst, kPFix_COMMASP },
{ kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOT },
{ kPFix_DOT, kIdentity, kPFix_OPEN },
{ kPFix_EMPTY, kUppercaseAll, kPFix_SP },
{ kPFix_EMPTY, kUppercaseFirst, kPFix_DQUOTGT },
{ kPFix_EMPTY, kIdentity, kPFix_EQDQUOT },
{ kPFix_SP, kIdentity, kPFix_DOT },
{ kPFix_DOTcomSLASH, kIdentity, kPFix_EMPTY },
{ kPFix_SPtheSP, kIdentity, kPFix_SPofSPtheSP },
{ kPFix_EMPTY, kUppercaseFirst, kPFix_SQUOT },
{ kPFix_EMPTY, kIdentity, kPFix_DOTSPThisSP },
{ kPFix_EMPTY, kIdentity, kPFix_COMMA },
{ kPFix_DOT, kIdentity, kPFix_SP },
{ kPFix_EMPTY, kUppercaseFirst, kPFix_OPEN },
{ kPFix_EMPTY, kUppercaseFirst, kPFix_DOT },
{ kPFix_EMPTY, kIdentity, kPFix_SPnotSP },
{ kPFix_SP, kIdentity, kPFix_EQDQUOT },
{ kPFix_EMPTY, kIdentity, kPFix_erSP },
{ kPFix_SP, kUppercaseAll, kPFix_SP },
{ kPFix_EMPTY, kIdentity, kPFix_alSP },
{ kPFix_SP, kUppercaseAll, kPFix_EMPTY },
{ kPFix_EMPTY, kIdentity, kPFix_EQSQUOT },
{ kPFix_EMPTY, kUppercaseAll, kPFix_DQUOT },
{ kPFix_EMPTY, kUppercaseFirst, kPFix_DOTSP },
{ kPFix_SP, kIdentity, kPFix_OPEN },
{ kPFix_EMPTY, kIdentity, kPFix_fulSP },
{ kPFix_SP, kUppercaseFirst, kPFix_DOTSP },
{ kPFix_EMPTY, kIdentity, kPFix_iveSP },
{ kPFix_EMPTY, kIdentity, kPFix_lessSP },
{ kPFix_EMPTY, kUppercaseAll, kPFix_SQUOT },
{ kPFix_EMPTY, kIdentity, kPFix_estSP },
{ kPFix_SP, kUppercaseFirst, kPFix_DOT },
{ kPFix_EMPTY, kUppercaseAll, kPFix_DQUOTGT },
{ kPFix_SP, kIdentity, kPFix_EQSQUOT },
{ kPFix_EMPTY, kUppercaseFirst, kPFix_COMMA },
{ kPFix_EMPTY, kIdentity, kPFix_izeSP },
{ kPFix_EMPTY, kUppercaseAll, kPFix_DOT },
{ kPFix_NBSP, kIdentity, kPFix_EMPTY },
{ kPFix_SP, kIdentity, kPFix_COMMA },
{ kPFix_EMPTY, kUppercaseFirst, kPFix_EQDQUOT },
{ kPFix_EMPTY, kUppercaseAll, kPFix_EQDQUOT },
{ kPFix_EMPTY, kIdentity, kPFix_ousSP },
{ kPFix_EMPTY, kUppercaseAll, kPFix_COMMASP },
{ kPFix_EMPTY, kUppercaseFirst, kPFix_EQSQUOT },
{ kPFix_SP, kUppercaseFirst, kPFix_COMMA },
{ kPFix_SP, kUppercaseAll, kPFix_EQDQUOT },
{ kPFix_SP, kUppercaseAll, kPFix_COMMASP },
{ kPFix_EMPTY, kUppercaseAll, kPFix_COMMA },
{ kPFix_EMPTY, kUppercaseAll, kPFix_OPEN },
{ kPFix_EMPTY, kUppercaseAll, kPFix_DOTSP },
{ kPFix_SP, kUppercaseAll, kPFix_DOT },
{ kPFix_EMPTY, kUppercaseAll, kPFix_EQSQUOT },
{ kPFix_SP, kUppercaseAll, kPFix_DOTSP },
{ kPFix_SP, kUppercaseFirst, kPFix_EQDQUOT },
{ kPFix_SP, kUppercaseAll, kPFix_EQSQUOT },
{ kPFix_SP, kUppercaseFirst, kPFix_EQSQUOT },
};
static const int kNumTransforms = sizeof(kTransforms) / sizeof(kTransforms[0]);
@ -199,37 +269,43 @@ static int ToUpperCase(uint8_t *p) {
return 3;
}
static BROTLI_INLINE int TransformDictionaryWord(
static BROTLI_NOINLINE int TransformDictionaryWord(
uint8_t* dst, const uint8_t* word, int len, int transform) {
const char* prefix = kTransforms[transform].prefix;
const char* suffix = kTransforms[transform].suffix;
const int t = kTransforms[transform].transform;
int skip = t < kOmitFirst1 ? 0 : t - (kOmitFirst1 - 1);
int idx = 0;
int i = 0;
uint8_t* uppercase;
if (skip > len) {
skip = len;
{
const char* prefix = &kPrefixSuffix[kTransforms[transform].prefix_id];
while (*prefix) { dst[idx++] = (uint8_t)*prefix++; }
}
while (*prefix) { dst[idx++] = (uint8_t)*prefix++; }
word += skip;
len -= skip;
if (t <= kOmitLast9) {
len -= t;
}
while (i < len) { dst[idx++] = word[i++]; }
uppercase = &dst[idx - len];
if (t == kUppercaseFirst) {
ToUpperCase(uppercase);
} else if (t == kUppercaseAll) {
while (len > 0) {
int step = ToUpperCase(uppercase);
uppercase += step;
len -= step;
{
const int t = kTransforms[transform].transform;
int skip = t < kOmitFirst1 ? 0 : t - (kOmitFirst1 - 1);
int i = 0;
uint8_t* uppercase;
if (skip > len) {
skip = len;
}
word += skip;
len -= skip;
if (t <= kOmitLast9) {
len -= t;
}
while (i < len) { dst[idx++] = word[i++]; }
uppercase = &dst[idx - len];
if (t == kUppercaseFirst) {
ToUpperCase(uppercase);
} else if (t == kUppercaseAll) {
while (len > 0) {
int step = ToUpperCase(uppercase);
uppercase += step;
len -= step;
}
}
}
while (*suffix) { dst[idx++] = (uint8_t)*suffix++; }
return idx;
{
const char* suffix = &kPrefixSuffix[kTransforms[transform].suffix_id];
while (*suffix) { dst[idx++] = (uint8_t)*suffix++; }
return idx;
}
}
#if defined(__cplusplus) || defined(c_plusplus)

View File

@ -20,15 +20,7 @@
#include <stddef.h> /* for size_t */
#ifndef _MSC_VER
#include <inttypes.h>
#if defined(__cplusplus) || !defined(__STRICT_ANSI__) \
|| __STDC_VERSION__ >= 199901L
#define BROTLI_INLINE inline
#else
#define BROTLI_INLINE
#endif
#else
#if defined(_MSC_VER) && (_MSC_VER < 1600)
typedef signed char int8_t;
typedef unsigned char uint8_t;
typedef signed short int16_t;
@ -37,7 +29,8 @@ typedef signed int int32_t;
typedef unsigned int uint32_t;
typedef unsigned long long int uint64_t;
typedef long long int int64_t;
#define BROTLI_INLINE __forceinline
#endif /* _MSC_VER */
#else
#include <stdint.h>
#endif /* defined(_MSC_VER) && (_MSC_VER < 1600) */
#endif /* BROTLI_DEC_TYPES_H_ */