speedup decoder by 0.2%-1.2%

PiperOrigin-RevId: 516754779
This commit is contained in:
Evgenii Kliuchnikov 2023-03-15 08:15:19 +00:00 committed by Evgenii Kliuchnikov
parent 57c36a4f27
commit cb1ced3a25
2 changed files with 58 additions and 42 deletions

View File

@ -22,6 +22,9 @@ extern "C" {
#define BROTLI_SHORT_FILL_BIT_WINDOW_READ (sizeof(brotli_reg_t) >> 1)
/* 162 bits + 7 bytes */
#define BROTLI_FAST_INPUT_SLACK 28
BROTLI_INTERNAL extern const brotli_reg_t kBrotliBitMask[33];
static BROTLI_INLINE brotli_reg_t BitMask(brotli_reg_t n) {
@ -38,7 +41,8 @@ typedef struct {
brotli_reg_t val_; /* pre-fetched bits */
brotli_reg_t bit_pos_; /* current bit-reading position in val_ */
const uint8_t* next_in; /* the byte we're reading from */
size_t avail_in;
const uint8_t* guard_in; /* position from which "fast-path" is prohibited */
const uint8_t* last_in; /* == next_in + avail_in */
} BrotliBitReader;
typedef struct {
@ -64,12 +68,28 @@ BROTLI_INTERNAL BROTLI_BOOL BrotliWarmupBitReader(BrotliBitReader* br);
BROTLI_INTERNAL BROTLI_NOINLINE BROTLI_BOOL BrotliSafeReadBits32Slow(
BrotliBitReader* br, brotli_reg_t n_bits, brotli_reg_t* val);
static BROTLI_INLINE size_t
BrotliBitReaderGetAvailIn(BrotliBitReader* const br) {
return (size_t)(br->last_in - br->next_in);
}
static BROTLI_INLINE void BrotliBitReaderSaveState(
BrotliBitReader* const from, BrotliBitReaderState* to) {
to->val_ = from->val_;
to->bit_pos_ = from->bit_pos_;
to->next_in = from->next_in;
to->avail_in = from->avail_in;
to->avail_in = BrotliBitReaderGetAvailIn(from);
}
static BROTLI_INLINE void BrotliBitReaderSetInput(
BrotliBitReader* const br, const uint8_t* next_in, size_t avail_in) {
br->next_in = next_in;
br->last_in = next_in + avail_in;
if (avail_in + 1 > BROTLI_FAST_INPUT_SLACK) {
br->guard_in = next_in + (avail_in + 1 - BROTLI_FAST_INPUT_SLACK);
} else {
br->guard_in = next_in;
}
}
static BROTLI_INLINE void BrotliBitReaderRestoreState(
@ -77,7 +97,7 @@ static BROTLI_INLINE void BrotliBitReaderRestoreState(
to->val_ = from->val_;
to->bit_pos_ = from->bit_pos_;
to->next_in = from->next_in;
to->avail_in = from->avail_in;
BrotliBitReaderSetInput(to, from->next_in, from->avail_in);
}
static BROTLI_INLINE brotli_reg_t BrotliGetAvailableBits(
@ -90,15 +110,16 @@ static BROTLI_INLINE brotli_reg_t BrotliGetAvailableBits(
maximal ring-buffer size (larger number won't be utilized anyway). */
static BROTLI_INLINE size_t BrotliGetRemainingBytes(BrotliBitReader* br) {
static const size_t kCap = (size_t)1 << BROTLI_LARGE_MAX_WBITS;
if (br->avail_in > kCap) return kCap;
return br->avail_in + (BrotliGetAvailableBits(br) >> 3);
size_t avail_in = BrotliBitReaderGetAvailIn(br);
if (avail_in > kCap) return kCap;
return avail_in + (BrotliGetAvailableBits(br) >> 3);
}
/* Checks if there is at least |num| bytes left in the input ring-buffer
(excluding the bits remaining in br->val_). */
static BROTLI_INLINE BROTLI_BOOL BrotliCheckInputAmount(
BrotliBitReader* const br, size_t num) {
return TO_BROTLI_BOOL(br->avail_in >= num);
BrotliBitReader* const br) {
return TO_BROTLI_BOOL(br->next_in < br->guard_in);
}
/* Guarantees that there are at least |n_bits| + 1 bits in accumulator.
@ -116,7 +137,6 @@ static BROTLI_INLINE void BrotliFillBitWindow(
(br->val_ >> 56) | (BROTLI_UNALIGNED_LOAD64LE(br->next_in) << 8);
br->bit_pos_ =
bit_pos ^ 56; /* here same as -= 56 because of the if condition */
br->avail_in -= 7;
br->next_in += 7;
}
} else if (BROTLI_UNALIGNED_READ_FAST && BROTLI_IS_CONSTANT(n_bits) &&
@ -127,7 +147,6 @@ static BROTLI_INLINE void BrotliFillBitWindow(
(br->val_ >> 48) | (BROTLI_UNALIGNED_LOAD64LE(br->next_in) << 16);
br->bit_pos_ =
bit_pos ^ 48; /* here same as -= 48 because of the if condition */
br->avail_in -= 6;
br->next_in += 6;
}
} else {
@ -137,7 +156,6 @@ static BROTLI_INLINE void BrotliFillBitWindow(
(((uint64_t)BROTLI_UNALIGNED_LOAD32LE(br->next_in)) << 32);
br->bit_pos_ =
bit_pos ^ 32; /* here same as -= 32 because of the if condition */
br->avail_in -= BROTLI_SHORT_FILL_BIT_WINDOW_READ;
br->next_in += BROTLI_SHORT_FILL_BIT_WINDOW_READ;
}
}
@ -150,7 +168,6 @@ static BROTLI_INLINE void BrotliFillBitWindow(
(br->val_ >> 24) | (BROTLI_UNALIGNED_LOAD32LE(br->next_in) << 8);
br->bit_pos_ =
bit_pos ^ 24; /* here same as -= 24 because of the if condition */
br->avail_in -= 3;
br->next_in += 3;
}
} else {
@ -160,7 +177,6 @@ static BROTLI_INLINE void BrotliFillBitWindow(
(((brotli_reg_t)BROTLI_UNALIGNED_LOAD16LE(br->next_in)) << 16);
br->bit_pos_ =
bit_pos ^ 16; /* here same as -= 16 because of the if condition */
br->avail_in -= BROTLI_SHORT_FILL_BIT_WINDOW_READ;
br->next_in += BROTLI_SHORT_FILL_BIT_WINDOW_READ;
}
}
@ -176,7 +192,7 @@ static BROTLI_INLINE void BrotliFillBitWindow16(BrotliBitReader* const br) {
/* Tries to pull one byte of input to accumulator.
Returns BROTLI_FALSE if there is no input available. */
static BROTLI_INLINE BROTLI_BOOL BrotliPullByte(BrotliBitReader* const br) {
if (br->avail_in == 0) {
if (br->next_in == br->last_in) {
return BROTLI_FALSE;
}
br->val_ >>= 8;
@ -186,7 +202,6 @@ static BROTLI_INLINE BROTLI_BOOL BrotliPullByte(BrotliBitReader* const br) {
br->val_ |= ((brotli_reg_t)*br->next_in) << 24;
#endif
br->bit_pos_ -= 8;
--br->avail_in;
++br->next_in;
return BROTLI_TRUE;
}
@ -236,7 +251,6 @@ static BROTLI_INLINE void BrotliDropBits(
static BROTLI_INLINE void BrotliBitReaderUnload(BrotliBitReader* br) {
brotli_reg_t unused_bytes = BrotliGetAvailableBits(br) >> 3;
brotli_reg_t unused_bits = unused_bytes << 3;
br->avail_in += unused_bytes;
br->next_in -= unused_bytes;
if (unused_bits == sizeof(br->val_) << 3) {
br->val_ = 0;
@ -248,11 +262,13 @@ static BROTLI_INLINE void BrotliBitReaderUnload(BrotliBitReader* br) {
/* Reads the specified number of bits from |br| and advances the bit pos.
Precondition: accumulator MUST contain at least |n_bits|. */
static BROTLI_INLINE void BrotliTakeBits(
BrotliBitReader* const br, brotli_reg_t n_bits, brotli_reg_t* val) {
static BROTLI_INLINE void BrotliTakeBits(BrotliBitReader* const br,
brotli_reg_t n_bits,
brotli_reg_t* val) {
*val = BrotliGetBitsUnmasked(br) & BitMask(n_bits);
BROTLI_LOG(("[BrotliTakeBits] %d %d %d val: %6x\n",
(int)br->avail_in, (int)br->bit_pos_, (int)n_bits, (int)*val));
(int)BrotliBitReaderGetAvailIn(br), (int)br->bit_pos_,
(int)n_bits, (int)*val));
BrotliDropBits(br, n_bits);
}
@ -342,7 +358,6 @@ static BROTLI_INLINE BROTLI_BOOL BrotliJumpToByteBoundary(BrotliBitReader* br) {
}
static BROTLI_INLINE void BrotliDropBytes(BrotliBitReader* br, size_t num) {
br->avail_in -= num;
br->next_in += num;
}
@ -365,21 +380,24 @@ static BROTLI_INLINE void BrotliCopyBytes(uint8_t* dest,
BROTLI_UNUSED_FUNCTION void BrotliBitReaderSuppressUnusedFunctions(void) {
BROTLI_UNUSED(&BrotliBitReaderSuppressUnusedFunctions);
BROTLI_UNUSED(&BrotliBitReaderSaveState);
BROTLI_UNUSED(&BrotliBitReaderGetAvailIn);
BROTLI_UNUSED(&BrotliBitReaderRestoreState);
BROTLI_UNUSED(&BrotliGetRemainingBytes);
BROTLI_UNUSED(&BrotliBitReaderSaveState);
BROTLI_UNUSED(&BrotliBitReaderSetInput);
BROTLI_UNUSED(&BrotliBitReaderUnload);
BROTLI_UNUSED(&BrotliCheckInputAmount);
BROTLI_UNUSED(&BrotliCopyBytes);
BROTLI_UNUSED(&BrotliFillBitWindow16);
BROTLI_UNUSED(&BrotliGet16BitsUnmasked);
BROTLI_UNUSED(&BrotliGetBits);
BROTLI_UNUSED(&BrotliSafeGetBits);
BROTLI_UNUSED(&BrotliBitReaderUnload);
BROTLI_UNUSED(&BrotliGetRemainingBytes);
BROTLI_UNUSED(&BrotliJumpToByteBoundary);
BROTLI_UNUSED(&BrotliReadBits24);
BROTLI_UNUSED(&BrotliReadBits32);
BROTLI_UNUSED(&BrotliSafeGetBits);
BROTLI_UNUSED(&BrotliSafeReadBits);
BROTLI_UNUSED(&BrotliSafeReadBits32);
BROTLI_UNUSED(&BrotliJumpToByteBoundary);
BROTLI_UNUSED(&BrotliCopyBytes);
}
#if defined(__cplusplus) || defined(c_plusplus)

View File

@ -611,7 +611,7 @@ static BrotliDecoderErrorCode ReadSymbolCodeLengths(
const HuffmanCode* p = h->table;
brotli_reg_t code_len;
BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(p);
if (!BrotliCheckInputAmount(br, BROTLI_SHORT_FILL_BIT_WINDOW_READ)) {
if (!BrotliCheckInputAmount(br)) {
h->symbol = symbol;
h->repeat = repeat;
h->prev_code_len = prev_code_len;
@ -1876,11 +1876,11 @@ static BROTLI_INLINE BROTLI_BOOL SafeReadCommand(
}
static BROTLI_INLINE BROTLI_BOOL CheckInputAmount(
int safe, BrotliBitReader* const br, size_t num) {
int safe, BrotliBitReader* const br) {
if (safe) {
return BROTLI_TRUE;
}
return BrotliCheckInputAmount(br, num);
return BrotliCheckInputAmount(br);
}
#define BROTLI_SAFE(METHOD) \
@ -1903,7 +1903,7 @@ static BROTLI_INLINE BrotliDecoderErrorCode ProcessCommandsInternal(
BrotliBitReader* br = &s->br;
int compound_dictionary_size = GetCompoundDictionarySize(s);
if (!CheckInputAmount(safe, br, 28)) {
if (!CheckInputAmount(safe, br)) {
result = BROTLI_DECODER_NEEDS_MORE_INPUT;
goto saveStateAndReturn;
}
@ -1928,7 +1928,7 @@ CommandBegin:
if (safe) {
s->state = BROTLI_STATE_COMMAND_BEGIN;
}
if (!CheckInputAmount(safe, br, 28)) { /* 156 bits + 7 bytes */
if (!CheckInputAmount(safe, br)) {
s->state = BROTLI_STATE_COMMAND_BEGIN;
result = BROTLI_DECODER_NEEDS_MORE_INPUT;
goto saveStateAndReturn;
@ -1956,7 +1956,7 @@ CommandInner:
brotli_reg_t value;
PreloadSymbol(safe, s->literal_htree, br, &bits, &value);
do {
if (!CheckInputAmount(safe, br, 28)) { /* 162 bits + 7 bytes */
if (!CheckInputAmount(safe, br)) {
s->state = BROTLI_STATE_COMMAND_INNER;
result = BROTLI_DECODER_NEEDS_MORE_INPUT;
goto saveStateAndReturn;
@ -1990,7 +1990,7 @@ CommandInner:
do {
const HuffmanCode* hc;
uint8_t context;
if (!CheckInputAmount(safe, br, 28)) { /* 162 bits + 7 bytes */
if (!CheckInputAmount(safe, br)) {
s->state = BROTLI_STATE_COMMAND_INNER;
result = BROTLI_DECODER_NEEDS_MORE_INPUT;
goto saveStateAndReturn;
@ -2315,14 +2315,13 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
}
if (!*available_out) next_out = 0;
if (s->buffer_length == 0) { /* Just connect bit reader to input stream. */
br->avail_in = *available_in;
br->next_in = *next_in;
BrotliBitReaderSetInput(br, *next_in, *available_in);
} else {
/* At least one byte of input is required. More than one byte of input may
be required to complete the transaction -> reading more data must be
done in a loop -> do it in a main loop. */
result = BROTLI_DECODER_NEEDS_MORE_INPUT;
br->next_in = &s->buffer.u8[0];
BrotliBitReaderSetInput(br, &s->buffer.u8[0], s->buffer_length);
}
/* State machine */
for (;;) {
@ -2339,15 +2338,14 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
}
}
if (s->buffer_length != 0) { /* Used with internal buffer. */
if (br->avail_in == 0) {
if (br->next_in == br->last_in) {
/* Successfully finished read transaction.
Accumulator contains less than 8 bits, because internal buffer
is expanded byte-by-byte until it is enough to complete read. */
s->buffer_length = 0;
/* Switch to input stream and restart. */
result = BROTLI_DECODER_SUCCESS;
br->avail_in = *available_in;
br->next_in = *next_in;
BrotliBitReaderSetInput(br, *next_in, *available_in);
continue;
} else if (*available_in != 0) {
/* Not enough data in buffer, but can take one more byte from
@ -2355,7 +2353,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
result = BROTLI_DECODER_SUCCESS;
s->buffer.u8[s->buffer_length] = **next_in;
s->buffer_length++;
br->avail_in = s->buffer_length;
BrotliBitReaderSetInput(br, &s->buffer.u8[0], s->buffer_length);
(*next_in)++;
(*available_in)--;
/* Retry with more data in buffer. */
@ -2366,7 +2364,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
} else { /* Input stream doesn't contain enough input. */
/* Copy tail to internal buffer and return. */
*next_in = br->next_in;
*available_in = br->avail_in;
*available_in = BrotliBitReaderGetAvailIn(br);
while (*available_in) {
s->buffer.u8[s->buffer_length] = **next_in;
s->buffer_length++;
@ -2389,7 +2387,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
stream it has less than 8 bits in accumulator, so it is safe to
return unused accumulator bits there. */
BrotliBitReaderUnload(br);
*available_in = br->avail_in;
*available_in = BrotliBitReaderGetAvailIn(br);
*next_in = br->next_in;
}
break;
@ -2756,7 +2754,7 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
}
if (s->buffer_length == 0) {
BrotliBitReaderUnload(br);
*available_in = br->avail_in;
*available_in = BrotliBitReaderGetAvailIn(br);
*next_in = br->next_in;
}
s->state = BROTLI_STATE_DONE;