Make Brotli decompression faster

Makes it ~8% faster on my skylake desktop.

PiperOrigin-RevId: 689499172
This commit is contained in:
Ilya Tokar 2024-10-24 13:36:16 -07:00 committed by Copybara-Service
parent 350100a5bb
commit 664952333f

View File

@ -2006,35 +2006,72 @@ CommandInner:
brotli_reg_t bits; brotli_reg_t bits;
brotli_reg_t value; brotli_reg_t value;
PreloadSymbol(safe, s->literal_htree, br, &bits, &value); PreloadSymbol(safe, s->literal_htree, br, &bits, &value);
do { if (!safe) {
if (!CheckInputAmount(safe, br)) { // This is a hottest part of the decode, so we copy the loop below
s->state = BROTLI_STATE_COMMAND_INNER; // and optimize it by calculating the number of steps where all checks
result = BROTLI_DECODER_NEEDS_MORE_INPUT; // evaluate to false (ringbuffer size/block size/input size).
goto saveStateAndReturn; // Since all checks are loop invariant, we just need to find
// minimal number of iterations for a simple loop, and run
// the full version for the remainder.
int num_steps = i - 1;
if (num_steps > 0 && ((brotli_reg_t)(num_steps) > s->block_length[0])) {
// Safe cast, since block_length < steps
num_steps = (int)s->block_length[0];
} }
if (BROTLI_PREDICT_FALSE(s->block_length[0] == 0)) { if (s->ringbuffer_size >= pos &&
goto NextLiteralBlock; (s->ringbuffer_size - pos) <= num_steps) {
num_steps = s->ringbuffer_size - pos - 1;
} }
if (!safe) { if (num_steps < 0) {
num_steps = 0;
}
num_steps = BrotliCopyPreloadedSymbolsToU8(s->literal_htree, br, &bits,
&value, s->ringbuffer, pos,
num_steps);
pos += num_steps;
s->block_length[0] -= (brotli_reg_t)num_steps;
i -= num_steps;
do {
if (!CheckInputAmount(safe, br)) {
s->state = BROTLI_STATE_COMMAND_INNER;
result = BROTLI_DECODER_NEEDS_MORE_INPUT;
goto saveStateAndReturn;
}
if (BROTLI_PREDICT_FALSE(s->block_length[0] == 0)) {
goto NextLiteralBlock;
}
BrotliCopyPreloadedSymbolsToU8(s->literal_htree, br, &bits, &value, BrotliCopyPreloadedSymbolsToU8(s->literal_htree, br, &bits, &value,
s->ringbuffer, pos, 1); s->ringbuffer, pos, 1);
} else { --s->block_length[0];
BROTLI_LOG_ARRAY_INDEX(s->ringbuffer, pos);
++pos;
if (BROTLI_PREDICT_FALSE(pos == s->ringbuffer_size)) {
s->state = BROTLI_STATE_COMMAND_INNER_WRITE;
--i;
goto saveStateAndReturn;
}
} while (--i != 0);
} else { /* safe */
do {
if (BROTLI_PREDICT_FALSE(s->block_length[0] == 0)) {
goto NextLiteralBlock;
}
brotli_reg_t literal; brotli_reg_t literal;
if (!SafeReadSymbol(s->literal_htree, br, &literal)) { if (!SafeReadSymbol(s->literal_htree, br, &literal)) {
result = BROTLI_DECODER_NEEDS_MORE_INPUT; result = BROTLI_DECODER_NEEDS_MORE_INPUT;
goto saveStateAndReturn; goto saveStateAndReturn;
} }
s->ringbuffer[pos] = (uint8_t)literal; s->ringbuffer[pos] = (uint8_t)literal;
} --s->block_length[0];
--s->block_length[0]; BROTLI_LOG_ARRAY_INDEX(s->ringbuffer, pos);
BROTLI_LOG_ARRAY_INDEX(s->ringbuffer, pos); ++pos;
++pos; if (BROTLI_PREDICT_FALSE(pos == s->ringbuffer_size)) {
if (BROTLI_PREDICT_FALSE(pos == s->ringbuffer_size)) { s->state = BROTLI_STATE_COMMAND_INNER_WRITE;
s->state = BROTLI_STATE_COMMAND_INNER_WRITE; --i;
--i; goto saveStateAndReturn;
goto saveStateAndReturn; }
} } while (--i != 0);
} while (--i != 0); }
} else { } else {
uint8_t p1 = s->ringbuffer[(pos - 1) & s->ringbuffer_mask]; uint8_t p1 = s->ringbuffer[(pos - 1) & s->ringbuffer_mask];
uint8_t p2 = s->ringbuffer[(pos - 2) & s->ringbuffer_mask]; uint8_t p2 = s->ringbuffer[(pos - 2) & s->ringbuffer_mask];