mirror of
https://github.com/google/brotli.git
synced 2024-11-21 19:20:09 +00:00
Make Brotli decompression faster
Makes it ~8% faster on my skylake desktop. PiperOrigin-RevId: 689499172
This commit is contained in:
parent
350100a5bb
commit
664952333f
@ -2006,35 +2006,72 @@ CommandInner:
|
|||||||
brotli_reg_t bits;
|
brotli_reg_t bits;
|
||||||
brotli_reg_t value;
|
brotli_reg_t value;
|
||||||
PreloadSymbol(safe, s->literal_htree, br, &bits, &value);
|
PreloadSymbol(safe, s->literal_htree, br, &bits, &value);
|
||||||
do {
|
if (!safe) {
|
||||||
if (!CheckInputAmount(safe, br)) {
|
// This is a hottest part of the decode, so we copy the loop below
|
||||||
s->state = BROTLI_STATE_COMMAND_INNER;
|
// and optimize it by calculating the number of steps where all checks
|
||||||
result = BROTLI_DECODER_NEEDS_MORE_INPUT;
|
// evaluate to false (ringbuffer size/block size/input size).
|
||||||
goto saveStateAndReturn;
|
// Since all checks are loop invariant, we just need to find
|
||||||
|
// minimal number of iterations for a simple loop, and run
|
||||||
|
// the full version for the remainder.
|
||||||
|
int num_steps = i - 1;
|
||||||
|
if (num_steps > 0 && ((brotli_reg_t)(num_steps) > s->block_length[0])) {
|
||||||
|
// Safe cast, since block_length < steps
|
||||||
|
num_steps = (int)s->block_length[0];
|
||||||
}
|
}
|
||||||
if (BROTLI_PREDICT_FALSE(s->block_length[0] == 0)) {
|
if (s->ringbuffer_size >= pos &&
|
||||||
goto NextLiteralBlock;
|
(s->ringbuffer_size - pos) <= num_steps) {
|
||||||
|
num_steps = s->ringbuffer_size - pos - 1;
|
||||||
}
|
}
|
||||||
if (!safe) {
|
if (num_steps < 0) {
|
||||||
|
num_steps = 0;
|
||||||
|
}
|
||||||
|
num_steps = BrotliCopyPreloadedSymbolsToU8(s->literal_htree, br, &bits,
|
||||||
|
&value, s->ringbuffer, pos,
|
||||||
|
num_steps);
|
||||||
|
pos += num_steps;
|
||||||
|
s->block_length[0] -= (brotli_reg_t)num_steps;
|
||||||
|
i -= num_steps;
|
||||||
|
do {
|
||||||
|
if (!CheckInputAmount(safe, br)) {
|
||||||
|
s->state = BROTLI_STATE_COMMAND_INNER;
|
||||||
|
result = BROTLI_DECODER_NEEDS_MORE_INPUT;
|
||||||
|
goto saveStateAndReturn;
|
||||||
|
}
|
||||||
|
if (BROTLI_PREDICT_FALSE(s->block_length[0] == 0)) {
|
||||||
|
goto NextLiteralBlock;
|
||||||
|
}
|
||||||
BrotliCopyPreloadedSymbolsToU8(s->literal_htree, br, &bits, &value,
|
BrotliCopyPreloadedSymbolsToU8(s->literal_htree, br, &bits, &value,
|
||||||
s->ringbuffer, pos, 1);
|
s->ringbuffer, pos, 1);
|
||||||
} else {
|
--s->block_length[0];
|
||||||
|
BROTLI_LOG_ARRAY_INDEX(s->ringbuffer, pos);
|
||||||
|
++pos;
|
||||||
|
if (BROTLI_PREDICT_FALSE(pos == s->ringbuffer_size)) {
|
||||||
|
s->state = BROTLI_STATE_COMMAND_INNER_WRITE;
|
||||||
|
--i;
|
||||||
|
goto saveStateAndReturn;
|
||||||
|
}
|
||||||
|
} while (--i != 0);
|
||||||
|
} else { /* safe */
|
||||||
|
do {
|
||||||
|
if (BROTLI_PREDICT_FALSE(s->block_length[0] == 0)) {
|
||||||
|
goto NextLiteralBlock;
|
||||||
|
}
|
||||||
brotli_reg_t literal;
|
brotli_reg_t literal;
|
||||||
if (!SafeReadSymbol(s->literal_htree, br, &literal)) {
|
if (!SafeReadSymbol(s->literal_htree, br, &literal)) {
|
||||||
result = BROTLI_DECODER_NEEDS_MORE_INPUT;
|
result = BROTLI_DECODER_NEEDS_MORE_INPUT;
|
||||||
goto saveStateAndReturn;
|
goto saveStateAndReturn;
|
||||||
}
|
}
|
||||||
s->ringbuffer[pos] = (uint8_t)literal;
|
s->ringbuffer[pos] = (uint8_t)literal;
|
||||||
}
|
--s->block_length[0];
|
||||||
--s->block_length[0];
|
BROTLI_LOG_ARRAY_INDEX(s->ringbuffer, pos);
|
||||||
BROTLI_LOG_ARRAY_INDEX(s->ringbuffer, pos);
|
++pos;
|
||||||
++pos;
|
if (BROTLI_PREDICT_FALSE(pos == s->ringbuffer_size)) {
|
||||||
if (BROTLI_PREDICT_FALSE(pos == s->ringbuffer_size)) {
|
s->state = BROTLI_STATE_COMMAND_INNER_WRITE;
|
||||||
s->state = BROTLI_STATE_COMMAND_INNER_WRITE;
|
--i;
|
||||||
--i;
|
goto saveStateAndReturn;
|
||||||
goto saveStateAndReturn;
|
}
|
||||||
}
|
} while (--i != 0);
|
||||||
} while (--i != 0);
|
}
|
||||||
} else {
|
} else {
|
||||||
uint8_t p1 = s->ringbuffer[(pos - 1) & s->ringbuffer_mask];
|
uint8_t p1 = s->ringbuffer[(pos - 1) & s->ringbuffer_mask];
|
||||||
uint8_t p2 = s->ringbuffer[(pos - 2) & s->ringbuffer_mask];
|
uint8_t p2 = s->ringbuffer[(pos - 2) & s->ringbuffer_mask];
|
||||||
|
Loading…
Reference in New Issue
Block a user