From 664952333f675eb189b9d8dfefb1d33538cc41f6 Mon Sep 17 00:00:00 2001 From: Ilya Tokar Date: Thu, 24 Oct 2024 13:36:16 -0700 Subject: [PATCH] Make Brotli decompression faster Makes it ~8% faster on my skylake desktop. PiperOrigin-RevId: 689499172 --- c/dec/decode.c | 75 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 56 insertions(+), 19 deletions(-) diff --git a/c/dec/decode.c b/c/dec/decode.c index 3ed9fe8..921aa4a 100644 --- a/c/dec/decode.c +++ b/c/dec/decode.c @@ -2006,35 +2006,72 @@ CommandInner: brotli_reg_t bits; brotli_reg_t value; PreloadSymbol(safe, s->literal_htree, br, &bits, &value); - do { - if (!CheckInputAmount(safe, br)) { - s->state = BROTLI_STATE_COMMAND_INNER; - result = BROTLI_DECODER_NEEDS_MORE_INPUT; - goto saveStateAndReturn; + if (!safe) { + // This is a hottest part of the decode, so we copy the loop below + // and optimize it by calculating the number of steps where all checks + // evaluate to false (ringbuffer size/block size/input size). + // Since all checks are loop invariant, we just need to find + // minimal number of iterations for a simple loop, and run + // the full version for the remainder. + int num_steps = i - 1; + if (num_steps > 0 && ((brotli_reg_t)(num_steps) > s->block_length[0])) { + // Safe cast, since block_length < steps + num_steps = (int)s->block_length[0]; } - if (BROTLI_PREDICT_FALSE(s->block_length[0] == 0)) { - goto NextLiteralBlock; + if (s->ringbuffer_size >= pos && + (s->ringbuffer_size - pos) <= num_steps) { + num_steps = s->ringbuffer_size - pos - 1; } - if (!safe) { + if (num_steps < 0) { + num_steps = 0; + } + num_steps = BrotliCopyPreloadedSymbolsToU8(s->literal_htree, br, &bits, + &value, s->ringbuffer, pos, + num_steps); + pos += num_steps; + s->block_length[0] -= (brotli_reg_t)num_steps; + i -= num_steps; + do { + if (!CheckInputAmount(safe, br)) { + s->state = BROTLI_STATE_COMMAND_INNER; + result = BROTLI_DECODER_NEEDS_MORE_INPUT; + goto saveStateAndReturn; + } + if (BROTLI_PREDICT_FALSE(s->block_length[0] == 0)) { + goto NextLiteralBlock; + } BrotliCopyPreloadedSymbolsToU8(s->literal_htree, br, &bits, &value, s->ringbuffer, pos, 1); - } else { + --s->block_length[0]; + BROTLI_LOG_ARRAY_INDEX(s->ringbuffer, pos); + ++pos; + if (BROTLI_PREDICT_FALSE(pos == s->ringbuffer_size)) { + s->state = BROTLI_STATE_COMMAND_INNER_WRITE; + --i; + goto saveStateAndReturn; + } + } while (--i != 0); + } else { /* safe */ + do { + if (BROTLI_PREDICT_FALSE(s->block_length[0] == 0)) { + goto NextLiteralBlock; + } brotli_reg_t literal; if (!SafeReadSymbol(s->literal_htree, br, &literal)) { result = BROTLI_DECODER_NEEDS_MORE_INPUT; goto saveStateAndReturn; } s->ringbuffer[pos] = (uint8_t)literal; - } - --s->block_length[0]; - BROTLI_LOG_ARRAY_INDEX(s->ringbuffer, pos); - ++pos; - if (BROTLI_PREDICT_FALSE(pos == s->ringbuffer_size)) { - s->state = BROTLI_STATE_COMMAND_INNER_WRITE; - --i; - goto saveStateAndReturn; - } - } while (--i != 0); + --s->block_length[0]; + BROTLI_LOG_ARRAY_INDEX(s->ringbuffer, pos); + ++pos; + if (BROTLI_PREDICT_FALSE(pos == s->ringbuffer_size)) { + s->state = BROTLI_STATE_COMMAND_INNER_WRITE; + --i; + goto saveStateAndReturn; + } + } while (--i != 0); + } } else { uint8_t p1 = s->ringbuffer[(pos - 1) & s->ringbuffer_mask]; uint8_t p2 = s->ringbuffer[(pos - 2) & s->ringbuffer_mask];