Speed and memory usage improvements for the decoder.

* Change order of members of bit reader state structure.

* Remove unused includes for assert. Add BROTLI_DCHECK
  macros and use it instead of assert.

* Do not calculate nbits in common case of ReadSymbol.

* Introduce and use PREDICT_TRUE / PREDICT_FALSE macros.

* Allocate less memory in the brotli decoder if it knows
  the result size beforehand. Before this, the decoder
  would always allocate 16MB if the encoder annotated the
  window size as 22 bit (which is the default), even if the
  file is only a few KB uncompressed. Now, it'll only
  allocate a ringbuffer as large as needed for the result file.
  But only if it can know the filesize, it's not possible
  to know that if there are multiple metablocks or too large
  uncompressed metablock.
This commit is contained in:
Zoltan Szabadka 2015-05-07 16:53:43 +02:00
parent 47ea761869
commit 83aa24dc86
11 changed files with 117 additions and 35 deletions

1
.gitignore vendored
View File

@ -1,4 +1,5 @@
*.o
*.pyc
*.txt.uncompressed
*.bro
*.unbro

View File

@ -15,10 +15,10 @@
/* Bit reading helpers */
#include <assert.h>
#include <stdlib.h>
#include "./bit_reader.h"
#include "./port.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
@ -26,7 +26,7 @@ extern "C" {
void BrotliInitBitReader(BrotliBitReader* const br,
BrotliInput input, int finish) {
assert(br != NULL);
BROTLI_DCHECK(br != NULL);
br->finish_ = finish;
br->tmp_bytes_read_ = 0;

View File

@ -19,6 +19,7 @@
#define BROTLI_DEC_BIT_READER_H_
#include <string.h>
#include "./port.h"
#include "./streams.h"
#include "./types.h"
@ -46,11 +47,6 @@ static const uint32_t kBitMask[BROTLI_MAX_NUM_BIT_READ] = {
};
typedef struct {
/* Input byte buffer, consist of a ringbuffer and a "slack" region where */
/* bytes from the start of the ringbuffer are copied. */
uint8_t buf_[BROTLI_IBUF_SIZE];
uint8_t* buf_ptr_; /* next input will write here */
BrotliInput input_; /* input callback */
#if (BROTLI_USE_64_BITS)
uint64_t val_; /* pre-fetched bits */
#else
@ -60,12 +56,18 @@ typedef struct {
uint32_t bit_pos_; /* current bit-reading position in val_ */
uint32_t bit_end_pos_; /* bit-reading end position from LSB of val_ */
int eos_; /* input stream is finished */
uint8_t* buf_ptr_; /* next input will write here */
BrotliInput input_; /* input callback */
/* Set to 0 to support partial data streaming. Set to 1 to expect full data or
for the last chunk of partial data. */
int finish_;
/* indicates how much bytes already read when reading partial data */
int tmp_bytes_read_;
/* Input byte buffer, consist of a ringbuffer and a "slack" region where */
/* bytes from the start of the ringbuffer are copied. */
uint8_t buf_[BROTLI_IBUF_SIZE];
} BrotliBitReader;
/* Initializes the bitreader fields. After this, BrotliWarmupBitReader must
@ -126,9 +128,9 @@ static BROTLI_INLINE void ShiftBytes32(BrotliBitReader* const br) {
every 32 bytes of input is read.
*/
static BROTLI_INLINE int BrotliReadMoreInput(BrotliBitReader* const br) {
if (br->bit_end_pos_ > 256) {
if (PREDICT_TRUE(br->bit_end_pos_ > 256)) {
return 1;
} else if (br->eos_) {
} else if (PREDICT_FALSE(br->eos_)) {
return br->bit_pos_ <= br->bit_end_pos_;
} else {
uint8_t* dst = br->buf_ptr_;

View File

@ -20,6 +20,7 @@
#include "./context.h"
#include "./decode.h"
#include "./dictionary.h"
#include "./port.h"
#include "./transform.h"
#include "./huffman.h"
#include "./prefix.h"
@ -149,9 +150,9 @@ static BROTLI_INLINE int ReadSymbol(const HuffmanCode* table,
int nbits;
BrotliFillBitWindow(br);
table += (int)(br->val_ >> br->bit_pos_) & HUFFMAN_TABLE_MASK;
nbits = table->bits - HUFFMAN_TABLE_BITS;
if (nbits > 0) {
if (PREDICT_FALSE(table->bits > HUFFMAN_TABLE_BITS)) {
br->bit_pos_ += HUFFMAN_TABLE_BITS;
nbits = table->bits - HUFFMAN_TABLE_BITS;
table += table->value;
table += (int)(br->val_ >> br->bit_pos_) & ((1 << nbits) - 1);
}
@ -938,6 +939,7 @@ BrotliResult BrotliDecompressStreaming(BrotliInput input, BrotliOutput output,
- flushing the input s->ringbuffer when decoding uncompressed blocks */
static const int kRingBufferWriteAheadSlack = 128 + BROTLI_READ_SIZE;
s->br.input_ = input;
s->br.finish_ = finish;
/* State machine */
@ -979,17 +981,6 @@ BrotliResult BrotliDecompressStreaming(BrotliInput input, BrotliOutput output,
s->window_bits = DecodeWindowBits(br);
s->max_backward_distance = (1 << s->window_bits) - 16;
s->ringbuffer_size = 1 << s->window_bits;
s->ringbuffer_mask = s->ringbuffer_size - 1;
s->ringbuffer = (uint8_t*)malloc((size_t)(s->ringbuffer_size +
kRingBufferWriteAheadSlack +
kMaxDictionaryWordLength));
if (!s->ringbuffer) {
result = BROTLI_RESULT_ERROR;
break;
}
s->ringbuffer_end = s->ringbuffer + s->ringbuffer_size;
s->block_type_trees = (HuffmanCode*)malloc(
3 * BROTLI_HUFFMAN_MAX_TABLE_SIZE * sizeof(HuffmanCode));
s->block_len_trees = (HuffmanCode*)malloc(
@ -1002,10 +993,6 @@ BrotliResult BrotliDecompressStreaming(BrotliInput input, BrotliOutput output,
s->state = BROTLI_STATE_METABLOCK_BEGIN;
/* No break, continue to next state */
case BROTLI_STATE_METABLOCK_BEGIN:
if (!BrotliReadMoreInput(br)) {
result = BROTLI_RESULT_NEEDS_MORE_INPUT;
break;
}
if (s->input_end) {
s->partially_written = 0;
s->state = BROTLI_STATE_DONE;
@ -1058,6 +1045,34 @@ BrotliResult BrotliDecompressStreaming(BrotliInput input, BrotliOutput output,
break;
}
BROTLI_LOG_UINT(s->meta_block_remaining_len);
/* If it is the first metablock, allocate the ringbuffer */
if (s->ringbuffer == NULL) {
size_t known_size = 0;
s->ringbuffer_size = 1 << s->window_bits;
/* If we know the data size is small, do not allocate more ringbuffer
size than needed to reduce memory usage. Since this happens after
the first BrotliReadMoreInput call, we can read the bitreader
buffer at position 0. */
if (BrotliDecompressedSize(BROTLI_READ_SIZE, br->buf_, &known_size)
== BROTLI_RESULT_SUCCESS) {
while (s->ringbuffer_size >= known_size * 2
&& s->ringbuffer_size > 0) {
s->ringbuffer_size /= 2;
}
}
s->ringbuffer_mask = s->ringbuffer_size - 1;
s->ringbuffer = (uint8_t*)malloc((size_t)(s->ringbuffer_size +
kRingBufferWriteAheadSlack +
kMaxDictionaryWordLength));
if (!s->ringbuffer) {
result = BROTLI_RESULT_ERROR;
break;
}
s->ringbuffer_end = s->ringbuffer + s->ringbuffer_size;
}
if (s->is_metadata) {
if (!JumpToByteBoundary(&s->br)) {
result = BROTLI_RESULT_ERROR;
@ -1348,7 +1363,7 @@ BrotliResult BrotliDecompressStreaming(BrotliInput input, BrotliOutput output,
result = BROTLI_RESULT_NEEDS_MORE_INPUT;
break;
}
assert(s->distance_code < 0);
BROTLI_DCHECK(s->distance_code < 0);
if (s->block_length[2] == 0) {
DecodeBlockType(s->num_block_types[2],

View File

@ -15,7 +15,6 @@
/* Utilities for building Huffman decoding tables. */
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>

View File

@ -18,7 +18,6 @@
#ifndef BROTLI_DEC_HUFFMAN_H_
#define BROTLI_DEC_HUFFMAN_H_
#include <assert.h>
#include "./types.h"
#if defined(__cplusplus) || defined(c_plusplus)

60
dec/port.h Normal file
View File

@ -0,0 +1,60 @@
/* Copyright 2015 Google Inc. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/* Macros for branch prediction. */
#ifndef BROTLI_DEC_PORT_H_
#define BROTLI_DEC_PORT_H_
#include<assert.h>
/* Compatibility with non-clang compilers. */
#ifndef __has_builtin
#define __has_builtin(x) 0
#endif
/* Define "PREDICT_TRUE" and "PREDICT_FALSE" macros for capable compilers.
To apply compiler hint, enclose the branching condition into macros, like this:
if (PREDICT_TRUE(zero == 0)) {
// main execution path
} else {
// compiler should place this code outside of main execution path
}
OR:
if (PREDICT_FALSE(something_rare_or_unexpected_happens)) {
// compiler should place this code outside of main execution path
}
*/
#if (__GNUC__ > 2) || (__GNUC__ == 2 && __GNUC_MINOR__ > 95) || \
(defined(__llvm__) && __has_builtin(__builtin_expect))
#define PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
#define PREDICT_FALSE(x) (__builtin_expect(x, 0))
#else
#define PREDICT_FALSE(x) (x)
#define PREDICT_TRUE(x) (x)
#endif
#ifdef BROTLI_DECODE_DEBUG
#define BROTLI_DCHECK(x) assert(x)
#else
#define BROTLI_DCHECK(x)
#endif
#endif /* BROTLI_DEC_PORT_H_ */

View File

@ -16,6 +16,7 @@
/* Size-checked memory allocation. */
#include <stdlib.h>
#include "./port.h"
#include "./safe_malloc.h"
#if defined(__cplusplus) || defined(c_plusplus)
@ -33,7 +34,7 @@ static int CheckSizeArgumentsOverflow(uint64_t nmemb, size_t size) {
void* BrotliSafeMalloc(uint64_t nmemb, size_t size) {
if (!CheckSizeArgumentsOverflow(nmemb, size)) return NULL;
assert(nmemb * size > 0);
BROTLI_DCHECK(nmemb * size > 0);
return malloc((size_t)(nmemb * size));
}

View File

@ -18,8 +18,6 @@
#ifndef BROTLI_DEC_SAFE_MALLOC_H_
#define BROTLI_DEC_SAFE_MALLOC_H_
#include <assert.h>
#include "./types.h"
#if defined(__cplusplus) || defined(c_plusplus)

View File

@ -55,12 +55,18 @@
#define IS_LITTLE_ENDIAN
#endif
#if defined(COMPILER_GCC3)
/* Compatibility with non-clang compilers. */
#ifndef __has_builtin
#define __has_builtin(x) 0
#endif
#if (__GNUC__ > 2) || (__GNUC__ == 2 && __GNUC_MINOR__ > 95) || \
(defined(__llvm__) && __has_builtin(__builtin_expect))
#define PREDICT_FALSE(x) (__builtin_expect(x, 0))
#define PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
#else
#define PREDICT_FALSE(x) x
#define PREDICT_TRUE(x) x
#define PREDICT_FALSE(x) (x)
#define PREDICT_TRUE(x) (x)
#endif
// Portable handling of unaligned loads, stores, and copies.

View File

@ -141,6 +141,7 @@ brotli = Extension("brotli",
"dec/dictionary.h",
"dec/huffman.h",
"dec/prefix.h",
"dec/port.h",
"dec/safe_malloc.h",
"dec/streams.h",
"dec/transform.h",