mirror of
https://github.com/google/brotli.git
synced 2024-11-21 19:20:09 +00:00
Speed and memory usage improvements for the decoder.
* Change order of members of bit reader state structure. * Remove unused includes for assert. Add BROTLI_DCHECK macros and use it instead of assert. * Do not calculate nbits in common case of ReadSymbol. * Introduce and use PREDICT_TRUE / PREDICT_FALSE macros. * Allocate less memory in the brotli decoder if it knows the result size beforehand. Before this, the decoder would always allocate 16MB if the encoder annotated the window size as 22 bit (which is the default), even if the file is only a few KB uncompressed. Now, it'll only allocate a ringbuffer as large as needed for the result file. But only if it can know the filesize, it's not possible to know that if there are multiple metablocks or too large uncompressed metablock.
This commit is contained in:
parent
47ea761869
commit
83aa24dc86
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,4 +1,5 @@
|
||||
*.o
|
||||
*.pyc
|
||||
*.txt.uncompressed
|
||||
*.bro
|
||||
*.unbro
|
||||
|
@ -15,10 +15,10 @@
|
||||
|
||||
/* Bit reading helpers */
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "./bit_reader.h"
|
||||
#include "./port.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
@ -26,7 +26,7 @@ extern "C" {
|
||||
|
||||
void BrotliInitBitReader(BrotliBitReader* const br,
|
||||
BrotliInput input, int finish) {
|
||||
assert(br != NULL);
|
||||
BROTLI_DCHECK(br != NULL);
|
||||
|
||||
br->finish_ = finish;
|
||||
br->tmp_bytes_read_ = 0;
|
||||
|
@ -19,6 +19,7 @@
|
||||
#define BROTLI_DEC_BIT_READER_H_
|
||||
|
||||
#include <string.h>
|
||||
#include "./port.h"
|
||||
#include "./streams.h"
|
||||
#include "./types.h"
|
||||
|
||||
@ -46,11 +47,6 @@ static const uint32_t kBitMask[BROTLI_MAX_NUM_BIT_READ] = {
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
/* Input byte buffer, consist of a ringbuffer and a "slack" region where */
|
||||
/* bytes from the start of the ringbuffer are copied. */
|
||||
uint8_t buf_[BROTLI_IBUF_SIZE];
|
||||
uint8_t* buf_ptr_; /* next input will write here */
|
||||
BrotliInput input_; /* input callback */
|
||||
#if (BROTLI_USE_64_BITS)
|
||||
uint64_t val_; /* pre-fetched bits */
|
||||
#else
|
||||
@ -60,12 +56,18 @@ typedef struct {
|
||||
uint32_t bit_pos_; /* current bit-reading position in val_ */
|
||||
uint32_t bit_end_pos_; /* bit-reading end position from LSB of val_ */
|
||||
int eos_; /* input stream is finished */
|
||||
uint8_t* buf_ptr_; /* next input will write here */
|
||||
BrotliInput input_; /* input callback */
|
||||
|
||||
/* Set to 0 to support partial data streaming. Set to 1 to expect full data or
|
||||
for the last chunk of partial data. */
|
||||
int finish_;
|
||||
/* indicates how much bytes already read when reading partial data */
|
||||
int tmp_bytes_read_;
|
||||
|
||||
/* Input byte buffer, consist of a ringbuffer and a "slack" region where */
|
||||
/* bytes from the start of the ringbuffer are copied. */
|
||||
uint8_t buf_[BROTLI_IBUF_SIZE];
|
||||
} BrotliBitReader;
|
||||
|
||||
/* Initializes the bitreader fields. After this, BrotliWarmupBitReader must
|
||||
@ -126,9 +128,9 @@ static BROTLI_INLINE void ShiftBytes32(BrotliBitReader* const br) {
|
||||
every 32 bytes of input is read.
|
||||
*/
|
||||
static BROTLI_INLINE int BrotliReadMoreInput(BrotliBitReader* const br) {
|
||||
if (br->bit_end_pos_ > 256) {
|
||||
if (PREDICT_TRUE(br->bit_end_pos_ > 256)) {
|
||||
return 1;
|
||||
} else if (br->eos_) {
|
||||
} else if (PREDICT_FALSE(br->eos_)) {
|
||||
return br->bit_pos_ <= br->bit_end_pos_;
|
||||
} else {
|
||||
uint8_t* dst = br->buf_ptr_;
|
||||
|
51
dec/decode.c
51
dec/decode.c
@ -20,6 +20,7 @@
|
||||
#include "./context.h"
|
||||
#include "./decode.h"
|
||||
#include "./dictionary.h"
|
||||
#include "./port.h"
|
||||
#include "./transform.h"
|
||||
#include "./huffman.h"
|
||||
#include "./prefix.h"
|
||||
@ -149,9 +150,9 @@ static BROTLI_INLINE int ReadSymbol(const HuffmanCode* table,
|
||||
int nbits;
|
||||
BrotliFillBitWindow(br);
|
||||
table += (int)(br->val_ >> br->bit_pos_) & HUFFMAN_TABLE_MASK;
|
||||
nbits = table->bits - HUFFMAN_TABLE_BITS;
|
||||
if (nbits > 0) {
|
||||
if (PREDICT_FALSE(table->bits > HUFFMAN_TABLE_BITS)) {
|
||||
br->bit_pos_ += HUFFMAN_TABLE_BITS;
|
||||
nbits = table->bits - HUFFMAN_TABLE_BITS;
|
||||
table += table->value;
|
||||
table += (int)(br->val_ >> br->bit_pos_) & ((1 << nbits) - 1);
|
||||
}
|
||||
@ -938,6 +939,7 @@ BrotliResult BrotliDecompressStreaming(BrotliInput input, BrotliOutput output,
|
||||
- flushing the input s->ringbuffer when decoding uncompressed blocks */
|
||||
static const int kRingBufferWriteAheadSlack = 128 + BROTLI_READ_SIZE;
|
||||
|
||||
s->br.input_ = input;
|
||||
s->br.finish_ = finish;
|
||||
|
||||
/* State machine */
|
||||
@ -979,17 +981,6 @@ BrotliResult BrotliDecompressStreaming(BrotliInput input, BrotliOutput output,
|
||||
s->window_bits = DecodeWindowBits(br);
|
||||
s->max_backward_distance = (1 << s->window_bits) - 16;
|
||||
|
||||
s->ringbuffer_size = 1 << s->window_bits;
|
||||
s->ringbuffer_mask = s->ringbuffer_size - 1;
|
||||
s->ringbuffer = (uint8_t*)malloc((size_t)(s->ringbuffer_size +
|
||||
kRingBufferWriteAheadSlack +
|
||||
kMaxDictionaryWordLength));
|
||||
if (!s->ringbuffer) {
|
||||
result = BROTLI_RESULT_ERROR;
|
||||
break;
|
||||
}
|
||||
s->ringbuffer_end = s->ringbuffer + s->ringbuffer_size;
|
||||
|
||||
s->block_type_trees = (HuffmanCode*)malloc(
|
||||
3 * BROTLI_HUFFMAN_MAX_TABLE_SIZE * sizeof(HuffmanCode));
|
||||
s->block_len_trees = (HuffmanCode*)malloc(
|
||||
@ -1002,10 +993,6 @@ BrotliResult BrotliDecompressStreaming(BrotliInput input, BrotliOutput output,
|
||||
s->state = BROTLI_STATE_METABLOCK_BEGIN;
|
||||
/* No break, continue to next state */
|
||||
case BROTLI_STATE_METABLOCK_BEGIN:
|
||||
if (!BrotliReadMoreInput(br)) {
|
||||
result = BROTLI_RESULT_NEEDS_MORE_INPUT;
|
||||
break;
|
||||
}
|
||||
if (s->input_end) {
|
||||
s->partially_written = 0;
|
||||
s->state = BROTLI_STATE_DONE;
|
||||
@ -1058,6 +1045,34 @@ BrotliResult BrotliDecompressStreaming(BrotliInput input, BrotliOutput output,
|
||||
break;
|
||||
}
|
||||
BROTLI_LOG_UINT(s->meta_block_remaining_len);
|
||||
/* If it is the first metablock, allocate the ringbuffer */
|
||||
if (s->ringbuffer == NULL) {
|
||||
size_t known_size = 0;
|
||||
s->ringbuffer_size = 1 << s->window_bits;
|
||||
|
||||
/* If we know the data size is small, do not allocate more ringbuffer
|
||||
size than needed to reduce memory usage. Since this happens after
|
||||
the first BrotliReadMoreInput call, we can read the bitreader
|
||||
buffer at position 0. */
|
||||
if (BrotliDecompressedSize(BROTLI_READ_SIZE, br->buf_, &known_size)
|
||||
== BROTLI_RESULT_SUCCESS) {
|
||||
while (s->ringbuffer_size >= known_size * 2
|
||||
&& s->ringbuffer_size > 0) {
|
||||
s->ringbuffer_size /= 2;
|
||||
}
|
||||
}
|
||||
|
||||
s->ringbuffer_mask = s->ringbuffer_size - 1;
|
||||
s->ringbuffer = (uint8_t*)malloc((size_t)(s->ringbuffer_size +
|
||||
kRingBufferWriteAheadSlack +
|
||||
kMaxDictionaryWordLength));
|
||||
if (!s->ringbuffer) {
|
||||
result = BROTLI_RESULT_ERROR;
|
||||
break;
|
||||
}
|
||||
s->ringbuffer_end = s->ringbuffer + s->ringbuffer_size;
|
||||
}
|
||||
|
||||
if (s->is_metadata) {
|
||||
if (!JumpToByteBoundary(&s->br)) {
|
||||
result = BROTLI_RESULT_ERROR;
|
||||
@ -1348,7 +1363,7 @@ BrotliResult BrotliDecompressStreaming(BrotliInput input, BrotliOutput output,
|
||||
result = BROTLI_RESULT_NEEDS_MORE_INPUT;
|
||||
break;
|
||||
}
|
||||
assert(s->distance_code < 0);
|
||||
BROTLI_DCHECK(s->distance_code < 0);
|
||||
|
||||
if (s->block_length[2] == 0) {
|
||||
DecodeBlockType(s->num_block_types[2],
|
||||
|
@ -15,7 +15,6 @@
|
||||
|
||||
/* Utilities for building Huffman decoding tables. */
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
@ -18,7 +18,6 @@
|
||||
#ifndef BROTLI_DEC_HUFFMAN_H_
|
||||
#define BROTLI_DEC_HUFFMAN_H_
|
||||
|
||||
#include <assert.h>
|
||||
#include "./types.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
|
60
dec/port.h
Normal file
60
dec/port.h
Normal file
@ -0,0 +1,60 @@
|
||||
/* Copyright 2015 Google Inc. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
/* Macros for branch prediction. */
|
||||
|
||||
#ifndef BROTLI_DEC_PORT_H_
|
||||
#define BROTLI_DEC_PORT_H_
|
||||
|
||||
#include<assert.h>
|
||||
|
||||
/* Compatibility with non-clang compilers. */
|
||||
#ifndef __has_builtin
|
||||
#define __has_builtin(x) 0
|
||||
#endif
|
||||
|
||||
/* Define "PREDICT_TRUE" and "PREDICT_FALSE" macros for capable compilers.
|
||||
|
||||
To apply compiler hint, enclose the branching condition into macros, like this:
|
||||
|
||||
if (PREDICT_TRUE(zero == 0)) {
|
||||
// main execution path
|
||||
} else {
|
||||
// compiler should place this code outside of main execution path
|
||||
}
|
||||
|
||||
OR:
|
||||
|
||||
if (PREDICT_FALSE(something_rare_or_unexpected_happens)) {
|
||||
// compiler should place this code outside of main execution path
|
||||
}
|
||||
|
||||
*/
|
||||
#if (__GNUC__ > 2) || (__GNUC__ == 2 && __GNUC_MINOR__ > 95) || \
|
||||
(defined(__llvm__) && __has_builtin(__builtin_expect))
|
||||
#define PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
|
||||
#define PREDICT_FALSE(x) (__builtin_expect(x, 0))
|
||||
#else
|
||||
#define PREDICT_FALSE(x) (x)
|
||||
#define PREDICT_TRUE(x) (x)
|
||||
#endif
|
||||
|
||||
#ifdef BROTLI_DECODE_DEBUG
|
||||
#define BROTLI_DCHECK(x) assert(x)
|
||||
#else
|
||||
#define BROTLI_DCHECK(x)
|
||||
#endif
|
||||
|
||||
#endif /* BROTLI_DEC_PORT_H_ */
|
@ -16,6 +16,7 @@
|
||||
/* Size-checked memory allocation. */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "./port.h"
|
||||
#include "./safe_malloc.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
@ -33,7 +34,7 @@ static int CheckSizeArgumentsOverflow(uint64_t nmemb, size_t size) {
|
||||
|
||||
void* BrotliSafeMalloc(uint64_t nmemb, size_t size) {
|
||||
if (!CheckSizeArgumentsOverflow(nmemb, size)) return NULL;
|
||||
assert(nmemb * size > 0);
|
||||
BROTLI_DCHECK(nmemb * size > 0);
|
||||
return malloc((size_t)(nmemb * size));
|
||||
}
|
||||
|
||||
|
@ -18,8 +18,6 @@
|
||||
#ifndef BROTLI_DEC_SAFE_MALLOC_H_
|
||||
#define BROTLI_DEC_SAFE_MALLOC_H_
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "./types.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
|
12
enc/port.h
12
enc/port.h
@ -55,12 +55,18 @@
|
||||
#define IS_LITTLE_ENDIAN
|
||||
#endif
|
||||
|
||||
#if defined(COMPILER_GCC3)
|
||||
/* Compatibility with non-clang compilers. */
|
||||
#ifndef __has_builtin
|
||||
#define __has_builtin(x) 0
|
||||
#endif
|
||||
|
||||
#if (__GNUC__ > 2) || (__GNUC__ == 2 && __GNUC_MINOR__ > 95) || \
|
||||
(defined(__llvm__) && __has_builtin(__builtin_expect))
|
||||
#define PREDICT_FALSE(x) (__builtin_expect(x, 0))
|
||||
#define PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
|
||||
#else
|
||||
#define PREDICT_FALSE(x) x
|
||||
#define PREDICT_TRUE(x) x
|
||||
#define PREDICT_FALSE(x) (x)
|
||||
#define PREDICT_TRUE(x) (x)
|
||||
#endif
|
||||
|
||||
// Portable handling of unaligned loads, stores, and copies.
|
||||
|
Loading…
Reference in New Issue
Block a user