Bug fixes for the brotli encoder and decoder.

This commit is contained in:
Zoltan Szabadka 2014-01-06 16:01:57 +01:00
parent 1447345cbb
commit d762bc6845
4 changed files with 85 additions and 58 deletions

View File

@ -199,6 +199,9 @@ static int ReadHuffmanCodeLengths(
repeat <<= extra_bits;
}
repeat += BrotliReadBits(br, extra_bits) + 3;
if (repeat + symbol > num_symbols) {
goto End;
}
if (code_len == kCodeLengthRepeatCode) {
repeat_length = prev_code_len;
for (; i < repeat; ++i) {
@ -258,7 +261,7 @@ static int ReadHuffmanCode(int alphabet_size,
}
memset(code_lengths, 0, alphabet_size);
for (i = 0; i < num_symbols; ++i) {
symbols[i] = BrotliReadBits(br, max_bits);
symbols[i] = BrotliReadBits(br, max_bits) % alphabet_size;
code_lengths[symbols[i]] = 2;
}
code_lengths[symbols[0]] = 1;
@ -386,8 +389,8 @@ static void ReadInsertAndCopy(const HuffmanTree* tree,
}
}
static int TranslateShortCodes(int code, int* ringbuffer, size_t index) {
int val;
static size_t TranslateShortCodes(int code, int* ringbuffer, size_t index) {
size_t val;
if (code < NUM_DISTANCE_SHORT_CODES) {
index += kDistanceShortCodeIndexOffset[code];
index &= 3;
@ -427,9 +430,13 @@ typedef struct {
static void HuffmanTreeGroupInit(HuffmanTreeGroup* group, int alphabet_size,
int ntrees) {
int i;
group->alphabet_size = alphabet_size;
group->num_htrees = ntrees;
group->htrees = (HuffmanTree*)malloc(sizeof(HuffmanTree) * ntrees);
for (i = 0; i < ntrees; ++i) {
group->htrees[i].root_ = NULL;
}
}
static void HuffmanTreeGroupRelease(HuffmanTreeGroup* group) {
@ -437,7 +444,9 @@ static void HuffmanTreeGroupRelease(HuffmanTreeGroup* group) {
for (i = 0; i < group->num_htrees; ++i) {
BrotliHuffmanTreeRelease(&group->htrees[i]);
}
free(group->htrees);
if (group->htrees) {
free(group->htrees);
}
}
static int HuffmanTreeGroupDecode(HuffmanTreeGroup* group,
@ -466,6 +475,9 @@ static int DecodeContextMap(int context_map_size,
BROTLI_LOG_UINT(*num_htrees);
*context_map = (uint8_t*)malloc(context_map_size);
if (*context_map == 0) {
return 0;
}
if (*num_htrees <= 1) {
memset(*context_map, 0, context_map_size);
return 1;
@ -497,6 +509,10 @@ static int DecodeContextMap(int context_map_size,
} else if (code <= max_run_length_prefix) {
int reps = 1 + (1 << code) + BrotliReadBits(br, code);
while (--reps) {
if (i >= context_map_size) {
ok = 0;
goto End;
}
(*context_map)[i] = 0;
++i;
}
@ -514,12 +530,13 @@ static int DecodeContextMap(int context_map_size,
return ok;
}
static BROTLI_INLINE void DecodeBlockType(const HuffmanTree* trees,
int tree_type,
int* block_types,
int* ringbuffers,
size_t* indexes,
BrotliBitReader* br) {
static BROTLI_INLINE void DecodeBlockType(const int max_block_type,
const HuffmanTree* trees,
int tree_type,
int* block_types,
int* ringbuffers,
size_t* indexes,
BrotliBitReader* br) {
int* ringbuffer = ringbuffers + tree_type * 2;
size_t* index = indexes + tree_type;
int type_code = ReadSymbol(trees + tree_type, br);
@ -531,6 +548,9 @@ static BROTLI_INLINE void DecodeBlockType(const HuffmanTree* trees,
} else {
block_type = type_code - 2;
}
if (block_type >= max_block_type) {
block_type -= max_block_type;
}
block_types[tree_type] = block_type;
ringbuffer[(*index) & 1] = block_type;
++(*index);
@ -639,7 +659,9 @@ int BrotliDecompress(BrotliInput input, BrotliOutput output) {
HuffmanTreeGroup hgroup[3];
BrotliBitReader br;
static const int kRingBufferWriteAheadSlack = 16;
/* 16 bytes would be enough, but we add some more slack for transforms */
/* to work at the end of the ringbuffer. */
static const int kRingBufferWriteAheadSlack = 128;
static const int kMaxDictionaryWordLength = 0;
@ -656,6 +678,9 @@ int BrotliDecompress(BrotliInput input, BrotliOutput output) {
ringbuffer = (uint8_t*)malloc(ringbuffer_size +
kRingBufferWriteAheadSlack +
kMaxDictionaryWordLength);
if (!ringbuffer) {
ok = 0;
}
ringbuffer_end = ringbuffer + ringbuffer_size;
while (!input_end && ok) {
@ -755,6 +780,10 @@ int BrotliDecompress(BrotliInput input, BrotliOutput output) {
num_distance_codes = (num_direct_distance_codes +
(48 << distance_postfix_bits));
context_modes = (uint8_t*)malloc(num_block_types[0]);
if (context_modes == 0) {
ok = 0;
goto End;
}
for (i = 0; i < num_block_types[0]; ++i) {
context_modes[i] = BrotliReadBits(&br, 2) << 1;
BROTLI_LOG_ARRAY_INDEX(context_modes, i);
@ -792,7 +821,7 @@ int BrotliDecompress(BrotliInput input, BrotliOutput output) {
int insert_length;
int copy_length;
int distance_code;
int distance;
size_t distance;
size_t max_distance;
uint8_t context;
int j;
@ -804,7 +833,8 @@ int BrotliDecompress(BrotliInput input, BrotliOutput output) {
goto End;
}
if (block_length[1] == 0) {
DecodeBlockType(block_type_trees, 1, block_type, block_type_rb,
DecodeBlockType(num_block_types[1],
block_type_trees, 1, block_type, block_type_rb,
block_type_rb_index, &br);
block_length[1] = ReadBlockLength(&block_len_trees[1], &br);
}
@ -821,7 +851,8 @@ int BrotliDecompress(BrotliInput input, BrotliOutput output) {
goto End;
}
if (block_length[0] == 0) {
DecodeBlockType(block_type_trees, 0, block_type, block_type_rb,
DecodeBlockType(num_block_types[0],
block_type_trees, 0, block_type, block_type_rb,
block_type_rb_index, &br);
block_length[0] = ReadBlockLength(&block_len_trees[0], &br);
context_offset = block_type[0] << kLiteralContextBits;
@ -858,7 +889,8 @@ int BrotliDecompress(BrotliInput input, BrotliOutput output) {
goto End;
}
if (block_length[2] == 0) {
DecodeBlockType(block_type_trees, 2, block_type, block_type_rb,
DecodeBlockType(num_block_types[2],
block_type_trees, 2, block_type, block_type_rb,
block_type_rb_index, &br);
block_length[2] = ReadBlockLength(&block_len_trees[2], &br);
dist_htree_index = block_type[2];
@ -891,16 +923,16 @@ int BrotliDecompress(BrotliInput input, BrotliOutput output) {
copy_dst = &ringbuffer[pos & ringbuffer_mask];
if ((size_t)distance > max_distance) {
if (distance > max_distance) {
printf("Invalid backward reference. pos: %lu distance: %d "
"len: %d end: %lu\n", (unsigned long)pos, distance, copy_length,
(unsigned long)meta_block_end_pos);
"len: %d end: %lu\n", (unsigned long)pos, (int)distance,
copy_length, (unsigned long)meta_block_end_pos);
ok = 0;
goto End;
} else {
if (pos + copy_length > meta_block_end_pos) {
printf("Invalid backward reference. pos: %lu distance: %d "
"len: %d end: %lu\n", (unsigned long)pos, distance,
"len: %d end: %lu\n", (unsigned long)pos, (int)distance,
copy_length, (unsigned long)meta_block_end_pos);
ok = 0;
goto End;
@ -942,9 +974,15 @@ int BrotliDecompress(BrotliInput input, BrotliOutput output) {
prev_byte2 = ringbuffer[(pos - 2) & ringbuffer_mask];
}
End:
free(context_modes);
free(context_map);
free(dist_context_map);
if (context_modes != 0) {
free(context_modes);
}
if (context_map != 0) {
free(context_map);
}
if (dist_context_map != 0) {
free(dist_context_map);
}
for (i = 0; i < 3; ++i) {
HuffmanTreeGroupRelease(&hgroup[i]);
BrotliHuffmanTreeRelease(&block_type_trees[i]);
@ -952,10 +990,12 @@ int BrotliDecompress(BrotliInput input, BrotliOutput output) {
}
}
if (BrotliWrite(output, ringbuffer, pos & ringbuffer_mask) < 0) {
ok = 0;
if (ringbuffer != 0) {
if (BrotliWrite(output, ringbuffer, pos & ringbuffer_mask) < 0) {
ok = 0;
}
free(ringbuffer);
}
free(ringbuffer);
return ok;
}

View File

@ -52,6 +52,7 @@ static void AssignChildren(HuffmanTree* const tree,
static int TreeInit(HuffmanTree* const tree, int num_leaves) {
assert(tree != NULL);
tree->root_ = NULL;
if (num_leaves == 0) return 0;
/* We allocate maximum possible nodes in the tree at once. */
/* Note that a Huffman tree is a full binary tree; and in a full binary */
@ -70,7 +71,9 @@ static int TreeInit(HuffmanTree* const tree, int num_leaves) {
void BrotliHuffmanTreeRelease(HuffmanTree* const tree) {
if (tree != NULL) {
free(tree->root_);
if (tree->root_ != NULL) {
free(tree->root_);
}
tree->root_ = NULL;
tree->max_nodes_ = 0;
tree->num_nodes_ = 0;

View File

@ -82,18 +82,6 @@ void CopyCommandsToByteArray(const std::vector<Command>& cmds,
}
}
template<int kSize>
double HistogramAddEval(const Histogram<kSize>& a,
const Histogram<kSize>& b) {
int total = a.total_count_ + b.total_count_;
double retval = total * FastLog2(total);
for (int i = 0; i < kSize; ++i) {
int count = a.data_[i] + b.data_[i];
retval -= count * FastLog2(count);
}
return retval;
}
template<typename HistogramType, typename DataType>
void InitialEntropyCodes(const DataType* data, size_t length,
int literals_per_histogram,
@ -120,28 +108,19 @@ void InitialEntropyCodes(const DataType* data, size_t length,
}
}
template<typename HistogramType>
int FindClosest(const HistogramType& sample,
const std::vector<HistogramType>& vec) {
double best_distance = 1e99;
int best_ix = 0;
for (int i = 0; i < vec.size(); ++i) {
double distance = HistogramAddEval(sample, vec[i]);
if (distance < best_distance) {
best_ix = i;
best_distance = distance;
}
}
return best_ix;
}
template<typename HistogramType, typename DataType>
void RandomSample(unsigned int* seed,
const DataType* data,
size_t length,
size_t stride,
HistogramType* sample) {
size_t pos = rand_r(seed) % (length - stride);
size_t pos = 0;
if (stride >= length) {
pos = 0;
stride = length;
} else {
pos = rand_r(seed) % (length - stride + 1);
}
sample->Add(data + pos, stride);
}
@ -149,13 +128,14 @@ template<typename HistogramType, typename DataType>
void RefineEntropyCodes(const DataType* data, size_t length,
size_t stride,
std::vector<HistogramType>* vec) {
const int iters =
int iters =
kIterMulForRefining * length / stride + kMinItersForRefining;
unsigned int seed = 7;
iters = ((iters + vec->size() - 1) / vec->size()) * vec->size();
for (int iter = 0; iter < iters; ++iter) {
HistogramType sample;
RandomSample(&seed, data, length, stride, &sample);
int ix = FindClosest(sample, *vec);
int ix = iter % vec->size();
(*vec)[ix].AddHistogram(sample);
}
}

View File

@ -205,7 +205,9 @@ class HashLongestMatch {
continue;
}
prev_ix &= ring_buffer_mask;
if (data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
if (cur_ix_masked + best_len > ring_buffer_mask ||
prev_ix + best_len > ring_buffer_mask ||
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
continue;
}
const size_t len =
@ -277,7 +279,9 @@ class HashLongestMatch {
break;
}
prev_ix &= ring_buffer_mask;
if (data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
if (cur_ix_masked + best_len > ring_buffer_mask ||
prev_ix + best_len > ring_buffer_mask ||
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
continue;
}
const size_t len =