Update encoder (#492)

* fix comment position in `context.h`
 * fix typo in internal quality constant name
 * deduplicate `BuildMetaBlockGreedy` code
 * simplify aggregation in `ChooseContextMap`
This commit is contained in:
Eugene Kliuchnikov 2016-12-22 15:55:05 +01:00 committed by GitHub
parent 27d94590a2
commit 7e347a7c84
5 changed files with 97 additions and 138 deletions

View File

@ -127,9 +127,9 @@ static const uint8_t kUTF8ContextLookup[512] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* UTF8 lead byte range. */ /* UTF8 lead byte range. */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
}; };

View File

@ -330,18 +330,13 @@ static void ChooseContextMap(int quality,
uint32_t monogram_histo[3] = { 0 }; uint32_t monogram_histo[3] = { 0 };
uint32_t two_prefix_histo[6] = { 0 }; uint32_t two_prefix_histo[6] = { 0 };
size_t total = 0; size_t total;
size_t i; size_t i;
size_t dummy; size_t dummy;
double entropy[4]; double entropy[4];
for (i = 0; i < 9; ++i) { for (i = 0; i < 9; ++i) {
size_t j = i;
total += bigram_histo[i];
monogram_histo[i % 3] += bigram_histo[i]; monogram_histo[i % 3] += bigram_histo[i];
if (j >= 6) { two_prefix_histo[i % 6] += bigram_histo[i];
j -= 6;
}
two_prefix_histo[j] += bigram_histo[i];
} }
entropy[1] = ShannonEntropy(monogram_histo, 3, &dummy); entropy[1] = ShannonEntropy(monogram_histo, 3, &dummy);
entropy[2] = (ShannonEntropy(two_prefix_histo, 3, &dummy) + entropy[2] = (ShannonEntropy(two_prefix_histo, 3, &dummy) +
@ -351,6 +346,7 @@ static void ChooseContextMap(int quality,
entropy[3] += ShannonEntropy(bigram_histo + 3 * i, 3, &dummy); entropy[3] += ShannonEntropy(bigram_histo + 3 * i, 3, &dummy);
} }
total = monogram_histo[0] + monogram_histo[1] + monogram_histo[2];
assert(total != 0); assert(total != 0);
entropy[0] = 1.0 / (double)total; entropy[0] = 1.0 / (double)total;
entropy[1] *= entropy[0]; entropy[1] *= entropy[0];
@ -480,7 +476,7 @@ static void WriteMetaBlockInternal(MemoryManager* m,
num_direct_distance_codes, num_direct_distance_codes,
distance_postfix_bits); distance_postfix_bits);
} }
if (params->quality <= MAX_QUALITY_FOR_STATIC_ENRTOPY_CODES) { if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) {
BrotliStoreMetaBlockFast(m, data, wrapped_last_flush_pos, BrotliStoreMetaBlockFast(m, data, wrapped_last_flush_pos,
bytes, mask, is_last, bytes, mask, is_last,
commands, num_commands, commands, num_commands,
@ -505,22 +501,10 @@ static void WriteMetaBlockInternal(MemoryManager* m,
&literal_context_mode, &literal_context_mode,
&num_literal_contexts, &num_literal_contexts,
&literal_context_map); &literal_context_map);
if (literal_context_map == NULL) {
BrotliBuildMetaBlockGreedy(m, data, wrapped_last_flush_pos, mask, BrotliBuildMetaBlockGreedy(m, data, wrapped_last_flush_pos, mask,
commands, num_commands, &mb); prev_byte, prev_byte2, literal_context_mode, num_literal_contexts,
literal_context_map, commands, num_commands, &mb);
if (BROTLI_IS_OOM(m)) return; if (BROTLI_IS_OOM(m)) return;
} else {
BrotliBuildMetaBlockGreedyWithContexts(m, data,
wrapped_last_flush_pos,
mask,
prev_byte, prev_byte2,
literal_context_mode,
num_literal_contexts,
literal_context_map,
commands, num_commands,
&mb);
if (BROTLI_IS_OOM(m)) return;
}
} else { } else {
if (!BrotliIsMostlyUTF8(data, wrapped_last_flush_pos, mask, bytes, if (!BrotliIsMostlyUTF8(data, wrapped_last_flush_pos, mask, bytes,
kMinUTF8Ratio)) { kMinUTF8Ratio)) {

View File

@ -136,53 +136,7 @@ void BrotliBuildMetaBlock(MemoryManager* m,
#include "./metablock_inc.h" /* NOLINT(build/include) */ #include "./metablock_inc.h" /* NOLINT(build/include) */
#undef FN #undef FN
void BrotliBuildMetaBlockGreedy(MemoryManager* m, #define BROTLI_MAX_STATIC_CONTEXTS 3
const uint8_t* ringbuffer,
size_t pos,
size_t mask,
const Command *commands,
size_t n_commands,
MetaBlockSplit* mb) {
BlockSplitterLiteral lit_blocks;
BlockSplitterCommand cmd_blocks;
BlockSplitterDistance dist_blocks;
size_t num_literals = 0;
size_t i;
for (i = 0; i < n_commands; ++i) {
num_literals += commands[i].insert_len_;
}
InitBlockSplitterLiteral(m, &lit_blocks, 256, 512, 400.0, num_literals,
&mb->literal_split, &mb->literal_histograms,
&mb->literal_histograms_size);
if (BROTLI_IS_OOM(m)) return;
InitBlockSplitterCommand(m, &cmd_blocks, BROTLI_NUM_COMMAND_SYMBOLS, 1024,
500.0, n_commands, &mb->command_split, &mb->command_histograms,
&mb->command_histograms_size);
if (BROTLI_IS_OOM(m)) return;
InitBlockSplitterDistance(m, &dist_blocks, 64, 512, 100.0, n_commands,
&mb->distance_split, &mb->distance_histograms,
&mb->distance_histograms_size);
if (BROTLI_IS_OOM(m)) return;
for (i = 0; i < n_commands; ++i) {
const Command cmd = commands[i];
size_t j;
BlockSplitterAddSymbolCommand(&cmd_blocks, cmd.cmd_prefix_);
for (j = cmd.insert_len_; j != 0; --j) {
BlockSplitterAddSymbolLiteral(&lit_blocks, ringbuffer[pos & mask]);
++pos;
}
pos += CommandCopyLen(&cmd);
if (CommandCopyLen(&cmd) && cmd.cmd_prefix_ >= 128) {
BlockSplitterAddSymbolDistance(&dist_blocks, cmd.dist_prefix_);
}
}
BlockSplitterFinishBlockLiteral(&lit_blocks, /* is_final = */ BROTLI_TRUE);
BlockSplitterFinishBlockCommand(&cmd_blocks, /* is_final = */ BROTLI_TRUE);
BlockSplitterFinishBlockDistance(&dist_blocks, /* is_final = */ BROTLI_TRUE);
}
/* Greedy block splitter for one block category (literal, command or distance). /* Greedy block splitter for one block category (literal, command or distance).
Gathers histograms for all context buckets. */ Gathers histograms for all context buckets. */
@ -214,7 +168,7 @@ typedef struct ContextBlockSplitter {
/* Offset of the histograms of the previous two block types. */ /* Offset of the histograms of the previous two block types. */
size_t last_histogram_ix_[2]; size_t last_histogram_ix_[2];
/* Entropy of the previous two block types. */ /* Entropy of the previous two block types. */
double* last_entropy_; double last_entropy_[2 * BROTLI_MAX_STATIC_CONTEXTS];
/* The number of times we merged the current block with the last one. */ /* The number of times we merged the current block with the last one. */
size_t merge_last_count_; size_t merge_last_count_;
} ContextBlockSplitter; } ContextBlockSplitter;
@ -226,6 +180,7 @@ static void InitContextBlockSplitter(
size_t* histograms_size) { size_t* histograms_size) {
size_t max_num_blocks = num_symbols / min_block_size + 1; size_t max_num_blocks = num_symbols / min_block_size + 1;
size_t max_num_types; size_t max_num_types;
assert(num_contexts <= BROTLI_MAX_STATIC_CONTEXTS);
self->alphabet_size_ = alphabet_size; self->alphabet_size_ = alphabet_size;
self->num_contexts_ = num_contexts; self->num_contexts_ = num_contexts;
@ -250,7 +205,6 @@ static void InitContextBlockSplitter(
split->lengths, split->lengths_alloc_size, max_num_blocks); split->lengths, split->lengths_alloc_size, max_num_blocks);
if (BROTLI_IS_OOM(m)) return; if (BROTLI_IS_OOM(m)) return;
split->num_blocks = max_num_blocks; split->num_blocks = max_num_blocks;
self->last_entropy_ = BROTLI_ALLOC(m, double, 2 * num_contexts);
if (BROTLI_IS_OOM(m)) return; if (BROTLI_IS_OOM(m)) return;
assert(*histograms == 0); assert(*histograms == 0);
*histograms_size = max_num_types * num_contexts; *histograms_size = max_num_types * num_contexts;
@ -262,17 +216,12 @@ static void InitContextBlockSplitter(
self->last_histogram_ix_[0] = self->last_histogram_ix_[1] = 0; self->last_histogram_ix_[0] = self->last_histogram_ix_[1] = 0;
} }
static void CleanupContextBlockSplitter(
MemoryManager* m, ContextBlockSplitter* self) {
BROTLI_FREE(m, self->last_entropy_);
}
/* Does either of three things: /* Does either of three things:
(1) emits the current block with a new block type; (1) emits the current block with a new block type;
(2) emits the current block with the type of the second last block; (2) emits the current block with the type of the second last block;
(3) merges the current block with the last block. */ (3) merges the current block with the last block. */
static void ContextBlockSplitterFinishBlock( static void ContextBlockSplitterFinishBlock(
MemoryManager* m, ContextBlockSplitter* self, BROTLI_BOOL is_final) { ContextBlockSplitter* self, BROTLI_BOOL is_final) {
BlockSplit* split = self->split_; BlockSplit* split = self->split_;
const size_t num_contexts = self->num_contexts_; const size_t num_contexts = self->num_contexts_;
double* last_entropy = self->last_entropy_; double* last_entropy = self->last_entropy_;
@ -305,13 +254,11 @@ static void ContextBlockSplitterFinishBlock(
respective set of histograms for the last and second last block types. respective set of histograms for the last and second last block types.
Decide over the split based on the total reduction of entropy across Decide over the split based on the total reduction of entropy across
all contexts. */ all contexts. */
double* entropy = BROTLI_ALLOC(m, double, num_contexts); double entropy[BROTLI_MAX_STATIC_CONTEXTS];
HistogramLiteral* combined_histo = HistogramLiteral combined_histo[2 * BROTLI_MAX_STATIC_CONTEXTS]; /* 6KiB */
BROTLI_ALLOC(m, HistogramLiteral, 2 * num_contexts); double combined_entropy[2 * BROTLI_MAX_STATIC_CONTEXTS];
double* combined_entropy = BROTLI_ALLOC(m, double, 2 * num_contexts);
double diff[2] = { 0.0 }; double diff[2] = { 0.0 };
size_t i; size_t i;
if (BROTLI_IS_OOM(m)) return;
for (i = 0; i < num_contexts; ++i) { for (i = 0; i < num_contexts; ++i) {
size_t curr_histo_ix = self->curr_histogram_ix_ + i; size_t curr_histo_ix = self->curr_histogram_ix_ + i;
size_t j; size_t j;
@ -383,9 +330,6 @@ static void ContextBlockSplitterFinishBlock(
self->target_block_size_ += self->min_block_size_; self->target_block_size_ += self->min_block_size_;
} }
} }
BROTLI_FREE(m, combined_entropy);
BROTLI_FREE(m, combined_histo);
BROTLI_FREE(m, entropy);
} }
if (is_final) { if (is_final) {
*self->histograms_size_ = split->num_types * num_contexts; *self->histograms_size_ = split->num_types * num_contexts;
@ -395,30 +339,47 @@ static void ContextBlockSplitterFinishBlock(
/* Adds the next symbol to the current block type and context. When the /* Adds the next symbol to the current block type and context. When the
current block reaches the target size, decides on merging the block. */ current block reaches the target size, decides on merging the block. */
static void ContextBlockSplitterAddSymbol(MemoryManager* m, static void ContextBlockSplitterAddSymbol(
ContextBlockSplitter* self, size_t symbol, size_t context) { ContextBlockSplitter* self, size_t symbol, size_t context) {
HistogramAddLiteral(&self->histograms_[self->curr_histogram_ix_ + context], HistogramAddLiteral(&self->histograms_[self->curr_histogram_ix_ + context],
symbol); symbol);
++self->block_size_; ++self->block_size_;
if (self->block_size_ == self->target_block_size_) { if (self->block_size_ == self->target_block_size_) {
ContextBlockSplitterFinishBlock(m, self, /* is_final = */ BROTLI_FALSE); ContextBlockSplitterFinishBlock(self, /* is_final = */ BROTLI_FALSE);
if (BROTLI_IS_OOM(m)) return;
} }
} }
void BrotliBuildMetaBlockGreedyWithContexts(MemoryManager* m, static void MapStaticContexts(MemoryManager* m,
const uint8_t* ringbuffer,
size_t pos,
size_t mask,
uint8_t prev_byte,
uint8_t prev_byte2,
ContextType literal_context_mode,
size_t num_contexts, size_t num_contexts,
const uint32_t* static_context_map, const uint32_t* static_context_map,
const Command *commands,
size_t n_commands,
MetaBlockSplit* mb) { MetaBlockSplit* mb) {
ContextBlockSplitter lit_blocks; size_t i;
assert(mb->literal_context_map == 0);
mb->literal_context_map_size =
mb->literal_split.num_types << BROTLI_LITERAL_CONTEXT_BITS;
mb->literal_context_map =
BROTLI_ALLOC(m, uint32_t, mb->literal_context_map_size);
if (BROTLI_IS_OOM(m)) return;
for (i = 0; i < mb->literal_split.num_types; ++i) {
uint32_t offset = (uint32_t)(i * num_contexts);
size_t j;
for (j = 0; j < (1u << BROTLI_LITERAL_CONTEXT_BITS); ++j) {
mb->literal_context_map[(i << BROTLI_LITERAL_CONTEXT_BITS) + j] =
offset + static_context_map[j];
}
}
}
static BROTLI_INLINE void BrotliBuildMetaBlockGreedyInternal(
MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask,
uint8_t prev_byte, uint8_t prev_byte2, ContextType literal_context_mode,
const size_t num_contexts, const uint32_t* static_context_map,
const Command *commands, size_t n_commands, MetaBlockSplit* mb) {
union {
BlockSplitterLiteral plain;
ContextBlockSplitter ctx;
} lit_blocks;
BlockSplitterCommand cmd_blocks; BlockSplitterCommand cmd_blocks;
BlockSplitterDistance dist_blocks; BlockSplitterDistance dist_blocks;
size_t num_literals = 0; size_t num_literals = 0;
@ -427,9 +388,15 @@ void BrotliBuildMetaBlockGreedyWithContexts(MemoryManager* m,
num_literals += commands[i].insert_len_; num_literals += commands[i].insert_len_;
} }
InitContextBlockSplitter(m, &lit_blocks, 256, num_contexts, 512, 400.0, if (num_contexts == 1) {
InitBlockSplitterLiteral(m, &lit_blocks.plain, 256, 512, 400.0,
num_literals, &mb->literal_split, &mb->literal_histograms, num_literals, &mb->literal_split, &mb->literal_histograms,
&mb->literal_histograms_size); &mb->literal_histograms_size);
} else {
InitContextBlockSplitter(m, &lit_blocks.ctx, 256, num_contexts, 512, 400.0,
num_literals, &mb->literal_split, &mb->literal_histograms,
&mb->literal_histograms_size);
}
if (BROTLI_IS_OOM(m)) return; if (BROTLI_IS_OOM(m)) return;
InitBlockSplitterCommand(m, &cmd_blocks, BROTLI_NUM_COMMAND_SYMBOLS, 1024, InitBlockSplitterCommand(m, &cmd_blocks, BROTLI_NUM_COMMAND_SYMBOLS, 1024,
500.0, n_commands, &mb->command_split, &mb->command_histograms, 500.0, n_commands, &mb->command_split, &mb->command_histograms,
@ -445,12 +412,15 @@ void BrotliBuildMetaBlockGreedyWithContexts(MemoryManager* m,
size_t j; size_t j;
BlockSplitterAddSymbolCommand(&cmd_blocks, cmd.cmd_prefix_); BlockSplitterAddSymbolCommand(&cmd_blocks, cmd.cmd_prefix_);
for (j = cmd.insert_len_; j != 0; --j) { for (j = cmd.insert_len_; j != 0; --j) {
size_t context = Context(prev_byte, prev_byte2, literal_context_mode);
uint8_t literal = ringbuffer[pos & mask]; uint8_t literal = ringbuffer[pos & mask];
ContextBlockSplitterAddSymbol( if (num_contexts == 1) {
m, &lit_blocks, literal, static_context_map[context]); BlockSplitterAddSymbolLiteral(&lit_blocks.plain, literal);
} else {
size_t context = Context(prev_byte, prev_byte2, literal_context_mode);
ContextBlockSplitterAddSymbol(&lit_blocks.ctx, literal,
static_context_map[context]);
}
prev_byte2 = prev_byte; prev_byte2 = prev_byte;
if (BROTLI_IS_OOM(m)) return;
prev_byte = literal; prev_byte = literal;
++pos; ++pos;
} }
@ -464,26 +434,41 @@ void BrotliBuildMetaBlockGreedyWithContexts(MemoryManager* m,
} }
} }
ContextBlockSplitterFinishBlock(m, &lit_blocks, /* is_final = */ BROTLI_TRUE); if (num_contexts == 1) {
if (BROTLI_IS_OOM(m)) return; BlockSplitterFinishBlockLiteral(
CleanupContextBlockSplitter(m, &lit_blocks); &lit_blocks.plain, /* is_final = */ BROTLI_TRUE);
} else {
ContextBlockSplitterFinishBlock(
&lit_blocks.ctx, /* is_final = */ BROTLI_TRUE);
}
BlockSplitterFinishBlockCommand(&cmd_blocks, /* is_final = */ BROTLI_TRUE); BlockSplitterFinishBlockCommand(&cmd_blocks, /* is_final = */ BROTLI_TRUE);
BlockSplitterFinishBlockDistance(&dist_blocks, /* is_final = */ BROTLI_TRUE); BlockSplitterFinishBlockDistance(&dist_blocks, /* is_final = */ BROTLI_TRUE);
assert(mb->literal_context_map == 0); if (num_contexts > 1) {
mb->literal_context_map_size = MapStaticContexts(m, num_contexts, static_context_map, mb);
mb->literal_split.num_types << BROTLI_LITERAL_CONTEXT_BITS;
mb->literal_context_map =
BROTLI_ALLOC(m, uint32_t, mb->literal_context_map_size);
if (BROTLI_IS_OOM(m)) return;
for (i = 0; i < mb->literal_split.num_types; ++i) {
size_t j;
for (j = 0; j < (1u << BROTLI_LITERAL_CONTEXT_BITS); ++j) {
mb->literal_context_map[(i << BROTLI_LITERAL_CONTEXT_BITS) + j] =
(uint32_t)(i * num_contexts) + static_context_map[j];
} }
} }
void BrotliBuildMetaBlockGreedy(MemoryManager* m,
const uint8_t* ringbuffer,
size_t pos,
size_t mask,
uint8_t prev_byte,
uint8_t prev_byte2,
ContextType literal_context_mode,
size_t num_contexts,
const uint32_t* static_context_map,
const Command* commands,
size_t n_commands,
MetaBlockSplit* mb) {
if (num_contexts == 1) {
BrotliBuildMetaBlockGreedyInternal(m, ringbuffer, pos, mask, prev_byte,
prev_byte2, literal_context_mode, 1, NULL, commands, n_commands, mb);
} else {
BrotliBuildMetaBlockGreedyInternal(m, ringbuffer, pos, mask, prev_byte,
prev_byte2, literal_context_mode, num_contexts, static_context_map,
commands, n_commands, mb);
}
} }
void BrotliOptimizeHistograms(size_t num_direct_distance_codes, void BrotliOptimizeHistograms(size_t num_direct_distance_codes,

View File

@ -80,20 +80,10 @@ BROTLI_INTERNAL void BrotliBuildMetaBlock(MemoryManager* m,
ContextType literal_context_mode, ContextType literal_context_mode,
MetaBlockSplit* mb); MetaBlockSplit* mb);
/* Uses a fast greedy block splitter that tries to merge current block with the
last or the second last block and does not do any context modeling. */
BROTLI_INTERNAL void BrotliBuildMetaBlockGreedy(MemoryManager* m,
const uint8_t* ringbuffer,
size_t pos,
size_t mask,
const Command* commands,
size_t n_commands,
MetaBlockSplit* mb);
/* Uses a fast greedy block splitter that tries to merge current block with the /* Uses a fast greedy block splitter that tries to merge current block with the
last or the second last block and uses a static context clustering which last or the second last block and uses a static context clustering which
is the same for all block types. */ is the same for all block types. */
BROTLI_INTERNAL void BrotliBuildMetaBlockGreedyWithContexts( BROTLI_INTERNAL void BrotliBuildMetaBlockGreedy(
MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask, MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask,
uint8_t prev_byte, uint8_t prev_byte2, ContextType literal_context_mode, uint8_t prev_byte, uint8_t prev_byte2, ContextType literal_context_mode,
size_t num_contexts, const uint32_t* static_context_map, size_t num_contexts, const uint32_t* static_context_map,

View File

@ -17,7 +17,7 @@
#define ZOPFLIFICATION_QUALITY 10 #define ZOPFLIFICATION_QUALITY 10
#define HQ_ZOPFLIFICATION_QUALITY 11 #define HQ_ZOPFLIFICATION_QUALITY 11
#define MAX_QUALITY_FOR_STATIC_ENRTOPY_CODES 2 #define MAX_QUALITY_FOR_STATIC_ENTROPY_CODES 2
#define MIN_QUALITY_FOR_BLOCK_SPLIT 4 #define MIN_QUALITY_FOR_BLOCK_SPLIT 4
#define MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS 4 #define MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS 4
#define MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH 5 #define MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH 5