/* NOLINT(build/header_guard) */ /* Copyright 2015 Google Inc. All Rights Reserved. Distributed under MIT license. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ /* template parameters: FN */ #define HistogramType FN(Histogram) /* Greedy block splitter for one block category (literal, command or distance). */ typedef struct FN(BlockSplitter) { /* Alphabet size of particular block category. */ size_t alphabet_size_; /* We collect at least this many symbols for each block. */ size_t min_block_size_; /* We merge histograms A and B if entropy(A+B) < entropy(A) + entropy(B) + split_threshold_, where A is the current histogram and B is the histogram of the last or the second last block type. */ double split_threshold_; size_t num_blocks_; BlockSplit* split_; /* not owned */ HistogramType* histograms_; /* not owned */ size_t* histograms_size_; /* not owned */ /* The number of symbols that we want to collect before deciding on whether or not to merge the block with a previous one or emit a new block. */ size_t target_block_size_; /* The number of symbols in the current histogram. */ size_t block_size_; /* Offset of the current histogram. */ size_t curr_histogram_ix_; /* Offset of the histograms of the previous two block types. */ size_t last_histogram_ix_[2]; /* Entropy of the previous two block types. */ double last_entropy_[2]; /* The number of times we merged the current block with the last one. */ size_t merge_last_count_; } FN(BlockSplitter); static void FN(InitBlockSplitter)( MemoryManager* m, FN(BlockSplitter)* self, size_t alphabet_size, size_t min_block_size, double split_threshold, size_t num_symbols, BlockSplit* split, HistogramType** histograms, size_t* histograms_size) { size_t max_num_blocks = num_symbols / min_block_size + 1; /* We have to allocate one more histogram than the maximum number of block types for the current histogram when the meta-block is too big. */ size_t max_num_types = BROTLI_MIN(size_t, max_num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + 1); self->alphabet_size_ = alphabet_size; self->min_block_size_ = min_block_size; self->split_threshold_ = split_threshold; self->num_blocks_ = 0; self->split_ = split; self->histograms_size_ = histograms_size; self->target_block_size_ = min_block_size; self->block_size_ = 0; self->curr_histogram_ix_ = 0; self->merge_last_count_ = 0; BROTLI_ENSURE_CAPACITY(m, uint8_t, split->types, split->types_alloc_size, max_num_blocks); BROTLI_ENSURE_CAPACITY(m, uint32_t, split->lengths, split->lengths_alloc_size, max_num_blocks); if (BROTLI_IS_OOM(m)) return; self->split_->num_blocks = max_num_blocks; assert(*histograms == 0); *histograms_size = max_num_types; *histograms = BROTLI_ALLOC(m, HistogramType, *histograms_size); self->histograms_ = *histograms; if (BROTLI_IS_OOM(m)) return; /* Clear only current histogram. */ FN(HistogramClear)(&self->histograms_[0]); self->last_histogram_ix_[0] = self->last_histogram_ix_[1] = 0; } /* Does either of three things: (1) emits the current block with a new block type; (2) emits the current block with the type of the second last block; (3) merges the current block with the last block. */ static void FN(BlockSplitterFinishBlock)(FN(BlockSplitter)* self, int is_final) { BlockSplit* split = self->split_; double* last_entropy = self->last_entropy_; HistogramType* histograms = self->histograms_; self->block_size_ = BROTLI_MAX(size_t, self->block_size_, self->min_block_size_); if (self->num_blocks_ == 0) { /* Create first block. */ split->lengths[0] = (uint32_t)self->block_size_; split->types[0] = 0; last_entropy[0] = BitsEntropy(histograms[0].data_, self->alphabet_size_); last_entropy[1] = last_entropy[0]; ++self->num_blocks_; ++split->num_types; ++self->curr_histogram_ix_; if (self->curr_histogram_ix_ < *self->histograms_size_) FN(HistogramClear)(&histograms[self->curr_histogram_ix_]); self->block_size_ = 0; } else if (self->block_size_ > 0) { double entropy = BitsEntropy(histograms[self->curr_histogram_ix_].data_, self->alphabet_size_); HistogramType combined_histo[2]; double combined_entropy[2]; double diff[2]; size_t j; for (j = 0; j < 2; ++j) { size_t last_histogram_ix = self->last_histogram_ix_[j]; combined_histo[j] = histograms[self->curr_histogram_ix_]; FN(HistogramAddHistogram)(&combined_histo[j], &histograms[last_histogram_ix]); combined_entropy[j] = BitsEntropy( &combined_histo[j].data_[0], self->alphabet_size_); diff[j] = combined_entropy[j] - entropy - last_entropy[j]; } if (split->num_types < BROTLI_MAX_NUMBER_OF_BLOCK_TYPES && diff[0] > self->split_threshold_ && diff[1] > self->split_threshold_) { /* Create new block. */ split->lengths[self->num_blocks_] = (uint32_t)self->block_size_; split->types[self->num_blocks_] = (uint8_t)split->num_types; self->last_histogram_ix_[1] = self->last_histogram_ix_[0]; self->last_histogram_ix_[0] = (uint8_t)split->num_types; last_entropy[1] = last_entropy[0]; last_entropy[0] = entropy; ++self->num_blocks_; ++split->num_types; ++self->curr_histogram_ix_; if (self->curr_histogram_ix_ < *self->histograms_size_) FN(HistogramClear)(&histograms[self->curr_histogram_ix_]); self->block_size_ = 0; self->merge_last_count_ = 0; self->target_block_size_ = self->min_block_size_; } else if (diff[1] < diff[0] - 20.0) { /* Combine this block with second last block. */ split->lengths[self->num_blocks_] = (uint32_t)self->block_size_; split->types[self->num_blocks_] = split->types[self->num_blocks_ - 2]; BROTLI_SWAP(size_t, self->last_histogram_ix_, 0, 1); histograms[self->last_histogram_ix_[0]] = combined_histo[1]; last_entropy[1] = last_entropy[0]; last_entropy[0] = combined_entropy[1]; ++self->num_blocks_; self->block_size_ = 0; FN(HistogramClear)(&histograms[self->curr_histogram_ix_]); self->merge_last_count_ = 0; self->target_block_size_ = self->min_block_size_; } else { /* Combine this block with last block. */ split->lengths[self->num_blocks_ - 1] += (uint32_t)self->block_size_; histograms[self->last_histogram_ix_[0]] = combined_histo[0]; last_entropy[0] = combined_entropy[0]; if (split->num_types == 1) { last_entropy[1] = last_entropy[0]; } self->block_size_ = 0; FN(HistogramClear)(&histograms[self->curr_histogram_ix_]); if (++self->merge_last_count_ > 1) { self->target_block_size_ += self->min_block_size_; } } } if (is_final) { *self->histograms_size_ = split->num_types; split->num_blocks = self->num_blocks_; } } /* Adds the next symbol to the current histogram. When the current histogram reaches the target size, decides on merging the block. */ static void FN(BlockSplitterAddSymbol)(FN(BlockSplitter)* self, size_t symbol) { FN(HistogramAdd)(&self->histograms_[self->curr_histogram_ix_], symbol); ++self->block_size_; if (self->block_size_ == self->target_block_size_) { FN(BlockSplitterFinishBlock)(self, /* is_final = */ 0); } } #undef HistogramType