/* Copyright 2013 Google Inc. All Rights Reserved. Distributed under MIT license. See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ // Block split point selection utilities. #include "./block_splitter.h" #include #include #include #include #include #include "./cluster.h" #include "./command.h" #include "./fast_log.h" #include "./histogram.h" namespace brotli { static const size_t kMaxLiteralHistograms = 100; static const size_t kMaxCommandHistograms = 50; static const double kLiteralBlockSwitchCost = 28.1; static const double kCommandBlockSwitchCost = 13.5; static const double kDistanceBlockSwitchCost = 14.6; static const size_t kLiteralStrideLength = 70; static const size_t kCommandStrideLength = 40; static const size_t kSymbolsPerLiteralHistogram = 544; static const size_t kSymbolsPerCommandHistogram = 530; static const size_t kSymbolsPerDistanceHistogram = 544; static const size_t kMinLengthForBlockSplitting = 128; static const size_t kIterMulForRefining = 2; static const size_t kMinItersForRefining = 100; void CopyLiteralsToByteArray(const Command* cmds, const size_t num_commands, const uint8_t* data, const size_t offset, const size_t mask, std::vector* literals) { // Count how many we have. size_t total_length = 0; for (size_t i = 0; i < num_commands; ++i) { total_length += cmds[i].insert_len_; } if (total_length == 0) { return; } // Allocate. literals->resize(total_length); // Loop again, and copy this time. size_t pos = 0; size_t from_pos = offset & mask; for (size_t i = 0; i < num_commands && pos < total_length; ++i) { size_t insert_len = cmds[i].insert_len_; if (from_pos + insert_len > mask) { size_t head_size = mask + 1 - from_pos; memcpy(&(*literals)[pos], data + from_pos, head_size); from_pos = 0; pos += head_size; insert_len -= head_size; } if (insert_len > 0) { memcpy(&(*literals)[pos], data + from_pos, insert_len); pos += insert_len; } from_pos = (from_pos + insert_len + cmds[i].copy_len()) & mask; } } inline static unsigned int MyRand(unsigned int* seed) { *seed *= 16807U; if (*seed == 0) { *seed = 1; } return *seed; } template void InitialEntropyCodes(const DataType* data, size_t length, size_t stride, size_t num_histograms, HistogramType* histograms) { for (size_t i = 0; i < num_histograms; ++i) { histograms[i].Clear(); } unsigned int seed = 7; size_t block_length = length / num_histograms; for (size_t i = 0; i < num_histograms; ++i) { size_t pos = length * i / num_histograms; if (i != 0) { pos += MyRand(&seed) % block_length; } if (pos + stride >= length) { pos = length - stride - 1; } histograms[i].Add(data + pos, stride); } } template void RandomSample(unsigned int* seed, const DataType* data, size_t length, size_t stride, HistogramType* sample) { size_t pos = 0; if (stride >= length) { pos = 0; stride = length; } else { pos = MyRand(seed) % (length - stride + 1); } sample->Add(data + pos, stride); } template void RefineEntropyCodes(const DataType* data, size_t length, size_t stride, size_t num_histograms, HistogramType* histograms) { size_t iters = kIterMulForRefining * length / stride + kMinItersForRefining; unsigned int seed = 7; iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms; for (size_t iter = 0; iter < iters; ++iter) { HistogramType sample; RandomSample(&seed, data, length, stride, &sample); size_t ix = iter % num_histograms; histograms[ix].AddHistogram(sample); } } inline static double BitCost(size_t count) { return count == 0 ? -2.0 : FastLog2(count); } // Assigns a block id from the range [0, vec.size()) to each data element // in data[0..length) and fills in block_id[0..length) with the assigned values. // Returns the number of blocks, i.e. one plus the number of block switches. template size_t FindBlocks(const DataType* data, const size_t length, const double block_switch_bitcost, const size_t num_histograms, const Histogram* histograms, double* insert_cost, double* cost, uint8_t* switch_signal, uint8_t *block_id) { if (num_histograms <= 1) { for (size_t i = 0; i < length; ++i) { block_id[i] = 0; } return 1; } const size_t bitmaplen = (num_histograms + 7) >> 3; assert(num_histograms <= 256); memset(insert_cost, 0, sizeof(insert_cost[0]) * kSize * num_histograms); for (size_t j = 0; j < num_histograms; ++j) { insert_cost[j] = FastLog2(static_cast( histograms[j].total_count_)); } for (size_t i = kSize; i != 0;) { --i; for (size_t j = 0; j < num_histograms; ++j) { insert_cost[i * num_histograms + j] = insert_cost[j] - BitCost(histograms[j].data_[i]); } } memset(cost, 0, sizeof(cost[0]) * num_histograms); memset(switch_signal, 0, sizeof(switch_signal[0]) * length * bitmaplen); // After each iteration of this loop, cost[k] will contain the difference // between the minimum cost of arriving at the current byte position using // entropy code k, and the minimum cost of arriving at the current byte // position. This difference is capped at the block switch cost, and if it // reaches block switch cost, it means that when we trace back from the last // position, we need to switch here. for (size_t byte_ix = 0; byte_ix < length; ++byte_ix) { size_t ix = byte_ix * bitmaplen; size_t insert_cost_ix = data[byte_ix] * num_histograms; double min_cost = 1e99; for (size_t k = 0; k < num_histograms; ++k) { // We are coding the symbol in data[byte_ix] with entropy code k. cost[k] += insert_cost[insert_cost_ix + k]; if (cost[k] < min_cost) { min_cost = cost[k]; block_id[byte_ix] = static_cast(k); } } double block_switch_cost = block_switch_bitcost; // More blocks for the beginning. if (byte_ix < 2000) { block_switch_cost *= 0.77 + 0.07 * static_cast(byte_ix) / 2000; } for (size_t k = 0; k < num_histograms; ++k) { cost[k] -= min_cost; if (cost[k] >= block_switch_cost) { cost[k] = block_switch_cost; const uint8_t mask = static_cast(1u << (k & 7)); assert((k >> 3) < bitmaplen); switch_signal[ix + (k >> 3)] |= mask; } } } // Now trace back from the last position and switch at the marked places. size_t byte_ix = length - 1; size_t ix = byte_ix * bitmaplen; uint8_t cur_id = block_id[byte_ix]; size_t num_blocks = 1; while (byte_ix > 0) { --byte_ix; ix -= bitmaplen; const uint8_t mask = static_cast(1u << (cur_id & 7)); assert((static_cast(cur_id) >> 3) < bitmaplen); if (switch_signal[ix + (cur_id >> 3)] & mask) { if (cur_id != block_id[byte_ix]) { cur_id = block_id[byte_ix]; ++num_blocks; } } block_id[byte_ix] = cur_id; } return num_blocks; } static size_t RemapBlockIds(uint8_t* block_ids, const size_t length, uint16_t* new_id, const size_t num_histograms) { static const uint16_t kInvalidId = 256; for (size_t i = 0; i < num_histograms; ++i) { new_id[i] = kInvalidId; } uint16_t next_id = 0; for (size_t i = 0; i < length; ++i) { assert(block_ids[i] < num_histograms); if (new_id[block_ids[i]] == kInvalidId) { new_id[block_ids[i]] = next_id++; } } for (size_t i = 0; i < length; ++i) { block_ids[i] = static_cast(new_id[block_ids[i]]); assert(block_ids[i] < num_histograms); } assert(next_id <= num_histograms); return next_id; } template void BuildBlockHistograms(const DataType* data, const size_t length, const uint8_t* block_ids, const size_t num_histograms, HistogramType* histograms) { for (size_t i = 0; i < num_histograms; ++i) { histograms[i].Clear(); } for (size_t i = 0; i < length; ++i) { histograms[block_ids[i]].Add(data[i]); } } template void ClusterBlocks(const DataType* data, const size_t length, const size_t num_blocks, uint8_t* block_ids, BlockSplit* split) { static const size_t kMaxNumberOfBlockTypes = 256; static const size_t kHistogramsPerBatch = 64; static const size_t kClustersPerBatch = 16; std::vector histogram_symbols(num_blocks); std::vector block_lengths(num_blocks); size_t block_idx = 0; for (size_t i = 0; i < length; ++i) { assert(block_idx < num_blocks); ++block_lengths[block_idx]; if (i + 1 == length || block_ids[i] != block_ids[i + 1]) { ++block_idx; } } assert(block_idx == num_blocks); const size_t expected_num_clusters = kClustersPerBatch * (num_blocks + kHistogramsPerBatch - 1) / kHistogramsPerBatch; std::vector all_histograms; std::vector cluster_size; all_histograms.reserve(expected_num_clusters); cluster_size.reserve(expected_num_clusters); size_t num_clusters = 0; std::vector histograms( std::min(num_blocks, kHistogramsPerBatch)); size_t max_num_pairs = kHistogramsPerBatch * kHistogramsPerBatch / 2; std::vector pairs(max_num_pairs + 1); size_t pos = 0; for (size_t i = 0; i < num_blocks; i += kHistogramsPerBatch) { const size_t num_to_combine = std::min(num_blocks - i, kHistogramsPerBatch); uint32_t sizes[kHistogramsPerBatch]; uint32_t clusters[kHistogramsPerBatch]; uint32_t symbols[kHistogramsPerBatch]; uint32_t remap[kHistogramsPerBatch]; for (size_t j = 0; j < num_to_combine; ++j) { histograms[j].Clear(); for (size_t k = 0; k < block_lengths[i + j]; ++k) { histograms[j].Add(data[pos++]); } histograms[j].bit_cost_ = PopulationCost(histograms[j]); symbols[j] = clusters[j] = static_cast(j); sizes[j] = 1; } size_t num_new_clusters = HistogramCombine( &histograms[0], sizes, symbols, clusters, &pairs[0], num_to_combine, num_to_combine, kHistogramsPerBatch, max_num_pairs); for (size_t j = 0; j < num_new_clusters; ++j) { all_histograms.push_back(histograms[clusters[j]]); cluster_size.push_back(sizes[clusters[j]]); remap[clusters[j]] = static_cast(j); } for (size_t j = 0; j < num_to_combine; ++j) { histogram_symbols[i + j] = static_cast(num_clusters) + remap[symbols[j]]; } num_clusters += num_new_clusters; assert(num_clusters == cluster_size.size()); assert(num_clusters == all_histograms.size()); } max_num_pairs = std::min(64 * num_clusters, (num_clusters / 2) * num_clusters); pairs.resize(max_num_pairs + 1); std::vector clusters(num_clusters); for (size_t i = 0; i < num_clusters; ++i) { clusters[i] = static_cast(i); } size_t num_final_clusters = HistogramCombine(&all_histograms[0], &cluster_size[0], &histogram_symbols[0], &clusters[0], &pairs[0], num_clusters, num_blocks, kMaxNumberOfBlockTypes, max_num_pairs); static const uint32_t kInvalidIndex = std::numeric_limits::max(); std::vector new_index(num_clusters, kInvalidIndex); uint32_t next_index = 0; pos = 0; for (size_t i = 0; i < num_blocks; ++i) { HistogramType histo; for (size_t j = 0; j < block_lengths[i]; ++j) { histo.Add(data[pos++]); } uint32_t best_out = i == 0 ? histogram_symbols[0] : histogram_symbols[i - 1]; double best_bits = HistogramBitCostDistance( histo, all_histograms[best_out]); for (size_t j = 0; j < num_final_clusters; ++j) { const double cur_bits = HistogramBitCostDistance( histo, all_histograms[clusters[j]]); if (cur_bits < best_bits) { best_bits = cur_bits; best_out = clusters[j]; } } histogram_symbols[i] = best_out; if (new_index[best_out] == kInvalidIndex) { new_index[best_out] = next_index++; } } uint8_t max_type = 0; uint32_t cur_length = 0; block_idx = 0; split->types.resize(num_blocks); split->lengths.resize(num_blocks); for (size_t i = 0; i < num_blocks; ++i) { cur_length += block_lengths[i]; if (i + 1 == num_blocks || histogram_symbols[i] != histogram_symbols[i + 1]) { const uint8_t id = static_cast(new_index[histogram_symbols[i]]); split->types[block_idx] = id; split->lengths[block_idx] = cur_length; max_type = std::max(max_type, id); cur_length = 0; ++block_idx; } } split->types.resize(block_idx); split->lengths.resize(block_idx); split->num_types = static_cast(max_type) + 1; } template void SplitByteVector(const std::vector& data, const size_t literals_per_histogram, const size_t max_histograms, const size_t sampling_stride_length, const double block_switch_cost, BlockSplit* split) { if (data.empty()) { split->num_types = 1; return; } else if (data.size() < kMinLengthForBlockSplitting) { split->num_types = 1; split->types.push_back(0); split->lengths.push_back(static_cast(data.size())); return; } size_t num_histograms = data.size() / literals_per_histogram + 1; if (num_histograms > max_histograms) { num_histograms = max_histograms; } Histogram* histograms = new Histogram[num_histograms]; // Find good entropy codes. InitialEntropyCodes(&data[0], data.size(), sampling_stride_length, num_histograms, histograms); RefineEntropyCodes(&data[0], data.size(), sampling_stride_length, num_histograms, histograms); // Find a good path through literals with the good entropy codes. std::vector block_ids(data.size()); size_t num_blocks; const size_t bitmaplen = (num_histograms + 7) >> 3; double* insert_cost = new double[kSize * num_histograms]; double *cost = new double[num_histograms]; uint8_t* switch_signal = new uint8_t[data.size() * bitmaplen]; uint16_t* new_id = new uint16_t[num_histograms]; for (size_t i = 0; i < 10; ++i) { num_blocks = FindBlocks(&data[0], data.size(), block_switch_cost, num_histograms, histograms, insert_cost, cost, switch_signal, &block_ids[0]); num_histograms = RemapBlockIds(&block_ids[0], data.size(), new_id, num_histograms); BuildBlockHistograms(&data[0], data.size(), &block_ids[0], num_histograms, histograms); } delete[] insert_cost; delete[] cost; delete[] switch_signal; delete[] new_id; delete[] histograms; ClusterBlocks >(&data[0], data.size(), num_blocks, &block_ids[0], split); } void SplitBlock(const Command* cmds, const size_t num_commands, const uint8_t* data, const size_t pos, const size_t mask, BlockSplit* literal_split, BlockSplit* insert_and_copy_split, BlockSplit* dist_split) { { // Create a continuous array of literals. std::vector literals; CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, &literals); // Create the block split on the array of literals. // Literal histograms have alphabet size 256. SplitByteVector<256>( literals, kSymbolsPerLiteralHistogram, kMaxLiteralHistograms, kLiteralStrideLength, kLiteralBlockSwitchCost, literal_split); } { // Compute prefix codes for commands. std::vector insert_and_copy_codes(num_commands); for (size_t i = 0; i < num_commands; ++i) { insert_and_copy_codes[i] = cmds[i].cmd_prefix_; } // Create the block split on the array of command prefixes. SplitByteVector( insert_and_copy_codes, kSymbolsPerCommandHistogram, kMaxCommandHistograms, kCommandStrideLength, kCommandBlockSwitchCost, insert_and_copy_split); } { // Create a continuous array of distance prefixes. std::vector distance_prefixes(num_commands); size_t pos = 0; for (size_t i = 0; i < num_commands; ++i) { const Command& cmd = cmds[i]; if (cmd.copy_len() && cmd.cmd_prefix_ >= 128) { distance_prefixes[pos++] = cmd.dist_prefix_; } } distance_prefixes.resize(pos); // Create the block split on the array of distance prefixes. SplitByteVector( distance_prefixes, kSymbolsPerDistanceHistogram, kMaxCommandHistograms, kCommandStrideLength, kDistanceBlockSwitchCost, dist_split); } } } // namespace brotli