diff --git a/enc/backward_references.cc b/enc/backward_references.cc index 539c1e7..3900500 100644 --- a/enc/backward_references.cc +++ b/enc/backward_references.cc @@ -4,7 +4,7 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Function to find backward reference copies. +/* Function to find backward reference copies. */ #include "./backward_references.h" @@ -12,16 +12,17 @@ #include #include +#include "../common/types.h" #include "./command.h" #include "./fast_log.h" #include "./literal_cost.h" namespace brotli { -// The maximum length for which the zopflification uses distinct distances. +/* The maximum length for which the zopflification uses distinct distances. */ static const uint16_t kMaxZopfliLen = 325; -// Histogram based cost model for zopflification. +/* Histogram based cost model for zopflification. */ class ZopfliCostModel { public: ZopfliCostModel(void) : min_cost_cmd_(kInfinity) {} @@ -178,9 +179,9 @@ inline size_t ComputeDistanceCode(size_t distance, return distance + 15; } -// REQUIRES: len >= 2, start_pos <= pos -// REQUIRES: cost < kInfinity, nodes[start_pos].cost < kInfinity -// Maintains the "ZopfliNode array invariant". +/* REQUIRES: len >= 2, start_pos <= pos */ +/* REQUIRES: cost < kInfinity, nodes[start_pos].cost < kInfinity */ +/* Maintains the "ZopfliNode array invariant". */ inline void UpdateZopfliNode(ZopfliNode* nodes, size_t pos, size_t start_pos, size_t len, size_t len_code, size_t dist, size_t short_code, float cost) { @@ -191,7 +192,7 @@ inline void UpdateZopfliNode(ZopfliNode* nodes, size_t pos, size_t start_pos, next.cost = cost; } -// Maintains the smallest 2^k cost difference together with their positions +/* Maintains the smallest 8 cost difference together with their positions */ class StartPosQueue { public: struct PosData { @@ -212,8 +213,8 @@ class StartPosQueue { ++idx_; size_t len = size(); q_[offset] = posdata; - /* Restore the sorted order. In the list of |len| items at most |len - 1| - adjacent element comparisons / swaps are required. */ + /* Restore the sorted order. In the list of |len| items at most |len - 1| + adjacent element comparisons / swaps are required. */ for (size_t i = 1; i < len; ++i) { if (q_[offset & mask_].costdiff > q_[(offset + 1) & mask_].costdiff) { std::swap(q_[offset & mask_], q_[(offset + 1) & mask_]); @@ -234,14 +235,14 @@ class StartPosQueue { size_t idx_; }; -// Returns the minimum possible copy length that can improve the cost of any -// future position. +/* Returns the minimum possible copy length that can improve the cost of any */ +/* future position. */ static size_t ComputeMinimumCopyLength(const StartPosQueue& queue, const ZopfliNode* nodes, const ZopfliCostModel& model, const size_t num_bytes, const size_t pos) { - // Compute the minimum possible cost of reaching any future position. + /* Compute the minimum possible cost of reaching any future position. */ const size_t start0 = queue.GetStartPosData(0).pos; float min_cost = (nodes[start0].cost + model.GetLiteralCosts(start0, pos) + @@ -250,13 +251,13 @@ static size_t ComputeMinimumCopyLength(const StartPosQueue& queue, size_t next_len_bucket = 4; size_t next_len_offset = 10; while (pos + len <= num_bytes && nodes[pos + len].cost <= min_cost) { - // We already reached (pos + len) with no more cost than the minimum - // possible cost of reaching anything from this pos, so there is no point in - // looking for lengths <= len. + /* We already reached (pos + len) with no more cost than the minimum + possible cost of reaching anything from this pos, so there is no point in + looking for lengths <= len. */ ++len; if (len == next_len_offset) { - // We reached the next copy length code bucket, so we add one more - // extra bit to the minimum cost. + /* We reached the next copy length code bucket, so we add one more + extra bit to the minimum cost. */ min_cost += static_cast(1.0); next_len_offset += next_len_bucket; next_len_bucket *= 2; @@ -265,13 +266,13 @@ static size_t ComputeMinimumCopyLength(const StartPosQueue& queue, return len; } -// Fills in dist_cache[0..3] with the last four distances (as defined by -// Section 4. of the Spec) that would be used at (block_start + pos) if we -// used the shortest path of commands from block_start, computed from -// nodes[0..pos]. The last four distances at block_start are in -// starting_dist_cach[0..3]. -// REQUIRES: nodes[pos].cost < kInfinity -// REQUIRES: nodes[0..pos] satisfies that "ZopfliNode array invariant". +/* Fills in dist_cache[0..3] with the last four distances (as defined by + Section 4. of the Spec) that would be used at (block_start + pos) if we + used the shortest path of commands from block_start, computed from + nodes[0..pos]. The last four distances at block_start are in + starting_dist_cach[0..3]. + REQUIRES: nodes[pos].cost < kInfinity + REQUIRES: nodes[0..pos] satisfies that "ZopfliNode array invariant". */ static void ComputeDistanceCache(const size_t block_start, const size_t pos, const size_t max_backward, @@ -280,21 +281,21 @@ static void ComputeDistanceCache(const size_t block_start, int* dist_cache) { int idx = 0; size_t p = pos; - // Because of prerequisite, does at most (pos + 1) / 2 iterations. + /* Because of prerequisite, does at most (pos + 1) / 2 iterations. */ while (idx < 4 && p > 0) { const size_t clen = nodes[p].copy_length(); const size_t ilen = nodes[p].insert_length; const size_t dist = nodes[p].copy_distance(); - // Since block_start + p is the end position of the command, the copy part - // starts from block_start + p - clen. Distances that are greater than this - // or greater than max_backward are static dictionary references, and do - // not update the last distances. Also distance code 0 (last distance) - // does not update the last distances. + /* Since block_start + p is the end position of the command, the copy part + starts from block_start + p - clen. Distances that are greater than this + or greater than max_backward are static dictionary references, and do + not update the last distances. Also distance code 0 (last distance) + does not update the last distances. */ if (dist + clen <= block_start + p && dist <= max_backward && nodes[p].distance_code() > 0) { dist_cache[idx++] = static_cast(dist); } - // Because of prerequisite, p >= clen + ilen >= 2. + /* Because of prerequisite, p >= clen + ilen >= 2. */ p -= clen + ilen; } for (; idx < 4; ++idx) { @@ -330,15 +331,15 @@ static void UpdateNodes(const size_t num_bytes, const size_t min_len = ComputeMinimumCopyLength( *queue, nodes, *model, num_bytes, pos); - // Go over the command starting positions in order of increasing cost - // difference. + /* Go over the command starting positions in order of increasing cost + difference. */ for (size_t k = 0; k < 5 && k < queue->size(); ++k) { const StartPosQueue::PosData& posdata = queue->GetStartPosData(k); const size_t start = posdata.pos; const float start_costdiff = posdata.costdiff; - // Look for last distance matches using the distance cache from this - // starting position. + /* Look for last distance matches using the distance cache from this + starting position. */ size_t best_len = min_len - 1; for (size_t j = 0; j < kNumDistanceShortCodes; ++j) { const size_t idx = kDistanceCacheIndex[j]; @@ -374,23 +375,23 @@ static void UpdateNodes(const size_t num_bytes, } } - // At higher iterations look only for new last distance matches, since - // looking only for new command start positions with the same distances - // does not help much. + /* At higher iterations look only for new last distance matches, since + looking only for new command start positions with the same distances + does not help much. */ if (k >= 2) continue; - // Loop through all possible copy lengths at this position. + /* Loop through all possible copy lengths at this position. */ size_t len = min_len; for (size_t j = 0; j < num_matches; ++j) { BackwardMatch match = matches[j]; size_t dist = match.distance; bool is_dictionary_match = dist > max_distance; - // We already tried all possible last distance matches, so we can use - // normal distance code here. + /* We already tried all possible last distance matches, so we can use + normal distance code here. */ size_t dist_code = dist + 15; - // Try all copy lengths up until the maximum copy length corresponding - // to this distance. If the distance refers to the static dictionary, or - // the maximum length is long enough, try only one maximum length. + /* Try all copy lengths up until the maximum copy length corresponding + to this distance. If the distance refers to the static dictionary, or + the maximum length is long enough, try only one maximum length. */ size_t max_len = match.length(); if (len < max_len && (is_dictionary_match || max_len > kMaxZopfliLen)) { len = max_len; @@ -487,8 +488,8 @@ static void ZopfliIterate(size_t num_bytes, max_backward_limit, dist_cache, num_matches[i], &matches[cur_match_pos], &model, &queue, &nodes[0]); cur_match_pos += num_matches[i]; - // The zopflification can be too slow in case of very long lengths, so in - // such case skip it all, it does not cost a lot of compression ratio. + /* The zopflification can be too slow in case of very long lengths, so in + such case skip it all, it does not cost a lot of compression ratio. */ if (num_matches[i] == 1 && matches[cur_match_pos - 1].length() > kMaxZopfliLen) { i += matches[cur_match_pos - 1].length() - 1; diff --git a/enc/backward_references.h b/enc/backward_references.h index 642133c..9619f5f 100644 --- a/enc/backward_references.h +++ b/enc/backward_references.h @@ -4,7 +4,7 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Function to find backward reference copies. +/* Function to find backward reference copies. */ #ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_ #define BROTLI_ENC_BACKWARD_REFERENCES_H_ @@ -17,10 +17,10 @@ namespace brotli { -// "commands" points to the next output command to write to, "*num_commands" is -// initially the total amount of commands output by previous -// CreateBackwardReferences calls, and must be incremented by the amount written -// by this call. +/* "commands" points to the next output command to write to, "*num_commands" is + initially the total amount of commands output by previous + CreateBackwardReferences calls, and must be incremented by the amount written + by this call. */ void CreateBackwardReferences(size_t num_bytes, size_t position, bool is_last, @@ -66,31 +66,32 @@ struct ZopfliNode { return copy_length() + insert_length; } - // best length to get up to this byte (not including this byte itself) - // highest 8 bit is used to reconstruct the length code + /* best length to get up to this byte (not including this byte itself) + highest 8 bit is used to reconstruct the length code */ uint32_t length; - // distance associated with the length - // highest 7 bit contains distance short code + 1 (or zero if no short code) + /* distance associated with the length + highest 7 bit contains distance short code + 1 (or zero if no short code) + */ uint32_t distance; - // number of literal inserts before this copy + /* number of literal inserts before this copy */ uint32_t insert_length; - // smallest cost to get to this byte from the beginning, as found so far + /* Smallest cost to get to this byte from the beginning, as found so far. */ float cost; }; -// Computes the shortest path of commands from position to at most -// position + num_bytes. -// -// On return, path->size() is the number of commands found and path[i] is the -// length of the ith command (copy length plus insert length). -// Note that the sum of the lengths of all commands can be less than num_bytes. -// -// On return, the nodes[0..num_bytes] array will have the following -// "ZopfliNode array invariant": -// For each i in [1..num_bytes], if nodes[i].cost < kInfinity, then -// (1) nodes[i].copy_length() >= 2 -// (2) nodes[i].command_length() <= i and -// (3) nodes[i - nodes[i].command_length()].cost < kInfinity +/* Computes the shortest path of commands from position to at most + position + num_bytes. + + On return, path->size() is the number of commands found and path[i] is the + length of the ith command (copy length plus insert length). + Note that the sum of the lengths of all commands can be less than num_bytes. + + On return, the nodes[0..num_bytes] array will have the following + "ZopfliNode array invariant": + For each i in [1..num_bytes], if nodes[i].cost < kInfinity, then + (1) nodes[i].copy_length() >= 2 + (2) nodes[i].command_length() <= i and + (3) nodes[i - nodes[i].command_length()].cost < kInfinity */ void ZopfliComputeShortestPath(size_t num_bytes, size_t position, const uint8_t* ringbuffer, @@ -113,4 +114,4 @@ void ZopfliCreateCommands(const size_t num_bytes, } // namespace brotli -#endif // BROTLI_ENC_BACKWARD_REFERENCES_H_ +#endif /* BROTLI_ENC_BACKWARD_REFERENCES_H_ */ diff --git a/enc/bit_cost.h b/enc/bit_cost.h index bed224b..086224f 100644 --- a/enc/bit_cost.h +++ b/enc/bit_cost.h @@ -4,7 +4,7 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Functions to estimate the bit cost of Huffman trees. +/* Functions to estimate the bit cost of Huffman trees. */ #ifndef BROTLI_ENC_BIT_COST_H_ #define BROTLI_ENC_BIT_COST_H_ @@ -42,7 +42,7 @@ static inline double BitsEntropy(const uint32_t *population, size_t size) { size_t sum; double retval = ShannonEntropy(population, size, &sum); if (retval < sum) { - // At least one bit per literal is needed. + /* At least one bit per literal is needed. */ retval = static_cast(sum); } return retval; @@ -158,4 +158,4 @@ double PopulationCost(const Histogram& histogram) { } // namespace brotli -#endif // BROTLI_ENC_BIT_COST_H_ +#endif /* BROTLI_ENC_BIT_COST_H_ */ diff --git a/enc/block_splitter.cc b/enc/block_splitter.cc index db8d9c6..f98765c 100644 --- a/enc/block_splitter.cc +++ b/enc/block_splitter.cc @@ -4,7 +4,7 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Block split point selection utilities. +/* Block split point selection utilities. */ #include "./block_splitter.h" @@ -42,7 +42,7 @@ void CopyLiteralsToByteArray(const Command* cmds, const size_t offset, const size_t mask, std::vector* literals) { - // Count how many we have. + /* Count how many we have. */ size_t total_length = 0; for (size_t i = 0; i < num_commands; ++i) { total_length += cmds[i].insert_len_; @@ -456,11 +456,11 @@ void SplitBlock(const Command* cmds, BlockSplit* insert_and_copy_split, BlockSplit* dist_split) { { - // Create a continuous array of literals. + /* Create a continuous array of literals. */ std::vector literals; CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, &literals); - // Create the block split on the array of literals. - // Literal histograms have alphabet size 256. + /* Create the block split on the array of literals. + Literal histograms have alphabet size 256. */ SplitByteVector<256>( literals, kSymbolsPerLiteralHistogram, kMaxLiteralHistograms, @@ -469,12 +469,12 @@ void SplitBlock(const Command* cmds, } { - // Compute prefix codes for commands. + /* Compute prefix codes for commands. */ std::vector insert_and_copy_codes(num_commands); for (size_t i = 0; i < num_commands; ++i) { insert_and_copy_codes[i] = cmds[i].cmd_prefix_; } - // Create the block split on the array of command prefixes. + /* Create the block split on the array of command prefixes. */ SplitByteVector( insert_and_copy_codes, kSymbolsPerCommandHistogram, kMaxCommandHistograms, @@ -483,7 +483,7 @@ void SplitBlock(const Command* cmds, } { - // Create a continuous array of distance prefixes. + /* Create a continuous array of distance prefixes. */ std::vector distance_prefixes(num_commands); size_t pos = 0; for (size_t i = 0; i < num_commands; ++i) { @@ -493,7 +493,7 @@ void SplitBlock(const Command* cmds, } } distance_prefixes.resize(pos); - // Create the block split on the array of distance prefixes. + /* Create the block split on the array of distance prefixes. */ SplitByteVector( distance_prefixes, kSymbolsPerDistanceHistogram, kMaxCommandHistograms, diff --git a/enc/block_splitter.h b/enc/block_splitter.h index 3652b04..b9a34b6 100644 --- a/enc/block_splitter.h +++ b/enc/block_splitter.h @@ -4,7 +4,7 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Block split point selection utilities. +/* Block split point selection utilities. */ #ifndef BROTLI_ENC_BLOCK_SPLITTER_H_ #define BROTLI_ENC_BLOCK_SPLITTER_H_ @@ -58,4 +58,4 @@ void SplitBlock(const Command* cmds, } // namespace brotli -#endif // BROTLI_ENC_BLOCK_SPLITTER_H_ +#endif /* BROTLI_ENC_BLOCK_SPLITTER_H_ */ diff --git a/enc/brotli_bit_stream.cc b/enc/brotli_bit_stream.cc index 43f1210..df5a887 100644 --- a/enc/brotli_bit_stream.cc +++ b/enc/brotli_bit_stream.cc @@ -4,9 +4,9 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Brotli bit stream functions to support the low level format. There are no -// compression algorithms here, just the right ordering of bits to match the -// specs. +/* Brotli bit stream functions to support the low level format. There are no + compression algorithms here, just the right ordering of bits to match the + specs. */ #include "./brotli_bit_stream.h" @@ -16,6 +16,7 @@ #include #include +#include "../common/types.h" #include "./bit_cost.h" #include "./context.h" #include "./entropy_encode.h" @@ -34,9 +35,9 @@ static const size_t kContextMapAlphabetSize = 256 + 16; // Block type alphabet has 256 block id symbols plus 2 special symbols. static const size_t kBlockTypeAlphabetSize = 256 + 2; -// nibblesbits represents the 2 bits to encode MNIBBLES (0-3) -// REQUIRES: length > 0 -// REQUIRES: length <= (1 << 24) +/* nibblesbits represents the 2 bits to encode MNIBBLES (0-3) + REQUIRES: length > 0 + REQUIRES: length <= (1 << 24) */ void EncodeMlen(size_t length, uint64_t* bits, size_t* numbits, uint64_t* nibblesbits) { assert(length > 0); @@ -76,13 +77,16 @@ void StoreVarLenUint8(size_t n, size_t* storage_ix, uint8_t* storage) { } } +/* Stores the compressed meta-block header. + REQUIRES: length > 0 + REQUIRES: length <= (1 << 24) */ void StoreCompressedMetaBlockHeader(bool final_block, size_t length, size_t* storage_ix, uint8_t* storage) { - // Write ISLAST bit. + /* Write ISLAST bit. */ WriteBits(1, final_block, storage_ix, storage); - // Write ISEMPTY bit. + /* Write ISEMPTY bit. */ if (final_block) { WriteBits(1, 0, storage_ix, storage); } @@ -95,15 +99,19 @@ void StoreCompressedMetaBlockHeader(bool final_block, WriteBits(nlenbits, lenbits, storage_ix, storage); if (!final_block) { - // Write ISUNCOMPRESSED bit. + /* Write ISUNCOMPRESSED bit. */ WriteBits(1, 0, storage_ix, storage); } } +/* Stores the uncompressed meta-block header. + REQUIRES: length > 0 + REQUIRES: length <= (1 << 24) */ void StoreUncompressedMetaBlockHeader(size_t length, size_t* storage_ix, uint8_t* storage) { - // Write ISLAST bit. Uncompressed block cannot be the last one, so set to 0. + /* Write ISLAST bit. + Uncompressed block cannot be the last one, so set to 0. */ WriteBits(1, 0, storage_ix, storage); uint64_t lenbits; size_t nlenbits; @@ -111,7 +119,7 @@ void StoreUncompressedMetaBlockHeader(size_t length, EncodeMlen(length, &lenbits, &nlenbits, &nibblesbits); WriteBits(2, nibblesbits, storage_ix, storage); WriteBits(nlenbits, lenbits, storage_ix, storage); - // Write ISUNCOMPRESSED bit. + /* Write ISUNCOMPRESSED bit. */ WriteBits(1, 1, storage_ix, storage); } @@ -123,16 +131,16 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask( static const uint8_t kStorageOrder[kCodeLengthCodes] = { 1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; - // The bit lengths of the Huffman code over the code length alphabet - // are compressed with the following static Huffman code: - // Symbol Code - // ------ ---- - // 0 00 - // 1 1110 - // 2 110 - // 3 01 - // 4 10 - // 5 1111 + /* The bit lengths of the Huffman code over the code length alphabet + are compressed with the following static Huffman code: + Symbol Code + ------ ---- + 0 00 + 1 1110 + 2 110 + 3 01 + 4 10 + 5 1111 */ static const uint8_t kHuffmanBitLengthHuffmanCodeSymbols[6] = { 0, 7, 3, 2, 1, 15 }; @@ -140,7 +148,7 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask( 2, 4, 3, 2, 2, 4 }; - // Throw away trailing zeros: + /* Throw away trailing zeros: */ size_t codes_to_store = kCodeLengthCodes; if (num_codes > 1) { for (; codes_to_store > 0; --codes_to_store) { @@ -149,12 +157,12 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask( } } } - size_t skip_some = 0; // skips none. + size_t skip_some = 0; /* skips none. */ if (code_length_bitdepth[kStorageOrder[0]] == 0 && code_length_bitdepth[kStorageOrder[1]] == 0) { - skip_some = 2; // skips two. + skip_some = 2; /* skips two. */ if (code_length_bitdepth[kStorageOrder[2]] == 0) { - skip_some = 3; // skips three. + skip_some = 3; /* skips three. */ } } WriteBits(2, skip_some, storage_ix, storage); @@ -177,7 +185,7 @@ static void StoreHuffmanTreeToBitMask( size_t ix = huffman_tree[i]; WriteBits(code_length_bitdepth[ix], code_length_bitdepth_symbols[ix], storage_ix, storage); - // Extra bits + /* Extra bits */ switch (ix) { case 16: WriteBits(2, huffman_tree_extra_bits[i], storage_ix, storage); @@ -194,11 +202,11 @@ static void StoreSimpleHuffmanTree(const uint8_t* depths, size_t num_symbols, size_t max_bits, size_t *storage_ix, uint8_t *storage) { - // value of 1 indicates a simple Huffman code + /* value of 1 indicates a simple Huffman code */ WriteBits(2, 1, storage_ix, storage); - WriteBits(2, num_symbols - 1, storage_ix, storage); // NSYM - 1 + WriteBits(2, num_symbols - 1, storage_ix, storage); /* NSYM - 1 */ - // Sort + /* Sort */ for (size_t i = 0; i < num_symbols; i++) { for (size_t j = i + 1; j < num_symbols; j++) { if (depths[symbols[j]] < depths[symbols[i]]) { @@ -219,19 +227,19 @@ static void StoreSimpleHuffmanTree(const uint8_t* depths, WriteBits(max_bits, symbols[1], storage_ix, storage); WriteBits(max_bits, symbols[2], storage_ix, storage); WriteBits(max_bits, symbols[3], storage_ix, storage); - // tree-select + /* tree-select */ WriteBits(1, depths[symbols[0]] == 1 ? 1 : 0, storage_ix, storage); } } -// num = alphabet size -// depths = symbol depths +/* num = alphabet size + depths = symbol depths */ void StoreHuffmanTree(const uint8_t* depths, size_t num, HuffmanTree* tree, size_t *storage_ix, uint8_t *storage) { - // Write the Huffman tree into the brotli-representation. - // The command alphabet is the largest, so this allocation will fit all - // alphabets. + /* Write the Huffman tree into the brotli-representation. + The command alphabet is the largest, so this allocation will fit all + alphabets. */ assert(num <= kNumCommandPrefixes); uint8_t huffman_tree[kNumCommandPrefixes]; uint8_t huffman_tree_extra_bits[kNumCommandPrefixes]; @@ -239,7 +247,7 @@ void StoreHuffmanTree(const uint8_t* depths, size_t num, WriteHuffmanTree(depths, num, &huffman_tree_size, huffman_tree, huffman_tree_extra_bits); - // Calculate the statistics of the Huffman tree in brotli-representation. + /* Calculate the statistics of the Huffman tree in brotli-representation. */ uint32_t huffman_tree_histogram[kCodeLengthCodes] = { 0 }; for (size_t i = 0; i < huffman_tree_size; ++i) { ++huffman_tree_histogram[huffman_tree[i]]; @@ -259,8 +267,8 @@ void StoreHuffmanTree(const uint8_t* depths, size_t num, } } - // Calculate another Huffman tree to use for compressing both the - // earlier Huffman tree with. + /* Calculate another Huffman tree to use for compressing both the + earlier Huffman tree with. */ uint8_t code_length_bitdepth[kCodeLengthCodes] = { 0 }; uint16_t code_length_bitdepth_symbols[kCodeLengthCodes] = { 0 }; CreateHuffmanTree(&huffman_tree_histogram[0], kCodeLengthCodes, @@ -268,7 +276,7 @@ void StoreHuffmanTree(const uint8_t* depths, size_t num, ConvertBitDepthsToSymbols(code_length_bitdepth, kCodeLengthCodes, &code_length_bitdepth_symbols[0]); - // Now, we have all the data, let's start storing it + /* Now, we have all the data, let's start storing it */ StoreHuffmanTreeOfHuffmanTreeToBitMask(num_codes, code_length_bitdepth, storage_ix, storage); @@ -276,7 +284,7 @@ void StoreHuffmanTree(const uint8_t* depths, size_t num, code_length_bitdepth[code] = 0; } - // Store the real huffman tree now. + /* Store the real huffman tree now. */ StoreHuffmanTreeToBitMask(huffman_tree_size, huffman_tree, huffman_tree_extra_bits, @@ -285,6 +293,8 @@ void StoreHuffmanTree(const uint8_t* depths, size_t num, storage_ix, storage); } +/* Builds a Huffman tree from histogram[0:length] into depth[0:length] and + bits[0:length] and stores the encoded tree to the bit stream. */ void BuildAndStoreHuffmanTree(const uint32_t *histogram, const size_t length, HuffmanTree* tree, @@ -379,13 +389,13 @@ void BuildAndStoreHuffmanTreeFast(const uint32_t *histogram, } const int n = static_cast(node - tree); std::sort(tree, node, SortHuffmanTree); - // The nodes are: - // [0, n): the sorted leaf nodes that we start with. - // [n]: we add a sentinel here. - // [n + 1, 2n): new parent nodes are added here, starting from - // (n+1). These are naturally in ascending order. - // [2n]: we add a sentinel at the end as well. - // There will be (2n+1) elements at the end. + /* The nodes are: + [0, n): the sorted leaf nodes that we start with. + [n]: we add a sentinel here. + [n + 1, 2n): new parent nodes are added here, starting from + (n+1). These are naturally in ascending order. + [2n]: we add a sentinel at the end as well. + There will be (2n+1) elements at the end. */ const HuffmanTree sentinel(std::numeric_limits::max(), -1, -1); *node++ = sentinel; *node++ = sentinel; @@ -408,18 +418,17 @@ void BuildAndStoreHuffmanTreeFast(const uint32_t *histogram, right = j; ++j; } - // The sentinel node becomes the parent node. + /* The sentinel node becomes the parent node. */ node[-1].total_count_ = tree[left].total_count_ + tree[right].total_count_; node[-1].index_left_ = static_cast(left); node[-1].index_right_or_value_ = static_cast(right); - // Add back the last sentinel node. + /* Add back the last sentinel node. */ *node++ = sentinel; } SetDepth(tree[2 * n - 1], &tree[0], depth, 0); - // We need to pack the Huffman tree in 14 bits. - // If this was not successful, add fake entities to the lowest values - // and retry. + /* We need to pack the Huffman tree in 14 bits. If this was not + successful, add fake entities to the lowest values and retry. */ if (PREDICT_TRUE(*std::max_element(&depth[0], &depth[length]) <= 14)) { break; } @@ -427,11 +436,11 @@ void BuildAndStoreHuffmanTreeFast(const uint32_t *histogram, free(tree); ConvertBitDepthsToSymbols(depth, length, bits); if (count <= 4) { - // value of 1 indicates a simple Huffman code + /* value of 1 indicates a simple Huffman code */ WriteBits(2, 1, storage_ix, storage); - WriteBits(2, count - 1, storage_ix, storage); // NSYM - 1 + WriteBits(2, count - 1, storage_ix, storage); /* NSYM - 1 */ - // Sort + /* Sort */ for (size_t i = 0; i < count; i++) { for (size_t j = i + 1; j < count; j++) { if (depth[symbols[j]] < depth[symbols[i]]) { @@ -452,14 +461,14 @@ void BuildAndStoreHuffmanTreeFast(const uint32_t *histogram, WriteBits(max_bits, symbols[1], storage_ix, storage); WriteBits(max_bits, symbols[2], storage_ix, storage); WriteBits(max_bits, symbols[3], storage_ix, storage); - // tree-select + /* tree-select */ WriteBits(1, depth[symbols[0]] == 1 ? 1 : 0, storage_ix, storage); } } else { - // Complex Huffman Tree + /* Complex Huffman Tree */ StoreStaticCodeLengthCode(storage_ix, storage); - // Actual rle coding. + /* Actual rle coding. */ uint8_t previous_value = 8; for (size_t i = 0; i < length;) { const uint8_t value = depth[i]; @@ -531,12 +540,12 @@ static void MoveToFrontTransform(const uint32_t* __restrict v_in, } } -// Finds runs of zeros in v[0..in_size) and replaces them with a prefix code of -// the run length plus extra bits (lower 9 bits is the prefix code and the rest -// are the extra bits). Non-zero values in v[] are shifted by -// *max_length_prefix. Will not create prefix codes bigger than the initial -// value of *max_run_length_prefix. The prefix code of run length L is simply -// Log2Floor(L) and the number of extra bits is the same as the prefix code. +/* Finds runs of zeros in v[0..in_size) and replaces them with a prefix code of + the run length plus extra bits (lower 9 bits is the prefix code and the rest + are the extra bits). Non-zero values in v[] are shifted by + *max_length_prefix. Will not create prefix codes bigger than the initial + value of *max_run_length_prefix. The prefix code of run length L is simply + Log2Floor(L) and the number of extra bits is the same as the prefix code. */ static void RunLengthCodeZeros(const size_t in_size, uint32_t* __restrict v, size_t* __restrict out_size, @@ -630,6 +639,7 @@ void EncodeContextMap(const std::vector& context_map, delete[] rle_symbols; } +/* Stores the block switch command with index block_ix to the bit stream. */ void StoreBlockSwitch(const BlockSplitCode& code, const size_t block_ix, size_t* storage_ix, @@ -646,6 +656,8 @@ void StoreBlockSwitch(const BlockSplitCode& code, storage_ix, storage); } +/* Builds a BlockSplitCode data structure from the block split given by the + vector of block types and block lengths and stores it to the bit stream. */ static void BuildAndStoreBlockSplitCode(const std::vector& types, const std::vector& lengths, const size_t num_types, @@ -695,6 +707,7 @@ static void BuildAndStoreBlockSplitCode(const std::vector& types, } } +/* Stores a context map where the histogram type is always the block type. */ void StoreTrivialContextMap(size_t num_types, size_t context_bits, HuffmanTree* tree, @@ -711,7 +724,7 @@ void StoreTrivialContextMap(size_t num_types, memset(histogram, 0, alphabet_size * sizeof(histogram[0])); memset(depths, 0, alphabet_size * sizeof(depths[0])); memset(bits, 0, alphabet_size * sizeof(bits[0])); - // Write RLEMAX. + /* Write RLEMAX. */ WriteBits(1, 1, storage_ix, storage); WriteBits(4, repeat_code - 1, storage_ix, storage); histogram[repeat_code] = static_cast(num_types); @@ -728,12 +741,12 @@ void StoreTrivialContextMap(size_t num_types, WriteBits(depths[repeat_code], bits[repeat_code], storage_ix, storage); WriteBits(repeat_code, repeat_bits, storage_ix, storage); } - // Write IMTF (inverse-move-to-front) bit. + /* Write IMTF (inverse-move-to-front) bit. */ WriteBits(1, 1, storage_ix, storage); } } -// Manages the encoding of one block category (literal, command or distance). +/* Manages the encoding of one block category (literal, command or distance). */ class BlockEncoder { public: BlockEncoder(size_t alphabet_size, @@ -748,8 +761,8 @@ class BlockEncoder { block_len_(block_lengths.empty() ? 0 : block_lengths[0]), entropy_ix_(0) {} - // Creates entropy codes of block lengths and block types and stores them - // to the bit stream. +/* Creates entropy codes of block lengths and block types and stores them + to the bit stream. */ void BuildAndStoreBlockSwitchEntropyCodes(HuffmanTree* tree, size_t* storage_ix, uint8_t* storage) { @@ -776,8 +789,8 @@ class BlockEncoder { } } - // Stores the next symbol with the entropy code of the current block type. - // Updates the block type and block length at block boundaries. +/* Stores the next symbol with the entropy code of the current block type. + Updates the block type and block length at block boundaries. */ void StoreSymbol(size_t symbol, size_t* storage_ix, uint8_t* storage) { if (block_len_ == 0) { ++block_ix_; @@ -790,9 +803,9 @@ class BlockEncoder { WriteBits(depths_[ix], bits_[ix], storage_ix, storage); } - // Stores the next symbol with the entropy code of the current block type and - // context value. - // Updates the block type and block length at block boundaries. +/* Stores the next symbol with the entropy code of the current block type and + context value. + Updates the block type and block length at block boundaries. */ template void StoreSymbolWithContext(size_t symbol, size_t context, const std::vector& context_map, @@ -1132,8 +1145,8 @@ void StoreMetaBlockFast(const uint8_t* input, } } -// This is for storing uncompressed blocks (simple raw storage of -// bytes-as-bytes). +/* This is for storing uncompressed blocks (simple raw storage of + bytes-as-bytes). */ void StoreUncompressedMetaBlock(bool final_block, const uint8_t * __restrict input, size_t position, size_t mask, @@ -1154,15 +1167,15 @@ void StoreUncompressedMetaBlock(bool final_block, memcpy(&storage[*storage_ix >> 3], &input[masked_pos], len); *storage_ix += len << 3; - // We need to clear the next 4 bytes to continue to be - // compatible with WriteBits. + /* We need to clear the next 4 bytes to continue to be + compatible with BrotliWriteBits. */ brotli::WriteBitsPrepareStorage(*storage_ix, storage); - // Since the uncompressed block itself may not be the final block, add an - // empty one after this. + /* Since the uncompressed block itself may not be the final block, add an + empty one after this. */ if (final_block) { - brotli::WriteBits(1, 1, storage_ix, storage); // islast - brotli::WriteBits(1, 1, storage_ix, storage); // isempty + brotli::WriteBits(1, 1, storage_ix, storage); /* islast */ + brotli::WriteBits(1, 1, storage_ix, storage); /* isempty */ JumpToByteBoundary(storage_ix, storage); } } diff --git a/enc/brotli_bit_stream.h b/enc/brotli_bit_stream.h index 9ffd50f..27a6d07 100644 --- a/enc/brotli_bit_stream.h +++ b/enc/brotli_bit_stream.h @@ -4,14 +4,14 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Functions to convert brotli-related data structures into the -// brotli bit stream. The functions here operate under -// assumption that there is enough space in the storage, i.e., there are -// no out-of-range checks anywhere. -// -// These functions do bit addressing into a byte array. The byte array -// is called "storage" and the index to the bit is called storage_ix -// in function arguments. +/* Functions to convert brotli-related data structures into the + brotli bit stream. The functions here operate under + assumption that there is enough space in the storage, i.e., there are + no out-of-range checks anywhere. + + These functions do bit addressing into a byte array. The byte array + is called "storage" and the index to the bit is called storage_ix + in function arguments. */ #ifndef BROTLI_ENC_BROTLI_BIT_STREAM_H_ #define BROTLI_ENC_BROTLI_BIT_STREAM_H_ @@ -24,8 +24,8 @@ namespace brotli { -// All Store functions here will use a storage_ix, which is always the bit -// position for the current storage. +/* All Store functions here will use a storage_ix, which is always the bit + position for the current storage. */ // Stores a number between 0 and 255. void StoreVarLenUint8(size_t n, size_t* storage_ix, uint8_t* storage); @@ -114,8 +114,8 @@ void StoreBlockSwitch(const BlockSplitCode& code, size_t* storage_ix, uint8_t* storage); -// REQUIRES: length > 0 -// REQUIRES: length <= (1 << 24) +/* REQUIRES: length > 0 */ +/* REQUIRES: length <= (1 << 24) */ void StoreMetaBlock(const uint8_t* input, size_t start_pos, size_t length, @@ -132,10 +132,10 @@ void StoreMetaBlock(const uint8_t* input, size_t *storage_ix, uint8_t *storage); -// Stores the meta-block without doing any block splitting, just collects -// one histogram per block category and uses that for entropy coding. -// REQUIRES: length > 0 -// REQUIRES: length <= (1 << 24) +/* Stores the meta-block without doing any block splitting, just collects + one histogram per block category and uses that for entropy coding. + REQUIRES: length > 0 + REQUIRES: length <= (1 << 24) */ void StoreMetaBlockTrivial(const uint8_t* input, size_t start_pos, size_t length, @@ -146,10 +146,10 @@ void StoreMetaBlockTrivial(const uint8_t* input, size_t *storage_ix, uint8_t *storage); -// Same as above, but uses static prefix codes for histograms with a only a few -// symbols, and uses static code length prefix codes for all other histograms. -// REQUIRES: length > 0 -// REQUIRES: length <= (1 << 24) +/* Same as above, but uses static prefix codes for histograms with a only a few + symbols, and uses static code length prefix codes for all other histograms. + REQUIRES: length > 0 + REQUIRES: length <= (1 << 24) */ void StoreMetaBlockFast(const uint8_t* input, size_t start_pos, size_t length, @@ -160,10 +160,10 @@ void StoreMetaBlockFast(const uint8_t* input, size_t *storage_ix, uint8_t *storage); -// This is for storing uncompressed blocks (simple raw storage of -// bytes-as-bytes). -// REQUIRES: length > 0 -// REQUIRES: length <= (1 << 24) +/* This is for storing uncompressed blocks (simple raw storage of + bytes-as-bytes). + REQUIRES: length > 0 + REQUIRES: length <= (1 << 24) */ void StoreUncompressedMetaBlock(bool final_block, const uint8_t* input, size_t position, size_t mask, @@ -171,9 +171,9 @@ void StoreUncompressedMetaBlock(bool final_block, size_t* storage_ix, uint8_t* storage); -// Stores an empty metadata meta-block and syncs to a byte boundary. +/* Stores an empty metadata meta-block and syncs to a byte boundary. */ void StoreSyncMetaBlock(size_t* storage_ix, uint8_t* storage); } // namespace brotli -#endif // BROTLI_ENC_BROTLI_BIT_STREAM_H_ +#endif /* BROTLI_ENC_BROTLI_BIT_STREAM_H_ */ diff --git a/enc/cluster.h b/enc/cluster.h index 8f24316..166fd36 100644 --- a/enc/cluster.h +++ b/enc/cluster.h @@ -4,7 +4,7 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Functions for clustering similar histograms together. +/* Functions for clustering similar histograms together. */ #ifndef BROTLI_ENC_CLUSTER_H_ #define BROTLI_ENC_CLUSTER_H_ @@ -328,4 +328,4 @@ void ClusterHistograms(const std::vector& in, } // namespace brotli -#endif // BROTLI_ENC_CLUSTER_H_ +#endif /* BROTLI_ENC_CLUSTER_H_ */ diff --git a/enc/command.h b/enc/command.h index c9e17ba..c601dac 100644 --- a/enc/command.h +++ b/enc/command.h @@ -4,7 +4,7 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// This class models a sequence of literals and a backward reference copy. +/* This class models a sequence of literals and a backward reference copy. */ #ifndef BROTLI_ENC_COMMAND_H_ #define BROTLI_ENC_COMMAND_H_ @@ -63,8 +63,8 @@ static inline uint16_t CombineLengthCodes( if (use_last_distance && inscode < 8 && copycode < 16) { return (copycode < 8) ? bits64 : (bits64 | 64); } else { - // "To convert an insert-and-copy length code to an insert length code and - // a copy length code, the following table can be used" + /* "To convert an insert-and-copy length code to an insert length code and + a copy length code, the following table can be used" */ static const uint16_t cells[9] = { 128u, 192u, 384u, 256u, 320u, 512u, 448u, 576u, 640u }; return cells[(copycode >> 3) + 3 * (inscode >> 3)] | bits64; @@ -153,4 +153,4 @@ struct Command { } // namespace brotli -#endif // BROTLI_ENC_COMMAND_H_ +#endif /* BROTLI_ENC_COMMAND_H_ */ diff --git a/enc/compress_fragment.cc b/enc/compress_fragment.cc index aee39d7..92951c2 100644 --- a/enc/compress_fragment.cc +++ b/enc/compress_fragment.cc @@ -4,13 +4,13 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Function for fast encoding of an input fragment, independently from the input -// history. This function uses one-pass processing: when we find a backward -// match, we immediately emit the corresponding command and literal codes to -// the bit stream. -// -// Adapted from the CompressFragment() function in -// https://github.com/google/snappy/blob/master/snappy.cc +/* Function for fast encoding of an input fragment, independently from the input + history. This function uses one-pass processing: when we find a backward + match, we immediately emit the corresponding command and literal codes to + the bit stream. + + Adapted from the CompressFragment() function in + https://github.com/google/snappy/blob/master/snappy.cc */ #include "./compress_fragment.h" @@ -27,12 +27,12 @@ namespace brotli { -// kHashMul32 multiplier has these properties: -// * The multiplier must be odd. Otherwise we may lose the highest bit. -// * No long streaks of 1s or 0s. -// * There is no effort to ensure that it is a prime, the oddity is enough -// for this use. -// * The number has been tuned heuristically against compression benchmarks. +/* kHashMul32 multiplier has these properties: + * The multiplier must be odd. Otherwise we may lose the highest bit. + * No long streaks of 1s or 0s. + * There is no effort to ensure that it is a prime, the oddity is enough + for this use. + * The number has been tuned heuristically against compression benchmarks. */ static const uint32_t kHashMul32 = 0x1e35a7bd; static inline uint32_t Hash(const uint8_t* p, size_t shift) { @@ -52,12 +52,12 @@ static inline int IsMatch(const uint8_t* p1, const uint8_t* p2) { p1[4] == p2[4]); } -// Builds a literal prefix code into "depths" and "bits" based on the statistics -// of the "input" string and stores it into the bit stream. -// Note that the prefix code here is built from the pre-LZ77 input, therefore -// we can only approximate the statistics of the actual literal stream. -// Moreover, for long inputs we build a histogram from a sample of the input -// and thus have to assign a non-zero depth for each literal. +/* Builds a literal prefix code into "depths" and "bits" based on the statistics + of the "input" string and stores it into the bit stream. + Note that the prefix code here is built from the pre-LZ77 input, therefore + we can only approximate the statistics of the actual literal stream. + Moreover, for long inputs we build a histogram from a sample of the input + and thus have to assign a non-zero depth for each literal. */ static void BuildAndStoreLiteralPrefixCode(const uint8_t* input, const size_t input_size, uint8_t depths[256], @@ -72,8 +72,8 @@ static void BuildAndStoreLiteralPrefixCode(const uint8_t* input, } histogram_total = input_size; for (size_t i = 0; i < 256; ++i) { - // We weigh the first 11 samples with weight 3 to account for the - // balancing effect of the LZ77 phase on the histogram. + /* We weigh the first 11 samples with weight 3 to account for the + balancing effect of the LZ77 phase on the histogram. */ const uint32_t adjust = 2 * std::min(histogram[i], 11u); histogram[i] += adjust; histogram_total += adjust; @@ -85,11 +85,11 @@ static void BuildAndStoreLiteralPrefixCode(const uint8_t* input, } histogram_total = (input_size + kSampleRate - 1) / kSampleRate; for (size_t i = 0; i < 256; ++i) { - // We add 1 to each population count to avoid 0 bit depths (since this is - // only a sample and we don't know if the symbol appears or not), and we - // weigh the first 11 samples with weight 3 to account for the balancing - // effect of the LZ77 phase on the histogram (more frequent symbols are - // more likely to be in backward references instead as literals). + /* We add 1 to each population count to avoid 0 bit depths (since this is + only a sample and we don't know if the symbol appears or not), and we + weigh the first 11 samples with weight 3 to account for the balancing + effect of the LZ77 phase on the histogram (more frequent symbols are + more likely to be in backward references instead as literals). */ const uint32_t adjust = 1 + 2 * std::min(histogram[i], 11u); histogram[i] += adjust; histogram_total += adjust; @@ -100,23 +100,23 @@ static void BuildAndStoreLiteralPrefixCode(const uint8_t* input, depths, bits, storage_ix, storage); } -// Builds a command and distance prefix code (each 64 symbols) into "depth" and -// "bits" based on "histogram" and stores it into the bit stream. +/* Builds a command and distance prefix code (each 64 symbols) into "depth" and + "bits" based on "histogram" and stores it into the bit stream. */ static void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128], uint8_t depth[128], uint16_t bits[128], size_t* storage_ix, uint8_t* storage) { - // Tree size for building a tree over 64 symbols is 2 * 64 + 1. + /* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */ static const size_t kTreeSize = 129; HuffmanTree tree[kTreeSize]; CreateHuffmanTree(histogram, 64, 15, tree, depth); CreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]); - // We have to jump through a few hoopes here in order to compute - // the command bits because the symbols are in a different order than in - // the full alphabet. This looks complicated, but having the symbols - // in this order in the command bits saves a few branches in the Emit* - // functions. + /* We have to jump through a few hoopes here in order to compute + the command bits because the symbols are in a different order than in + the full alphabet. This looks complicated, but having the symbols + in this order in the command bits saves a few branches in the Emit* + functions. */ uint8_t cmd_depth[64]; uint16_t cmd_bits[64]; memcpy(cmd_depth, depth, 24); @@ -134,7 +134,7 @@ static void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128], memcpy(bits + 56, cmd_bits + 56, 16); ConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]); { - // Create the bit length array for the full command alphabet. + /* Create the bit length array for the full command alphabet. */ uint8_t cmd_depth[704] = { 0 }; memcpy(cmd_depth, depth, 8); memcpy(cmd_depth + 64, depth + 8, 8); @@ -151,7 +151,7 @@ static void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128], StoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage); } -// REQUIRES: insertlen < 6210 +/* REQUIRES: insertlen < 6210 */ inline void EmitInsertLen(size_t insertlen, const uint8_t depth[128], const uint16_t bits[128], @@ -299,21 +299,21 @@ inline void EmitLiterals(const uint8_t* input, const size_t len, } } -// REQUIRES: len <= 1 << 20. +/* REQUIRES: len <= 1 << 20. */ static void StoreMetaBlockHeader( size_t len, bool is_uncompressed, size_t* storage_ix, uint8_t* storage) { - // ISLAST + /* ISLAST */ WriteBits(1, 0, storage_ix, storage); if (len <= (1U << 16)) { - // MNIBBLES is 4 + /* MNIBBLES is 4 */ WriteBits(2, 0, storage_ix, storage); WriteBits(16, len - 1, storage_ix, storage); } else { - // MNIBBLES is 5 + /* MNIBBLES is 5 */ WriteBits(2, 1, storage_ix, storage); WriteBits(20, len - 1, storage_ix, storage); } - // ISUNCOMPRESSED + /* ISUNCOMPRESSED */ WriteBits(1, is_uncompressed, storage_ix, storage); } @@ -406,11 +406,12 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size, return; } - // "next_emit" is a pointer to the first byte that is not covered by a - // previous copy. Bytes between "next_emit" and the start of the next copy or - // the end of the input will be emitted as literal bytes. + /* "next_emit" is a pointer to the first byte that is not covered by a + previous copy. Bytes between "next_emit" and the start of the next copy or + the end of the input will be emitted as literal bytes. */ const uint8_t* next_emit = input; - // Save the start of the first block for position and distance computations. + /* Save the start of the first block for position and distance computations. + */ const uint8_t* base_ip = input; static const size_t kFirstBlockSize = 3 << 15; @@ -419,8 +420,8 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size, const uint8_t* metablock_start = input; size_t block_size = std::min(input_size, kFirstBlockSize); size_t total_block_size = block_size; - // Save the bit position of the MLEN field of the meta-block header, so that - // we can update it later if we decide to extend this meta-block. + /* Save the bit position of the MLEN field of the meta-block header, so that + we can update it later if we decide to extend this meta-block. */ size_t mlen_storage_ix = *storage_ix + 3; StoreMetaBlockHeader(block_size, 0, storage_ix, storage); // No block splits, no contexts. @@ -439,10 +440,10 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size, storage_ix, storage); emit_commands: - // Initialize the command and distance histograms. We will gather - // statistics of command and distance codes during the processing - // of this block and use it to update the command and distance - // prefix codes for the next block. + /* Initialize the command and distance histograms. We will gather + statistics of command and distance codes during the processing + of this block and use it to update the command and distance + prefix codes for the next block. */ uint32_t cmd_histo[128] = { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, @@ -466,31 +467,31 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size, const size_t kInputMarginBytes = 16; const size_t kMinMatchLen = 5; if (PREDICT_TRUE(block_size >= kInputMarginBytes)) { - // For the last block, we need to keep a 16 bytes margin so that we can be - // sure that all distances are at most window size - 16. - // For all other blocks, we only need to keep a margin of 5 bytes so that - // we don't go over the block size with a copy. + /* For the last block, we need to keep a 16 bytes margin so that we can be + sure that all distances are at most window size - 16. + For all other blocks, we only need to keep a margin of 5 bytes so that + we don't go over the block size with a copy. */ const size_t len_limit = std::min(block_size - kMinMatchLen, input_size - kInputMarginBytes); const uint8_t* ip_limit = input + len_limit; for (uint32_t next_hash = Hash(++ip, shift); ; ) { assert(next_emit < ip); - // Step 1: Scan forward in the input looking for a 5-byte-long match. - // If we get close to exhausting the input then goto emit_remainder. - // - // Heuristic match skipping: If 32 bytes are scanned with no matches - // found, start looking only at every other byte. If 32 more bytes are - // scanned, look at every third byte, etc.. When a match is found, - // immediately go back to looking at every byte. This is a small loss - // (~5% performance, ~0.1% density) for compressible data due to more - // bookkeeping, but for non-compressible data (such as JPEG) it's a huge - // win since the compressor quickly "realizes" the data is incompressible - // and doesn't bother looking for matches everywhere. - // - // The "skip" variable keeps track of how many bytes there are since the - // last match; dividing it by 32 (ie. right-shifting by five) gives the - // number of bytes to move ahead for each iteration. + /* Step 1: Scan forward in the input looking for a 5-byte-long match. + If we get close to exhausting the input then goto emit_remainder. + + Heuristic match skipping: If 32 bytes are scanned with no matches + found, start looking only at every other byte. If 32 more bytes are + scanned, look at every third byte, etc.. When a match is found, + immediately go back to looking at every byte. This is a small loss + (~5% performance, ~0.1% density) for compressible data due to more + bookkeeping, but for non-compressible data (such as JPEG) it's a huge + win since the compressor quickly "realizes" the data is incompressible + and doesn't bother looking for matches everywhere. + + The "skip" variable keeps track of how many bytes there are since the + last match; dividing it by 32 (ie. right-shifting by five) gives the + number of bytes to move ahead for each iteration. */ uint32_t skip = 32; const uint8_t* next_ip = ip; @@ -519,15 +520,15 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size, table[hash] = static_cast(ip - base_ip); } while (PREDICT_TRUE(!IsMatch(ip, candidate))); - // Step 2: Emit the found match together with the literal bytes from - // "next_emit" to the bit stream, and then see if we can find a next macth - // immediately afterwards. Repeat until we find no match for the input - // without emitting some literal bytes. + /* Step 2: Emit the found match together with the literal bytes from + "next_emit" to the bit stream, and then see if we can find a next macth + immediately afterwards. Repeat until we find no match for the input + without emitting some literal bytes. */ uint64_t input_bytes; { - // We have a 5-byte match at ip, and we need to emit bytes in - // [next_emit, ip). + /* We have a 5-byte match at ip, and we need to emit bytes in + [next_emit, ip). */ const uint8_t* base = ip; size_t matched = 5 + FindMatchLengthWithLimit( candidate + 5, ip + 5, static_cast(ip_end - ip) - 5); @@ -567,9 +568,9 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size, if (PREDICT_FALSE(ip >= ip_limit)) { goto emit_remainder; } - // We could immediately start working at ip now, but to improve - // compression we first update "table" with the hashes of some positions - // within the last copy. + /* We could immediately start working at ip now, but to improve + compression we first update "table" with the hashes of some positions + within the last copy. */ input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3); uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift); table[prev_hash] = static_cast(ip - base_ip - 3); @@ -584,8 +585,8 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size, } while (IsMatch(ip, candidate)) { - // We have a 5-byte match at ip, and no need to emit any literal bytes - // prior to ip. + /* We have a 5-byte match at ip, and no need to emit any literal bytes + prior to ip. */ const uint8_t* base = ip; size_t matched = 5 + FindMatchLengthWithLimit( candidate + 5, ip + 5, static_cast(ip_end - ip) - 5); @@ -601,9 +602,9 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size, if (PREDICT_FALSE(ip >= ip_limit)) { goto emit_remainder; } - // We could immediately start working at ip now, but to improve - // compression we first update "table" with the hashes of some positions - // within the last copy. + /* We could immediately start working at ip now, but to improve + compression we first update "table" with the hashes of some positions + within the last copy. */ input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3); uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift); table[prev_hash] = static_cast(ip - base_ip - 3); @@ -627,22 +628,22 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size, input_size -= block_size; block_size = std::min(input_size, kMergeBlockSize); - // Decide if we want to continue this meta-block instead of emitting the - // last insert-only command. + /* Decide if we want to continue this meta-block instead of emitting the + last insert-only command. */ if (input_size > 0 && total_block_size + block_size <= (1 << 20) && ShouldMergeBlock(input, block_size, lit_depth)) { assert(total_block_size > (1 << 16)); - // Update the size of the current meta-block and continue emitting commands. - // We can do this because the current size and the new size both have 5 - // nibbles. + /* Update the size of the current meta-block and continue emitting commands. + We can do this because the current size and the new size both have 5 + nibbles. */ total_block_size += block_size; UpdateBits(20, static_cast(total_block_size - 1), mlen_storage_ix, storage); goto emit_commands; } - // Emit the remaining bytes as literals. + /* Emit the remaining bytes as literals. */ if (next_emit < ip_end) { const size_t insert = static_cast(ip_end - next_emit); if (PREDICT_TRUE(insert < 6210)) { @@ -663,17 +664,17 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size, next_emit = ip_end; next_block: - // If we have more data, write a new meta-block header and prefix codes and - // then continue emitting commands. + /* If we have more data, write a new meta-block header and prefix codes and + then continue emitting commands. */ if (input_size > 0) { metablock_start = input; block_size = std::min(input_size, kFirstBlockSize); total_block_size = block_size; - // Save the bit position of the MLEN field of the meta-block header, so that - // we can update it later if we decide to extend this meta-block. + /* Save the bit position of the MLEN field of the meta-block header, so that + we can update it later if we decide to extend this meta-block. */ mlen_storage_ix = *storage_ix + 3; StoreMetaBlockHeader(block_size, 0, storage_ix, storage); - // No block splits, no contexts. + /* No block splits, no contexts. */ WriteBits(13, 0, storage_ix, storage); memset(lit_depth, 0, sizeof(lit_depth)); memset(lit_bits, 0, sizeof(lit_bits)); @@ -685,12 +686,12 @@ next_block: } if (is_last) { - WriteBits(1, 1, storage_ix, storage); // islast - WriteBits(1, 1, storage_ix, storage); // isempty + WriteBits(1, 1, storage_ix, storage); /* islast */ + WriteBits(1, 1, storage_ix, storage); /* isempty */ *storage_ix = (*storage_ix + 7u) & ~7u; } else { - // If this is not the last block, update the command and distance prefix - // codes for the next block and store the compressed forms. + /* If this is not the last block, update the command and distance prefix + codes for the next block and store the compressed forms. */ cmd_code[0] = 0; *cmd_code_numbits = 0; BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits, diff --git a/enc/compress_fragment.h b/enc/compress_fragment.h index 1c65c5b..a0aa13b 100644 --- a/enc/compress_fragment.h +++ b/enc/compress_fragment.h @@ -4,10 +4,10 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Function for fast encoding of an input fragment, independently from the input -// history. This function uses one-pass processing: when we find a backward -// match, we immediately emit the corresponding command and literal codes to -// the bit stream. +/* Function for fast encoding of an input fragment, independently from the input + history. This function uses one-pass processing: when we find a backward + match, we immediately emit the corresponding command and literal codes to + the bit stream. */ #ifndef BROTLI_ENC_COMPRESS_FRAGMENT_H_ #define BROTLI_ENC_COMPRESS_FRAGMENT_H_ @@ -16,25 +16,25 @@ namespace brotli { -// Compresses "input" string to the "*storage" buffer as one or more complete -// meta-blocks, and updates the "*storage_ix" bit position. -// -// If "is_last" is true, emits an additional empty last meta-block. -// -// "cmd_depth" and "cmd_bits" contain the command and distance prefix codes -// (see comment in encode.h) used for the encoding of this input fragment. -// If "is_last" is false, they are updated to reflect the statistics -// of this input fragment, to be used for the encoding of the next fragment. -// -// "*cmd_code_numbits" is the number of bits of the compressed representation -// of the command and distance prefix codes, and "cmd_code" is an array of -// at least "(*cmd_code_numbits + 7) >> 3" size that contains the compressed -// command and distance prefix codes. If "is_last" is false, these are also -// updated to represent the updated "cmd_depth" and "cmd_bits". -// -// REQUIRES: "input_size" is greater than zero, or "is_last" is true. -// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero. -// REQUIRES: "table_size" is a power of two +/* Compresses "input" string to the "*storage" buffer as one or more complete + meta-blocks, and updates the "*storage_ix" bit position. + + If "is_last" is 1, emits an additional empty last meta-block. + + "cmd_depth" and "cmd_bits" contain the command and distance prefix codes + (see comment in encode.h) used for the encoding of this input fragment. + If "is_last" is 0, they are updated to reflect the statistics + of this input fragment, to be used for the encoding of the next fragment. + + "*cmd_code_numbits" is the number of bits of the compressed representation + of the command and distance prefix codes, and "cmd_code" is an array of + at least "(*cmd_code_numbits + 7) >> 3" size that contains the compressed + command and distance prefix codes. If "is_last" is 0, these are also + updated to represent the updated "cmd_depth" and "cmd_bits". + + REQUIRES: "input_size" is greater than zero, or "is_last" is 1. + REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero. + REQUIRES: "table_size" is a power of two */ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size, bool is_last, int* table, size_t table_size, @@ -44,4 +44,4 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size, } // namespace brotli -#endif // BROTLI_ENC_COMPRESS_FRAGMENT_H_ +#endif /* BROTLI_ENC_COMPRESS_FRAGMENT_H_ */ diff --git a/enc/compress_fragment_two_pass.cc b/enc/compress_fragment_two_pass.cc index 7c0ce21..cdba6e4 100644 --- a/enc/compress_fragment_two_pass.cc +++ b/enc/compress_fragment_two_pass.cc @@ -4,11 +4,11 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Function for fast encoding of an input fragment, independently from the input -// history. This function uses two-pass processing: in the first pass we save -// the found backward matches and literal bytes into a buffer, and in the -// second pass we emit them into the bit stream using prefix codes built based -// on the actual command and literal byte histograms. +/* Function for fast encoding of an input fragment, independently from the input + history. This function uses two-pass processing: in the first pass we save + the found backward matches and literal bytes into a buffer, and in the + second pass we emit them into the bit stream using prefix codes built based + on the actual command and literal byte histograms. */ #include "./compress_fragment_two_pass.h" @@ -25,12 +25,12 @@ namespace brotli { -// kHashMul32 multiplier has these properties: -// * The multiplier must be odd. Otherwise we may lose the highest bit. -// * No long streaks of 1s or 0s. -// * There is no effort to ensure that it is a prime, the oddity is enough -// for this use. -// * The number has been tuned heuristically against compression benchmarks. +/* kHashMul32 multiplier has these properties: + * The multiplier must be odd. Otherwise we may lose the highest bit. + * No long streaks of 1s or 0s. + * There is no effort to ensure that it is a prime, the oddity is enough + for this use. + * The number has been tuned heuristically against compression benchmarks. */ static const uint32_t kHashMul32 = 0x1e35a7bd; static inline uint32_t Hash(const uint8_t* p, size_t shift) { @@ -51,22 +51,22 @@ static inline int IsMatch(const uint8_t* p1, const uint8_t* p2) { p1[5] == p2[5]); } -// Builds a command and distance prefix code (each 64 symbols) into "depth" and -// "bits" based on "histogram" and stores it into the bit stream. +/* Builds a command and distance prefix code (each 64 symbols) into "depth" and + "bits" based on "histogram" and stores it into the bit stream. */ static void BuildAndStoreCommandPrefixCode( const uint32_t histogram[128], uint8_t depth[128], uint16_t bits[128], size_t* storage_ix, uint8_t* storage) { - // Tree size for building a tree over 64 symbols is 2 * 64 + 1. + /* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */ static const size_t kTreeSize = 129; HuffmanTree tree[kTreeSize]; CreateHuffmanTree(histogram, 64, 15, tree, depth); CreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]); - // We have to jump through a few hoopes here in order to compute - // the command bits because the symbols are in a different order than in - // the full alphabet. This looks complicated, but having the symbols - // in this order in the command bits saves a few branches in the Emit* - // functions. + /* We have to jump through a few hoopes here in order to compute + the command bits because the symbols are in a different order than in + the full alphabet. This looks complicated, but having the symbols + in this order in the command bits saves a few branches in the Emit* + functions. */ uint8_t cmd_depth[64]; uint16_t cmd_bits[64]; memcpy(cmd_depth, depth + 24, 24); @@ -84,7 +84,7 @@ static void BuildAndStoreCommandPrefixCode( memcpy(bits + 56, cmd_bits + 48, 16); ConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]); { - // Create the bit length array for the full command alphabet. + /* Create the bit length array for the full command alphabet. */ uint8_t cmd_depth[704] = { 0 }; memcpy(cmd_depth, depth + 24, 8); memcpy(cmd_depth + 64, depth + 32, 8); @@ -202,21 +202,21 @@ inline void EmitDistance(uint32_t distance, uint32_t** commands) { ++(*commands); } -// REQUIRES: len <= 1 << 20. +/* REQUIRES: len <= 1 << 20. */ static void StoreMetaBlockHeader( size_t len, bool is_uncompressed, size_t* storage_ix, uint8_t* storage) { - // ISLAST + /* ISLAST */ WriteBits(1, 0, storage_ix, storage); if (len <= (1U << 16)) { - // MNIBBLES is 4 + /* MNIBBLES is 4 */ WriteBits(2, 0, storage_ix, storage); WriteBits(16, len - 1, storage_ix, storage); } else { - // MNIBBLES is 5 + /* MNIBBLES is 5 */ WriteBits(2, 1, storage_ix, storage); WriteBits(20, len - 1, storage_ix, storage); } - // ISUNCOMPRESSED + /* ISUNCOMPRESSED */ WriteBits(1, is_uncompressed, storage_ix, storage); } @@ -224,7 +224,7 @@ static void CreateCommands(const uint8_t* input, size_t block_size, size_t input_size, const uint8_t* base_ip, int* table, size_t table_size, uint8_t** literals, uint32_t** commands) { - // "ip" is the input pointer. + /* "ip" is the input pointer. */ const uint8_t* ip = input; assert(table_size); assert(table_size <= (1u << 31)); @@ -233,40 +233,40 @@ static void CreateCommands(const uint8_t* input, size_t block_size, assert(table_size - 1 == static_cast( MAKE_UINT64_T(0xFFFFFFFF, 0xFFFFFF) >> shift)); const uint8_t* ip_end = input + block_size; - // "next_emit" is a pointer to the first byte that is not covered by a - // previous copy. Bytes between "next_emit" and the start of the next copy or - // the end of the input will be emitted as literal bytes. + /* "next_emit" is a pointer to the first byte that is not covered by a + previous copy. Bytes between "next_emit" and the start of the next copy or + the end of the input will be emitted as literal bytes. */ const uint8_t* next_emit = input; int last_distance = -1; const size_t kInputMarginBytes = 16; const size_t kMinMatchLen = 6; if (PREDICT_TRUE(block_size >= kInputMarginBytes)) { - // For the last block, we need to keep a 16 bytes margin so that we can be - // sure that all distances are at most window size - 16. - // For all other blocks, we only need to keep a margin of 5 bytes so that - // we don't go over the block size with a copy. + /* For the last block, we need to keep a 16 bytes margin so that we can be + sure that all distances are at most window size - 16. + For all other blocks, we only need to keep a margin of 5 bytes so that + we don't go over the block size with a copy. */ const size_t len_limit = std::min(block_size - kMinMatchLen, input_size - kInputMarginBytes); const uint8_t* ip_limit = input + len_limit; for (uint32_t next_hash = Hash(++ip, shift); ; ) { assert(next_emit < ip); - // Step 1: Scan forward in the input looking for a 6-byte-long match. - // If we get close to exhausting the input then goto emit_remainder. - // - // Heuristic match skipping: If 32 bytes are scanned with no matches - // found, start looking only at every other byte. If 32 more bytes are - // scanned, look at every third byte, etc.. When a match is found, - // immediately go back to looking at every byte. This is a small loss - // (~5% performance, ~0.1% density) for compressible data due to more - // bookkeeping, but for non-compressible data (such as JPEG) it's a huge - // win since the compressor quickly "realizes" the data is incompressible - // and doesn't bother looking for matches everywhere. - // - // The "skip" variable keeps track of how many bytes there are since the - // last match; dividing it by 32 (ie. right-shifting by five) gives the - // number of bytes to move ahead for each iteration. + /* Step 1: Scan forward in the input looking for a 6-byte-long match. + If we get close to exhausting the input then goto emit_remainder. + + Heuristic match skipping: If 32 bytes are scanned with no matches + found, start looking only at every other byte. If 32 more bytes are + scanned, look at every third byte, etc.. When a match is found, + immediately go back to looking at every byte. This is a small loss + (~5% performance, ~0.1% density) for compressible data due to more + bookkeeping, but for non-compressible data (such as JPEG) it's a huge + win since the compressor quickly "realizes" the data is incompressible + and doesn't bother looking for matches everywhere. + + The "skip" variable keeps track of how many bytes there are since the + last match; dividing it by 32 (ie. right-shifting by five) gives the + number of bytes to move ahead for each iteration. */ uint32_t skip = 32; const uint8_t* next_ip = ip; @@ -295,15 +295,15 @@ static void CreateCommands(const uint8_t* input, size_t block_size, table[hash] = static_cast(ip - base_ip); } while (PREDICT_TRUE(!IsMatch(ip, candidate))); - // Step 2: Emit the found match together with the literal bytes from - // "next_emit", and then see if we can find a next macth immediately - // afterwards. Repeat until we find no match for the input - // without emitting some literal bytes. + /* Step 2: Emit the found match together with the literal bytes from + "next_emit", and then see if we can find a next macth immediately + afterwards. Repeat until we find no match for the input + without emitting some literal bytes. */ uint64_t input_bytes; { - // We have a 6-byte match at ip, and we need to emit bytes in - // [next_emit, ip). + /* We have a 6-byte match at ip, and we need to emit bytes in + [next_emit, ip). */ const uint8_t* base = ip; size_t matched = 6 + FindMatchLengthWithLimit( candidate + 6, ip + 6, static_cast(ip_end - ip) - 6); @@ -327,9 +327,9 @@ static void CreateCommands(const uint8_t* input, size_t block_size, if (PREDICT_FALSE(ip >= ip_limit)) { goto emit_remainder; } - // We could immediately start working at ip now, but to improve - // compression we first update "table" with the hashes of some positions - // within the last copy. + /* We could immediately start working at ip now, but to improve + compression we first update "table" with the hashes of some + positions within the last copy. */ input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 5); uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift); table[prev_hash] = static_cast(ip - base_ip - 5); @@ -349,8 +349,8 @@ static void CreateCommands(const uint8_t* input, size_t block_size, } while (IsMatch(ip, candidate)) { - // We have a 6-byte match at ip, and no need to emit any - // literal bytes prior to ip. + /* We have a 6-byte match at ip, and no need to emit any + literal bytes prior to ip. */ const uint8_t* base = ip; size_t matched = 6 + FindMatchLengthWithLimit( candidate + 6, ip + 6, static_cast(ip_end - ip) - 6); @@ -364,9 +364,9 @@ static void CreateCommands(const uint8_t* input, size_t block_size, if (PREDICT_FALSE(ip >= ip_limit)) { goto emit_remainder; } - // We could immediately start working at ip now, but to improve - // compression we first update "table" with the hashes of some positions - // within the last copy. + /* We could immediately start working at ip now, but to improve + compression we first update "table" with the hashes of some + positions within the last copy. */ input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 5); uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift); table[prev_hash] = static_cast(ip - base_ip - 5); @@ -391,7 +391,7 @@ static void CreateCommands(const uint8_t* input, size_t block_size, emit_remainder: assert(next_emit <= ip_end); - // Emit the remaining bytes as literals. + /* Emit the remaining bytes as literals. */ if (next_emit < ip_end) { const uint32_t insert = static_cast(ip_end - next_emit); EmitInsertLen(insert, commands); @@ -483,7 +483,8 @@ void BrotliCompressFragmentTwoPass(const uint8_t* input, size_t input_size, uint32_t* command_buf, uint8_t* literal_buf, int* table, size_t table_size, size_t* storage_ix, uint8_t* storage) { - // Save the start of the first block for position and distance computations. + /* Save the start of the first block for position and distance computations. + */ const uint8_t* base_ip = input; while (input_size > 0) { @@ -496,14 +497,14 @@ void BrotliCompressFragmentTwoPass(const uint8_t* input, size_t input_size, const size_t num_commands = static_cast(commands - command_buf); if (ShouldCompress(input, block_size, num_literals)) { StoreMetaBlockHeader(block_size, 0, storage_ix, storage); - // No block splits, no contexts. + /* No block splits, no contexts. */ WriteBits(13, 0, storage_ix, storage); StoreCommands(literal_buf, num_literals, command_buf, num_commands, storage_ix, storage); } else { - // Since we did not find many backward references and the entropy of - // the data is close to 8 bits, we can simply emit an uncompressed block. - // This makes compression speed of uncompressible data about 3x faster. + /* Since we did not find many backward references and the entropy of + the data is close to 8 bits, we can simply emit an uncompressed block. + This makes compression speed of uncompressible data about 3x faster. */ StoreMetaBlockHeader(block_size, 1, storage_ix, storage); *storage_ix = (*storage_ix + 7u) & ~7u; memcpy(&storage[*storage_ix >> 3], input, block_size); @@ -515,8 +516,8 @@ void BrotliCompressFragmentTwoPass(const uint8_t* input, size_t input_size, } if (is_last) { - WriteBits(1, 1, storage_ix, storage); // islast - WriteBits(1, 1, storage_ix, storage); // isempty + WriteBits(1, 1, storage_ix, storage); /* islast */ + WriteBits(1, 1, storage_ix, storage); /* isempty */ *storage_ix = (*storage_ix + 7u) & ~7u; } } diff --git a/enc/compress_fragment_two_pass.h b/enc/compress_fragment_two_pass.h index 2bbde77..8efe48a 100644 --- a/enc/compress_fragment_two_pass.h +++ b/enc/compress_fragment_two_pass.h @@ -4,11 +4,11 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Function for fast encoding of an input fragment, independently from the input -// history. This function uses two-pass processing: in the first pass we save -// the found backward matches and literal bytes into a buffer, and in the -// second pass we emit them into the bit stream using prefix codes built based -// on the actual command and literal byte histograms. +/* Function for fast encoding of an input fragment, independently from the input + history. This function uses two-pass processing: in the first pass we save + the found backward matches and literal bytes into a buffer, and in the + second pass we emit them into the bit stream using prefix codes built based + on the actual command and literal byte histograms. */ #ifndef BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_ #define BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_ @@ -19,16 +19,16 @@ namespace brotli { static const size_t kCompressFragmentTwoPassBlockSize = 1 << 17; -// Compresses "input" string to the "*storage" buffer as one or more complete -// meta-blocks, and updates the "*storage_ix" bit position. -// -// If "is_last" is true, emits an additional empty last meta-block. -// -// REQUIRES: "input_size" is greater than zero, or "is_last" is true. -// REQUIRES: "command_buf" and "literal_buf" point to at least -// kCompressFragmentTwoPassBlockSize long arrays. -// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero. -// REQUIRES: "table_size" is a power of two +/* Compresses "input" string to the "*storage" buffer as one or more complete + meta-blocks, and updates the "*storage_ix" bit position. + + If "is_last" is 1, emits an additional empty last meta-block. + + REQUIRES: "input_size" is greater than zero, or "is_last" is 1. + REQUIRES: "command_buf" and "literal_buf" point to at least + kCompressFragmentTwoPassBlockSize long arrays. + REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero. + REQUIRES: "table_size" is a power of two */ void BrotliCompressFragmentTwoPass(const uint8_t* input, size_t input_size, bool is_last, uint32_t* command_buf, uint8_t* literal_buf, @@ -37,4 +37,4 @@ void BrotliCompressFragmentTwoPass(const uint8_t* input, size_t input_size, } // namespace brotli -#endif // BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_ +#endif /* BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_ */ diff --git a/enc/context.h b/enc/context.h index 0e572bc..1edc9e1 100644 --- a/enc/context.h +++ b/enc/context.h @@ -4,7 +4,7 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Functions to map previous bytes into a context id. +/* Functions to map previous bytes into a context id. */ #ifndef BROTLI_ENC_CONTEXT_H_ #define BROTLI_ENC_CONTEXT_H_ @@ -13,82 +13,83 @@ namespace brotli { -// Second-order context lookup table for UTF8 byte streams. -// -// If p1 and p2 are the previous two bytes, we calculate the context as -// -// context = kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256]. -// -// If the previous two bytes are ASCII characters (i.e. < 128), this will be -// equivalent to -// -// context = 4 * context1(p1) + context2(p2), -// -// where context1 is based on the previous byte in the following way: -// -// 0 : non-ASCII control -// 1 : \t, \n, \r -// 2 : space -// 3 : other punctuation -// 4 : " ' -// 5 : % -// 6 : ( < [ { -// 7 : ) > ] } -// 8 : , ; : -// 9 : . -// 10 : = -// 11 : number -// 12 : upper-case vowel -// 13 : upper-case consonant -// 14 : lower-case vowel -// 15 : lower-case consonant -// -// and context2 is based on the second last byte: -// -// 0 : control, space -// 1 : punctuation -// 2 : upper-case letter, number -// 3 : lower-case letter -// -// If the last byte is ASCII, and the second last byte is not (in a valid UTF8 -// stream it will be a continuation byte, value between 128 and 191), the -// context is the same as if the second last byte was an ASCII control or space. -// -// If the last byte is a UTF8 lead byte (value >= 192), then the next byte will -// be a continuation byte and the context id is 2 or 3 depending on the LSB of -// the last byte and to a lesser extent on the second last byte if it is ASCII. -// -// If the last byte is a UTF8 continuation byte, the second last byte can be: -// - continuation byte: the next byte is probably ASCII or lead byte (assuming -// 4-byte UTF8 characters are rare) and the context id is 0 or 1. -// - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1 -// - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3 -// -// The possible value combinations of the previous two bytes, the range of -// context ids and the type of the next byte is summarized in the table below: -// -// |--------\-----------------------------------------------------------------| -// | \ Last byte | -// | Second \---------------------------------------------------------------| -// | last byte \ ASCII | cont. byte | lead byte | -// | \ (0-127) | (128-191) | (192-) | -// |=============|===================|=====================|==================| -// | ASCII | next: ASCII/lead | not valid | next: cont. | -// | (0-127) | context: 4 - 63 | | context: 2 - 3 | -// |-------------|-------------------|---------------------|------------------| -// | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. | -// | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 | -// |-------------|-------------------|---------------------|------------------| -// | lead byte | not valid | next: ASCII/lead | not valid | -// | (192-207) | | context: 0 - 1 | | -// |-------------|-------------------|---------------------|------------------| -// | lead byte | not valid | next: cont. | not valid | -// | (208-) | | context: 2 - 3 | | -// |-------------|-------------------|---------------------|------------------| +/* Second-order context lookup table for UTF8 byte streams. + + If p1 and p2 are the previous two bytes, we calculate the context as + + context = kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256]. + + If the previous two bytes are ASCII characters (i.e. < 128), this will be + equivalent to + + context = 4 * context1(p1) + context2(p2), + + where context1 is based on the previous byte in the following way: + + 0 : non-ASCII control + 1 : \t, \n, \r + 2 : space + 3 : other punctuation + 4 : " ' + 5 : % + 6 : ( < [ { + 7 : ) > ] } + 8 : , ; : + 9 : . + 10 : = + 11 : number + 12 : upper-case vowel + 13 : upper-case consonant + 14 : lower-case vowel + 15 : lower-case consonant + + and context2 is based on the second last byte: + + 0 : control, space + 1 : punctuation + 2 : upper-case letter, number + 3 : lower-case letter + + If the last byte is ASCII, and the second last byte is not (in a valid UTF8 + stream it will be a continuation byte, value between 128 and 191), the + context is the same as if the second last byte was an ASCII control or space. + + If the last byte is a UTF8 lead byte (value >= 192), then the next byte will + be a continuation byte and the context id is 2 or 3 depending on the LSB of + the last byte and to a lesser extent on the second last byte if it is ASCII. + + If the last byte is a UTF8 continuation byte, the second last byte can be: + - continuation byte: the next byte is probably ASCII or lead byte (assuming + 4-byte UTF8 characters are rare) and the context id is 0 or 1. + - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1 + - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3 + + The possible value combinations of the previous two bytes, the range of + context ids and the type of the next byte is summarized in the table below: + + |--------\-----------------------------------------------------------------| + | \ Last byte | + | Second \---------------------------------------------------------------| + | last byte \ ASCII | cont. byte | lead byte | + | \ (0-127) | (128-191) | (192-) | + |=============|===================|=====================|==================| + | ASCII | next: ASCII/lead | not valid | next: cont. | + | (0-127) | context: 4 - 63 | | context: 2 - 3 | + |-------------|-------------------|---------------------|------------------| + | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. | + | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 | + |-------------|-------------------|---------------------|------------------| + | lead byte | not valid | next: ASCII/lead | not valid | + | (192-207) | | context: 0 - 1 | | + |-------------|-------------------|---------------------|------------------| + | lead byte | not valid | next: cont. | not valid | + | (208-) | | context: 2 - 3 | | + |-------------|-------------------|---------------------|------------------| +*/ static const uint8_t kUTF8ContextLookup[512] = { - // Last byte. - // - // ASCII range. + /* Last byte. */ + /* */ + /* ASCII range. */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12, @@ -97,19 +98,19 @@ static const uint8_t kUTF8ContextLookup[512] = { 52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12, 12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0, - // UTF8 continuation byte range. + /* UTF8 continuation byte range. */ 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, - // UTF8 lead byte range. + /* UTF8 lead byte range. */ 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, - // Second last byte. - // - // ASCII range. + /* Second last byte. */ + /* */ + /* ASCII range. */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -118,19 +119,19 @@ static const uint8_t kUTF8ContextLookup[512] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0, - // UTF8 continuation byte range. + /* UTF8 continuation byte range. */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - // UTF8 lead byte range. + /* UTF8 lead byte range. */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, }; -// Context lookup table for small signed integers. +/* Context lookup table for small signed integers. */ static const uint8_t kSigned3BitContextLookup[] = { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -175,4 +176,4 @@ static inline uint8_t Context(uint8_t p1, uint8_t p2, ContextType mode) { } // namespace brotli -#endif // BROTLI_ENC_CONTEXT_H_ +#endif /* BROTLI_ENC_CONTEXT_H_ */ diff --git a/enc/dictionary_hash.h b/enc/dictionary_hash.h index b9c50f5..a6b3dd3 100644 --- a/enc/dictionary_hash.h +++ b/enc/dictionary_hash.h @@ -4,7 +4,7 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Hash table on the 4-byte prefixes of static dictionary words. +/* Hash table on the 4-byte prefixes of static dictionary words. */ #ifndef BROTLI_ENC_DICTIONARY_HASH_H_ #define BROTLI_ENC_DICTIONARY_HASH_H_ @@ -4114,4 +4114,4 @@ static const uint16_t kStaticDictionaryHash[] = { } // namespace brotli -#endif // BROTLI_ENC_DICTIONARY_HASH_H_ +#endif /* BROTLI_ENC_DICTIONARY_HASH_H_ */ diff --git a/enc/encode.cc b/enc/encode.cc index 496d129..86165fd 100644 --- a/enc/encode.cc +++ b/enc/encode.cc @@ -4,7 +4,7 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Implementation of Brotli compressor. +/* Implementation of Brotli compressor. */ #include "./encode.h" @@ -36,8 +36,8 @@ namespace brotli { static const int kMinQualityForBlockSplit = 4; static const int kMinQualityForContextModeling = 5; static const int kMinQualityForOptimizeHistograms = 4; -// For quality 2 there is no block splitting, so we buffer at most this much -// literals and commands. +/* For quality 2 there is no block splitting, so we buffer at most this much + literals and commands. */ static const size_t kMaxNumDelayedSymbols = 0x2fff; #define COPY_ARRAY(dst, src) memcpy(dst, src, sizeof(src)); @@ -95,10 +95,10 @@ static size_t HashTableSize(size_t max_table_size, size_t input_size) { int* BrotliCompressor::GetHashTable(int quality, size_t input_size, size_t* table_size) { - // Use smaller hash table when input.size() is smaller, since we - // fill the table, incurring O(hash table size) overhead for - // compression, and if the input is short, we won't need that - // many hash table entries anyway. + /* Use smaller hash table when input.size() is smaller, since we + fill the table, incurring O(hash table size) overhead for + compression, and if the input is short, we won't need that + many hash table entries anyway. */ const size_t max_table_size = MaxHashTableSize(quality); assert(max_table_size >= 256); size_t htsize = HashTableSize(max_table_size, input_size); @@ -135,7 +135,7 @@ static void EncodeWindowBits(int lgwin, uint8_t* last_byte, } } -// Initializes the command and distance prefix codes for the first block. +/* Initializes the command and distance prefix codes for the first block. */ static void InitCommandPrefixCodes(uint8_t cmd_depths[128], uint16_t cmd_bits[128], uint8_t cmd_code[512], @@ -167,8 +167,8 @@ static void InitCommandPrefixCodes(uint8_t cmd_depths[128], COPY_ARRAY(cmd_depths, kDefaultCommandDepths); COPY_ARRAY(cmd_bits, kDefaultCommandBits); - // Initialize the pre-compressed form of the command and distance prefix - // codes. + /* Initialize the pre-compressed form of the command and distance prefix + codes. */ static const uint8_t kDefaultCommandCode[] = { 0xff, 0x77, 0xd5, 0xbf, 0xe7, 0xde, 0xea, 0x9e, 0x51, 0x5d, 0xde, 0xc6, 0x70, 0x57, 0xbc, 0x58, 0x58, 0x58, 0xd8, 0xd8, 0x58, 0xd5, 0xcb, 0x8c, @@ -181,13 +181,13 @@ static void InitCommandPrefixCodes(uint8_t cmd_depths[128], *cmd_code_numbits = kDefaultCommandCodeNumBits; } -// Decide about the context map based on the ability of the prediction -// ability of the previous byte UTF8-prefix on the next byte. The -// prediction ability is calculated as shannon entropy. Here we need -// shannon entropy instead of 'BitsEntropy' since the prefix will be -// encoded with the remaining 6 bits of the following byte, and -// BitsEntropy will assume that symbol to be stored alone using Huffman -// coding. +/* Decide about the context map based on the ability of the prediction + ability of the previous byte UTF8-prefix on the next byte. The + prediction ability is calculated as shannon entropy. Here we need + shannon entropy instead of 'BitsEntropy' since the prefix will be + encoded with the remaining 6 bits of the following byte, and + BitsEntropy will assume that symbol to be stored alone using Huffman + coding. */ static void ChooseContextMap(int quality, uint32_t* bigram_histo, size_t* num_literal_contexts, @@ -232,11 +232,11 @@ static void ChooseContextMap(int quality, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; if (quality < 7) { - // 3 context models is a bit slower, don't use it at lower qualities. + /* 3 context models is a bit slower, don't use it at lower qualities. */ entropy3 = entropy1 * 10; } - // If expected savings by symbol are less than 0.2 bits, skip the - // context modeling -- in exchange for faster decoding speed. + /* If expected savings by symbol are less than 0.2 bits, skip the + context modeling -- in exchange for faster decoding speed. */ if (entropy1 - entropy2 < 0.2 && entropy1 - entropy3 < 0.2) { *num_literal_contexts = 1; @@ -261,9 +261,9 @@ static void DecideOverLiteralContextModeling( if (quality < kMinQualityForContextModeling || length < 64) { return; } - // Gather bigram data of the UTF8 byte prefixes. To make the analysis of - // UTF8 data faster we only examine 64 byte long strides at every 4kB - // intervals. + /* Gather bigram data of the UTF8 byte prefixes. To make the analysis of + UTF8 data faster we only examine 64 byte long strides at every 4kB + intervals. */ const size_t end_pos = start_pos + length; uint32_t bigram_prefix_histo[9] = { 0 }; for (; start_pos + 64 <= end_pos; start_pos += 4096) { @@ -325,7 +325,7 @@ static void WriteMetaBlockInternal(const uint8_t* data, size_t* storage_ix, uint8_t* storage) { if (bytes == 0) { - // Write the ISLAST and ISEMPTY bits. + /* Write the ISLAST and ISEMPTY bits. */ WriteBits(2, 3, storage_ix, storage); *storage_ix = (*storage_ix + 7u) & ~7u; return; @@ -333,8 +333,8 @@ static void WriteMetaBlockInternal(const uint8_t* data, if (!ShouldCompress(data, mask, last_flush_pos, bytes, num_literals, num_commands)) { - // Restore the distance cache, as its last update by - // CreateBackwardReferences is now unused. + /* Restore the distance cache, as its last update by + CreateBackwardReferences is now unused. */ memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0])); StoreUncompressedMetaBlock(is_last, data, WrapPosition(last_flush_pos), mask, bytes, @@ -416,7 +416,7 @@ static void WriteMetaBlockInternal(const uint8_t* data, storage_ix, storage); } if (bytes + 4 < (*storage_ix >> 3)) { - // Restore the distance cache and last byte. + /* Restore the distance cache and last byte. */ memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0])); storage[0] = last_byte; *storage_ix = last_byte_bits; @@ -444,7 +444,7 @@ BrotliCompressor::BrotliCompressor(BrotliParams params) command_buf_(NULL), literal_buf_(NULL), is_last_block_emitted_(0) { - // Sanitize params. + /* Sanitize params. */ params_.quality = std::max(0, params_.quality); if (params_.lgwin < kMinWindowBits) { params_.lgwin = kMinWindowBits; @@ -465,18 +465,18 @@ BrotliCompressor::BrotliCompressor(BrotliParams params) std::max(kMinInputBlockBits, params_.lgblock)); } - // Initialize input and literal cost ring buffers. - // We allocate at least lgwin + 1 bits for the ring buffer so that the newly - // added block fits there completely and we still get lgwin bits and at least - // read_block_size_bits + 1 bits because the copy tail length needs to be - // smaller than ringbuffer size. + /* Initialize input and literal cost ring buffers. + We allocate at least lgwin + 1 bits for the ring buffer so that the newly + added block fits there completely and we still get lgwin bits and at least + read_block_size_bits + 1 bits because the copy tail length needs to be + smaller than ringbuffer size. */ int ringbuffer_bits = std::max(params_.lgwin + 1, params_.lgblock + 1); ringbuffer_ = new RingBuffer(ringbuffer_bits, params_.lgblock); commands_ = 0; cmd_alloc_size_ = 0; - // Initialize last byte with stream header. + /* Initialize last byte with stream header. */ EncodeWindowBits(params_.lgwin, &last_byte_, &last_byte_bits_); // Initialize distance cache. @@ -496,7 +496,7 @@ BrotliCompressor::BrotliCompressor(BrotliParams params) literal_buf_ = new uint8_t[kCompressFragmentTwoPassBlockSize]; } - // Initialize hashers. + /* Initialize hashers. */ hash_type_ = std::min(10, params_.quality); hashers_->Init(hash_type_); } @@ -516,48 +516,49 @@ void BrotliCompressor::CopyInputToRingBuffer(const size_t input_size, ringbuffer_->Write(input_buffer, input_size); input_pos_ += input_size; - // TL;DR: If needed, initialize 7 more bytes in the ring buffer to make the - // hashing not depend on uninitialized data. This makes compression - // deterministic and it prevents uninitialized memory warnings in Valgrind. - // Even without erasing, the output would be valid (but nondeterministic). - // - // Background information: The compressor stores short (at most 8 bytes) - // substrings of the input already read in a hash table, and detects - // repetitions by looking up such substrings in the hash table. If it - // can find a substring, it checks whether the substring is really there - // in the ring buffer (or it's just a hash collision). Should the hash - // table become corrupt, this check makes sure that the output is - // still valid, albeit the compression ratio would be bad. - // - // The compressor populates the hash table from the ring buffer as it's - // reading new bytes from the input. However, at the last few indexes of - // the ring buffer, there are not enough bytes to build full-length - // substrings from. Since the hash table always contains full-length - // substrings, we erase with dummy 0s here to make sure that those - // substrings will contain 0s at the end instead of uninitialized - // data. - // - // Please note that erasing is not necessary (because the - // memory region is already initialized since he ring buffer - // has a `tail' that holds a copy of the beginning,) so we - // skip erasing if we have already gone around at least once in - // the ring buffer. + /* TL;DR: If needed, initialize 7 more bytes in the ring buffer to make the + hashing not depend on uninitialized data. This makes compression + deterministic and it prevents uninitialized memory warnings in Valgrind. + Even without erasing, the output would be valid (but nondeterministic). + + Background information: The compressor stores short (at most 8 bytes) + substrings of the input already read in a hash table, and detects + repetitions by looking up such substrings in the hash table. If it + can find a substring, it checks whether the substring is really there + in the ring buffer (or it's just a hash collision). Should the hash + table become corrupt, this check makes sure that the output is + still valid, albeit the compression ratio would be bad. + + The compressor populates the hash table from the ring buffer as it's + reading new bytes from the input. However, at the last few indexes of + the ring buffer, there are not enough bytes to build full-length + substrings from. Since the hash table always contains full-length + substrings, we erase with dummy 0s here to make sure that those + substrings will contain 0s at the end instead of uninitialized + data. + + Please note that erasing is not necessary (because the + memory region is already initialized since he ring buffer + has a `tail' that holds a copy of the beginning,) so we + skip erasing if we have already gone around at least once in + the ring buffer. + + Only clear during the first round of ringbuffer writes. On + subsequent rounds data in the ringbuffer would be affected. */ size_t pos = ringbuffer_->position(); - // Only clear during the first round of ringbuffer writes. On - // subsequent rounds data in the ringbuffer would be affected. if (pos <= ringbuffer_->mask()) { - // This is the first time when the ring buffer is being written. - // We clear 7 bytes just after the bytes that have been copied from - // the input buffer. - // - // The ringbuffer has a "tail" that holds a copy of the beginning, - // but only once the ring buffer has been fully written once, i.e., - // pos <= mask. For the first time, we need to write values - // in this tail (where index may be larger than mask), so that - // we have exactly defined behavior and don't read un-initialized - // memory. Due to performance reasons, hashing reads data using a - // LOAD64, which can go 7 bytes beyond the bytes written in the - // ringbuffer. + /* This is the first time when the ring buffer is being written. + We clear 7 bytes just after the bytes that have been copied from + the input buffer. + + The ringbuffer has a "tail" that holds a copy of the beginning, + but only once the ring buffer has been fully written once, i.e., + pos <= mask. For the first time, we need to write values + in this tail (where index may be larger than mask), so that + we have exactly defined behavior and don't read un-initialized + memory. Due to performance reasons, hashing reads data using a + LOAD64, which can go 7 bytes beyond the bytes written in the + ringbuffer. */ memset(ringbuffer_->start() + pos, 0, 7); } } @@ -595,8 +596,8 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last, if (params_.quality <= 1) { if (delta == 0 && !is_last) { - // We have no new input data and we don't have to finish the stream, so - // nothing to do. + /* We have no new input data and we don't have to finish the stream, so + nothing to do. */ *out_size = 0; return true; } @@ -630,11 +631,11 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last, return true; } - // Theoretical max number of commands is 1 per 2 bytes. + /* Theoretical max number of commands is 1 per 2 bytes. */ size_t newsize = num_commands_ + bytes / 2 + 1; if (newsize > cmd_alloc_size_) { - // Reserve a bit more memory to allow merging with a next block - // without realloc: that would impact speed. + /* Reserve a bit more memory to allow merging with a next block + without realloc: that would impact speed. */ newsize += (bytes / 4) + 16; cmd_alloc_size_ = newsize; commands_ = @@ -662,13 +663,13 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last, num_literals_ < max_literals && num_commands_ < max_commands && input_pos_ + input_block_size() <= last_flush_pos_ + max_length) { - // Merge with next input block. Everything will happen later. + /* Merge with next input block. Everything will happen later. */ last_processed_pos_ = input_pos_; *out_size = 0; return true; } - // Create the last insert-only command. + /* Create the last insert-only command. */ if (last_insert_len_ > 0) { brotli::Command cmd(last_insert_len_); commands_[num_commands_++] = cmd; @@ -677,8 +678,8 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last, } if (!is_last && input_pos_ == last_flush_pos_) { - // We have no new input data and we don't have to finish the stream, so - // nothing to do. + /* We have no new input data and we don't have to finish the stream, so + nothing to do. */ *out_size = 0; return true; } @@ -708,8 +709,8 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last, } num_commands_ = 0; num_literals_ = 0; - // Save the state of the distance cache in case we need to restore it for - // emitting an uncompressed block. + /* Save the state of the distance cache in case we need to restore it for + emitting an uncompressed block. */ memcpy(saved_dist_cache_, dist_cache_, sizeof(dist_cache_)); *output = &storage[0]; *out_size = storage_ix >> 3; @@ -829,14 +830,14 @@ static int BrotliCompressBufferQuality10(int lgwin, ZopfliComputeShortestPath(block_size, block_start, input_buffer, mask, max_backward_limit, dist_cache, hasher, nodes, &path); - // We allocate a command buffer in the first iteration of this loop that - // will be likely big enough for the whole metablock, so that for most - // inputs we will not have to reallocate in later iterations. We do the - // allocation here and not before the loop, because if the input is small, - // this will be allocated after the zopfli cost model is freed, so this - // will not increase peak memory usage. - // TODO: If the first allocation is too small, increase command - // buffer size exponentially. + /* We allocate a command buffer in the first iteration of this loop that + will be likely big enough for the whole metablock, so that for most + inputs we will not have to reallocate in later iterations. We do the + allocation here and not before the loop, because if the input is small, + this will be allocated after the zopfli cost model is freed, so this + will not increase peak memory usage. + TODO: If the first allocation is too small, increase command + buffer size exponentially. */ size_t new_cmd_alloc_size = std::max(expected_num_commands, num_commands + path.size() + 1); if (cmd_alloc_size != new_cmd_alloc_size) { @@ -868,15 +869,15 @@ static int BrotliCompressBufferQuality10(int lgwin, size_t storage_ix = last_byte_bits; if (metablock_size == 0) { - // Write the ISLAST and ISEMPTY bits. + /* Write the ISLAST and ISEMPTY bits. */ storage = new uint8_t[16]; storage[0] = last_byte; WriteBits(2, 3, &storage_ix, storage); storage_ix = (storage_ix + 7u) & ~7u; } else if (!ShouldCompress(input_buffer, mask, metablock_start, metablock_size, num_literals, num_commands)) { - // Restore the distance cache, as its last update by - // CreateBackwardReferences is now unused. + /* Restore the distance cache, as its last update by + CreateBackwardReferences is now unused. */ memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0])); storage = new uint8_t[metablock_size + 16]; storage[0] = last_byte; @@ -914,7 +915,7 @@ static int BrotliCompressBufferQuality10(int lgwin, mb, &storage_ix, storage); if (metablock_size + 4 < (storage_ix >> 3)) { - // Restore the distance cache and last byte. + /* Restore the distance cache and last byte. */ memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0])); storage[0] = last_byte; storage_ix = last_byte_bits; @@ -928,8 +929,8 @@ static int BrotliCompressBufferQuality10(int lgwin, metablock_start += metablock_size; prev_byte = input_buffer[metablock_start - 1]; prev_byte2 = input_buffer[metablock_start - 2]; - // Save the state of the distance cache in case we need to restore it for - // emitting an uncompressed block. + /* Save the state of the distance cache in case we need to restore it for + emitting an uncompressed block. */ memcpy(saved_dist_cache, dist_cache, 4 * sizeof(dist_cache[0])); const size_t out_size = storage_ix >> 3; @@ -955,17 +956,17 @@ int BrotliCompressBuffer(BrotliParams params, size_t* encoded_size, uint8_t* encoded_buffer) { if (*encoded_size == 0) { - // Output buffer needs at least one byte. + /* Output buffer needs at least one byte. */ return 0; } if (input_size == 0) { - // Handle the special case of empty input. + /* Handle the special case of empty input. */ *encoded_size = 1; *encoded_buffer = 6; return 1; } if (params.quality == 10) { - // TODO: Implement this direct path for all quality levels. + /* TODO: Implement this direct path for all quality levels. */ const int lgwin = std::min(24, std::max(16, params.lgwin)); return BrotliCompressBufferQuality10(lgwin, input_size, input_buffer, encoded_size, encoded_buffer); diff --git a/enc/encode.h b/enc/encode.h index 917b174..e319f3f 100644 --- a/enc/encode.h +++ b/enc/encode.h @@ -4,7 +4,7 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// API for Brotli compression +/* API for Brotli compression. */ #ifndef BROTLI_ENC_ENCODE_H_ #define BROTLI_ENC_ENCODE_H_ @@ -38,23 +38,23 @@ struct BrotliParams { enable_context_modeling(true) {} enum Mode { - // Default compression mode. The compressor does not know anything in - // advance about the properties of the input. + /* Default compression mode. The compressor does not know anything in + advance about the properties of the input. */ MODE_GENERIC = 0, - // Compression mode for UTF-8 format text input. + /* Compression mode for UTF-8 format text input. */ MODE_TEXT = 1, - // Compression mode used in WOFF 2.0. + /* Compression mode used in WOFF 2.0. */ MODE_FONT = 2 }; Mode mode; - // Controls the compression-speed vs compression-density tradeoffs. The higher - // the quality, the slower the compression. Range is 0 to 11. + /* Controls the compression-speed vs compression-density tradeoffs. The higher + the |quality|, the slower the compression. Range is 0 to 11. */ int quality; - // Base 2 logarithm of the sliding window size. Range is 10 to 24. + /* Base 2 logarithm of the sliding window size. Range is 10 to 24. */ int lgwin; - // Base 2 logarithm of the maximum input block size. Range is 16 to 24. - // If set to 0, the value will be set based on the quality. + /* Base 2 logarithm of the maximum input block size. Range is 16 to 24. + If set to 0, the value will be set based on the quality. */ int lgblock; // These settings are deprecated and will be ignored. @@ -207,4 +207,4 @@ int BrotliCompressWithCustomDictionary(size_t dictsize, const uint8_t* dict, } // namespace brotli -#endif // BROTLI_ENC_ENCODE_H_ +#endif /* BROTLI_ENC_ENCODE_H_ */ diff --git a/enc/encode_parallel.cc b/enc/encode_parallel.cc index b0ddb87..6e4e8d5 100644 --- a/enc/encode_parallel.cc +++ b/enc/encode_parallel.cc @@ -4,7 +4,7 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Implementation of parallel Brotli compressor. +/* Implementation of parallel Brotli compressor. */ #include "./encode_parallel.h" @@ -63,33 +63,33 @@ bool WriteMetaBlockParallel(const BrotliParams& params, return false; } - // Copy prefix + next input block into a continuous area. + /* Copy prefix + next input block into a continuous area. */ uint32_t input_pos = prefix_size; - // CreateBackwardReferences reads up to 3 bytes past the end of input if the - // mask points past the end of input. - // FindMatchLengthWithLimit could do another 8 bytes look-forward. + /* CreateBackwardReferences reads up to 3 bytes past the end of input if the + mask points past the end of input. + FindMatchLengthWithLimit could do another 8 bytes look-forward. */ std::vector input(prefix_size + input_size + 4 + 8); memcpy(&input[0], prefix_buffer, prefix_size); memcpy(&input[input_pos], input_buffer, input_size); - // Since we don't have a ringbuffer, masking is a no-op. - // We use one less bit than the full range because some of the code uses - // mask + 1 as the size of the ringbuffer. + /* Since we don't have a ringbuffer, masking is a no-op. + We use one less bit than the full range because some of the code uses + mask + 1 as the size of the ringbuffer. */ const uint32_t mask = std::numeric_limits::max() >> 1; uint8_t prev_byte = input_pos > 0 ? input[(input_pos - 1) & mask] : 0; uint8_t prev_byte2 = input_pos > 1 ? input[(input_pos - 2) & mask] : 0; - // Decide about UTF8 mode. + /* Decide about UTF8 mode. */ static const double kMinUTF8Ratio = 0.75; bool utf8_mode = IsMostlyUTF8(&input[0], input_pos, mask, input_size, kMinUTF8Ratio); - // Initialize hashers. + /* Initialize hashers. */ int hash_type = std::min(10, params.quality); Hashers* hashers = new Hashers(); hashers->Init(hash_type); - // Compute backward references. + /* Compute backward references. */ size_t last_insert_len = 0; size_t num_commands = 0; size_t num_literals = 0; @@ -119,7 +119,7 @@ bool WriteMetaBlockParallel(const BrotliParams& params, } assert(num_commands != 0); - // Build the meta-block. + /* Build the meta-block. */ MetaBlockSplit mb; uint32_t num_direct_distance_codes = params.mode == BrotliParams::MODE_FONT ? 12 : 0; @@ -141,7 +141,7 @@ bool WriteMetaBlockParallel(const BrotliParams& params, &mb); } - // Set up the temporary output storage. + /* Set up the temporary output storage. */ const size_t max_out_size = 2 * input_size + 500; std::vector storage(max_out_size); uint8_t first_byte = 0; @@ -161,7 +161,7 @@ bool WriteMetaBlockParallel(const BrotliParams& params, storage[0] = static_cast(first_byte); size_t storage_ix = first_byte_bits; - // Store the meta-block to the temporary output. + /* Store the meta-block to the temporary output. */ StoreMetaBlock(&input[0], input_pos, input_size, mask, prev_byte, prev_byte2, is_last, @@ -173,14 +173,14 @@ bool WriteMetaBlockParallel(const BrotliParams& params, &storage_ix, &storage[0]); free(commands); - // If this is not the last meta-block, store an empty metadata - // meta-block so that the meta-block will end at a byte boundary. + /* If this is not the last meta-block, store an empty metadata + meta-block so that the meta-block will end at a byte boundary. */ if (!is_last) { StoreSyncMetaBlock(&storage_ix, &storage[0]); } - // If the compressed data is too large, fall back to an uncompressed - // meta-block. + /* If the compressed data is too large, fall back to an uncompressed + meta-block. */ size_t output_size = storage_ix >> 3; if (input_size + 4 < output_size) { storage[0] = static_cast(first_byte); @@ -191,7 +191,7 @@ bool WriteMetaBlockParallel(const BrotliParams& params, output_size = storage_ix >> 3; } - // Copy the temporary output with size-check to the output. + /* Copy the temporary output with size-check to the output. */ if (output_size > *encoded_size) { return false; } @@ -200,7 +200,7 @@ bool WriteMetaBlockParallel(const BrotliParams& params, return true; } -} // namespace +} /* namespace */ int BrotliCompressBufferParallel(BrotliParams params, size_t input_size, @@ -208,15 +208,15 @@ int BrotliCompressBufferParallel(BrotliParams params, size_t* encoded_size, uint8_t* encoded_buffer) { if (*encoded_size == 0) { - // Output buffer needs at least one byte. + /* Output buffer needs at least one byte. */ return 0; - } else if (input_size == 0) { + } else if (input_size == 0) { encoded_buffer[0] = 6; *encoded_size = 1; return 1; } - // Sanitize params. + /* Sanitize params. */ if (params.lgwin < kMinWindowBits) { params.lgwin = kMinWindowBits; } else if (params.lgwin > kMaxWindowBits) { @@ -237,7 +237,7 @@ int BrotliCompressBufferParallel(BrotliParams params, std::vector > compressed_pieces; - // Compress block-by-block independently. + /* Compress block-by-block independently. */ for (size_t pos = 0; pos < input_size; ) { uint32_t input_block_size = static_cast(std::min(max_input_block_size, input_size - pos)); @@ -261,7 +261,7 @@ int BrotliCompressBufferParallel(BrotliParams params, pos += input_block_size; } - // Piece together the output. + /* Piece together the output. */ size_t out_pos = 0; for (size_t i = 0; i < compressed_pieces.size(); ++i) { const std::vector& out = compressed_pieces[i]; @@ -276,4 +276,4 @@ int BrotliCompressBufferParallel(BrotliParams params, return true; } -} // namespace brotli +} /* namespace brotli */ diff --git a/enc/encode_parallel.h b/enc/encode_parallel.h index f2caa2e..b85d961 100644 --- a/enc/encode_parallel.h +++ b/enc/encode_parallel.h @@ -4,9 +4,9 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// API for parallel Brotli compression -// Note that this is only a proof of concept currently and not part of the -// final API yet. +/* API for parallel Brotli compression + Note that this is only a proof of concept currently and not part of the + final API yet. */ #ifndef BROTLI_ENC_ENCODE_PARALLEL_H_ #define BROTLI_ENC_ENCODE_PARALLEL_H_ @@ -22,6 +22,6 @@ int BrotliCompressBufferParallel(BrotliParams params, size_t* encoded_size, uint8_t* encoded_buffer); -} // namespace brotli +} /* namespace brotli */ -#endif // BROTLI_ENC_ENCODE_PARALLEL_H_ +#endif /* BROTLI_ENC_ENCODE_PARALLEL_H_ */ diff --git a/enc/entropy_encode.cc b/enc/entropy_encode.cc index b07e111..ef7361c 100644 --- a/enc/entropy_encode.cc +++ b/enc/entropy_encode.cc @@ -4,7 +4,7 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Entropy encoding (Huffman) utilities. +/* Entropy encoding (Huffman) utilities. */ #include "./entropy_encode.h" @@ -31,7 +31,7 @@ void SetDepth(const HuffmanTree &p, } } -// Sort the root nodes, least popular first. +/* Sort the root nodes, least popular first. */ static inline bool SortHuffmanTree(const HuffmanTree& v0, const HuffmanTree& v1) { if (v0.total_count_ != v1.total_count_) { @@ -40,30 +40,30 @@ static inline bool SortHuffmanTree(const HuffmanTree& v0, return v0.index_right_or_value_ > v1.index_right_or_value_; } -// This function will create a Huffman tree. -// -// The catch here is that the tree cannot be arbitrarily deep. -// Brotli specifies a maximum depth of 15 bits for "code trees" -// and 7 bits for "code length code trees." -// -// count_limit is the value that is to be faked as the minimum value -// and this minimum value is raised until the tree matches the -// maximum length requirement. -// -// This algorithm is not of excellent performance for very long data blocks, -// especially when population counts are longer than 2**tree_limit, but -// we are not planning to use this with extremely long blocks. -// -// See http://en.wikipedia.org/wiki/Huffman_coding +/* This function will create a Huffman tree. + + The catch here is that the tree cannot be arbitrarily deep. + Brotli specifies a maximum depth of 15 bits for "code trees" + and 7 bits for "code length code trees." + + count_limit is the value that is to be faked as the minimum value + and this minimum value is raised until the tree matches the + maximum length requirement. + + This algorithm is not of excellent performance for very long data blocks, + especially when population counts are longer than 2**tree_limit, but + we are not planning to use this with extremely long blocks. + + See http://en.wikipedia.org/wiki/Huffman_coding */ void CreateHuffmanTree(const uint32_t *data, const size_t length, const int tree_limit, HuffmanTree* tree, uint8_t *depth) { - // For block sizes below 64 kB, we never need to do a second iteration - // of this loop. Probably all of our block sizes will be smaller than - // that, so this loop is mostly of academic interest. If we actually - // would need this, we would be better off with the Katajainen algorithm. + /* For block sizes below 64 kB, we never need to do a second iteration + of this loop. Probably all of our block sizes will be smaller than + that, so this loop is mostly of academic interest. If we actually + would need this, we would be better off with the Katajainen algorithm. */ for (uint32_t count_limit = 1; ; count_limit *= 2) { size_t n = 0; for (size_t i = length; i != 0;) { @@ -81,19 +81,19 @@ void CreateHuffmanTree(const uint32_t *data, std::sort(tree, tree + n, SortHuffmanTree); - // The nodes are: - // [0, n): the sorted leaf nodes that we start with. - // [n]: we add a sentinel here. - // [n + 1, 2n): new parent nodes are added here, starting from - // (n+1). These are naturally in ascending order. - // [2n]: we add a sentinel at the end as well. - // There will be (2n+1) elements at the end. + /* The nodes are: + [0, n): the sorted leaf nodes that we start with. + [n]: we add a sentinel here. + [n + 1, 2n): new parent nodes are added here, starting from + (n+1). These are naturally in ascending order. + [2n]: we add a sentinel at the end as well. + There will be (2n+1) elements at the end. */ const HuffmanTree sentinel(std::numeric_limits::max(), -1, -1); tree[n] = sentinel; tree[n + 1] = sentinel; - size_t i = 0; // Points to the next leaf node. - size_t j = n + 1; // Points to the next non-leaf node. + size_t i = 0; /* Points to the next leaf node. */ + size_t j = n + 1; /* Points to the next non-leaf node. */ for (size_t k = n - 1; k != 0; --k) { size_t left, right; if (tree[i].total_count_ <= tree[j].total_count_) { @@ -111,21 +111,20 @@ void CreateHuffmanTree(const uint32_t *data, ++j; } - // The sentinel node becomes the parent node. + /* The sentinel node becomes the parent node. */ size_t j_end = 2 * n - k; tree[j_end].total_count_ = tree[left].total_count_ + tree[right].total_count_; tree[j_end].index_left_ = static_cast(left); tree[j_end].index_right_or_value_ = static_cast(right); - // Add back the last sentinel node. + /* Add back the last sentinel node. */ tree[j_end + 1] = sentinel; } SetDepth(tree[2 * n - 1], &tree[0], depth, 0); - // We need to pack the Huffman tree in tree_limit bits. - // If this was not successful, add fake entities to the lowest values - // and retry. + /* We need to pack the Huffman tree in tree_limit bits. If this was not + successful, add fake entities to the lowest values and retry. */ if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) { break; } @@ -229,7 +228,7 @@ void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts, size_t limit; size_t sum; const size_t streak_limit = 1240; - // Let's make the Huffman code more compatible with rle encoding. + /* Let's make the Huffman code more compatible with rle encoding. */ size_t i; for (i = 0; i < length; i++) { if (counts[i]) { @@ -243,9 +242,9 @@ void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts, --length; } if (length == 0) { - return; // All zeros. + return; /* All zeros. */ } - // Now counts[0..length - 1] does not have trailing zeros. + /* Now counts[0..length - 1] does not have trailing zeros. */ { size_t nonzeros = 0; uint32_t smallest_nonzero = 1 << 30; @@ -258,7 +257,7 @@ void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts, } } if (nonzeros < 5) { - // Small histogram will model it well. + /* Small histogram will model it well. */ return; } size_t zeros = length - nonzeros; @@ -275,13 +274,13 @@ void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts, return; } } - // 2) Let's mark all population counts that already can be encoded - // with an rle code. + /* 2) Let's mark all population counts that already can be encoded + with an rle code. */ memset(good_for_rle, 0, length); { - // Let's not spoil any of the existing good rle codes. - // Mark any seq of 0's that is longer as 5 as a good_for_rle. - // Mark any seq of non-0's that is longer as 7 as a good_for_rle. + /* Let's not spoil any of the existing good rle codes. + Mark any seq of 0's that is longer as 5 as a good_for_rle. + Mark any seq of non-0's that is longer as 7 as a good_for_rle. */ uint32_t symbol = counts[0]; size_t step = 0; for (i = 0; i <= length; ++i) { @@ -302,8 +301,8 @@ void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts, } } } - // 3) Let's replace those population counts that lead to more rle codes. - // Math here is in 24.8 fixed point representation. + /* 3) Let's replace those population counts that lead to more rle codes. + Math here is in 24.8 fixed point representation. */ stride = 0; limit = 256 * (counts[0] + counts[1] + counts[2]) / 3 + 420; sum = 0; @@ -313,26 +312,26 @@ void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts, (256 * counts[i] - limit + streak_limit) >= 2 * streak_limit) { if (stride >= 4 || (stride >= 3 && sum == 0)) { size_t k; - // The stride must end, collapse what we have, if we have enough (4). + /* The stride must end, collapse what we have, if we have enough (4). */ size_t count = (sum + stride / 2) / stride; if (count == 0) { count = 1; } if (sum == 0) { - // Don't make an all zeros stride to be upgraded to ones. + /* Don't make an all zeros stride to be upgraded to ones. */ count = 0; } for (k = 0; k < stride; ++k) { - // We don't want to change value at counts[i], - // that is already belonging to the next stride. Thus - 1. + /* We don't want to change value at counts[i], + that is already belonging to the next stride. Thus - 1. */ counts[i - k - 1] = static_cast(count); } } stride = 0; sum = 0; if (i < length - 2) { - // All interesting strides have a count of at least 4, - // at least when non-zeros. + /* All interesting strides have a count of at least 4, */ + /* at least when non-zeros. */ limit = 256 * (counts[i] + counts[i + 1] + counts[i + 2]) / 3 + 420; } else if (i < length) { limit = 256 * counts[i]; @@ -387,7 +386,7 @@ void WriteHuffmanTree(const uint8_t* depth, uint8_t* extra_bits_data) { uint8_t previous_value = 8; - // Throw away trailing zeros. + /* Throw away trailing zeros. */ size_t new_length = length; for (size_t i = 0; i < length; ++i) { if (depth[length - i - 1] == 0) { @@ -397,17 +396,17 @@ void WriteHuffmanTree(const uint8_t* depth, } } - // First gather statistics on if it is a good idea to do rle. + /* First gather statistics on if it is a good idea to do rle. */ bool use_rle_for_non_zero = false; bool use_rle_for_zero = false; if (length > 50) { - // Find rle coding for longer codes. - // Shorter codes seem not to benefit from rle. + /* Find rle coding for longer codes. + Shorter codes seem not to benefit from rle. */ DecideOverRleUse(depth, new_length, &use_rle_for_non_zero, &use_rle_for_zero); } - // Actual rle coding. + /* Actual rle coding. */ for (size_t i = 0; i < new_length;) { const uint8_t value = depth[i]; size_t reps = 1; @@ -432,7 +431,7 @@ void WriteHuffmanTree(const uint8_t* depth, namespace { uint16_t ReverseBits(int num_bits, uint16_t bits) { - static const size_t kLut[16] = { // Pre-reversed 4-bit values. + static const size_t kLut[16] = { /* Pre-reversed 4-bit values. */ 0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe, 0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf }; @@ -451,8 +450,8 @@ uint16_t ReverseBits(int num_bits, uint16_t bits) { void ConvertBitDepthsToSymbols(const uint8_t *depth, size_t len, uint16_t *bits) { - // In Brotli, all bit depths are [1..15] - // 0 bit depth means that the symbol does not exist. + /* In Brotli, all bit depths are [1..15] + 0 bit depth means that the symbol does not exist. */ const int kMaxBits = 16; // 0..15 are values for bits uint16_t bl_count[kMaxBits] = { 0 }; { diff --git a/enc/entropy_encode.h b/enc/entropy_encode.h index 1bc46d4..9757930 100644 --- a/enc/entropy_encode.h +++ b/enc/entropy_encode.h @@ -4,7 +4,7 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Entropy encoding (Huffman) utilities. +/* Entropy encoding (Huffman) utilities. */ #ifndef BROTLI_ENC_ENTROPY_ENCODE_H_ #define BROTLI_ENC_ENTROPY_ENCODE_H_ @@ -17,7 +17,7 @@ namespace brotli { -// A node of a Huffman tree. +/* A node of a Huffman tree. */ struct HuffmanTree { HuffmanTree() {} HuffmanTree(uint32_t count, int16_t left, int16_t right) @@ -33,44 +33,44 @@ struct HuffmanTree { void SetDepth(const HuffmanTree &p, HuffmanTree *pool, uint8_t *depth, uint8_t level); -// This function will create a Huffman tree. -// -// The (data,length) contains the population counts. -// The tree_limit is the maximum bit depth of the Huffman codes. -// -// The depth contains the tree, i.e., how many bits are used for -// the symbol. -// -// The actual Huffman tree is constructed in the tree[] array, which has to -// be at least 2 * length + 1 long. -// -// See http://en.wikipedia.org/wiki/Huffman_coding +/* This function will create a Huffman tree. + + The (data,length) contains the population counts. + The tree_limit is the maximum bit depth of the Huffman codes. + + The depth contains the tree, i.e., how many bits are used for + the symbol. + + The actual Huffman tree is constructed in the tree[] array, which has to + be at least 2 * length + 1 long. + + See http://en.wikipedia.org/wiki/Huffman_coding */ void CreateHuffmanTree(const uint32_t *data, const size_t length, const int tree_limit, HuffmanTree* tree, uint8_t *depth); -// Change the population counts in a way that the consequent -// Huffman tree compression, especially its rle-part will be more -// likely to compress this data more efficiently. -// -// length contains the size of the histogram. -// counts contains the population counts. -// good_for_rle is a buffer of at least length size +/* Change the population counts in a way that the consequent + Huffman tree compression, especially its rle-part will be more + likely to compress this data more efficiently. + + length contains the size of the histogram. + counts contains the population counts. + good_for_rle is a buffer of at least length size */ void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts, uint8_t* good_for_rle); -// Write a Huffman tree from bit depths into the bitstream representation -// of a Huffman tree. The generated Huffman tree is to be compressed once -// more using a Huffman tree +/* Write a Huffman tree from bit depths into the bitstream representation + of a Huffman tree. The generated Huffman tree is to be compressed once + more using a Huffman tree */ void WriteHuffmanTree(const uint8_t* depth, size_t num, size_t* tree_size, uint8_t* tree, uint8_t* extra_bits_data); -// Get the actual bit values for a tree of bit depths. +/* Get the actual bit values for a tree of bit depths. */ void ConvertBitDepthsToSymbols(const uint8_t *depth, size_t len, uint16_t *bits); @@ -102,4 +102,4 @@ typedef EntropyCode<258> EntropyCodeBlockType; } // namespace brotli -#endif // BROTLI_ENC_ENTROPY_ENCODE_H_ +#endif /* BROTLI_ENC_ENTROPY_ENCODE_H_ */ diff --git a/enc/entropy_encode_static.h b/enc/entropy_encode_static.h index 33e102f..0e1cfa7 100644 --- a/enc/entropy_encode_static.h +++ b/enc/entropy_encode_static.h @@ -4,7 +4,7 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Static entropy codes used for faster meta-block encoding. +/* Static entropy codes used for faster meta-block encoding. */ #ifndef BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_ #define BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_ @@ -569,4 +569,4 @@ inline void StoreStaticDistanceHuffmanTree(size_t* storage_ix, } // namespace brotli -#endif // BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_ +#endif /* BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_ */ diff --git a/enc/fast_log.h b/enc/fast_log.h index a054eca..6b2d8ff 100644 --- a/enc/fast_log.h +++ b/enc/fast_log.h @@ -4,7 +4,7 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Utilities for fast computation of logarithms. +/* Utilities for fast computation of logarithms. */ #ifndef BROTLI_ENC_FAST_LOG_H_ #define BROTLI_ENC_FAST_LOG_H_ @@ -26,10 +26,10 @@ static inline uint32_t Log2FloorNonZero(size_t n) { #endif } -// A lookup table for small values of log2(int) to be used in entropy -// computation. -// -// ", ".join(["%.16ff" % x for x in [0.0]+[log2(x) for x in range(1, 256)]]) +/* A lookup table for small values of log2(int) to be used in entropy + computation. + + ", ".join(["%.16ff" % x for x in [0.0]+[log2(x) for x in range(1, 256)]]) */ static const float kLog2Table[] = { 0.0000000000000000f, 0.0000000000000000f, 1.0000000000000000f, 1.5849625007211563f, 2.0000000000000000f, 2.3219280948873622f, @@ -119,14 +119,15 @@ static const float kLog2Table[] = { 7.9943534368588578f }; -// Faster logarithm for small integers, with the property of log2(0) == 0. +/* Faster logarithm for small integers, with the property of log2(0) == 0. */ static inline double FastLog2(size_t v) { if (v < sizeof(kLog2Table) / sizeof(kLog2Table[0])) { return kLog2Table[v]; } -#if defined(_MSC_VER) && _MSC_VER <= 1700 - // Visual Studio 2012 does not have the log2() function defined, so we use - // log() and a multiplication instead. +#if (defined(_MSC_VER) && _MSC_VER <= 1600) || \ + (defined(__ANDROID_API__) && __ANDROID_API__ < 18) + /* Visual Studio 2010 and Android API levels < 18 do not have the log2() + * function defined, so we use log() and a multiplication instead. */ static const double kLog2Inv = 1.4426950408889634f; return log(static_cast(v)) * kLog2Inv; #else @@ -136,4 +137,4 @@ static inline double FastLog2(size_t v) { } // namespace brotli -#endif // BROTLI_ENC_FAST_LOG_H_ +#endif /* BROTLI_ENC_FAST_LOG_H_ */ diff --git a/enc/find_match_length.h b/enc/find_match_length.h index dbad574..ed143d1 100644 --- a/enc/find_match_length.h +++ b/enc/find_match_length.h @@ -4,7 +4,7 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Function to find maximal matching prefixes of strings. +/* Function to find maximal matching prefixes of strings. */ #ifndef BROTLI_ENC_FIND_MATCH_LENGTH_H_ #define BROTLI_ENC_FIND_MATCH_LENGTH_H_ @@ -14,14 +14,14 @@ namespace brotli { -// Separate implementation for little-endian 64-bit targets, for speed. +/* Separate implementation for little-endian 64-bit targets, for speed. */ #if defined(__GNUC__) && defined(_LP64) && defined(IS_LITTLE_ENDIAN) static inline size_t FindMatchLengthWithLimit(const uint8_t* s1, const uint8_t* s2, size_t limit) { size_t matched = 0; - size_t limit2 = (limit >> 3) + 1; // + 1 is for pre-decrement in while + size_t limit2 = (limit >> 3) + 1; /* + 1 is for pre-decrement in while */ while (PREDICT_TRUE(--limit2)) { if (PREDICT_FALSE(BROTLI_UNALIGNED_LOAD64(s2) == BROTLI_UNALIGNED_LOAD64(s1 + matched))) { @@ -35,7 +35,7 @@ static inline size_t FindMatchLengthWithLimit(const uint8_t* s1, return matched; } } - limit = (limit & 7) + 1; // + 1 is for pre-decrement in while + limit = (limit & 7) + 1; /* + 1 is for pre-decrement in while */ while (--limit) { if (PREDICT_TRUE(s1[matched] == *s2)) { ++s2; @@ -48,15 +48,15 @@ static inline size_t FindMatchLengthWithLimit(const uint8_t* s1, } #else static inline size_t FindMatchLengthWithLimit(const uint8_t* s1, - const uint8_t* s2, - size_t limit) { + const uint8_t* s2, + size_t limit) { size_t matched = 0; const uint8_t* s2_limit = s2 + limit; const uint8_t* s2_ptr = s2; - // Find out how long the match is. We loop over the data 32 bits at a - // time until we find a 32-bit block that doesn't match; then we find - // the first non-matching bit and use that to calculate the total - // length of the match. + /* Find out how long the match is. We loop over the data 32 bits at a + time until we find a 32-bit block that doesn't match; then we find + the first non-matching bit and use that to calculate the total + length of the match. */ while (s2_ptr <= s2_limit - 4 && BROTLI_UNALIGNED_LOAD32(s2_ptr) == BROTLI_UNALIGNED_LOAD32(s1 + matched)) { @@ -73,4 +73,4 @@ static inline size_t FindMatchLengthWithLimit(const uint8_t* s1, } // namespace brotli -#endif // BROTLI_ENC_FIND_MATCH_LENGTH_H_ +#endif /* BROTLI_ENC_FIND_MATCH_LENGTH_H_ */ diff --git a/enc/hash.h b/enc/hash.h index e459941..1b34f08 100644 --- a/enc/hash.h +++ b/enc/hash.h @@ -4,8 +4,8 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// A (forgetful) hash table to the data seen by the compressor, to -// help create backward references to previous data. +/* A (forgetful) hash table to the data seen by the compressor, to + help create backward references to previous data. */ #ifndef BROTLI_ENC_HASH_H_ #define BROTLI_ENC_HASH_H_ @@ -42,38 +42,38 @@ static const uint8_t kCutoffTransforms[] = { 0, 12, 27, 23, 42, 63, 56, 48, 59, 64 }; -// kHashMul32 multiplier has these properties: -// * The multiplier must be odd. Otherwise we may lose the highest bit. -// * No long streaks of 1s or 0s. -// * There is no effort to ensure that it is a prime, the oddity is enough -// for this use. -// * The number has been tuned heuristically against compression benchmarks. +/* kHashMul32 multiplier has these properties: + * The multiplier must be odd. Otherwise we may lose the highest bit. + * No long streaks of 1s or 0s. + * There is no effort to ensure that it is a prime, the oddity is enough + for this use. + * The number has been tuned heuristically against compression benchmarks. */ static const uint32_t kHashMul32 = 0x1e35a7bd; template inline uint32_t Hash(const uint8_t *data) { uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32; - // The higher bits contain more mixture from the multiplication, - // so we take our results from there. + /* The higher bits contain more mixture from the multiplication, + so we take our results from there. */ return h >> (32 - kShiftBits); } -// Usually, we always choose the longest backward reference. This function -// allows for the exception of that rule. -// -// If we choose a backward reference that is further away, it will -// usually be coded with more bits. We approximate this by assuming -// log2(distance). If the distance can be expressed in terms of the -// last four distances, we use some heuristic constants to estimate -// the bits cost. For the first up to four literals we use the bit -// cost of the literals from the literal cost model, after that we -// use the average bit cost of the cost model. -// -// This function is used to sometimes discard a longer backward reference -// when it is not much longer and the bit cost for encoding it is more -// than the saved literals. -// -// backward_reference_offset MUST be positive. +/* Usually, we always choose the longest backward reference. This function + allows for the exception of that rule. + + If we choose a backward reference that is further away, it will + usually be coded with more bits. We approximate this by assuming + log2(distance). If the distance can be expressed in terms of the + last four distances, we use some heuristic constants to estimate + the bits cost. For the first up to four literals we use the bit + cost of the literals from the literal cost model, after that we + use the average bit cost of the cost model. + + This function is used to sometimes discard a longer backward reference + when it is not much longer and the bit cost for encoding it is more + than the saved literals. + + backward_reference_offset MUST be positive. */ inline double BackwardReferenceScore(size_t copy_length, size_t backward_reference_offset) { return 5.4 * static_cast(copy_length) - @@ -511,13 +511,13 @@ class HashLongestMatch { return match_found; } - // Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the - // length of max_length and stores the position cur_ix in the hash table. - // - // Sets *num_matches to the number of matches found, and stores the found - // matches in matches[0] to matches[*num_matches - 1]. The matches will be - // sorted by strictly increasing length and (non-strictly) increasing - // distance. +/* Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the + length of max_length and stores the position cur_ix in the hash table. + + Sets *num_matches to the number of matches found, and stores the found + matches in matches[0] to matches[*num_matches - 1]. The matches will be + sorted by strictly increasing length and (non-strictly) increasing + distance. */ size_t FindAllMatches(const uint8_t* data, const size_t ring_buffer_mask, const size_t cur_ix, @@ -936,7 +936,7 @@ struct Hashers { } } - // Custom LZ77 window. +/* Custom LZ77 window. */ void PrependCustomDictionary( int type, int lgwin, const size_t size, const uint8_t* dict) { switch (type) { @@ -972,4 +972,4 @@ struct Hashers { } // namespace brotli -#endif // BROTLI_ENC_HASH_H_ +#endif /* BROTLI_ENC_HASH_H_ */ diff --git a/enc/histogram.cc b/enc/histogram.cc index 9d733d8..537a275 100644 --- a/enc/histogram.cc +++ b/enc/histogram.cc @@ -4,7 +4,7 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Build per-context histograms of literals, commands and distance codes. +/* Build per-context histograms of literals, commands and distance codes. */ #include "./histogram.h" diff --git a/enc/histogram.h b/enc/histogram.h index a7e4835..2287b59 100644 --- a/enc/histogram.h +++ b/enc/histogram.h @@ -4,7 +4,7 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Models the histograms of literals, commands and distance codes. +/* Models the histograms of literals, commands and distance codes. */ #ifndef BROTLI_ENC_HISTOGRAM_H_ #define BROTLI_ENC_HISTOGRAM_H_ @@ -92,4 +92,4 @@ void BuildHistograms( } // namespace brotli -#endif // BROTLI_ENC_HISTOGRAM_H_ +#endif /* BROTLI_ENC_HISTOGRAM_H_ */ diff --git a/enc/literal_cost.cc b/enc/literal_cost.cc index 4293fbf..2560ee7 100644 --- a/enc/literal_cost.cc +++ b/enc/literal_cost.cc @@ -4,7 +4,8 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Literal cost model to allow backward reference replacement to be efficient. +/* Literal cost model to allow backward reference replacement to be efficient. +*/ #include "./literal_cost.h" @@ -20,14 +21,14 @@ namespace brotli { static size_t UTF8Position(size_t last, size_t c, size_t clamp) { if (c < 128) { - return 0; // Next one is the 'Byte 1' again. - } else if (c >= 192) { // Next one is the 'Byte 2' of utf-8 encoding. + return 0; /* Next one is the 'Byte 1' again. */ + } else if (c >= 192) { /* Next one is the 'Byte 2' of utf-8 encoding. */ return std::min(1, clamp); } else { - // Let's decide over the last byte if this ends the sequence. + /* Let's decide over the last byte if this ends the sequence. */ if (last < 0xe0) { - return 0; // Completed two or three byte coding. - } else { // Next one is the 'Byte 3' of utf-8 encoding. + return 0; /* Completed two or three byte coding. */ + } else { /* Next one is the 'Byte 3' of utf-8 encoding. */ return std::min(2, clamp); } } @@ -36,7 +37,7 @@ static size_t UTF8Position(size_t last, size_t c, size_t clamp) { static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask, const uint8_t *data) { size_t counts[3] = { 0 }; - size_t max_utf8 = 1; // should be 2, but 1 compresses better. + size_t max_utf8 = 1; /* should be 2, but 1 compresses better. */ size_t last_c = 0; size_t utf8_pos = 0; for (size_t i = 0; i < len; ++i) { @@ -56,16 +57,15 @@ static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask, static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask, const uint8_t *data, float *cost) { - - // max_utf8 is 0 (normal ascii single byte modeling), - // 1 (for 2-byte utf-8 modeling), or 2 (for 3-byte utf-8 modeling). + /* max_utf8 is 0 (normal ascii single byte modeling), + 1 (for 2-byte utf-8 modeling), or 2 (for 3-byte utf-8 modeling). */ const size_t max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data); size_t histogram[3][256] = { { 0 } }; size_t window_half = 495; size_t in_window = std::min(window_half, len); size_t in_window_utf8[3] = { 0 }; - // Bootstrap histograms. + /* Bootstrap histograms. */ size_t last_c = 0; size_t utf8_pos = 0; for (size_t i = 0; i < in_window; ++i) { @@ -76,10 +76,10 @@ static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask, last_c = c; } - // Compute bit costs with sliding window. + /* Compute bit costs with sliding window. */ for (size_t i = 0; i < len; ++i) { if (i >= window_half) { - // Remove a byte in the past. + /* Remove a byte in the past. */ size_t c = i < window_half + 1 ? 0 : data[(pos + i - window_half - 1) & mask]; size_t last_c = i < window_half + 2 ? @@ -89,7 +89,7 @@ static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask, --in_window_utf8[utf8_pos2]; } if (i + window_half < len) { - // Add a byte in the future. + /* Add a byte in the future. */ size_t c = data[(pos + i + window_half - 1) & mask]; size_t last_c = data[(pos + i + window_half - 2) & mask]; size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8); @@ -110,10 +110,10 @@ static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask, lit_cost *= 0.5; lit_cost += 0.5; } - // Make the first bytes more expensive -- seems to help, not sure why. - // Perhaps because the entropy source is changing its properties - // rapidly in the beginning of the file, perhaps because the beginning - // of the data is a statistical "anomaly". + /* Make the first bytes more expensive -- seems to help, not sure why. + Perhaps because the entropy source is changing its properties + rapidly in the beginning of the file, perhaps because the beginning + of the data is a statistical "anomaly". */ if (i < 2000) { lit_cost += 0.7 - (static_cast(2000 - i) / 2000.0 * 0.35); } @@ -131,20 +131,20 @@ void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask, size_t window_half = 2000; size_t in_window = std::min(window_half, len); - // Bootstrap histogram. + /* Bootstrap histogram. */ for (size_t i = 0; i < in_window; ++i) { ++histogram[data[(pos + i) & mask]]; } - // Compute bit costs with sliding window. + /* Compute bit costs with sliding window. */ for (size_t i = 0; i < len; ++i) { if (i >= window_half) { - // Remove a byte in the past. + /* Remove a byte in the past. */ --histogram[data[(pos + i - window_half) & mask]]; --in_window; } if (i + window_half < len) { - // Add a byte in the future. + /* Add a byte in the future. */ ++histogram[data[(pos + i + window_half) & mask]]; ++in_window; } diff --git a/enc/literal_cost.h b/enc/literal_cost.h index 9614baf..c00f83d 100644 --- a/enc/literal_cost.h +++ b/enc/literal_cost.h @@ -4,7 +4,8 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Literal cost model to allow backward reference replacement to be efficient. +/* Literal cost model to allow backward reference replacement to be efficient. +*/ #ifndef BROTLI_ENC_LITERAL_COST_H_ #define BROTLI_ENC_LITERAL_COST_H_ @@ -13,12 +14,12 @@ namespace brotli { -// Estimates how many bits the literals in the interval [pos, pos + len) in the -// ringbuffer (data, mask) will take entropy coded and writes these estimates -// to the cost[0..len) array. +/* Estimates how many bits the literals in the interval [pos, pos + len) in the + ringbuffer (data, mask) will take entropy coded and writes these estimates + to the cost[0..len) array. */ void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask, const uint8_t *data, float *cost); } // namespace brotli -#endif // BROTLI_ENC_LITERAL_COST_H_ +#endif /* BROTLI_ENC_LITERAL_COST_H_ */ diff --git a/enc/metablock.cc b/enc/metablock.cc index 5990e42..1391eb7 100644 --- a/enc/metablock.cc +++ b/enc/metablock.cc @@ -4,11 +4,12 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Algorithms for distributing the literals and commands of a metablock between -// block types and contexts. +/* Algorithms for distributing the literals and commands of a metablock between + block types and contexts. */ #include "./metablock.h" +#include "../common/types.h" #include "./block_splitter.h" #include "./cluster.h" #include "./context.h" @@ -55,7 +56,7 @@ void BuildMetaBlock(const uint8_t* ringbuffer, &mb->command_histograms, &distance_histograms); - // Histogram ids need to fit in one byte. + /* Histogram ids need to fit in one byte. */ static const size_t kMaxNumberOfHistograms = 256; ClusterHistograms(literal_histograms, @@ -201,32 +202,32 @@ class BlockSplitter { private: static const uint16_t kMaxBlockTypes = 256; - // Alphabet size of particular block category. + /* Alphabet size of particular block category. */ const size_t alphabet_size_; - // We collect at least this many symbols for each block. + /* We collect at least this many symbols for each block. */ const size_t min_block_size_; - // We merge histograms A and B if - // entropy(A+B) < entropy(A) + entropy(B) + split_threshold_, - // where A is the current histogram and B is the histogram of the last or the - // second last block type. + /* We merge histograms A and B if + entropy(A+B) < entropy(A) + entropy(B) + split_threshold_, + where A is the current histogram and B is the histogram of the last or the + second last block type. */ const double split_threshold_; size_t num_blocks_; - BlockSplit* split_; // not owned - std::vector* histograms_; // not owned + BlockSplit* split_; /* not owned */ + std::vector* histograms_; /* not owned */ - // The number of symbols that we want to collect before deciding on whether - // or not to merge the block with a previous one or emit a new block. + /* The number of symbols that we want to collect before deciding on whether + or not to merge the block with a previous one or emit a new block. */ size_t target_block_size_; - // The number of symbols in the current histogram. + /* The number of symbols in the current histogram. */ size_t block_size_; - // Offset of the current histogram. + /* Offset of the current histogram. */ size_t curr_histogram_ix_; - // Offset of the histograms of the previous two block types. + /* Offset of the histograms of the previous two block types. */ size_t last_histogram_ix_[2]; - // Entropy of the previous two block types. + /* Entropy of the previous two block types. */ double last_entropy_[2]; - // The number of times we merged the current block with the last one. + /* The number of times we merged the current block with the last one. */ size_t merge_last_count_; }; @@ -314,10 +315,10 @@ class ContextBlockSplitter { } } - // Does either of three things: - // (1) emits the current block with a new block type; - // (2) emits the current block with the type of the second last block; - // (3) merges the current block with the last block. +/* Does either of three things: + (1) emits the current block with a new block type; + (2) emits the current block with the type of the second last block; + (3) merges the current block with the last block. */ void FinishBlock(bool is_final) { if (block_size_ < min_block_size_) { block_size_ = min_block_size_; @@ -336,10 +337,10 @@ class ContextBlockSplitter { curr_histogram_ix_ += num_contexts_; block_size_ = 0; } else if (block_size_ > 0) { - // Try merging the set of histograms for the current block type with the - // respective set of histograms for the last and second last block types. - // Decide over the split based on the total reduction of entropy across - // all contexts. + /* Try merging the set of histograms for the current block type with the + respective set of histograms for the last and second last block types. + Decide over the split based on the total reduction of entropy across + all contexts. */ std::vector entropy(num_contexts_); std::vector combined_histo(2 * num_contexts_); std::vector combined_entropy(2 * num_contexts_); diff --git a/enc/metablock.h b/enc/metablock.h index d192885..35f2c87 100644 --- a/enc/metablock.h +++ b/enc/metablock.h @@ -4,14 +4,15 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Algorithms for distributing the literals and commands of a metablock between -// block types and contexts. +/* Algorithms for distributing the literals and commands of a metablock between + block types and contexts. */ #ifndef BROTLI_ENC_METABLOCK_H_ #define BROTLI_ENC_METABLOCK_H_ #include +#include "../common/types.h" #include "./command.h" #include "./histogram.h" @@ -36,7 +37,7 @@ struct MetaBlockSplit { std::vector distance_histograms; }; -// Uses the slow shortest-path block splitter and does context clustering. +/* Uses the slow shortest-path block splitter and does context clustering. */ void BuildMetaBlock(const uint8_t* ringbuffer, const size_t pos, const size_t mask, @@ -47,8 +48,8 @@ void BuildMetaBlock(const uint8_t* ringbuffer, ContextType literal_context_mode, MetaBlockSplit* mb); -// Uses a fast greedy block splitter that tries to merge current block with the -// last or the second last block and does not do any context modeling. +/* Uses a fast greedy block splitter that tries to merge current block with the + last or the second last block and does not do any context modeling. */ void BuildMetaBlockGreedy(const uint8_t* ringbuffer, size_t pos, size_t mask, @@ -56,9 +57,9 @@ void BuildMetaBlockGreedy(const uint8_t* ringbuffer, size_t n_commands, MetaBlockSplit* mb); -// Uses a fast greedy block splitter that tries to merge current block with the -// last or the second last block and uses a static context clustering which -// is the same for all block types. +/* Uses a fast greedy block splitter that tries to merge current block with the + last or the second last block and uses a static context clustering which + is the same for all block types. */ void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer, size_t pos, size_t mask, @@ -77,4 +78,4 @@ void OptimizeHistograms(size_t num_direct_distance_codes, } // namespace brotli -#endif // BROTLI_ENC_METABLOCK_H_ +#endif /* BROTLI_ENC_METABLOCK_H_ */ diff --git a/enc/port.h b/enc/port.h index 377ce99..a9c9ffe 100644 --- a/enc/port.h +++ b/enc/port.h @@ -4,14 +4,15 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Macros for endianness, branch prediction and unaligned loads and stores. +/* Macros for endianness, branch prediction and unaligned loads and stores. */ #ifndef BROTLI_ENC_PORT_H_ #define BROTLI_ENC_PORT_H_ #include -#include +#include /* memcpy */ +#include "../common/port.h" #include "../common/types.h" #if defined OS_LINUX || defined OS_CYGWIN @@ -25,9 +26,9 @@ #define __LITTLE_ENDIAN LITTLE_ENDIAN #endif -// define the macro IS_LITTLE_ENDIAN -// using the above endian definitions from endian.h if -// endian.h was included +/* define the macro IS_LITTLE_ENDIAN + using the above endian definitions from endian.h if + endian.h was included */ #ifdef __BYTE_ORDER #if __BYTE_ORDER == __LITTLE_ENDIAN #define IS_LITTLE_ENDIAN @@ -38,41 +39,28 @@ #if defined(__LITTLE_ENDIAN__) #define IS_LITTLE_ENDIAN #endif -#endif // __BYTE_ORDER +#endif /* __BYTE_ORDER */ #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) #define IS_LITTLE_ENDIAN #endif -// Enable little-endian optimization for x64 architecture on Windows. +/* Enable little-endian optimization for x64 architecture on Windows. */ #if (defined(_WIN32) || defined(_WIN64)) && defined(_M_X64) #define IS_LITTLE_ENDIAN #endif -/* Compatibility with non-clang compilers. */ -#ifndef __has_builtin -#define __has_builtin(x) 0 -#endif - -#if (__GNUC__ > 2) || (__GNUC__ == 2 && __GNUC_MINOR__ > 95) || \ - (defined(__llvm__) && __has_builtin(__builtin_expect)) -#define PREDICT_FALSE(x) (__builtin_expect(x, 0)) -#define PREDICT_TRUE(x) (__builtin_expect(!!(x), 1)) -#else -#define PREDICT_FALSE(x) (x) -#define PREDICT_TRUE(x) (x) -#endif - -// Portable handling of unaligned loads, stores, and copies. -// On some platforms, like ARM, the copy functions can be more efficient -// then a load and a store. +/* Portable handling of unaligned loads, stores, and copies. + On some platforms, like ARM, the copy functions can be more efficient + then a load and a store. */ #if defined(ARCH_PIII) || \ defined(ARCH_ATHLON) || defined(ARCH_K8) || defined(_ARCH_PPC) -// x86 and x86-64 can perform unaligned loads/stores directly; -// modern PowerPC hardware can also do unaligned integer loads and stores; -// but note: the FPU still sends unaligned loads and stores to a trap handler! +/* x86 and x86-64 can perform unaligned loads/stores directly; + modern PowerPC hardware can also do unaligned integer loads and stores; + but note: the FPU still sends unaligned loads and stores to a trap handler! +*/ #define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast(_p)) #define BROTLI_UNALIGNED_LOAD64(_p) (*reinterpret_cast(_p)) @@ -94,50 +82,50 @@ !defined(__ARM_ARCH_6ZK__) && \ !defined(__ARM_ARCH_6T2__) -// ARMv7 and newer support native unaligned accesses, but only of 16-bit -// and 32-bit values (not 64-bit); older versions either raise a fatal signal, -// do an unaligned read and rotate the words around a bit, or do the reads very -// slowly (trip through kernel mode). +/* ARMv7 and newer support native unaligned accesses, but only of 16-bit + and 32-bit values (not 64-bit); older versions either raise a fatal signal, + do an unaligned read and rotate the words around a bit, or do the reads very + slowly (trip through kernel mode). */ #define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast(_p)) #define BROTLI_UNALIGNED_STORE32(_p, _val) \ (*reinterpret_cast(_p) = (_val)) -inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) { +static inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) { uint64_t t; memcpy(&t, p, sizeof t); return t; } -inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) { +static inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) { memcpy(p, &v, sizeof v); } #else -// These functions are provided for architectures that don't support -// unaligned loads and stores. +/* These functions are provided for architectures that don't support */ +/* unaligned loads and stores. */ -inline uint32_t BROTLI_UNALIGNED_LOAD32(const void *p) { +static inline uint32_t BROTLI_UNALIGNED_LOAD32(const void *p) { uint32_t t; memcpy(&t, p, sizeof t); return t; } -inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) { +static inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) { uint64_t t; memcpy(&t, p, sizeof t); return t; } -inline void BROTLI_UNALIGNED_STORE32(void *p, uint32_t v) { +static inline void BROTLI_UNALIGNED_STORE32(void *p, uint32_t v) { memcpy(p, &v, sizeof v); } -inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) { +static inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) { memcpy(p, &v, sizeof v); } #endif -#endif // BROTLI_ENC_PORT_H_ +#endif /* BROTLI_ENC_PORT_H_ */ diff --git a/enc/prefix.h b/enc/prefix.h index e50fd2e..237e68f 100644 --- a/enc/prefix.h +++ b/enc/prefix.h @@ -4,8 +4,8 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Functions for encoding of integers into prefix codes the amount of extra -// bits, and the actual values of the extra bits. +/* Functions for encoding of integers into prefix codes the amount of extra + bits, and the actual values of the extra bits. */ #ifndef BROTLI_ENC_PREFIX_H_ #define BROTLI_ENC_PREFIX_H_ @@ -76,4 +76,4 @@ inline void PrefixEncodeCopyDistance(size_t distance_code, } // namespace brotli -#endif // BROTLI_ENC_PREFIX_H_ +#endif /* BROTLI_ENC_PREFIX_H_ */ diff --git a/enc/ringbuffer.h b/enc/ringbuffer.h index 46bf3ad..5c4b569 100644 --- a/enc/ringbuffer.h +++ b/enc/ringbuffer.h @@ -4,7 +4,7 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Sliding window over the input data. +/* Sliding window over the input data. */ #ifndef BROTLI_ENC_RINGBUFFER_H_ #define BROTLI_ENC_RINGBUFFER_H_ @@ -16,15 +16,15 @@ namespace brotli { -// A RingBuffer(window_bits, tail_bits) contains `1 << window_bits' bytes of -// data in a circular manner: writing a byte writes it to: -// `position() % (1 << window_bits)'. -// For convenience, the RingBuffer array contains another copy of the -// first `1 << tail_bits' bytes: -// buffer_[i] == buffer_[i + (1 << window_bits)], if i < (1 << tail_bits), -// and another copy of the last two bytes: -// buffer_[-1] == buffer_[(1 << window_bits) - 1] and -// buffer_[-2] == buffer_[(1 << window_bits) - 2]. +/* A RingBuffer(window_bits, tail_bits) contains `1 << window_bits' bytes of + data in a circular manner: writing a byte writes it to: + `position() % (1 << window_bits)'. + For convenience, the RingBuffer array contains another copy of the + first `1 << tail_bits' bytes: + buffer_[i] == buffer_[i + (1 << window_bits)], if i < (1 << tail_bits), + and another copy of the last two bytes: + buffer_[-1] == buffer_[(1 << window_bits) - 1] and + buffer_[-2] == buffer_[(1 << window_bits) - 2]. */ class RingBuffer { public: RingBuffer(int window_bits, int tail_bits) @@ -41,8 +41,8 @@ class RingBuffer { free(data_); } - // Allocates or re-allocates data_ to the given length + plus some slack - // region before and after. Fills the slack regions with zeros. +/* Allocates or re-allocates data_ to the given length + plus some slack + region before and after. Fills the slack regions with zeros. */ inline void InitBuffer(const uint32_t buflen) { static const size_t kSlackForEightByteHashingEverywhere = 7; cur_size_ = buflen; @@ -55,41 +55,41 @@ class RingBuffer { } } - // Push bytes into the ring buffer. +/* Push bytes into the ring buffer. */ void Write(const uint8_t *bytes, size_t n) { if (pos_ == 0 && n < tail_size_) { - // Special case for the first write: to process the first block, we don't - // need to allocate the whole ringbuffer and we don't need the tail - // either. However, we do this memory usage optimization only if the - // first write is less than the tail size, which is also the input block - // size, otherwise it is likely that other blocks will follow and we - // will need to reallocate to the full size anyway. + /* Special case for the first write: to process the first block, we don't + need to allocate the whole ringbuffer and we don't need the tail + either. However, we do this memory usage optimization only if the + first write is less than the tail size, which is also the input block + size, otherwise it is likely that other blocks will follow and we + will need to reallocate to the full size anyway. */ pos_ = static_cast(n); InitBuffer(pos_); memcpy(buffer_, bytes, n); return; } if (cur_size_ < total_size_) { - // Lazily allocate the full buffer. + /* Lazily allocate the full buffer. */ InitBuffer(total_size_); - // Initialize the last two bytes to zero, so that we don't have to worry - // later when we copy the last two bytes to the first two positions. + /* Initialize the last two bytes to zero, so that we don't have to worry + later when we copy the last two bytes to the first two positions. */ buffer_[size_ - 2] = 0; buffer_[size_ - 1] = 0; } const size_t masked_pos = pos_ & mask_; - // The length of the writes is limited so that we do not need to worry - // about a write + /* The length of the writes is limited so that we do not need to worry + about a write */ WriteTail(bytes, n); if (PREDICT_TRUE(masked_pos + n <= size_)) { - // A single write fits. + /* A single write fits. */ memcpy(&buffer_[masked_pos], bytes, n); } else { - // Split into two writes. - // Copy into the end of the buffer, including the tail buffer. + /* Split into two writes. + Copy into the end of the buffer, including the tail buffer. */ memcpy(&buffer_[masked_pos], bytes, std::min(n, total_size_ - masked_pos)); - // Copy into the beginning of the buffer + /* Copy into the beginning of the buffer */ memcpy(&buffer_[0], bytes + (size_ - masked_pos), n - (size_ - masked_pos)); } @@ -142,4 +142,4 @@ class RingBuffer { } // namespace brotli -#endif // BROTLI_ENC_RINGBUFFER_H_ +#endif /* BROTLI_ENC_RINGBUFFER_H_ */ diff --git a/enc/static_dict.cc b/enc/static_dict.cc index bc29166..6a34b05 100644 --- a/enc/static_dict.cc +++ b/enc/static_dict.cc @@ -17,8 +17,8 @@ namespace brotli { inline uint32_t Hash(const uint8_t *data) { uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kDictHashMul32; - // The higher bits contain more mixture from the multiplication, - // so we take our results from there. + /* The higher bits contain more mixture from the multiplication, + so we take our results from there. */ return h >> (32 - kDictNumBits); } @@ -42,18 +42,18 @@ inline bool IsMatch(DictWord w, const uint8_t* data, size_t max_length) { const size_t offset = kBrotliDictionaryOffsetsByLength[w.len] + w.len * w.idx; const uint8_t* dict = &kBrotliDictionary[offset]; if (w.transform == 0) { - // Match against base dictionary word. + /* Match against base dictionary word. */ return FindMatchLengthWithLimit(dict, data, w.len) == w.len; } else if (w.transform == 10) { - // Match against uppercase first transform. - // Note that there are only ASCII uppercase words in the lookup table. + /* Match against uppercase first transform. + Note that there are only ASCII uppercase words in the lookup table. */ return (dict[0] >= 'a' && dict[0] <= 'z' && (dict[0] ^ 32) == data[0] && FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) == w.len - 1u); } else { - // Match against uppercase all transform. - // Note that there are only ASCII uppercase words in the lookup table. + /* Match against uppercase all transform. + Note that there are only ASCII uppercase words in the lookup table. */ for (size_t i = 0; i < w.len; ++i) { if (dict[i] >= 'a' && dict[i] <= 'z') { if ((dict[i] ^ 32) != data[i]) return false; @@ -82,12 +82,12 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data, const size_t id = w.idx; if (w.transform == 0) { const size_t matchlen = DictMatchLength(data, id, l, max_length); - // Transform "" + kIdentity + "" + /* Transform "" + kIdentity + "" */ if (matchlen == l) { AddMatch(id, l, l, matches); found_match = true; } - // Transforms "" + kOmitLast1 + "" and "" + kOmitLast1 + "ing " + /* Transforms "" + kOmitLast1 + "" and "" + kOmitLast1 + "ing " */ if (matchlen >= l - 1) { AddMatch(id + 12 * n, l - 1, l, matches); if (l + 2 < max_length && @@ -97,7 +97,7 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data, } found_match = true; } - // Transform "" + kOmitLastN + "" (N = 2 .. 9) + /* Transform "" + kOmitLastN + "" (N = 2 .. 9) */ size_t minlen = min_length; if (l > 9) minlen = std::max(minlen, l - 9); size_t maxlen = std::min(matchlen, l - 2); @@ -109,7 +109,7 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data, continue; } const uint8_t* s = &data[l]; - // Transforms "" + kIdentity + + /* Transforms "" + kIdentity + */ if (s[0] == ' ') { AddMatch(id + n, l + 1, l, matches); if (s[1] == 'a') { @@ -127,7 +127,7 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data, } else if (s[1] == 'b') { if (s[2] == 'y' && s[3] == ' ') { AddMatch(id + 38 * n, l + 4, l, matches); - } + } } else if (s[1] == 'i') { if (s[2] == 'n') { if (s[3] == ' ') AddMatch(id + 16 * n, l + 4, l, matches); @@ -235,7 +235,7 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data, } else if (s[0] == 'i') { if (s[1] == 'v') { if (s[2] == 'e' && s[3] == ' ') { - AddMatch(id + 92 * n, l + 4, l, matches); + AddMatch(id + 92 * n, l + 4, l, matches); } } else if (s[1] == 'z') { if (s[2] == 'e' && s[3] == ' ') { @@ -256,19 +256,19 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data, } } } else { - // Set t=false for kUppercaseFirst and - // t=true otherwise (kUppercaseAll) transform. + /* Set is_all_caps=0 for kUppercaseFirst and + is_all_caps=1 otherwise (kUppercaseAll) transform. */ const bool t = w.transform != kUppercaseFirst; if (!IsMatch(w, data, max_length)) { continue; } - // Transform "" + kUppercase{First,All} + "" + /* Transform "" + kUppercase{First,All} + "" */ AddMatch(id + (t ? 44 : 9) * n, l, l, matches); found_match = true; if (l + 1 >= max_length) { continue; } - // Transforms "" + kUppercase{First,All} + + /* Transforms "" + kUppercase{First,All} + */ const uint8_t* s = &data[l]; if (s[0] == ' ') { AddMatch(id + (t ? 68 : 4) * n, l + 1, l, matches); @@ -301,7 +301,7 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data, } } } - // Transforms with prefixes " " and "." + /* Transforms with prefixes " " and "." */ if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) { bool is_space = (data[0] == ' '); key = Hash(&data[1]); @@ -317,13 +317,14 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data, if (!IsMatch(w, &data[1], max_length - 1)) { continue; } - // Transforms " " + kIdentity + "" and "." + kIdentity + "" + /* Transforms " " + kIdentity + "" and "." + kIdentity + "" */ AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches); found_match = true; if (l + 2 >= max_length) { continue; } - // Transforms " " + kIdentity + and "." + kIdentity + + /* Transforms " " + kIdentity + and "." + kIdentity + + */ const uint8_t* s = &data[l + 1]; if (s[0] == ' ') { AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches); @@ -349,19 +350,19 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data, } } } else if (is_space) { - // Set t=false for kUppercaseFirst and - // t=true otherwise (kUppercaseAll) transform. + /* Set is_all_caps=0 for kUppercaseFirst and + is_all_caps=1 otherwise (kUppercaseAll) transform. */ const bool t = w.transform != kUppercaseFirst; if (!IsMatch(w, &data[1], max_length - 1)) { continue; } - // Transforms " " + kUppercase{First,All} + "" + /* Transforms " " + kUppercase{First,All} + "" */ AddMatch(id + (t ? 85 : 30) * n, l + 1, l, matches); found_match = true; if (l + 2 >= max_length) { continue; } - // Transforms " " + kUppercase{First,All} + + /* Transforms " " + kUppercase{First,All} + */ const uint8_t* s = &data[l + 1]; if (s[0] == ' ') { AddMatch(id + (t ? 83 : 15) * n, l + 2, l, matches); @@ -388,7 +389,7 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data, } } if (max_length >= 6) { - // Transforms with prefixes "e ", "s ", ", " and "\xc2\xa0" + /* Transforms with prefixes "e ", "s ", ", " and "\xc2\xa0" */ if ((data[1] == ' ' && (data[0] == 'e' || data[0] == 's' || data[0] == ',')) || (data[0] == 0xc2 && data[1] == 0xa0)) { @@ -415,7 +416,7 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data, } } if (max_length >= 9) { - // Transforms with prefixes " the " and ".com/" + /* Transforms with prefixes " the " and ".com/" */ if ((data[0] == ' ' && data[1] == 't' && data[2] == 'h' && data[3] == 'e' && data[4] == ' ') || (data[0] == '.' && data[1] == 'c' && data[2] == 'o' && diff --git a/enc/static_dict.h b/enc/static_dict.h index e9bee32..7891186 100644 --- a/enc/static_dict.h +++ b/enc/static_dict.h @@ -4,7 +4,7 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Class to model the static dictionary. +/* Class to model the static dictionary. */ #ifndef BROTLI_ENC_STATIC_DICT_H_ #define BROTLI_ENC_STATIC_DICT_H_ @@ -16,12 +16,13 @@ namespace brotli { static const size_t kMaxDictionaryMatchLen = 37; static const uint32_t kInvalidMatch = 0xfffffff; -// Matches data against static dictionary words, and for each length l, -// for which a match is found, updates matches[l] to be the minimum possible -// (distance << 5) + len_code. -// Prerequisites: -// matches array is at least kMaxDictionaryMatchLen + 1 long -// all elements are initialized to kInvalidMatch +/* Matches data against static dictionary words, and for each length l, + for which a match is found, updates matches[l] to be the minimum possible + (distance << 5) + len_code. + Returns 1 if matches have been found, otherwise 0. + Prerequisites: + matches array is at least BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1 long + all elements are initialized to kInvalidMatch */ bool FindAllStaticDictionaryMatches(const uint8_t* data, size_t min_length, size_t max_length, @@ -29,4 +30,4 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data, } // namespace brotli -#endif // BROTLI_ENC_STATIC_DICT_H_ +#endif /* BROTLI_ENC_STATIC_DICT_H_ */ diff --git a/enc/static_dict_lut.h b/enc/static_dict_lut.h index 0c33f1f..b00a5f7 100644 --- a/enc/static_dict_lut.h +++ b/enc/static_dict_lut.h @@ -4,7 +4,7 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Lookup table for static dictionary and transforms. +/* Lookup table for static dictionary and transforms. */ #ifndef BROTLI_ENC_DICTIONARY_LUT_H_ #define BROTLI_ENC_DICTIONARY_LUT_H_ @@ -13,8 +13,8 @@ namespace brotli { -static const int kDictNumBits = 15 -;static const uint32_t kDictHashMul32 = 0x1e35a7bd; +static const int kDictNumBits = 15; +static const uint32_t kDictHashMul32 = 0x1e35a7bd; struct DictWord { uint8_t len; @@ -12052,4 +12052,4 @@ static const DictWord kStaticDictionaryWords[] = { } // namespace brotli -#endif // BROTLI_ENC_DICTIONARY_LUT_H_ +#endif /* BROTLI_ENC_STATIC_DICT_LUT_H_ */ diff --git a/enc/streams.cc b/enc/streams.cc index 17eda2d..abdcc2a 100644 --- a/enc/streams.cc +++ b/enc/streams.cc @@ -4,14 +4,14 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Convience routines to make Brotli I/O classes from some memory containers and -// files. +/* Convience routines to make Brotli I/O classes from some memory containers and + files. */ #include "./streams.h" #include #include -#include +#include /* memcpy */ namespace brotli { @@ -26,7 +26,7 @@ void BrotliMemOut::Reset(void* buf, size_t len) { pos_ = 0; } -// Brotli output routine: copy n bytes to the output buffer. +/* Brotli output routine: copy n bytes to the output buffer. */ bool BrotliMemOut::Write(const void *buf, size_t n) { if (n + pos_ > len_) return false; @@ -47,7 +47,7 @@ void BrotliStringOut::Reset(std::string* buf, size_t max_size) { max_size_ = max_size; } -// Brotli output routine: add n bytes to a string. +/* Brotli output routine: add n bytes to a string. */ bool BrotliStringOut::Write(const void *buf, size_t n) { if (buf_->size() + n > max_size_) return false; @@ -66,7 +66,7 @@ void BrotliMemIn::Reset(const void* buf, size_t len) { pos_ = 0; } -// Brotli input routine: read the next chunk of memory. +/* Brotli input routine: read the next chunk of memory. */ const void* BrotliMemIn::Read(size_t n, size_t* output) { if (pos_ == len_) { return NULL; @@ -111,4 +111,4 @@ bool BrotliFileOut::Write(const void* buf, size_t n) { return true; } -} // namespace brotli +} /* namespace brotli */ diff --git a/enc/streams.h b/enc/streams.h index 351473f..7a595ea 100644 --- a/enc/streams.h +++ b/enc/streams.h @@ -4,7 +4,7 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Input and output classes for streaming brotli compression. +/* Input and output classes for streaming brotli compression. */ #ifndef BROTLI_ENC_STREAMS_H_ #define BROTLI_ENC_STREAMS_H_ @@ -17,71 +17,71 @@ namespace brotli { -// Input interface for the compression routines. +/* Input interface for the compression routines. */ class BrotliIn { public: virtual ~BrotliIn(void) {} - // Return a pointer to the next block of input of at most n bytes. - // Return the actual length in *nread. - // At end of data, return NULL. Don't return NULL if there is more data - // to read, even if called with n == 0. - // Read will only be called if some of its bytes are needed. + /* Return a pointer to the next block of input of at most n bytes. + Return the actual length in *nread. + At end of data, return NULL. Don't return NULL if there is more data + to read, even if called with n == 0. + Read will only be called if some of its bytes are needed. */ virtual const void* Read(size_t n, size_t* nread) = 0; }; -// Output interface for the compression routines. +/* Output interface for the compression routines. */ class BrotliOut { public: virtual ~BrotliOut(void) {} - // Write n bytes of data from buf. - // Return true if all written, false otherwise. + /* Write n bytes of data from buf. + Return true if all written, false otherwise. */ virtual bool Write(const void *buf, size_t n) = 0; }; -// Adapter class to make BrotliIn objects from raw memory. +/* Adapter class to make BrotliIn objects from raw memory. */ class BrotliMemIn : public BrotliIn { public: BrotliMemIn(const void* buf, size_t len); void Reset(const void* buf, size_t len); - // returns the amount of data consumed + /* returns the amount of data consumed */ size_t position(void) const { return pos_; } const void* Read(size_t n, size_t* OUTPUT); private: - const void* buf_; // start of input buffer - size_t len_; // length of input - size_t pos_; // current read position within input + const void* buf_; /* start of input buffer */ + size_t len_; /* length of input */ + size_t pos_; /* current read position within input */ }; -// Adapter class to make BrotliOut objects from raw memory. +/* Adapter class to make BrotliOut objects from raw memory. */ class BrotliMemOut : public BrotliOut { public: BrotliMemOut(void* buf, size_t len); void Reset(void* buf, size_t len); - // returns the amount of data written + /* returns the amount of data written */ size_t position(void) const { return pos_; } bool Write(const void* buf, size_t n); private: - void* buf_; // start of output buffer - size_t len_; // length of output - size_t pos_; // current write position within output + void* buf_; /* start of output buffer */ + size_t len_; /* length of output */ + size_t pos_; /* current write position within output */ }; -// Adapter class to make BrotliOut objects from a string. +/* Adapter class to make BrotliOut objects from a string. */ class BrotliStringOut : public BrotliOut { public: - // Create a writer that appends its data to buf. - // buf->size() will grow to at most max_size - // buf is expected to be empty when constructing BrotliStringOut. + /* Create a writer that appends its data to buf. + buf->size() will grow to at most max_size + buf is expected to be empty when constructing BrotliStringOut. */ BrotliStringOut(std::string* buf, size_t max_size); void Reset(std::string* buf, size_t max_len); @@ -89,11 +89,11 @@ class BrotliStringOut : public BrotliOut { bool Write(const void* buf, size_t n); private: - std::string* buf_; // start of output buffer - size_t max_size_; // max length of output + std::string* buf_; /* start of output buffer */ + size_t max_size_; /* max length of output */ }; -// Adapter class to make BrotliIn object from a file. +/* Adapter class to make BrotliIn object from a file. */ class BrotliFileIn : public BrotliIn { public: BrotliFileIn(FILE* f, size_t max_read_size); @@ -107,7 +107,7 @@ class BrotliFileIn : public BrotliIn { size_t buf_size_; }; -// Adapter class to make BrotliOut object from a file. +/* Adapter class to make BrotliOut object from a file. */ class BrotliFileOut : public BrotliOut { public: explicit BrotliFileOut(FILE* f); @@ -117,6 +117,6 @@ class BrotliFileOut : public BrotliOut { FILE* f_; }; -} // namespace brotli +} /* namespace brotli */ -#endif // BROTLI_ENC_STREAMS_H_ +#endif /* BROTLI_ENC_STREAMS_H_ */ diff --git a/enc/utf8_util.cc b/enc/utf8_util.cc index 0010ca3..a5b0d2c 100644 --- a/enc/utf8_util.cc +++ b/enc/utf8_util.cc @@ -4,7 +4,7 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Heuristics for deciding about the UTF8-ness of strings. +/* Heuristics for deciding about the UTF8-ness of strings. */ #include "./utf8_util.h" @@ -15,14 +15,14 @@ namespace brotli { namespace { size_t ParseAsUTF8(int* symbol, const uint8_t* input, size_t size) { - // ASCII + /* ASCII */ if ((input[0] & 0x80) == 0) { *symbol = input[0]; if (*symbol > 0) { return 1; } } - // 2-byte UTF8 + /* 2-byte UTF8 */ if (size > 1u && (input[0] & 0xe0) == 0xc0 && (input[1] & 0xc0) == 0x80) { @@ -32,7 +32,7 @@ size_t ParseAsUTF8(int* symbol, const uint8_t* input, size_t size) { return 2; } } - // 3-byte UFT8 + /* 3-byte UFT8 */ if (size > 2u && (input[0] & 0xf0) == 0xe0 && (input[1] & 0xc0) == 0x80 && @@ -44,7 +44,7 @@ size_t ParseAsUTF8(int* symbol, const uint8_t* input, size_t size) { return 3; } } - // 4-byte UFT8 + /* 4-byte UFT8 */ if (size > 3u && (input[0] & 0xf8) == 0xf0 && (input[1] & 0xc0) == 0x80 && @@ -58,14 +58,14 @@ size_t ParseAsUTF8(int* symbol, const uint8_t* input, size_t size) { return 4; } } - // Not UTF8, emit a special symbol above the UTF8-code space + /* Not UTF8, emit a special symbol above the UTF8-code space */ *symbol = 0x110000 | input[0]; return 1; } } // namespace -// Returns true if at least min_fraction of the data is UTF8-encoded. +/* Returns 1 if at least min_fraction of the data is UTF8-encoded.*/ bool IsMostlyUTF8(const uint8_t* data, const size_t pos, const size_t mask, const size_t length, const double min_fraction) { size_t size_utf8 = 0; diff --git a/enc/utf8_util.h b/enc/utf8_util.h index e8478db..e5ed876 100644 --- a/enc/utf8_util.h +++ b/enc/utf8_util.h @@ -4,7 +4,7 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Heuristics for deciding about the UTF8-ness of strings. +/* Heuristics for deciding about the UTF8-ness of strings. */ #ifndef BROTLI_ENC_UTF8_UTIL_H_ #define BROTLI_ENC_UTF8_UTIL_H_ @@ -15,11 +15,12 @@ namespace brotli { static const double kMinUTF8Ratio = 0.75; -// Returns true if at least min_fraction of the bytes between pos and -// pos + length in the (data, mask) ringbuffer is UTF8-encoded. +/* Returns 1 if at least min_fraction of the bytes between pos and + pos + length in the (data, mask) ringbuffer is UTF8-encoded, otherwise + returns 0. */ bool IsMostlyUTF8(const uint8_t* data, const size_t pos, const size_t mask, const size_t length, const double min_fraction); } // namespace brotli -#endif // BROTLI_ENC_UTF8_UTIL_H_ +#endif /* BROTLI_ENC_UTF8_UTIL_H_ */ diff --git a/enc/write_bits.h b/enc/write_bits.h index 9d699c1..1358da4 100644 --- a/enc/write_bits.h +++ b/enc/write_bits.h @@ -4,36 +4,36 @@ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT */ -// Write bits into a byte array. +/* Write bits into a byte array. */ #ifndef BROTLI_ENC_WRITE_BITS_H_ #define BROTLI_ENC_WRITE_BITS_H_ #include -#include +#include /* printf */ #include "../common/types.h" #include "./port.h" namespace brotli { -//#define BIT_WRITER_DEBUG +/*#define BIT_WRITER_DEBUG */ -// This function writes bits into bytes in increasing addresses, and within -// a byte least-significant-bit first. -// -// The function can write up to 56 bits in one go with WriteBits -// Example: let's assume that 3 bits (Rs below) have been written already: -// -// BYTE-0 BYTE+1 BYTE+2 -// -// 0000 0RRR 0000 0000 0000 0000 -// -// Now, we could write 5 or less bits in MSB by just sifting by 3 -// and OR'ing to BYTE-0. -// -// For n bits, we take the last 5 bits, OR that with high bits in BYTE-0, -// and locate the rest in BYTE+1, BYTE+2, etc. +/* This function writes bits into bytes in increasing addresses, and within + a byte least-significant-bit first. + + The function can write up to 56 bits in one go with WriteBits + Example: let's assume that 3 bits (Rs below) have been written already: + + BYTE-0 BYTE+1 BYTE+2 + + 0000 0RRR 0000 0000 0000 0000 + + Now, we could write 5 or less bits in MSB by just sifting by 3 + and OR'ing to BYTE-0. + + For n bits, we take the last 5 bits, OR that with high bits in BYTE-0, + and locate the rest in BYTE+1, BYTE+2, etc. */ inline void WriteBits(size_t n_bits, uint64_t bits, size_t * __restrict pos, @@ -44,18 +44,18 @@ inline void WriteBits(size_t n_bits, assert((bits >> n_bits) == 0); assert(n_bits <= 56); #ifdef IS_LITTLE_ENDIAN - // This branch of the code can write up to 56 bits at a time, - // 7 bits are lost by being perhaps already in *p and at least - // 1 bit is needed to initialize the bit-stream ahead (i.e. if 7 - // bits are in *p and we write 57 bits, then the next write will - // access a byte that was never initialized). + /* This branch of the code can write up to 56 bits at a time, + 7 bits are lost by being perhaps already in *p and at least + 1 bit is needed to initialize the bit-stream ahead (i.e. if 7 + bits are in *p and we write 57 bits, then the next write will + access a byte that was never initialized). */ uint8_t *p = &array[*pos >> 3]; uint64_t v = *p; v |= bits << (*pos & 7); - BROTLI_UNALIGNED_STORE64(p, v); // Set some bits. + BROTLI_UNALIGNED_STORE64(p, v); /* Set some bits. */ *pos += n_bits; #else - // implicit & 0xff is assumed for uint8_t arithmetics + /* implicit & 0xff is assumed for uint8_t arithmetics */ uint8_t *array_pos = &array[*pos >> 3]; const size_t bits_reserved_in_first_byte = (*pos & 7); bits <<= bits_reserved_in_first_byte; @@ -81,4 +81,4 @@ inline void WriteBitsPrepareStorage(size_t pos, uint8_t *array) { } // namespace brotli -#endif // BROTLI_ENC_WRITE_BITS_H_ +#endif /* BROTLI_ENC_WRITE_BITS_H_ */