Merge pull request #361 from eustas/to-v0.4

Step 2: update comments
This commit is contained in:
eustas 2016-06-03 11:32:32 +02:00
commit 6e356105b5
42 changed files with 1046 additions and 1033 deletions

View File

@ -4,7 +4,7 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Function to find backward reference copies.
/* Function to find backward reference copies. */
#include "./backward_references.h"
@ -12,16 +12,17 @@
#include <limits>
#include <vector>
#include "../common/types.h"
#include "./command.h"
#include "./fast_log.h"
#include "./literal_cost.h"
namespace brotli {
// The maximum length for which the zopflification uses distinct distances.
/* The maximum length for which the zopflification uses distinct distances. */
static const uint16_t kMaxZopfliLen = 325;
// Histogram based cost model for zopflification.
/* Histogram based cost model for zopflification. */
class ZopfliCostModel {
public:
ZopfliCostModel(void) : min_cost_cmd_(kInfinity) {}
@ -178,9 +179,9 @@ inline size_t ComputeDistanceCode(size_t distance,
return distance + 15;
}
// REQUIRES: len >= 2, start_pos <= pos
// REQUIRES: cost < kInfinity, nodes[start_pos].cost < kInfinity
// Maintains the "ZopfliNode array invariant".
/* REQUIRES: len >= 2, start_pos <= pos */
/* REQUIRES: cost < kInfinity, nodes[start_pos].cost < kInfinity */
/* Maintains the "ZopfliNode array invariant". */
inline void UpdateZopfliNode(ZopfliNode* nodes, size_t pos, size_t start_pos,
size_t len, size_t len_code, size_t dist,
size_t short_code, float cost) {
@ -191,7 +192,7 @@ inline void UpdateZopfliNode(ZopfliNode* nodes, size_t pos, size_t start_pos,
next.cost = cost;
}
// Maintains the smallest 2^k cost difference together with their positions
/* Maintains the smallest 8 cost difference together with their positions */
class StartPosQueue {
public:
struct PosData {
@ -212,8 +213,8 @@ class StartPosQueue {
++idx_;
size_t len = size();
q_[offset] = posdata;
/* Restore the sorted order. In the list of |len| items at most |len - 1|
adjacent element comparisons / swaps are required. */
/* Restore the sorted order. In the list of |len| items at most |len - 1|
adjacent element comparisons / swaps are required. */
for (size_t i = 1; i < len; ++i) {
if (q_[offset & mask_].costdiff > q_[(offset + 1) & mask_].costdiff) {
std::swap(q_[offset & mask_], q_[(offset + 1) & mask_]);
@ -234,14 +235,14 @@ class StartPosQueue {
size_t idx_;
};
// Returns the minimum possible copy length that can improve the cost of any
// future position.
/* Returns the minimum possible copy length that can improve the cost of any */
/* future position. */
static size_t ComputeMinimumCopyLength(const StartPosQueue& queue,
const ZopfliNode* nodes,
const ZopfliCostModel& model,
const size_t num_bytes,
const size_t pos) {
// Compute the minimum possible cost of reaching any future position.
/* Compute the minimum possible cost of reaching any future position. */
const size_t start0 = queue.GetStartPosData(0).pos;
float min_cost = (nodes[start0].cost +
model.GetLiteralCosts(start0, pos) +
@ -250,13 +251,13 @@ static size_t ComputeMinimumCopyLength(const StartPosQueue& queue,
size_t next_len_bucket = 4;
size_t next_len_offset = 10;
while (pos + len <= num_bytes && nodes[pos + len].cost <= min_cost) {
// We already reached (pos + len) with no more cost than the minimum
// possible cost of reaching anything from this pos, so there is no point in
// looking for lengths <= len.
/* We already reached (pos + len) with no more cost than the minimum
possible cost of reaching anything from this pos, so there is no point in
looking for lengths <= len. */
++len;
if (len == next_len_offset) {
// We reached the next copy length code bucket, so we add one more
// extra bit to the minimum cost.
/* We reached the next copy length code bucket, so we add one more
extra bit to the minimum cost. */
min_cost += static_cast<float>(1.0);
next_len_offset += next_len_bucket;
next_len_bucket *= 2;
@ -265,13 +266,13 @@ static size_t ComputeMinimumCopyLength(const StartPosQueue& queue,
return len;
}
// Fills in dist_cache[0..3] with the last four distances (as defined by
// Section 4. of the Spec) that would be used at (block_start + pos) if we
// used the shortest path of commands from block_start, computed from
// nodes[0..pos]. The last four distances at block_start are in
// starting_dist_cach[0..3].
// REQUIRES: nodes[pos].cost < kInfinity
// REQUIRES: nodes[0..pos] satisfies that "ZopfliNode array invariant".
/* Fills in dist_cache[0..3] with the last four distances (as defined by
Section 4. of the Spec) that would be used at (block_start + pos) if we
used the shortest path of commands from block_start, computed from
nodes[0..pos]. The last four distances at block_start are in
starting_dist_cach[0..3].
REQUIRES: nodes[pos].cost < kInfinity
REQUIRES: nodes[0..pos] satisfies that "ZopfliNode array invariant". */
static void ComputeDistanceCache(const size_t block_start,
const size_t pos,
const size_t max_backward,
@ -280,21 +281,21 @@ static void ComputeDistanceCache(const size_t block_start,
int* dist_cache) {
int idx = 0;
size_t p = pos;
// Because of prerequisite, does at most (pos + 1) / 2 iterations.
/* Because of prerequisite, does at most (pos + 1) / 2 iterations. */
while (idx < 4 && p > 0) {
const size_t clen = nodes[p].copy_length();
const size_t ilen = nodes[p].insert_length;
const size_t dist = nodes[p].copy_distance();
// Since block_start + p is the end position of the command, the copy part
// starts from block_start + p - clen. Distances that are greater than this
// or greater than max_backward are static dictionary references, and do
// not update the last distances. Also distance code 0 (last distance)
// does not update the last distances.
/* Since block_start + p is the end position of the command, the copy part
starts from block_start + p - clen. Distances that are greater than this
or greater than max_backward are static dictionary references, and do
not update the last distances. Also distance code 0 (last distance)
does not update the last distances. */
if (dist + clen <= block_start + p && dist <= max_backward &&
nodes[p].distance_code() > 0) {
dist_cache[idx++] = static_cast<int>(dist);
}
// Because of prerequisite, p >= clen + ilen >= 2.
/* Because of prerequisite, p >= clen + ilen >= 2. */
p -= clen + ilen;
}
for (; idx < 4; ++idx) {
@ -330,15 +331,15 @@ static void UpdateNodes(const size_t num_bytes,
const size_t min_len = ComputeMinimumCopyLength(
*queue, nodes, *model, num_bytes, pos);
// Go over the command starting positions in order of increasing cost
// difference.
/* Go over the command starting positions in order of increasing cost
difference. */
for (size_t k = 0; k < 5 && k < queue->size(); ++k) {
const StartPosQueue::PosData& posdata = queue->GetStartPosData(k);
const size_t start = posdata.pos;
const float start_costdiff = posdata.costdiff;
// Look for last distance matches using the distance cache from this
// starting position.
/* Look for last distance matches using the distance cache from this
starting position. */
size_t best_len = min_len - 1;
for (size_t j = 0; j < kNumDistanceShortCodes; ++j) {
const size_t idx = kDistanceCacheIndex[j];
@ -374,23 +375,23 @@ static void UpdateNodes(const size_t num_bytes,
}
}
// At higher iterations look only for new last distance matches, since
// looking only for new command start positions with the same distances
// does not help much.
/* At higher iterations look only for new last distance matches, since
looking only for new command start positions with the same distances
does not help much. */
if (k >= 2) continue;
// Loop through all possible copy lengths at this position.
/* Loop through all possible copy lengths at this position. */
size_t len = min_len;
for (size_t j = 0; j < num_matches; ++j) {
BackwardMatch match = matches[j];
size_t dist = match.distance;
bool is_dictionary_match = dist > max_distance;
// We already tried all possible last distance matches, so we can use
// normal distance code here.
/* We already tried all possible last distance matches, so we can use
normal distance code here. */
size_t dist_code = dist + 15;
// Try all copy lengths up until the maximum copy length corresponding
// to this distance. If the distance refers to the static dictionary, or
// the maximum length is long enough, try only one maximum length.
/* Try all copy lengths up until the maximum copy length corresponding
to this distance. If the distance refers to the static dictionary, or
the maximum length is long enough, try only one maximum length. */
size_t max_len = match.length();
if (len < max_len && (is_dictionary_match || max_len > kMaxZopfliLen)) {
len = max_len;
@ -487,8 +488,8 @@ static void ZopfliIterate(size_t num_bytes,
max_backward_limit, dist_cache, num_matches[i],
&matches[cur_match_pos], &model, &queue, &nodes[0]);
cur_match_pos += num_matches[i];
// The zopflification can be too slow in case of very long lengths, so in
// such case skip it all, it does not cost a lot of compression ratio.
/* The zopflification can be too slow in case of very long lengths, so in
such case skip it all, it does not cost a lot of compression ratio. */
if (num_matches[i] == 1 &&
matches[cur_match_pos - 1].length() > kMaxZopfliLen) {
i += matches[cur_match_pos - 1].length() - 1;

View File

@ -4,7 +4,7 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Function to find backward reference copies.
/* Function to find backward reference copies. */
#ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_
#define BROTLI_ENC_BACKWARD_REFERENCES_H_
@ -17,10 +17,10 @@
namespace brotli {
// "commands" points to the next output command to write to, "*num_commands" is
// initially the total amount of commands output by previous
// CreateBackwardReferences calls, and must be incremented by the amount written
// by this call.
/* "commands" points to the next output command to write to, "*num_commands" is
initially the total amount of commands output by previous
CreateBackwardReferences calls, and must be incremented by the amount written
by this call. */
void CreateBackwardReferences(size_t num_bytes,
size_t position,
bool is_last,
@ -66,31 +66,32 @@ struct ZopfliNode {
return copy_length() + insert_length;
}
// best length to get up to this byte (not including this byte itself)
// highest 8 bit is used to reconstruct the length code
/* best length to get up to this byte (not including this byte itself)
highest 8 bit is used to reconstruct the length code */
uint32_t length;
// distance associated with the length
// highest 7 bit contains distance short code + 1 (or zero if no short code)
/* distance associated with the length
highest 7 bit contains distance short code + 1 (or zero if no short code)
*/
uint32_t distance;
// number of literal inserts before this copy
/* number of literal inserts before this copy */
uint32_t insert_length;
// smallest cost to get to this byte from the beginning, as found so far
/* Smallest cost to get to this byte from the beginning, as found so far. */
float cost;
};
// Computes the shortest path of commands from position to at most
// position + num_bytes.
//
// On return, path->size() is the number of commands found and path[i] is the
// length of the ith command (copy length plus insert length).
// Note that the sum of the lengths of all commands can be less than num_bytes.
//
// On return, the nodes[0..num_bytes] array will have the following
// "ZopfliNode array invariant":
// For each i in [1..num_bytes], if nodes[i].cost < kInfinity, then
// (1) nodes[i].copy_length() >= 2
// (2) nodes[i].command_length() <= i and
// (3) nodes[i - nodes[i].command_length()].cost < kInfinity
/* Computes the shortest path of commands from position to at most
position + num_bytes.
On return, path->size() is the number of commands found and path[i] is the
length of the ith command (copy length plus insert length).
Note that the sum of the lengths of all commands can be less than num_bytes.
On return, the nodes[0..num_bytes] array will have the following
"ZopfliNode array invariant":
For each i in [1..num_bytes], if nodes[i].cost < kInfinity, then
(1) nodes[i].copy_length() >= 2
(2) nodes[i].command_length() <= i and
(3) nodes[i - nodes[i].command_length()].cost < kInfinity */
void ZopfliComputeShortestPath(size_t num_bytes,
size_t position,
const uint8_t* ringbuffer,
@ -113,4 +114,4 @@ void ZopfliCreateCommands(const size_t num_bytes,
} // namespace brotli
#endif // BROTLI_ENC_BACKWARD_REFERENCES_H_
#endif /* BROTLI_ENC_BACKWARD_REFERENCES_H_ */

View File

@ -4,7 +4,7 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Functions to estimate the bit cost of Huffman trees.
/* Functions to estimate the bit cost of Huffman trees. */
#ifndef BROTLI_ENC_BIT_COST_H_
#define BROTLI_ENC_BIT_COST_H_
@ -42,7 +42,7 @@ static inline double BitsEntropy(const uint32_t *population, size_t size) {
size_t sum;
double retval = ShannonEntropy(population, size, &sum);
if (retval < sum) {
// At least one bit per literal is needed.
/* At least one bit per literal is needed. */
retval = static_cast<double>(sum);
}
return retval;
@ -158,4 +158,4 @@ double PopulationCost(const Histogram<kSize>& histogram) {
} // namespace brotli
#endif // BROTLI_ENC_BIT_COST_H_
#endif /* BROTLI_ENC_BIT_COST_H_ */

View File

@ -4,7 +4,7 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Block split point selection utilities.
/* Block split point selection utilities. */
#include "./block_splitter.h"
@ -42,7 +42,7 @@ void CopyLiteralsToByteArray(const Command* cmds,
const size_t offset,
const size_t mask,
std::vector<uint8_t>* literals) {
// Count how many we have.
/* Count how many we have. */
size_t total_length = 0;
for (size_t i = 0; i < num_commands; ++i) {
total_length += cmds[i].insert_len_;
@ -456,11 +456,11 @@ void SplitBlock(const Command* cmds,
BlockSplit* insert_and_copy_split,
BlockSplit* dist_split) {
{
// Create a continuous array of literals.
/* Create a continuous array of literals. */
std::vector<uint8_t> literals;
CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, &literals);
// Create the block split on the array of literals.
// Literal histograms have alphabet size 256.
/* Create the block split on the array of literals.
Literal histograms have alphabet size 256. */
SplitByteVector<256>(
literals,
kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
@ -469,12 +469,12 @@ void SplitBlock(const Command* cmds,
}
{
// Compute prefix codes for commands.
/* Compute prefix codes for commands. */
std::vector<uint16_t> insert_and_copy_codes(num_commands);
for (size_t i = 0; i < num_commands; ++i) {
insert_and_copy_codes[i] = cmds[i].cmd_prefix_;
}
// Create the block split on the array of command prefixes.
/* Create the block split on the array of command prefixes. */
SplitByteVector<kNumCommandPrefixes>(
insert_and_copy_codes,
kSymbolsPerCommandHistogram, kMaxCommandHistograms,
@ -483,7 +483,7 @@ void SplitBlock(const Command* cmds,
}
{
// Create a continuous array of distance prefixes.
/* Create a continuous array of distance prefixes. */
std::vector<uint16_t> distance_prefixes(num_commands);
size_t pos = 0;
for (size_t i = 0; i < num_commands; ++i) {
@ -493,7 +493,7 @@ void SplitBlock(const Command* cmds,
}
}
distance_prefixes.resize(pos);
// Create the block split on the array of distance prefixes.
/* Create the block split on the array of distance prefixes. */
SplitByteVector<kNumDistancePrefixes>(
distance_prefixes,
kSymbolsPerDistanceHistogram, kMaxCommandHistograms,

View File

@ -4,7 +4,7 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Block split point selection utilities.
/* Block split point selection utilities. */
#ifndef BROTLI_ENC_BLOCK_SPLITTER_H_
#define BROTLI_ENC_BLOCK_SPLITTER_H_
@ -58,4 +58,4 @@ void SplitBlock(const Command* cmds,
} // namespace brotli
#endif // BROTLI_ENC_BLOCK_SPLITTER_H_
#endif /* BROTLI_ENC_BLOCK_SPLITTER_H_ */

View File

@ -4,9 +4,9 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Brotli bit stream functions to support the low level format. There are no
// compression algorithms here, just the right ordering of bits to match the
// specs.
/* Brotli bit stream functions to support the low level format. There are no
compression algorithms here, just the right ordering of bits to match the
specs. */
#include "./brotli_bit_stream.h"
@ -16,6 +16,7 @@
#include <limits>
#include <vector>
#include "../common/types.h"
#include "./bit_cost.h"
#include "./context.h"
#include "./entropy_encode.h"
@ -34,9 +35,9 @@ static const size_t kContextMapAlphabetSize = 256 + 16;
// Block type alphabet has 256 block id symbols plus 2 special symbols.
static const size_t kBlockTypeAlphabetSize = 256 + 2;
// nibblesbits represents the 2 bits to encode MNIBBLES (0-3)
// REQUIRES: length > 0
// REQUIRES: length <= (1 << 24)
/* nibblesbits represents the 2 bits to encode MNIBBLES (0-3)
REQUIRES: length > 0
REQUIRES: length <= (1 << 24) */
void EncodeMlen(size_t length, uint64_t* bits,
size_t* numbits, uint64_t* nibblesbits) {
assert(length > 0);
@ -76,13 +77,16 @@ void StoreVarLenUint8(size_t n, size_t* storage_ix, uint8_t* storage) {
}
}
/* Stores the compressed meta-block header.
REQUIRES: length > 0
REQUIRES: length <= (1 << 24) */
void StoreCompressedMetaBlockHeader(bool final_block,
size_t length,
size_t* storage_ix,
uint8_t* storage) {
// Write ISLAST bit.
/* Write ISLAST bit. */
WriteBits(1, final_block, storage_ix, storage);
// Write ISEMPTY bit.
/* Write ISEMPTY bit. */
if (final_block) {
WriteBits(1, 0, storage_ix, storage);
}
@ -95,15 +99,19 @@ void StoreCompressedMetaBlockHeader(bool final_block,
WriteBits(nlenbits, lenbits, storage_ix, storage);
if (!final_block) {
// Write ISUNCOMPRESSED bit.
/* Write ISUNCOMPRESSED bit. */
WriteBits(1, 0, storage_ix, storage);
}
}
/* Stores the uncompressed meta-block header.
REQUIRES: length > 0
REQUIRES: length <= (1 << 24) */
void StoreUncompressedMetaBlockHeader(size_t length,
size_t* storage_ix,
uint8_t* storage) {
// Write ISLAST bit. Uncompressed block cannot be the last one, so set to 0.
/* Write ISLAST bit.
Uncompressed block cannot be the last one, so set to 0. */
WriteBits(1, 0, storage_ix, storage);
uint64_t lenbits;
size_t nlenbits;
@ -111,7 +119,7 @@ void StoreUncompressedMetaBlockHeader(size_t length,
EncodeMlen(length, &lenbits, &nlenbits, &nibblesbits);
WriteBits(2, nibblesbits, storage_ix, storage);
WriteBits(nlenbits, lenbits, storage_ix, storage);
// Write ISUNCOMPRESSED bit.
/* Write ISUNCOMPRESSED bit. */
WriteBits(1, 1, storage_ix, storage);
}
@ -123,16 +131,16 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask(
static const uint8_t kStorageOrder[kCodeLengthCodes] = {
1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15
};
// The bit lengths of the Huffman code over the code length alphabet
// are compressed with the following static Huffman code:
// Symbol Code
// ------ ----
// 0 00
// 1 1110
// 2 110
// 3 01
// 4 10
// 5 1111
/* The bit lengths of the Huffman code over the code length alphabet
are compressed with the following static Huffman code:
Symbol Code
------ ----
0 00
1 1110
2 110
3 01
4 10
5 1111 */
static const uint8_t kHuffmanBitLengthHuffmanCodeSymbols[6] = {
0, 7, 3, 2, 1, 15
};
@ -140,7 +148,7 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask(
2, 4, 3, 2, 2, 4
};
// Throw away trailing zeros:
/* Throw away trailing zeros: */
size_t codes_to_store = kCodeLengthCodes;
if (num_codes > 1) {
for (; codes_to_store > 0; --codes_to_store) {
@ -149,12 +157,12 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask(
}
}
}
size_t skip_some = 0; // skips none.
size_t skip_some = 0; /* skips none. */
if (code_length_bitdepth[kStorageOrder[0]] == 0 &&
code_length_bitdepth[kStorageOrder[1]] == 0) {
skip_some = 2; // skips two.
skip_some = 2; /* skips two. */
if (code_length_bitdepth[kStorageOrder[2]] == 0) {
skip_some = 3; // skips three.
skip_some = 3; /* skips three. */
}
}
WriteBits(2, skip_some, storage_ix, storage);
@ -177,7 +185,7 @@ static void StoreHuffmanTreeToBitMask(
size_t ix = huffman_tree[i];
WriteBits(code_length_bitdepth[ix], code_length_bitdepth_symbols[ix],
storage_ix, storage);
// Extra bits
/* Extra bits */
switch (ix) {
case 16:
WriteBits(2, huffman_tree_extra_bits[i], storage_ix, storage);
@ -194,11 +202,11 @@ static void StoreSimpleHuffmanTree(const uint8_t* depths,
size_t num_symbols,
size_t max_bits,
size_t *storage_ix, uint8_t *storage) {
// value of 1 indicates a simple Huffman code
/* value of 1 indicates a simple Huffman code */
WriteBits(2, 1, storage_ix, storage);
WriteBits(2, num_symbols - 1, storage_ix, storage); // NSYM - 1
WriteBits(2, num_symbols - 1, storage_ix, storage); /* NSYM - 1 */
// Sort
/* Sort */
for (size_t i = 0; i < num_symbols; i++) {
for (size_t j = i + 1; j < num_symbols; j++) {
if (depths[symbols[j]] < depths[symbols[i]]) {
@ -219,19 +227,19 @@ static void StoreSimpleHuffmanTree(const uint8_t* depths,
WriteBits(max_bits, symbols[1], storage_ix, storage);
WriteBits(max_bits, symbols[2], storage_ix, storage);
WriteBits(max_bits, symbols[3], storage_ix, storage);
// tree-select
/* tree-select */
WriteBits(1, depths[symbols[0]] == 1 ? 1 : 0, storage_ix, storage);
}
}
// num = alphabet size
// depths = symbol depths
/* num = alphabet size
depths = symbol depths */
void StoreHuffmanTree(const uint8_t* depths, size_t num,
HuffmanTree* tree,
size_t *storage_ix, uint8_t *storage) {
// Write the Huffman tree into the brotli-representation.
// The command alphabet is the largest, so this allocation will fit all
// alphabets.
/* Write the Huffman tree into the brotli-representation.
The command alphabet is the largest, so this allocation will fit all
alphabets. */
assert(num <= kNumCommandPrefixes);
uint8_t huffman_tree[kNumCommandPrefixes];
uint8_t huffman_tree_extra_bits[kNumCommandPrefixes];
@ -239,7 +247,7 @@ void StoreHuffmanTree(const uint8_t* depths, size_t num,
WriteHuffmanTree(depths, num, &huffman_tree_size, huffman_tree,
huffman_tree_extra_bits);
// Calculate the statistics of the Huffman tree in brotli-representation.
/* Calculate the statistics of the Huffman tree in brotli-representation. */
uint32_t huffman_tree_histogram[kCodeLengthCodes] = { 0 };
for (size_t i = 0; i < huffman_tree_size; ++i) {
++huffman_tree_histogram[huffman_tree[i]];
@ -259,8 +267,8 @@ void StoreHuffmanTree(const uint8_t* depths, size_t num,
}
}
// Calculate another Huffman tree to use for compressing both the
// earlier Huffman tree with.
/* Calculate another Huffman tree to use for compressing both the
earlier Huffman tree with. */
uint8_t code_length_bitdepth[kCodeLengthCodes] = { 0 };
uint16_t code_length_bitdepth_symbols[kCodeLengthCodes] = { 0 };
CreateHuffmanTree(&huffman_tree_histogram[0], kCodeLengthCodes,
@ -268,7 +276,7 @@ void StoreHuffmanTree(const uint8_t* depths, size_t num,
ConvertBitDepthsToSymbols(code_length_bitdepth, kCodeLengthCodes,
&code_length_bitdepth_symbols[0]);
// Now, we have all the data, let's start storing it
/* Now, we have all the data, let's start storing it */
StoreHuffmanTreeOfHuffmanTreeToBitMask(num_codes, code_length_bitdepth,
storage_ix, storage);
@ -276,7 +284,7 @@ void StoreHuffmanTree(const uint8_t* depths, size_t num,
code_length_bitdepth[code] = 0;
}
// Store the real huffman tree now.
/* Store the real huffman tree now. */
StoreHuffmanTreeToBitMask(huffman_tree_size,
huffman_tree,
huffman_tree_extra_bits,
@ -285,6 +293,8 @@ void StoreHuffmanTree(const uint8_t* depths, size_t num,
storage_ix, storage);
}
/* Builds a Huffman tree from histogram[0:length] into depth[0:length] and
bits[0:length] and stores the encoded tree to the bit stream. */
void BuildAndStoreHuffmanTree(const uint32_t *histogram,
const size_t length,
HuffmanTree* tree,
@ -379,13 +389,13 @@ void BuildAndStoreHuffmanTreeFast(const uint32_t *histogram,
}
const int n = static_cast<int>(node - tree);
std::sort(tree, node, SortHuffmanTree);
// The nodes are:
// [0, n): the sorted leaf nodes that we start with.
// [n]: we add a sentinel here.
// [n + 1, 2n): new parent nodes are added here, starting from
// (n+1). These are naturally in ascending order.
// [2n]: we add a sentinel at the end as well.
// There will be (2n+1) elements at the end.
/* The nodes are:
[0, n): the sorted leaf nodes that we start with.
[n]: we add a sentinel here.
[n + 1, 2n): new parent nodes are added here, starting from
(n+1). These are naturally in ascending order.
[2n]: we add a sentinel at the end as well.
There will be (2n+1) elements at the end. */
const HuffmanTree sentinel(std::numeric_limits<int>::max(), -1, -1);
*node++ = sentinel;
*node++ = sentinel;
@ -408,18 +418,17 @@ void BuildAndStoreHuffmanTreeFast(const uint32_t *histogram,
right = j;
++j;
}
// The sentinel node becomes the parent node.
/* The sentinel node becomes the parent node. */
node[-1].total_count_ =
tree[left].total_count_ + tree[right].total_count_;
node[-1].index_left_ = static_cast<int16_t>(left);
node[-1].index_right_or_value_ = static_cast<int16_t>(right);
// Add back the last sentinel node.
/* Add back the last sentinel node. */
*node++ = sentinel;
}
SetDepth(tree[2 * n - 1], &tree[0], depth, 0);
// We need to pack the Huffman tree in 14 bits.
// If this was not successful, add fake entities to the lowest values
// and retry.
/* We need to pack the Huffman tree in 14 bits. If this was not
successful, add fake entities to the lowest values and retry. */
if (PREDICT_TRUE(*std::max_element(&depth[0], &depth[length]) <= 14)) {
break;
}
@ -427,11 +436,11 @@ void BuildAndStoreHuffmanTreeFast(const uint32_t *histogram,
free(tree);
ConvertBitDepthsToSymbols(depth, length, bits);
if (count <= 4) {
// value of 1 indicates a simple Huffman code
/* value of 1 indicates a simple Huffman code */
WriteBits(2, 1, storage_ix, storage);
WriteBits(2, count - 1, storage_ix, storage); // NSYM - 1
WriteBits(2, count - 1, storage_ix, storage); /* NSYM - 1 */
// Sort
/* Sort */
for (size_t i = 0; i < count; i++) {
for (size_t j = i + 1; j < count; j++) {
if (depth[symbols[j]] < depth[symbols[i]]) {
@ -452,14 +461,14 @@ void BuildAndStoreHuffmanTreeFast(const uint32_t *histogram,
WriteBits(max_bits, symbols[1], storage_ix, storage);
WriteBits(max_bits, symbols[2], storage_ix, storage);
WriteBits(max_bits, symbols[3], storage_ix, storage);
// tree-select
/* tree-select */
WriteBits(1, depth[symbols[0]] == 1 ? 1 : 0, storage_ix, storage);
}
} else {
// Complex Huffman Tree
/* Complex Huffman Tree */
StoreStaticCodeLengthCode(storage_ix, storage);
// Actual rle coding.
/* Actual rle coding. */
uint8_t previous_value = 8;
for (size_t i = 0; i < length;) {
const uint8_t value = depth[i];
@ -531,12 +540,12 @@ static void MoveToFrontTransform(const uint32_t* __restrict v_in,
}
}
// Finds runs of zeros in v[0..in_size) and replaces them with a prefix code of
// the run length plus extra bits (lower 9 bits is the prefix code and the rest
// are the extra bits). Non-zero values in v[] are shifted by
// *max_length_prefix. Will not create prefix codes bigger than the initial
// value of *max_run_length_prefix. The prefix code of run length L is simply
// Log2Floor(L) and the number of extra bits is the same as the prefix code.
/* Finds runs of zeros in v[0..in_size) and replaces them with a prefix code of
the run length plus extra bits (lower 9 bits is the prefix code and the rest
are the extra bits). Non-zero values in v[] are shifted by
*max_length_prefix. Will not create prefix codes bigger than the initial
value of *max_run_length_prefix. The prefix code of run length L is simply
Log2Floor(L) and the number of extra bits is the same as the prefix code. */
static void RunLengthCodeZeros(const size_t in_size,
uint32_t* __restrict v,
size_t* __restrict out_size,
@ -630,6 +639,7 @@ void EncodeContextMap(const std::vector<uint32_t>& context_map,
delete[] rle_symbols;
}
/* Stores the block switch command with index block_ix to the bit stream. */
void StoreBlockSwitch(const BlockSplitCode& code,
const size_t block_ix,
size_t* storage_ix,
@ -646,6 +656,8 @@ void StoreBlockSwitch(const BlockSplitCode& code,
storage_ix, storage);
}
/* Builds a BlockSplitCode data structure from the block split given by the
vector of block types and block lengths and stores it to the bit stream. */
static void BuildAndStoreBlockSplitCode(const std::vector<uint8_t>& types,
const std::vector<uint32_t>& lengths,
const size_t num_types,
@ -695,6 +707,7 @@ static void BuildAndStoreBlockSplitCode(const std::vector<uint8_t>& types,
}
}
/* Stores a context map where the histogram type is always the block type. */
void StoreTrivialContextMap(size_t num_types,
size_t context_bits,
HuffmanTree* tree,
@ -711,7 +724,7 @@ void StoreTrivialContextMap(size_t num_types,
memset(histogram, 0, alphabet_size * sizeof(histogram[0]));
memset(depths, 0, alphabet_size * sizeof(depths[0]));
memset(bits, 0, alphabet_size * sizeof(bits[0]));
// Write RLEMAX.
/* Write RLEMAX. */
WriteBits(1, 1, storage_ix, storage);
WriteBits(4, repeat_code - 1, storage_ix, storage);
histogram[repeat_code] = static_cast<uint32_t>(num_types);
@ -728,12 +741,12 @@ void StoreTrivialContextMap(size_t num_types,
WriteBits(depths[repeat_code], bits[repeat_code], storage_ix, storage);
WriteBits(repeat_code, repeat_bits, storage_ix, storage);
}
// Write IMTF (inverse-move-to-front) bit.
/* Write IMTF (inverse-move-to-front) bit. */
WriteBits(1, 1, storage_ix, storage);
}
}
// Manages the encoding of one block category (literal, command or distance).
/* Manages the encoding of one block category (literal, command or distance). */
class BlockEncoder {
public:
BlockEncoder(size_t alphabet_size,
@ -748,8 +761,8 @@ class BlockEncoder {
block_len_(block_lengths.empty() ? 0 : block_lengths[0]),
entropy_ix_(0) {}
// Creates entropy codes of block lengths and block types and stores them
// to the bit stream.
/* Creates entropy codes of block lengths and block types and stores them
to the bit stream. */
void BuildAndStoreBlockSwitchEntropyCodes(HuffmanTree* tree,
size_t* storage_ix,
uint8_t* storage) {
@ -776,8 +789,8 @@ class BlockEncoder {
}
}
// Stores the next symbol with the entropy code of the current block type.
// Updates the block type and block length at block boundaries.
/* Stores the next symbol with the entropy code of the current block type.
Updates the block type and block length at block boundaries. */
void StoreSymbol(size_t symbol, size_t* storage_ix, uint8_t* storage) {
if (block_len_ == 0) {
++block_ix_;
@ -790,9 +803,9 @@ class BlockEncoder {
WriteBits(depths_[ix], bits_[ix], storage_ix, storage);
}
// Stores the next symbol with the entropy code of the current block type and
// context value.
// Updates the block type and block length at block boundaries.
/* Stores the next symbol with the entropy code of the current block type and
context value.
Updates the block type and block length at block boundaries. */
template<int kContextBits>
void StoreSymbolWithContext(size_t symbol, size_t context,
const std::vector<uint32_t>& context_map,
@ -1132,8 +1145,8 @@ void StoreMetaBlockFast(const uint8_t* input,
}
}
// This is for storing uncompressed blocks (simple raw storage of
// bytes-as-bytes).
/* This is for storing uncompressed blocks (simple raw storage of
bytes-as-bytes). */
void StoreUncompressedMetaBlock(bool final_block,
const uint8_t * __restrict input,
size_t position, size_t mask,
@ -1154,15 +1167,15 @@ void StoreUncompressedMetaBlock(bool final_block,
memcpy(&storage[*storage_ix >> 3], &input[masked_pos], len);
*storage_ix += len << 3;
// We need to clear the next 4 bytes to continue to be
// compatible with WriteBits.
/* We need to clear the next 4 bytes to continue to be
compatible with BrotliWriteBits. */
brotli::WriteBitsPrepareStorage(*storage_ix, storage);
// Since the uncompressed block itself may not be the final block, add an
// empty one after this.
/* Since the uncompressed block itself may not be the final block, add an
empty one after this. */
if (final_block) {
brotli::WriteBits(1, 1, storage_ix, storage); // islast
brotli::WriteBits(1, 1, storage_ix, storage); // isempty
brotli::WriteBits(1, 1, storage_ix, storage); /* islast */
brotli::WriteBits(1, 1, storage_ix, storage); /* isempty */
JumpToByteBoundary(storage_ix, storage);
}
}

View File

@ -4,14 +4,14 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Functions to convert brotli-related data structures into the
// brotli bit stream. The functions here operate under
// assumption that there is enough space in the storage, i.e., there are
// no out-of-range checks anywhere.
//
// These functions do bit addressing into a byte array. The byte array
// is called "storage" and the index to the bit is called storage_ix
// in function arguments.
/* Functions to convert brotli-related data structures into the
brotli bit stream. The functions here operate under
assumption that there is enough space in the storage, i.e., there are
no out-of-range checks anywhere.
These functions do bit addressing into a byte array. The byte array
is called "storage" and the index to the bit is called storage_ix
in function arguments. */
#ifndef BROTLI_ENC_BROTLI_BIT_STREAM_H_
#define BROTLI_ENC_BROTLI_BIT_STREAM_H_
@ -24,8 +24,8 @@
namespace brotli {
// All Store functions here will use a storage_ix, which is always the bit
// position for the current storage.
/* All Store functions here will use a storage_ix, which is always the bit
position for the current storage. */
// Stores a number between 0 and 255.
void StoreVarLenUint8(size_t n, size_t* storage_ix, uint8_t* storage);
@ -114,8 +114,8 @@ void StoreBlockSwitch(const BlockSplitCode& code,
size_t* storage_ix,
uint8_t* storage);
// REQUIRES: length > 0
// REQUIRES: length <= (1 << 24)
/* REQUIRES: length > 0 */
/* REQUIRES: length <= (1 << 24) */
void StoreMetaBlock(const uint8_t* input,
size_t start_pos,
size_t length,
@ -132,10 +132,10 @@ void StoreMetaBlock(const uint8_t* input,
size_t *storage_ix,
uint8_t *storage);
// Stores the meta-block without doing any block splitting, just collects
// one histogram per block category and uses that for entropy coding.
// REQUIRES: length > 0
// REQUIRES: length <= (1 << 24)
/* Stores the meta-block without doing any block splitting, just collects
one histogram per block category and uses that for entropy coding.
REQUIRES: length > 0
REQUIRES: length <= (1 << 24) */
void StoreMetaBlockTrivial(const uint8_t* input,
size_t start_pos,
size_t length,
@ -146,10 +146,10 @@ void StoreMetaBlockTrivial(const uint8_t* input,
size_t *storage_ix,
uint8_t *storage);
// Same as above, but uses static prefix codes for histograms with a only a few
// symbols, and uses static code length prefix codes for all other histograms.
// REQUIRES: length > 0
// REQUIRES: length <= (1 << 24)
/* Same as above, but uses static prefix codes for histograms with a only a few
symbols, and uses static code length prefix codes for all other histograms.
REQUIRES: length > 0
REQUIRES: length <= (1 << 24) */
void StoreMetaBlockFast(const uint8_t* input,
size_t start_pos,
size_t length,
@ -160,10 +160,10 @@ void StoreMetaBlockFast(const uint8_t* input,
size_t *storage_ix,
uint8_t *storage);
// This is for storing uncompressed blocks (simple raw storage of
// bytes-as-bytes).
// REQUIRES: length > 0
// REQUIRES: length <= (1 << 24)
/* This is for storing uncompressed blocks (simple raw storage of
bytes-as-bytes).
REQUIRES: length > 0
REQUIRES: length <= (1 << 24) */
void StoreUncompressedMetaBlock(bool final_block,
const uint8_t* input,
size_t position, size_t mask,
@ -171,9 +171,9 @@ void StoreUncompressedMetaBlock(bool final_block,
size_t* storage_ix,
uint8_t* storage);
// Stores an empty metadata meta-block and syncs to a byte boundary.
/* Stores an empty metadata meta-block and syncs to a byte boundary. */
void StoreSyncMetaBlock(size_t* storage_ix, uint8_t* storage);
} // namespace brotli
#endif // BROTLI_ENC_BROTLI_BIT_STREAM_H_
#endif /* BROTLI_ENC_BROTLI_BIT_STREAM_H_ */

View File

@ -4,7 +4,7 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Functions for clustering similar histograms together.
/* Functions for clustering similar histograms together. */
#ifndef BROTLI_ENC_CLUSTER_H_
#define BROTLI_ENC_CLUSTER_H_
@ -328,4 +328,4 @@ void ClusterHistograms(const std::vector<HistogramType>& in,
} // namespace brotli
#endif // BROTLI_ENC_CLUSTER_H_
#endif /* BROTLI_ENC_CLUSTER_H_ */

View File

@ -4,7 +4,7 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// This class models a sequence of literals and a backward reference copy.
/* This class models a sequence of literals and a backward reference copy. */
#ifndef BROTLI_ENC_COMMAND_H_
#define BROTLI_ENC_COMMAND_H_
@ -63,8 +63,8 @@ static inline uint16_t CombineLengthCodes(
if (use_last_distance && inscode < 8 && copycode < 16) {
return (copycode < 8) ? bits64 : (bits64 | 64);
} else {
// "To convert an insert-and-copy length code to an insert length code and
// a copy length code, the following table can be used"
/* "To convert an insert-and-copy length code to an insert length code and
a copy length code, the following table can be used" */
static const uint16_t cells[9] = { 128u, 192u, 384u, 256u, 320u, 512u,
448u, 576u, 640u };
return cells[(copycode >> 3) + 3 * (inscode >> 3)] | bits64;
@ -153,4 +153,4 @@ struct Command {
} // namespace brotli
#endif // BROTLI_ENC_COMMAND_H_
#endif /* BROTLI_ENC_COMMAND_H_ */

View File

@ -4,13 +4,13 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Function for fast encoding of an input fragment, independently from the input
// history. This function uses one-pass processing: when we find a backward
// match, we immediately emit the corresponding command and literal codes to
// the bit stream.
//
// Adapted from the CompressFragment() function in
// https://github.com/google/snappy/blob/master/snappy.cc
/* Function for fast encoding of an input fragment, independently from the input
history. This function uses one-pass processing: when we find a backward
match, we immediately emit the corresponding command and literal codes to
the bit stream.
Adapted from the CompressFragment() function in
https://github.com/google/snappy/blob/master/snappy.cc */
#include "./compress_fragment.h"
@ -27,12 +27,12 @@
namespace brotli {
// kHashMul32 multiplier has these properties:
// * The multiplier must be odd. Otherwise we may lose the highest bit.
// * No long streaks of 1s or 0s.
// * There is no effort to ensure that it is a prime, the oddity is enough
// for this use.
// * The number has been tuned heuristically against compression benchmarks.
/* kHashMul32 multiplier has these properties:
* The multiplier must be odd. Otherwise we may lose the highest bit.
* No long streaks of 1s or 0s.
* There is no effort to ensure that it is a prime, the oddity is enough
for this use.
* The number has been tuned heuristically against compression benchmarks. */
static const uint32_t kHashMul32 = 0x1e35a7bd;
static inline uint32_t Hash(const uint8_t* p, size_t shift) {
@ -52,12 +52,12 @@ static inline int IsMatch(const uint8_t* p1, const uint8_t* p2) {
p1[4] == p2[4]);
}
// Builds a literal prefix code into "depths" and "bits" based on the statistics
// of the "input" string and stores it into the bit stream.
// Note that the prefix code here is built from the pre-LZ77 input, therefore
// we can only approximate the statistics of the actual literal stream.
// Moreover, for long inputs we build a histogram from a sample of the input
// and thus have to assign a non-zero depth for each literal.
/* Builds a literal prefix code into "depths" and "bits" based on the statistics
of the "input" string and stores it into the bit stream.
Note that the prefix code here is built from the pre-LZ77 input, therefore
we can only approximate the statistics of the actual literal stream.
Moreover, for long inputs we build a histogram from a sample of the input
and thus have to assign a non-zero depth for each literal. */
static void BuildAndStoreLiteralPrefixCode(const uint8_t* input,
const size_t input_size,
uint8_t depths[256],
@ -72,8 +72,8 @@ static void BuildAndStoreLiteralPrefixCode(const uint8_t* input,
}
histogram_total = input_size;
for (size_t i = 0; i < 256; ++i) {
// We weigh the first 11 samples with weight 3 to account for the
// balancing effect of the LZ77 phase on the histogram.
/* We weigh the first 11 samples with weight 3 to account for the
balancing effect of the LZ77 phase on the histogram. */
const uint32_t adjust = 2 * std::min(histogram[i], 11u);
histogram[i] += adjust;
histogram_total += adjust;
@ -85,11 +85,11 @@ static void BuildAndStoreLiteralPrefixCode(const uint8_t* input,
}
histogram_total = (input_size + kSampleRate - 1) / kSampleRate;
for (size_t i = 0; i < 256; ++i) {
// We add 1 to each population count to avoid 0 bit depths (since this is
// only a sample and we don't know if the symbol appears or not), and we
// weigh the first 11 samples with weight 3 to account for the balancing
// effect of the LZ77 phase on the histogram (more frequent symbols are
// more likely to be in backward references instead as literals).
/* We add 1 to each population count to avoid 0 bit depths (since this is
only a sample and we don't know if the symbol appears or not), and we
weigh the first 11 samples with weight 3 to account for the balancing
effect of the LZ77 phase on the histogram (more frequent symbols are
more likely to be in backward references instead as literals). */
const uint32_t adjust = 1 + 2 * std::min(histogram[i], 11u);
histogram[i] += adjust;
histogram_total += adjust;
@ -100,23 +100,23 @@ static void BuildAndStoreLiteralPrefixCode(const uint8_t* input,
depths, bits, storage_ix, storage);
}
// Builds a command and distance prefix code (each 64 symbols) into "depth" and
// "bits" based on "histogram" and stores it into the bit stream.
/* Builds a command and distance prefix code (each 64 symbols) into "depth" and
"bits" based on "histogram" and stores it into the bit stream. */
static void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128],
uint8_t depth[128],
uint16_t bits[128],
size_t* storage_ix,
uint8_t* storage) {
// Tree size for building a tree over 64 symbols is 2 * 64 + 1.
/* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
static const size_t kTreeSize = 129;
HuffmanTree tree[kTreeSize];
CreateHuffmanTree(histogram, 64, 15, tree, depth);
CreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
// We have to jump through a few hoopes here in order to compute
// the command bits because the symbols are in a different order than in
// the full alphabet. This looks complicated, but having the symbols
// in this order in the command bits saves a few branches in the Emit*
// functions.
/* We have to jump through a few hoopes here in order to compute
the command bits because the symbols are in a different order than in
the full alphabet. This looks complicated, but having the symbols
in this order in the command bits saves a few branches in the Emit*
functions. */
uint8_t cmd_depth[64];
uint16_t cmd_bits[64];
memcpy(cmd_depth, depth, 24);
@ -134,7 +134,7 @@ static void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128],
memcpy(bits + 56, cmd_bits + 56, 16);
ConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
{
// Create the bit length array for the full command alphabet.
/* Create the bit length array for the full command alphabet. */
uint8_t cmd_depth[704] = { 0 };
memcpy(cmd_depth, depth, 8);
memcpy(cmd_depth + 64, depth + 8, 8);
@ -151,7 +151,7 @@ static void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128],
StoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
}
// REQUIRES: insertlen < 6210
/* REQUIRES: insertlen < 6210 */
inline void EmitInsertLen(size_t insertlen,
const uint8_t depth[128],
const uint16_t bits[128],
@ -299,21 +299,21 @@ inline void EmitLiterals(const uint8_t* input, const size_t len,
}
}
// REQUIRES: len <= 1 << 20.
/* REQUIRES: len <= 1 << 20. */
static void StoreMetaBlockHeader(
size_t len, bool is_uncompressed, size_t* storage_ix, uint8_t* storage) {
// ISLAST
/* ISLAST */
WriteBits(1, 0, storage_ix, storage);
if (len <= (1U << 16)) {
// MNIBBLES is 4
/* MNIBBLES is 4 */
WriteBits(2, 0, storage_ix, storage);
WriteBits(16, len - 1, storage_ix, storage);
} else {
// MNIBBLES is 5
/* MNIBBLES is 5 */
WriteBits(2, 1, storage_ix, storage);
WriteBits(20, len - 1, storage_ix, storage);
}
// ISUNCOMPRESSED
/* ISUNCOMPRESSED */
WriteBits(1, is_uncompressed, storage_ix, storage);
}
@ -406,11 +406,12 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
return;
}
// "next_emit" is a pointer to the first byte that is not covered by a
// previous copy. Bytes between "next_emit" and the start of the next copy or
// the end of the input will be emitted as literal bytes.
/* "next_emit" is a pointer to the first byte that is not covered by a
previous copy. Bytes between "next_emit" and the start of the next copy or
the end of the input will be emitted as literal bytes. */
const uint8_t* next_emit = input;
// Save the start of the first block for position and distance computations.
/* Save the start of the first block for position and distance computations.
*/
const uint8_t* base_ip = input;
static const size_t kFirstBlockSize = 3 << 15;
@ -419,8 +420,8 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
const uint8_t* metablock_start = input;
size_t block_size = std::min(input_size, kFirstBlockSize);
size_t total_block_size = block_size;
// Save the bit position of the MLEN field of the meta-block header, so that
// we can update it later if we decide to extend this meta-block.
/* Save the bit position of the MLEN field of the meta-block header, so that
we can update it later if we decide to extend this meta-block. */
size_t mlen_storage_ix = *storage_ix + 3;
StoreMetaBlockHeader(block_size, 0, storage_ix, storage);
// No block splits, no contexts.
@ -439,10 +440,10 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
storage_ix, storage);
emit_commands:
// Initialize the command and distance histograms. We will gather
// statistics of command and distance codes during the processing
// of this block and use it to update the command and distance
// prefix codes for the next block.
/* Initialize the command and distance histograms. We will gather
statistics of command and distance codes during the processing
of this block and use it to update the command and distance
prefix codes for the next block. */
uint32_t cmd_histo[128] = {
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
@ -466,31 +467,31 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
const size_t kInputMarginBytes = 16;
const size_t kMinMatchLen = 5;
if (PREDICT_TRUE(block_size >= kInputMarginBytes)) {
// For the last block, we need to keep a 16 bytes margin so that we can be
// sure that all distances are at most window size - 16.
// For all other blocks, we only need to keep a margin of 5 bytes so that
// we don't go over the block size with a copy.
/* For the last block, we need to keep a 16 bytes margin so that we can be
sure that all distances are at most window size - 16.
For all other blocks, we only need to keep a margin of 5 bytes so that
we don't go over the block size with a copy. */
const size_t len_limit = std::min(block_size - kMinMatchLen,
input_size - kInputMarginBytes);
const uint8_t* ip_limit = input + len_limit;
for (uint32_t next_hash = Hash(++ip, shift); ; ) {
assert(next_emit < ip);
// Step 1: Scan forward in the input looking for a 5-byte-long match.
// If we get close to exhausting the input then goto emit_remainder.
//
// Heuristic match skipping: If 32 bytes are scanned with no matches
// found, start looking only at every other byte. If 32 more bytes are
// scanned, look at every third byte, etc.. When a match is found,
// immediately go back to looking at every byte. This is a small loss
// (~5% performance, ~0.1% density) for compressible data due to more
// bookkeeping, but for non-compressible data (such as JPEG) it's a huge
// win since the compressor quickly "realizes" the data is incompressible
// and doesn't bother looking for matches everywhere.
//
// The "skip" variable keeps track of how many bytes there are since the
// last match; dividing it by 32 (ie. right-shifting by five) gives the
// number of bytes to move ahead for each iteration.
/* Step 1: Scan forward in the input looking for a 5-byte-long match.
If we get close to exhausting the input then goto emit_remainder.
Heuristic match skipping: If 32 bytes are scanned with no matches
found, start looking only at every other byte. If 32 more bytes are
scanned, look at every third byte, etc.. When a match is found,
immediately go back to looking at every byte. This is a small loss
(~5% performance, ~0.1% density) for compressible data due to more
bookkeeping, but for non-compressible data (such as JPEG) it's a huge
win since the compressor quickly "realizes" the data is incompressible
and doesn't bother looking for matches everywhere.
The "skip" variable keeps track of how many bytes there are since the
last match; dividing it by 32 (ie. right-shifting by five) gives the
number of bytes to move ahead for each iteration. */
uint32_t skip = 32;
const uint8_t* next_ip = ip;
@ -519,15 +520,15 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
table[hash] = static_cast<int>(ip - base_ip);
} while (PREDICT_TRUE(!IsMatch(ip, candidate)));
// Step 2: Emit the found match together with the literal bytes from
// "next_emit" to the bit stream, and then see if we can find a next macth
// immediately afterwards. Repeat until we find no match for the input
// without emitting some literal bytes.
/* Step 2: Emit the found match together with the literal bytes from
"next_emit" to the bit stream, and then see if we can find a next macth
immediately afterwards. Repeat until we find no match for the input
without emitting some literal bytes. */
uint64_t input_bytes;
{
// We have a 5-byte match at ip, and we need to emit bytes in
// [next_emit, ip).
/* We have a 5-byte match at ip, and we need to emit bytes in
[next_emit, ip). */
const uint8_t* base = ip;
size_t matched = 5 + FindMatchLengthWithLimit(
candidate + 5, ip + 5, static_cast<size_t>(ip_end - ip) - 5);
@ -567,9 +568,9 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
if (PREDICT_FALSE(ip >= ip_limit)) {
goto emit_remainder;
}
// We could immediately start working at ip now, but to improve
// compression we first update "table" with the hashes of some positions
// within the last copy.
/* We could immediately start working at ip now, but to improve
compression we first update "table" with the hashes of some positions
within the last copy. */
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3);
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
table[prev_hash] = static_cast<int>(ip - base_ip - 3);
@ -584,8 +585,8 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
}
while (IsMatch(ip, candidate)) {
// We have a 5-byte match at ip, and no need to emit any literal bytes
// prior to ip.
/* We have a 5-byte match at ip, and no need to emit any literal bytes
prior to ip. */
const uint8_t* base = ip;
size_t matched = 5 + FindMatchLengthWithLimit(
candidate + 5, ip + 5, static_cast<size_t>(ip_end - ip) - 5);
@ -601,9 +602,9 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
if (PREDICT_FALSE(ip >= ip_limit)) {
goto emit_remainder;
}
// We could immediately start working at ip now, but to improve
// compression we first update "table" with the hashes of some positions
// within the last copy.
/* We could immediately start working at ip now, but to improve
compression we first update "table" with the hashes of some positions
within the last copy. */
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3);
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
table[prev_hash] = static_cast<int>(ip - base_ip - 3);
@ -627,22 +628,22 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
input_size -= block_size;
block_size = std::min(input_size, kMergeBlockSize);
// Decide if we want to continue this meta-block instead of emitting the
// last insert-only command.
/* Decide if we want to continue this meta-block instead of emitting the
last insert-only command. */
if (input_size > 0 &&
total_block_size + block_size <= (1 << 20) &&
ShouldMergeBlock(input, block_size, lit_depth)) {
assert(total_block_size > (1 << 16));
// Update the size of the current meta-block and continue emitting commands.
// We can do this because the current size and the new size both have 5
// nibbles.
/* Update the size of the current meta-block and continue emitting commands.
We can do this because the current size and the new size both have 5
nibbles. */
total_block_size += block_size;
UpdateBits(20, static_cast<uint32_t>(total_block_size - 1),
mlen_storage_ix, storage);
goto emit_commands;
}
// Emit the remaining bytes as literals.
/* Emit the remaining bytes as literals. */
if (next_emit < ip_end) {
const size_t insert = static_cast<size_t>(ip_end - next_emit);
if (PREDICT_TRUE(insert < 6210)) {
@ -663,17 +664,17 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
next_emit = ip_end;
next_block:
// If we have more data, write a new meta-block header and prefix codes and
// then continue emitting commands.
/* If we have more data, write a new meta-block header and prefix codes and
then continue emitting commands. */
if (input_size > 0) {
metablock_start = input;
block_size = std::min(input_size, kFirstBlockSize);
total_block_size = block_size;
// Save the bit position of the MLEN field of the meta-block header, so that
// we can update it later if we decide to extend this meta-block.
/* Save the bit position of the MLEN field of the meta-block header, so that
we can update it later if we decide to extend this meta-block. */
mlen_storage_ix = *storage_ix + 3;
StoreMetaBlockHeader(block_size, 0, storage_ix, storage);
// No block splits, no contexts.
/* No block splits, no contexts. */
WriteBits(13, 0, storage_ix, storage);
memset(lit_depth, 0, sizeof(lit_depth));
memset(lit_bits, 0, sizeof(lit_bits));
@ -685,12 +686,12 @@ next_block:
}
if (is_last) {
WriteBits(1, 1, storage_ix, storage); // islast
WriteBits(1, 1, storage_ix, storage); // isempty
WriteBits(1, 1, storage_ix, storage); /* islast */
WriteBits(1, 1, storage_ix, storage); /* isempty */
*storage_ix = (*storage_ix + 7u) & ~7u;
} else {
// If this is not the last block, update the command and distance prefix
// codes for the next block and store the compressed forms.
/* If this is not the last block, update the command and distance prefix
codes for the next block and store the compressed forms. */
cmd_code[0] = 0;
*cmd_code_numbits = 0;
BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,

View File

@ -4,10 +4,10 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Function for fast encoding of an input fragment, independently from the input
// history. This function uses one-pass processing: when we find a backward
// match, we immediately emit the corresponding command and literal codes to
// the bit stream.
/* Function for fast encoding of an input fragment, independently from the input
history. This function uses one-pass processing: when we find a backward
match, we immediately emit the corresponding command and literal codes to
the bit stream. */
#ifndef BROTLI_ENC_COMPRESS_FRAGMENT_H_
#define BROTLI_ENC_COMPRESS_FRAGMENT_H_
@ -16,25 +16,25 @@
namespace brotli {
// Compresses "input" string to the "*storage" buffer as one or more complete
// meta-blocks, and updates the "*storage_ix" bit position.
//
// If "is_last" is true, emits an additional empty last meta-block.
//
// "cmd_depth" and "cmd_bits" contain the command and distance prefix codes
// (see comment in encode.h) used for the encoding of this input fragment.
// If "is_last" is false, they are updated to reflect the statistics
// of this input fragment, to be used for the encoding of the next fragment.
//
// "*cmd_code_numbits" is the number of bits of the compressed representation
// of the command and distance prefix codes, and "cmd_code" is an array of
// at least "(*cmd_code_numbits + 7) >> 3" size that contains the compressed
// command and distance prefix codes. If "is_last" is false, these are also
// updated to represent the updated "cmd_depth" and "cmd_bits".
//
// REQUIRES: "input_size" is greater than zero, or "is_last" is true.
// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
// REQUIRES: "table_size" is a power of two
/* Compresses "input" string to the "*storage" buffer as one or more complete
meta-blocks, and updates the "*storage_ix" bit position.
If "is_last" is 1, emits an additional empty last meta-block.
"cmd_depth" and "cmd_bits" contain the command and distance prefix codes
(see comment in encode.h) used for the encoding of this input fragment.
If "is_last" is 0, they are updated to reflect the statistics
of this input fragment, to be used for the encoding of the next fragment.
"*cmd_code_numbits" is the number of bits of the compressed representation
of the command and distance prefix codes, and "cmd_code" is an array of
at least "(*cmd_code_numbits + 7) >> 3" size that contains the compressed
command and distance prefix codes. If "is_last" is 0, these are also
updated to represent the updated "cmd_depth" and "cmd_bits".
REQUIRES: "input_size" is greater than zero, or "is_last" is 1.
REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
REQUIRES: "table_size" is a power of two */
void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
bool is_last,
int* table, size_t table_size,
@ -44,4 +44,4 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
} // namespace brotli
#endif // BROTLI_ENC_COMPRESS_FRAGMENT_H_
#endif /* BROTLI_ENC_COMPRESS_FRAGMENT_H_ */

View File

@ -4,11 +4,11 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Function for fast encoding of an input fragment, independently from the input
// history. This function uses two-pass processing: in the first pass we save
// the found backward matches and literal bytes into a buffer, and in the
// second pass we emit them into the bit stream using prefix codes built based
// on the actual command and literal byte histograms.
/* Function for fast encoding of an input fragment, independently from the input
history. This function uses two-pass processing: in the first pass we save
the found backward matches and literal bytes into a buffer, and in the
second pass we emit them into the bit stream using prefix codes built based
on the actual command and literal byte histograms. */
#include "./compress_fragment_two_pass.h"
@ -25,12 +25,12 @@
namespace brotli {
// kHashMul32 multiplier has these properties:
// * The multiplier must be odd. Otherwise we may lose the highest bit.
// * No long streaks of 1s or 0s.
// * There is no effort to ensure that it is a prime, the oddity is enough
// for this use.
// * The number has been tuned heuristically against compression benchmarks.
/* kHashMul32 multiplier has these properties:
* The multiplier must be odd. Otherwise we may lose the highest bit.
* No long streaks of 1s or 0s.
* There is no effort to ensure that it is a prime, the oddity is enough
for this use.
* The number has been tuned heuristically against compression benchmarks. */
static const uint32_t kHashMul32 = 0x1e35a7bd;
static inline uint32_t Hash(const uint8_t* p, size_t shift) {
@ -51,22 +51,22 @@ static inline int IsMatch(const uint8_t* p1, const uint8_t* p2) {
p1[5] == p2[5]);
}
// Builds a command and distance prefix code (each 64 symbols) into "depth" and
// "bits" based on "histogram" and stores it into the bit stream.
/* Builds a command and distance prefix code (each 64 symbols) into "depth" and
"bits" based on "histogram" and stores it into the bit stream. */
static void BuildAndStoreCommandPrefixCode(
const uint32_t histogram[128],
uint8_t depth[128], uint16_t bits[128],
size_t* storage_ix, uint8_t* storage) {
// Tree size for building a tree over 64 symbols is 2 * 64 + 1.
/* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
static const size_t kTreeSize = 129;
HuffmanTree tree[kTreeSize];
CreateHuffmanTree(histogram, 64, 15, tree, depth);
CreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
// We have to jump through a few hoopes here in order to compute
// the command bits because the symbols are in a different order than in
// the full alphabet. This looks complicated, but having the symbols
// in this order in the command bits saves a few branches in the Emit*
// functions.
/* We have to jump through a few hoopes here in order to compute
the command bits because the symbols are in a different order than in
the full alphabet. This looks complicated, but having the symbols
in this order in the command bits saves a few branches in the Emit*
functions. */
uint8_t cmd_depth[64];
uint16_t cmd_bits[64];
memcpy(cmd_depth, depth + 24, 24);
@ -84,7 +84,7 @@ static void BuildAndStoreCommandPrefixCode(
memcpy(bits + 56, cmd_bits + 48, 16);
ConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
{
// Create the bit length array for the full command alphabet.
/* Create the bit length array for the full command alphabet. */
uint8_t cmd_depth[704] = { 0 };
memcpy(cmd_depth, depth + 24, 8);
memcpy(cmd_depth + 64, depth + 32, 8);
@ -202,21 +202,21 @@ inline void EmitDistance(uint32_t distance, uint32_t** commands) {
++(*commands);
}
// REQUIRES: len <= 1 << 20.
/* REQUIRES: len <= 1 << 20. */
static void StoreMetaBlockHeader(
size_t len, bool is_uncompressed, size_t* storage_ix, uint8_t* storage) {
// ISLAST
/* ISLAST */
WriteBits(1, 0, storage_ix, storage);
if (len <= (1U << 16)) {
// MNIBBLES is 4
/* MNIBBLES is 4 */
WriteBits(2, 0, storage_ix, storage);
WriteBits(16, len - 1, storage_ix, storage);
} else {
// MNIBBLES is 5
/* MNIBBLES is 5 */
WriteBits(2, 1, storage_ix, storage);
WriteBits(20, len - 1, storage_ix, storage);
}
// ISUNCOMPRESSED
/* ISUNCOMPRESSED */
WriteBits(1, is_uncompressed, storage_ix, storage);
}
@ -224,7 +224,7 @@ static void CreateCommands(const uint8_t* input, size_t block_size,
size_t input_size, const uint8_t* base_ip,
int* table, size_t table_size,
uint8_t** literals, uint32_t** commands) {
// "ip" is the input pointer.
/* "ip" is the input pointer. */
const uint8_t* ip = input;
assert(table_size);
assert(table_size <= (1u << 31));
@ -233,40 +233,40 @@ static void CreateCommands(const uint8_t* input, size_t block_size,
assert(table_size - 1 == static_cast<size_t>(
MAKE_UINT64_T(0xFFFFFFFF, 0xFFFFFF) >> shift));
const uint8_t* ip_end = input + block_size;
// "next_emit" is a pointer to the first byte that is not covered by a
// previous copy. Bytes between "next_emit" and the start of the next copy or
// the end of the input will be emitted as literal bytes.
/* "next_emit" is a pointer to the first byte that is not covered by a
previous copy. Bytes between "next_emit" and the start of the next copy or
the end of the input will be emitted as literal bytes. */
const uint8_t* next_emit = input;
int last_distance = -1;
const size_t kInputMarginBytes = 16;
const size_t kMinMatchLen = 6;
if (PREDICT_TRUE(block_size >= kInputMarginBytes)) {
// For the last block, we need to keep a 16 bytes margin so that we can be
// sure that all distances are at most window size - 16.
// For all other blocks, we only need to keep a margin of 5 bytes so that
// we don't go over the block size with a copy.
/* For the last block, we need to keep a 16 bytes margin so that we can be
sure that all distances are at most window size - 16.
For all other blocks, we only need to keep a margin of 5 bytes so that
we don't go over the block size with a copy. */
const size_t len_limit = std::min(block_size - kMinMatchLen,
input_size - kInputMarginBytes);
const uint8_t* ip_limit = input + len_limit;
for (uint32_t next_hash = Hash(++ip, shift); ; ) {
assert(next_emit < ip);
// Step 1: Scan forward in the input looking for a 6-byte-long match.
// If we get close to exhausting the input then goto emit_remainder.
//
// Heuristic match skipping: If 32 bytes are scanned with no matches
// found, start looking only at every other byte. If 32 more bytes are
// scanned, look at every third byte, etc.. When a match is found,
// immediately go back to looking at every byte. This is a small loss
// (~5% performance, ~0.1% density) for compressible data due to more
// bookkeeping, but for non-compressible data (such as JPEG) it's a huge
// win since the compressor quickly "realizes" the data is incompressible
// and doesn't bother looking for matches everywhere.
//
// The "skip" variable keeps track of how many bytes there are since the
// last match; dividing it by 32 (ie. right-shifting by five) gives the
// number of bytes to move ahead for each iteration.
/* Step 1: Scan forward in the input looking for a 6-byte-long match.
If we get close to exhausting the input then goto emit_remainder.
Heuristic match skipping: If 32 bytes are scanned with no matches
found, start looking only at every other byte. If 32 more bytes are
scanned, look at every third byte, etc.. When a match is found,
immediately go back to looking at every byte. This is a small loss
(~5% performance, ~0.1% density) for compressible data due to more
bookkeeping, but for non-compressible data (such as JPEG) it's a huge
win since the compressor quickly "realizes" the data is incompressible
and doesn't bother looking for matches everywhere.
The "skip" variable keeps track of how many bytes there are since the
last match; dividing it by 32 (ie. right-shifting by five) gives the
number of bytes to move ahead for each iteration. */
uint32_t skip = 32;
const uint8_t* next_ip = ip;
@ -295,15 +295,15 @@ static void CreateCommands(const uint8_t* input, size_t block_size,
table[hash] = static_cast<int>(ip - base_ip);
} while (PREDICT_TRUE(!IsMatch(ip, candidate)));
// Step 2: Emit the found match together with the literal bytes from
// "next_emit", and then see if we can find a next macth immediately
// afterwards. Repeat until we find no match for the input
// without emitting some literal bytes.
/* Step 2: Emit the found match together with the literal bytes from
"next_emit", and then see if we can find a next macth immediately
afterwards. Repeat until we find no match for the input
without emitting some literal bytes. */
uint64_t input_bytes;
{
// We have a 6-byte match at ip, and we need to emit bytes in
// [next_emit, ip).
/* We have a 6-byte match at ip, and we need to emit bytes in
[next_emit, ip). */
const uint8_t* base = ip;
size_t matched = 6 + FindMatchLengthWithLimit(
candidate + 6, ip + 6, static_cast<size_t>(ip_end - ip) - 6);
@ -327,9 +327,9 @@ static void CreateCommands(const uint8_t* input, size_t block_size,
if (PREDICT_FALSE(ip >= ip_limit)) {
goto emit_remainder;
}
// We could immediately start working at ip now, but to improve
// compression we first update "table" with the hashes of some positions
// within the last copy.
/* We could immediately start working at ip now, but to improve
compression we first update "table" with the hashes of some
positions within the last copy. */
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 5);
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
table[prev_hash] = static_cast<int>(ip - base_ip - 5);
@ -349,8 +349,8 @@ static void CreateCommands(const uint8_t* input, size_t block_size,
}
while (IsMatch(ip, candidate)) {
// We have a 6-byte match at ip, and no need to emit any
// literal bytes prior to ip.
/* We have a 6-byte match at ip, and no need to emit any
literal bytes prior to ip. */
const uint8_t* base = ip;
size_t matched = 6 + FindMatchLengthWithLimit(
candidate + 6, ip + 6, static_cast<size_t>(ip_end - ip) - 6);
@ -364,9 +364,9 @@ static void CreateCommands(const uint8_t* input, size_t block_size,
if (PREDICT_FALSE(ip >= ip_limit)) {
goto emit_remainder;
}
// We could immediately start working at ip now, but to improve
// compression we first update "table" with the hashes of some positions
// within the last copy.
/* We could immediately start working at ip now, but to improve
compression we first update "table" with the hashes of some
positions within the last copy. */
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 5);
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
table[prev_hash] = static_cast<int>(ip - base_ip - 5);
@ -391,7 +391,7 @@ static void CreateCommands(const uint8_t* input, size_t block_size,
emit_remainder:
assert(next_emit <= ip_end);
// Emit the remaining bytes as literals.
/* Emit the remaining bytes as literals. */
if (next_emit < ip_end) {
const uint32_t insert = static_cast<uint32_t>(ip_end - next_emit);
EmitInsertLen(insert, commands);
@ -483,7 +483,8 @@ void BrotliCompressFragmentTwoPass(const uint8_t* input, size_t input_size,
uint32_t* command_buf, uint8_t* literal_buf,
int* table, size_t table_size,
size_t* storage_ix, uint8_t* storage) {
// Save the start of the first block for position and distance computations.
/* Save the start of the first block for position and distance computations.
*/
const uint8_t* base_ip = input;
while (input_size > 0) {
@ -496,14 +497,14 @@ void BrotliCompressFragmentTwoPass(const uint8_t* input, size_t input_size,
const size_t num_commands = static_cast<size_t>(commands - command_buf);
if (ShouldCompress(input, block_size, num_literals)) {
StoreMetaBlockHeader(block_size, 0, storage_ix, storage);
// No block splits, no contexts.
/* No block splits, no contexts. */
WriteBits(13, 0, storage_ix, storage);
StoreCommands(literal_buf, num_literals, command_buf, num_commands,
storage_ix, storage);
} else {
// Since we did not find many backward references and the entropy of
// the data is close to 8 bits, we can simply emit an uncompressed block.
// This makes compression speed of uncompressible data about 3x faster.
/* Since we did not find many backward references and the entropy of
the data is close to 8 bits, we can simply emit an uncompressed block.
This makes compression speed of uncompressible data about 3x faster. */
StoreMetaBlockHeader(block_size, 1, storage_ix, storage);
*storage_ix = (*storage_ix + 7u) & ~7u;
memcpy(&storage[*storage_ix >> 3], input, block_size);
@ -515,8 +516,8 @@ void BrotliCompressFragmentTwoPass(const uint8_t* input, size_t input_size,
}
if (is_last) {
WriteBits(1, 1, storage_ix, storage); // islast
WriteBits(1, 1, storage_ix, storage); // isempty
WriteBits(1, 1, storage_ix, storage); /* islast */
WriteBits(1, 1, storage_ix, storage); /* isempty */
*storage_ix = (*storage_ix + 7u) & ~7u;
}
}

View File

@ -4,11 +4,11 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Function for fast encoding of an input fragment, independently from the input
// history. This function uses two-pass processing: in the first pass we save
// the found backward matches and literal bytes into a buffer, and in the
// second pass we emit them into the bit stream using prefix codes built based
// on the actual command and literal byte histograms.
/* Function for fast encoding of an input fragment, independently from the input
history. This function uses two-pass processing: in the first pass we save
the found backward matches and literal bytes into a buffer, and in the
second pass we emit them into the bit stream using prefix codes built based
on the actual command and literal byte histograms. */
#ifndef BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
#define BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
@ -19,16 +19,16 @@ namespace brotli {
static const size_t kCompressFragmentTwoPassBlockSize = 1 << 17;
// Compresses "input" string to the "*storage" buffer as one or more complete
// meta-blocks, and updates the "*storage_ix" bit position.
//
// If "is_last" is true, emits an additional empty last meta-block.
//
// REQUIRES: "input_size" is greater than zero, or "is_last" is true.
// REQUIRES: "command_buf" and "literal_buf" point to at least
// kCompressFragmentTwoPassBlockSize long arrays.
// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
// REQUIRES: "table_size" is a power of two
/* Compresses "input" string to the "*storage" buffer as one or more complete
meta-blocks, and updates the "*storage_ix" bit position.
If "is_last" is 1, emits an additional empty last meta-block.
REQUIRES: "input_size" is greater than zero, or "is_last" is 1.
REQUIRES: "command_buf" and "literal_buf" point to at least
kCompressFragmentTwoPassBlockSize long arrays.
REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
REQUIRES: "table_size" is a power of two */
void BrotliCompressFragmentTwoPass(const uint8_t* input, size_t input_size,
bool is_last,
uint32_t* command_buf, uint8_t* literal_buf,
@ -37,4 +37,4 @@ void BrotliCompressFragmentTwoPass(const uint8_t* input, size_t input_size,
} // namespace brotli
#endif // BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
#endif /* BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_ */

View File

@ -4,7 +4,7 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Functions to map previous bytes into a context id.
/* Functions to map previous bytes into a context id. */
#ifndef BROTLI_ENC_CONTEXT_H_
#define BROTLI_ENC_CONTEXT_H_
@ -13,82 +13,83 @@
namespace brotli {
// Second-order context lookup table for UTF8 byte streams.
//
// If p1 and p2 are the previous two bytes, we calculate the context as
//
// context = kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256].
//
// If the previous two bytes are ASCII characters (i.e. < 128), this will be
// equivalent to
//
// context = 4 * context1(p1) + context2(p2),
//
// where context1 is based on the previous byte in the following way:
//
// 0 : non-ASCII control
// 1 : \t, \n, \r
// 2 : space
// 3 : other punctuation
// 4 : " '
// 5 : %
// 6 : ( < [ {
// 7 : ) > ] }
// 8 : , ; :
// 9 : .
// 10 : =
// 11 : number
// 12 : upper-case vowel
// 13 : upper-case consonant
// 14 : lower-case vowel
// 15 : lower-case consonant
//
// and context2 is based on the second last byte:
//
// 0 : control, space
// 1 : punctuation
// 2 : upper-case letter, number
// 3 : lower-case letter
//
// If the last byte is ASCII, and the second last byte is not (in a valid UTF8
// stream it will be a continuation byte, value between 128 and 191), the
// context is the same as if the second last byte was an ASCII control or space.
//
// If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
// be a continuation byte and the context id is 2 or 3 depending on the LSB of
// the last byte and to a lesser extent on the second last byte if it is ASCII.
//
// If the last byte is a UTF8 continuation byte, the second last byte can be:
// - continuation byte: the next byte is probably ASCII or lead byte (assuming
// 4-byte UTF8 characters are rare) and the context id is 0 or 1.
// - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
// - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3
//
// The possible value combinations of the previous two bytes, the range of
// context ids and the type of the next byte is summarized in the table below:
//
// |--------\-----------------------------------------------------------------|
// | \ Last byte |
// | Second \---------------------------------------------------------------|
// | last byte \ ASCII | cont. byte | lead byte |
// | \ (0-127) | (128-191) | (192-) |
// |=============|===================|=====================|==================|
// | ASCII | next: ASCII/lead | not valid | next: cont. |
// | (0-127) | context: 4 - 63 | | context: 2 - 3 |
// |-------------|-------------------|---------------------|------------------|
// | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. |
// | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 |
// |-------------|-------------------|---------------------|------------------|
// | lead byte | not valid | next: ASCII/lead | not valid |
// | (192-207) | | context: 0 - 1 | |
// |-------------|-------------------|---------------------|------------------|
// | lead byte | not valid | next: cont. | not valid |
// | (208-) | | context: 2 - 3 | |
// |-------------|-------------------|---------------------|------------------|
/* Second-order context lookup table for UTF8 byte streams.
If p1 and p2 are the previous two bytes, we calculate the context as
context = kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256].
If the previous two bytes are ASCII characters (i.e. < 128), this will be
equivalent to
context = 4 * context1(p1) + context2(p2),
where context1 is based on the previous byte in the following way:
0 : non-ASCII control
1 : \t, \n, \r
2 : space
3 : other punctuation
4 : " '
5 : %
6 : ( < [ {
7 : ) > ] }
8 : , ; :
9 : .
10 : =
11 : number
12 : upper-case vowel
13 : upper-case consonant
14 : lower-case vowel
15 : lower-case consonant
and context2 is based on the second last byte:
0 : control, space
1 : punctuation
2 : upper-case letter, number
3 : lower-case letter
If the last byte is ASCII, and the second last byte is not (in a valid UTF8
stream it will be a continuation byte, value between 128 and 191), the
context is the same as if the second last byte was an ASCII control or space.
If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
be a continuation byte and the context id is 2 or 3 depending on the LSB of
the last byte and to a lesser extent on the second last byte if it is ASCII.
If the last byte is a UTF8 continuation byte, the second last byte can be:
- continuation byte: the next byte is probably ASCII or lead byte (assuming
4-byte UTF8 characters are rare) and the context id is 0 or 1.
- lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
- lead byte (208 - 255): next byte is continuation byte, context is 2 or 3
The possible value combinations of the previous two bytes, the range of
context ids and the type of the next byte is summarized in the table below:
|--------\-----------------------------------------------------------------|
| \ Last byte |
| Second \---------------------------------------------------------------|
| last byte \ ASCII | cont. byte | lead byte |
| \ (0-127) | (128-191) | (192-) |
|=============|===================|=====================|==================|
| ASCII | next: ASCII/lead | not valid | next: cont. |
| (0-127) | context: 4 - 63 | | context: 2 - 3 |
|-------------|-------------------|---------------------|------------------|
| cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. |
| (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 |
|-------------|-------------------|---------------------|------------------|
| lead byte | not valid | next: ASCII/lead | not valid |
| (192-207) | | context: 0 - 1 | |
|-------------|-------------------|---------------------|------------------|
| lead byte | not valid | next: cont. | not valid |
| (208-) | | context: 2 - 3 | |
|-------------|-------------------|---------------------|------------------|
*/
static const uint8_t kUTF8ContextLookup[512] = {
// Last byte.
//
// ASCII range.
/* Last byte. */
/* */
/* ASCII range. */
0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
@ -97,19 +98,19 @@ static const uint8_t kUTF8ContextLookup[512] = {
52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0,
// UTF8 continuation byte range.
/* UTF8 continuation byte range. */
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
// UTF8 lead byte range.
/* UTF8 lead byte range. */
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
// Second last byte.
//
// ASCII range.
/* Second last byte. */
/* */
/* ASCII range. */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@ -118,19 +119,19 @@ static const uint8_t kUTF8ContextLookup[512] = {
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
// UTF8 continuation byte range.
/* UTF8 continuation byte range. */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// UTF8 lead byte range.
/* UTF8 lead byte range. */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
};
// Context lookup table for small signed integers.
/* Context lookup table for small signed integers. */
static const uint8_t kSigned3BitContextLookup[] = {
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@ -175,4 +176,4 @@ static inline uint8_t Context(uint8_t p1, uint8_t p2, ContextType mode) {
} // namespace brotli
#endif // BROTLI_ENC_CONTEXT_H_
#endif /* BROTLI_ENC_CONTEXT_H_ */

View File

@ -4,7 +4,7 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Hash table on the 4-byte prefixes of static dictionary words.
/* Hash table on the 4-byte prefixes of static dictionary words. */
#ifndef BROTLI_ENC_DICTIONARY_HASH_H_
#define BROTLI_ENC_DICTIONARY_HASH_H_
@ -4114,4 +4114,4 @@ static const uint16_t kStaticDictionaryHash[] = {
} // namespace brotli
#endif // BROTLI_ENC_DICTIONARY_HASH_H_
#endif /* BROTLI_ENC_DICTIONARY_HASH_H_ */

View File

@ -4,7 +4,7 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Implementation of Brotli compressor.
/* Implementation of Brotli compressor. */
#include "./encode.h"
@ -36,8 +36,8 @@ namespace brotli {
static const int kMinQualityForBlockSplit = 4;
static const int kMinQualityForContextModeling = 5;
static const int kMinQualityForOptimizeHistograms = 4;
// For quality 2 there is no block splitting, so we buffer at most this much
// literals and commands.
/* For quality 2 there is no block splitting, so we buffer at most this much
literals and commands. */
static const size_t kMaxNumDelayedSymbols = 0x2fff;
#define COPY_ARRAY(dst, src) memcpy(dst, src, sizeof(src));
@ -95,10 +95,10 @@ static size_t HashTableSize(size_t max_table_size, size_t input_size) {
int* BrotliCompressor::GetHashTable(int quality,
size_t input_size,
size_t* table_size) {
// Use smaller hash table when input.size() is smaller, since we
// fill the table, incurring O(hash table size) overhead for
// compression, and if the input is short, we won't need that
// many hash table entries anyway.
/* Use smaller hash table when input.size() is smaller, since we
fill the table, incurring O(hash table size) overhead for
compression, and if the input is short, we won't need that
many hash table entries anyway. */
const size_t max_table_size = MaxHashTableSize(quality);
assert(max_table_size >= 256);
size_t htsize = HashTableSize(max_table_size, input_size);
@ -135,7 +135,7 @@ static void EncodeWindowBits(int lgwin, uint8_t* last_byte,
}
}
// Initializes the command and distance prefix codes for the first block.
/* Initializes the command and distance prefix codes for the first block. */
static void InitCommandPrefixCodes(uint8_t cmd_depths[128],
uint16_t cmd_bits[128],
uint8_t cmd_code[512],
@ -167,8 +167,8 @@ static void InitCommandPrefixCodes(uint8_t cmd_depths[128],
COPY_ARRAY(cmd_depths, kDefaultCommandDepths);
COPY_ARRAY(cmd_bits, kDefaultCommandBits);
// Initialize the pre-compressed form of the command and distance prefix
// codes.
/* Initialize the pre-compressed form of the command and distance prefix
codes. */
static const uint8_t kDefaultCommandCode[] = {
0xff, 0x77, 0xd5, 0xbf, 0xe7, 0xde, 0xea, 0x9e, 0x51, 0x5d, 0xde, 0xc6,
0x70, 0x57, 0xbc, 0x58, 0x58, 0x58, 0xd8, 0xd8, 0x58, 0xd5, 0xcb, 0x8c,
@ -181,13 +181,13 @@ static void InitCommandPrefixCodes(uint8_t cmd_depths[128],
*cmd_code_numbits = kDefaultCommandCodeNumBits;
}
// Decide about the context map based on the ability of the prediction
// ability of the previous byte UTF8-prefix on the next byte. The
// prediction ability is calculated as shannon entropy. Here we need
// shannon entropy instead of 'BitsEntropy' since the prefix will be
// encoded with the remaining 6 bits of the following byte, and
// BitsEntropy will assume that symbol to be stored alone using Huffman
// coding.
/* Decide about the context map based on the ability of the prediction
ability of the previous byte UTF8-prefix on the next byte. The
prediction ability is calculated as shannon entropy. Here we need
shannon entropy instead of 'BitsEntropy' since the prefix will be
encoded with the remaining 6 bits of the following byte, and
BitsEntropy will assume that symbol to be stored alone using Huffman
coding. */
static void ChooseContextMap(int quality,
uint32_t* bigram_histo,
size_t* num_literal_contexts,
@ -232,11 +232,11 @@ static void ChooseContextMap(int quality,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
if (quality < 7) {
// 3 context models is a bit slower, don't use it at lower qualities.
/* 3 context models is a bit slower, don't use it at lower qualities. */
entropy3 = entropy1 * 10;
}
// If expected savings by symbol are less than 0.2 bits, skip the
// context modeling -- in exchange for faster decoding speed.
/* If expected savings by symbol are less than 0.2 bits, skip the
context modeling -- in exchange for faster decoding speed. */
if (entropy1 - entropy2 < 0.2 &&
entropy1 - entropy3 < 0.2) {
*num_literal_contexts = 1;
@ -261,9 +261,9 @@ static void DecideOverLiteralContextModeling(
if (quality < kMinQualityForContextModeling || length < 64) {
return;
}
// Gather bigram data of the UTF8 byte prefixes. To make the analysis of
// UTF8 data faster we only examine 64 byte long strides at every 4kB
// intervals.
/* Gather bigram data of the UTF8 byte prefixes. To make the analysis of
UTF8 data faster we only examine 64 byte long strides at every 4kB
intervals. */
const size_t end_pos = start_pos + length;
uint32_t bigram_prefix_histo[9] = { 0 };
for (; start_pos + 64 <= end_pos; start_pos += 4096) {
@ -325,7 +325,7 @@ static void WriteMetaBlockInternal(const uint8_t* data,
size_t* storage_ix,
uint8_t* storage) {
if (bytes == 0) {
// Write the ISLAST and ISEMPTY bits.
/* Write the ISLAST and ISEMPTY bits. */
WriteBits(2, 3, storage_ix, storage);
*storage_ix = (*storage_ix + 7u) & ~7u;
return;
@ -333,8 +333,8 @@ static void WriteMetaBlockInternal(const uint8_t* data,
if (!ShouldCompress(data, mask, last_flush_pos, bytes,
num_literals, num_commands)) {
// Restore the distance cache, as its last update by
// CreateBackwardReferences is now unused.
/* Restore the distance cache, as its last update by
CreateBackwardReferences is now unused. */
memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
StoreUncompressedMetaBlock(is_last, data,
WrapPosition(last_flush_pos), mask, bytes,
@ -416,7 +416,7 @@ static void WriteMetaBlockInternal(const uint8_t* data,
storage_ix, storage);
}
if (bytes + 4 < (*storage_ix >> 3)) {
// Restore the distance cache and last byte.
/* Restore the distance cache and last byte. */
memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
storage[0] = last_byte;
*storage_ix = last_byte_bits;
@ -444,7 +444,7 @@ BrotliCompressor::BrotliCompressor(BrotliParams params)
command_buf_(NULL),
literal_buf_(NULL),
is_last_block_emitted_(0) {
// Sanitize params.
/* Sanitize params. */
params_.quality = std::max(0, params_.quality);
if (params_.lgwin < kMinWindowBits) {
params_.lgwin = kMinWindowBits;
@ -465,18 +465,18 @@ BrotliCompressor::BrotliCompressor(BrotliParams params)
std::max(kMinInputBlockBits, params_.lgblock));
}
// Initialize input and literal cost ring buffers.
// We allocate at least lgwin + 1 bits for the ring buffer so that the newly
// added block fits there completely and we still get lgwin bits and at least
// read_block_size_bits + 1 bits because the copy tail length needs to be
// smaller than ringbuffer size.
/* Initialize input and literal cost ring buffers.
We allocate at least lgwin + 1 bits for the ring buffer so that the newly
added block fits there completely and we still get lgwin bits and at least
read_block_size_bits + 1 bits because the copy tail length needs to be
smaller than ringbuffer size. */
int ringbuffer_bits = std::max(params_.lgwin + 1, params_.lgblock + 1);
ringbuffer_ = new RingBuffer(ringbuffer_bits, params_.lgblock);
commands_ = 0;
cmd_alloc_size_ = 0;
// Initialize last byte with stream header.
/* Initialize last byte with stream header. */
EncodeWindowBits(params_.lgwin, &last_byte_, &last_byte_bits_);
// Initialize distance cache.
@ -496,7 +496,7 @@ BrotliCompressor::BrotliCompressor(BrotliParams params)
literal_buf_ = new uint8_t[kCompressFragmentTwoPassBlockSize];
}
// Initialize hashers.
/* Initialize hashers. */
hash_type_ = std::min(10, params_.quality);
hashers_->Init(hash_type_);
}
@ -516,48 +516,49 @@ void BrotliCompressor::CopyInputToRingBuffer(const size_t input_size,
ringbuffer_->Write(input_buffer, input_size);
input_pos_ += input_size;
// TL;DR: If needed, initialize 7 more bytes in the ring buffer to make the
// hashing not depend on uninitialized data. This makes compression
// deterministic and it prevents uninitialized memory warnings in Valgrind.
// Even without erasing, the output would be valid (but nondeterministic).
//
// Background information: The compressor stores short (at most 8 bytes)
// substrings of the input already read in a hash table, and detects
// repetitions by looking up such substrings in the hash table. If it
// can find a substring, it checks whether the substring is really there
// in the ring buffer (or it's just a hash collision). Should the hash
// table become corrupt, this check makes sure that the output is
// still valid, albeit the compression ratio would be bad.
//
// The compressor populates the hash table from the ring buffer as it's
// reading new bytes from the input. However, at the last few indexes of
// the ring buffer, there are not enough bytes to build full-length
// substrings from. Since the hash table always contains full-length
// substrings, we erase with dummy 0s here to make sure that those
// substrings will contain 0s at the end instead of uninitialized
// data.
//
// Please note that erasing is not necessary (because the
// memory region is already initialized since he ring buffer
// has a `tail' that holds a copy of the beginning,) so we
// skip erasing if we have already gone around at least once in
// the ring buffer.
/* TL;DR: If needed, initialize 7 more bytes in the ring buffer to make the
hashing not depend on uninitialized data. This makes compression
deterministic and it prevents uninitialized memory warnings in Valgrind.
Even without erasing, the output would be valid (but nondeterministic).
Background information: The compressor stores short (at most 8 bytes)
substrings of the input already read in a hash table, and detects
repetitions by looking up such substrings in the hash table. If it
can find a substring, it checks whether the substring is really there
in the ring buffer (or it's just a hash collision). Should the hash
table become corrupt, this check makes sure that the output is
still valid, albeit the compression ratio would be bad.
The compressor populates the hash table from the ring buffer as it's
reading new bytes from the input. However, at the last few indexes of
the ring buffer, there are not enough bytes to build full-length
substrings from. Since the hash table always contains full-length
substrings, we erase with dummy 0s here to make sure that those
substrings will contain 0s at the end instead of uninitialized
data.
Please note that erasing is not necessary (because the
memory region is already initialized since he ring buffer
has a `tail' that holds a copy of the beginning,) so we
skip erasing if we have already gone around at least once in
the ring buffer.
Only clear during the first round of ringbuffer writes. On
subsequent rounds data in the ringbuffer would be affected. */
size_t pos = ringbuffer_->position();
// Only clear during the first round of ringbuffer writes. On
// subsequent rounds data in the ringbuffer would be affected.
if (pos <= ringbuffer_->mask()) {
// This is the first time when the ring buffer is being written.
// We clear 7 bytes just after the bytes that have been copied from
// the input buffer.
//
// The ringbuffer has a "tail" that holds a copy of the beginning,
// but only once the ring buffer has been fully written once, i.e.,
// pos <= mask. For the first time, we need to write values
// in this tail (where index may be larger than mask), so that
// we have exactly defined behavior and don't read un-initialized
// memory. Due to performance reasons, hashing reads data using a
// LOAD64, which can go 7 bytes beyond the bytes written in the
// ringbuffer.
/* This is the first time when the ring buffer is being written.
We clear 7 bytes just after the bytes that have been copied from
the input buffer.
The ringbuffer has a "tail" that holds a copy of the beginning,
but only once the ring buffer has been fully written once, i.e.,
pos <= mask. For the first time, we need to write values
in this tail (where index may be larger than mask), so that
we have exactly defined behavior and don't read un-initialized
memory. Due to performance reasons, hashing reads data using a
LOAD64, which can go 7 bytes beyond the bytes written in the
ringbuffer. */
memset(ringbuffer_->start() + pos, 0, 7);
}
}
@ -595,8 +596,8 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last,
if (params_.quality <= 1) {
if (delta == 0 && !is_last) {
// We have no new input data and we don't have to finish the stream, so
// nothing to do.
/* We have no new input data and we don't have to finish the stream, so
nothing to do. */
*out_size = 0;
return true;
}
@ -630,11 +631,11 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last,
return true;
}
// Theoretical max number of commands is 1 per 2 bytes.
/* Theoretical max number of commands is 1 per 2 bytes. */
size_t newsize = num_commands_ + bytes / 2 + 1;
if (newsize > cmd_alloc_size_) {
// Reserve a bit more memory to allow merging with a next block
// without realloc: that would impact speed.
/* Reserve a bit more memory to allow merging with a next block
without realloc: that would impact speed. */
newsize += (bytes / 4) + 16;
cmd_alloc_size_ = newsize;
commands_ =
@ -662,13 +663,13 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last,
num_literals_ < max_literals &&
num_commands_ < max_commands &&
input_pos_ + input_block_size() <= last_flush_pos_ + max_length) {
// Merge with next input block. Everything will happen later.
/* Merge with next input block. Everything will happen later. */
last_processed_pos_ = input_pos_;
*out_size = 0;
return true;
}
// Create the last insert-only command.
/* Create the last insert-only command. */
if (last_insert_len_ > 0) {
brotli::Command cmd(last_insert_len_);
commands_[num_commands_++] = cmd;
@ -677,8 +678,8 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last,
}
if (!is_last && input_pos_ == last_flush_pos_) {
// We have no new input data and we don't have to finish the stream, so
// nothing to do.
/* We have no new input data and we don't have to finish the stream, so
nothing to do. */
*out_size = 0;
return true;
}
@ -708,8 +709,8 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last,
}
num_commands_ = 0;
num_literals_ = 0;
// Save the state of the distance cache in case we need to restore it for
// emitting an uncompressed block.
/* Save the state of the distance cache in case we need to restore it for
emitting an uncompressed block. */
memcpy(saved_dist_cache_, dist_cache_, sizeof(dist_cache_));
*output = &storage[0];
*out_size = storage_ix >> 3;
@ -829,14 +830,14 @@ static int BrotliCompressBufferQuality10(int lgwin,
ZopfliComputeShortestPath(block_size, block_start, input_buffer, mask,
max_backward_limit, dist_cache,
hasher, nodes, &path);
// We allocate a command buffer in the first iteration of this loop that
// will be likely big enough for the whole metablock, so that for most
// inputs we will not have to reallocate in later iterations. We do the
// allocation here and not before the loop, because if the input is small,
// this will be allocated after the zopfli cost model is freed, so this
// will not increase peak memory usage.
// TODO: If the first allocation is too small, increase command
// buffer size exponentially.
/* We allocate a command buffer in the first iteration of this loop that
will be likely big enough for the whole metablock, so that for most
inputs we will not have to reallocate in later iterations. We do the
allocation here and not before the loop, because if the input is small,
this will be allocated after the zopfli cost model is freed, so this
will not increase peak memory usage.
TODO: If the first allocation is too small, increase command
buffer size exponentially. */
size_t new_cmd_alloc_size = std::max(expected_num_commands,
num_commands + path.size() + 1);
if (cmd_alloc_size != new_cmd_alloc_size) {
@ -868,15 +869,15 @@ static int BrotliCompressBufferQuality10(int lgwin,
size_t storage_ix = last_byte_bits;
if (metablock_size == 0) {
// Write the ISLAST and ISEMPTY bits.
/* Write the ISLAST and ISEMPTY bits. */
storage = new uint8_t[16];
storage[0] = last_byte;
WriteBits(2, 3, &storage_ix, storage);
storage_ix = (storage_ix + 7u) & ~7u;
} else if (!ShouldCompress(input_buffer, mask, metablock_start,
metablock_size, num_literals, num_commands)) {
// Restore the distance cache, as its last update by
// CreateBackwardReferences is now unused.
/* Restore the distance cache, as its last update by
CreateBackwardReferences is now unused. */
memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
storage = new uint8_t[metablock_size + 16];
storage[0] = last_byte;
@ -914,7 +915,7 @@ static int BrotliCompressBufferQuality10(int lgwin,
mb,
&storage_ix, storage);
if (metablock_size + 4 < (storage_ix >> 3)) {
// Restore the distance cache and last byte.
/* Restore the distance cache and last byte. */
memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
storage[0] = last_byte;
storage_ix = last_byte_bits;
@ -928,8 +929,8 @@ static int BrotliCompressBufferQuality10(int lgwin,
metablock_start += metablock_size;
prev_byte = input_buffer[metablock_start - 1];
prev_byte2 = input_buffer[metablock_start - 2];
// Save the state of the distance cache in case we need to restore it for
// emitting an uncompressed block.
/* Save the state of the distance cache in case we need to restore it for
emitting an uncompressed block. */
memcpy(saved_dist_cache, dist_cache, 4 * sizeof(dist_cache[0]));
const size_t out_size = storage_ix >> 3;
@ -955,17 +956,17 @@ int BrotliCompressBuffer(BrotliParams params,
size_t* encoded_size,
uint8_t* encoded_buffer) {
if (*encoded_size == 0) {
// Output buffer needs at least one byte.
/* Output buffer needs at least one byte. */
return 0;
}
if (input_size == 0) {
// Handle the special case of empty input.
/* Handle the special case of empty input. */
*encoded_size = 1;
*encoded_buffer = 6;
return 1;
}
if (params.quality == 10) {
// TODO: Implement this direct path for all quality levels.
/* TODO: Implement this direct path for all quality levels. */
const int lgwin = std::min(24, std::max(16, params.lgwin));
return BrotliCompressBufferQuality10(lgwin, input_size, input_buffer,
encoded_size, encoded_buffer);

View File

@ -4,7 +4,7 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// API for Brotli compression
/* API for Brotli compression. */
#ifndef BROTLI_ENC_ENCODE_H_
#define BROTLI_ENC_ENCODE_H_
@ -38,23 +38,23 @@ struct BrotliParams {
enable_context_modeling(true) {}
enum Mode {
// Default compression mode. The compressor does not know anything in
// advance about the properties of the input.
/* Default compression mode. The compressor does not know anything in
advance about the properties of the input. */
MODE_GENERIC = 0,
// Compression mode for UTF-8 format text input.
/* Compression mode for UTF-8 format text input. */
MODE_TEXT = 1,
// Compression mode used in WOFF 2.0.
/* Compression mode used in WOFF 2.0. */
MODE_FONT = 2
};
Mode mode;
// Controls the compression-speed vs compression-density tradeoffs. The higher
// the quality, the slower the compression. Range is 0 to 11.
/* Controls the compression-speed vs compression-density tradeoffs. The higher
the |quality|, the slower the compression. Range is 0 to 11. */
int quality;
// Base 2 logarithm of the sliding window size. Range is 10 to 24.
/* Base 2 logarithm of the sliding window size. Range is 10 to 24. */
int lgwin;
// Base 2 logarithm of the maximum input block size. Range is 16 to 24.
// If set to 0, the value will be set based on the quality.
/* Base 2 logarithm of the maximum input block size. Range is 16 to 24.
If set to 0, the value will be set based on the quality. */
int lgblock;
// These settings are deprecated and will be ignored.
@ -207,4 +207,4 @@ int BrotliCompressWithCustomDictionary(size_t dictsize, const uint8_t* dict,
} // namespace brotli
#endif // BROTLI_ENC_ENCODE_H_
#endif /* BROTLI_ENC_ENCODE_H_ */

View File

@ -4,7 +4,7 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Implementation of parallel Brotli compressor.
/* Implementation of parallel Brotli compressor. */
#include "./encode_parallel.h"
@ -63,33 +63,33 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
return false;
}
// Copy prefix + next input block into a continuous area.
/* Copy prefix + next input block into a continuous area. */
uint32_t input_pos = prefix_size;
// CreateBackwardReferences reads up to 3 bytes past the end of input if the
// mask points past the end of input.
// FindMatchLengthWithLimit could do another 8 bytes look-forward.
/* CreateBackwardReferences reads up to 3 bytes past the end of input if the
mask points past the end of input.
FindMatchLengthWithLimit could do another 8 bytes look-forward. */
std::vector<uint8_t> input(prefix_size + input_size + 4 + 8);
memcpy(&input[0], prefix_buffer, prefix_size);
memcpy(&input[input_pos], input_buffer, input_size);
// Since we don't have a ringbuffer, masking is a no-op.
// We use one less bit than the full range because some of the code uses
// mask + 1 as the size of the ringbuffer.
/* Since we don't have a ringbuffer, masking is a no-op.
We use one less bit than the full range because some of the code uses
mask + 1 as the size of the ringbuffer. */
const uint32_t mask = std::numeric_limits<uint32_t>::max() >> 1;
uint8_t prev_byte = input_pos > 0 ? input[(input_pos - 1) & mask] : 0;
uint8_t prev_byte2 = input_pos > 1 ? input[(input_pos - 2) & mask] : 0;
// Decide about UTF8 mode.
/* Decide about UTF8 mode. */
static const double kMinUTF8Ratio = 0.75;
bool utf8_mode = IsMostlyUTF8(&input[0], input_pos, mask, input_size,
kMinUTF8Ratio);
// Initialize hashers.
/* Initialize hashers. */
int hash_type = std::min(10, params.quality);
Hashers* hashers = new Hashers();
hashers->Init(hash_type);
// Compute backward references.
/* Compute backward references. */
size_t last_insert_len = 0;
size_t num_commands = 0;
size_t num_literals = 0;
@ -119,7 +119,7 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
}
assert(num_commands != 0);
// Build the meta-block.
/* Build the meta-block. */
MetaBlockSplit mb;
uint32_t num_direct_distance_codes =
params.mode == BrotliParams::MODE_FONT ? 12 : 0;
@ -141,7 +141,7 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
&mb);
}
// Set up the temporary output storage.
/* Set up the temporary output storage. */
const size_t max_out_size = 2 * input_size + 500;
std::vector<uint8_t> storage(max_out_size);
uint8_t first_byte = 0;
@ -161,7 +161,7 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
storage[0] = static_cast<uint8_t>(first_byte);
size_t storage_ix = first_byte_bits;
// Store the meta-block to the temporary output.
/* Store the meta-block to the temporary output. */
StoreMetaBlock(&input[0], input_pos, input_size, mask,
prev_byte, prev_byte2,
is_last,
@ -173,14 +173,14 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
&storage_ix, &storage[0]);
free(commands);
// If this is not the last meta-block, store an empty metadata
// meta-block so that the meta-block will end at a byte boundary.
/* If this is not the last meta-block, store an empty metadata
meta-block so that the meta-block will end at a byte boundary. */
if (!is_last) {
StoreSyncMetaBlock(&storage_ix, &storage[0]);
}
// If the compressed data is too large, fall back to an uncompressed
// meta-block.
/* If the compressed data is too large, fall back to an uncompressed
meta-block. */
size_t output_size = storage_ix >> 3;
if (input_size + 4 < output_size) {
storage[0] = static_cast<uint8_t>(first_byte);
@ -191,7 +191,7 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
output_size = storage_ix >> 3;
}
// Copy the temporary output with size-check to the output.
/* Copy the temporary output with size-check to the output. */
if (output_size > *encoded_size) {
return false;
}
@ -200,7 +200,7 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
return true;
}
} // namespace
} /* namespace */
int BrotliCompressBufferParallel(BrotliParams params,
size_t input_size,
@ -208,15 +208,15 @@ int BrotliCompressBufferParallel(BrotliParams params,
size_t* encoded_size,
uint8_t* encoded_buffer) {
if (*encoded_size == 0) {
// Output buffer needs at least one byte.
/* Output buffer needs at least one byte. */
return 0;
} else if (input_size == 0) {
} else if (input_size == 0) {
encoded_buffer[0] = 6;
*encoded_size = 1;
return 1;
}
// Sanitize params.
/* Sanitize params. */
if (params.lgwin < kMinWindowBits) {
params.lgwin = kMinWindowBits;
} else if (params.lgwin > kMaxWindowBits) {
@ -237,7 +237,7 @@ int BrotliCompressBufferParallel(BrotliParams params,
std::vector<std::vector<uint8_t> > compressed_pieces;
// Compress block-by-block independently.
/* Compress block-by-block independently. */
for (size_t pos = 0; pos < input_size; ) {
uint32_t input_block_size =
static_cast<uint32_t>(std::min(max_input_block_size, input_size - pos));
@ -261,7 +261,7 @@ int BrotliCompressBufferParallel(BrotliParams params,
pos += input_block_size;
}
// Piece together the output.
/* Piece together the output. */
size_t out_pos = 0;
for (size_t i = 0; i < compressed_pieces.size(); ++i) {
const std::vector<uint8_t>& out = compressed_pieces[i];
@ -276,4 +276,4 @@ int BrotliCompressBufferParallel(BrotliParams params,
return true;
}
} // namespace brotli
} /* namespace brotli */

View File

@ -4,9 +4,9 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// API for parallel Brotli compression
// Note that this is only a proof of concept currently and not part of the
// final API yet.
/* API for parallel Brotli compression
Note that this is only a proof of concept currently and not part of the
final API yet. */
#ifndef BROTLI_ENC_ENCODE_PARALLEL_H_
#define BROTLI_ENC_ENCODE_PARALLEL_H_
@ -22,6 +22,6 @@ int BrotliCompressBufferParallel(BrotliParams params,
size_t* encoded_size,
uint8_t* encoded_buffer);
} // namespace brotli
} /* namespace brotli */
#endif // BROTLI_ENC_ENCODE_PARALLEL_H_
#endif /* BROTLI_ENC_ENCODE_PARALLEL_H_ */

View File

@ -4,7 +4,7 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Entropy encoding (Huffman) utilities.
/* Entropy encoding (Huffman) utilities. */
#include "./entropy_encode.h"
@ -31,7 +31,7 @@ void SetDepth(const HuffmanTree &p,
}
}
// Sort the root nodes, least popular first.
/* Sort the root nodes, least popular first. */
static inline bool SortHuffmanTree(const HuffmanTree& v0,
const HuffmanTree& v1) {
if (v0.total_count_ != v1.total_count_) {
@ -40,30 +40,30 @@ static inline bool SortHuffmanTree(const HuffmanTree& v0,
return v0.index_right_or_value_ > v1.index_right_or_value_;
}
// This function will create a Huffman tree.
//
// The catch here is that the tree cannot be arbitrarily deep.
// Brotli specifies a maximum depth of 15 bits for "code trees"
// and 7 bits for "code length code trees."
//
// count_limit is the value that is to be faked as the minimum value
// and this minimum value is raised until the tree matches the
// maximum length requirement.
//
// This algorithm is not of excellent performance for very long data blocks,
// especially when population counts are longer than 2**tree_limit, but
// we are not planning to use this with extremely long blocks.
//
// See http://en.wikipedia.org/wiki/Huffman_coding
/* This function will create a Huffman tree.
The catch here is that the tree cannot be arbitrarily deep.
Brotli specifies a maximum depth of 15 bits for "code trees"
and 7 bits for "code length code trees."
count_limit is the value that is to be faked as the minimum value
and this minimum value is raised until the tree matches the
maximum length requirement.
This algorithm is not of excellent performance for very long data blocks,
especially when population counts are longer than 2**tree_limit, but
we are not planning to use this with extremely long blocks.
See http://en.wikipedia.org/wiki/Huffman_coding */
void CreateHuffmanTree(const uint32_t *data,
const size_t length,
const int tree_limit,
HuffmanTree* tree,
uint8_t *depth) {
// For block sizes below 64 kB, we never need to do a second iteration
// of this loop. Probably all of our block sizes will be smaller than
// that, so this loop is mostly of academic interest. If we actually
// would need this, we would be better off with the Katajainen algorithm.
/* For block sizes below 64 kB, we never need to do a second iteration
of this loop. Probably all of our block sizes will be smaller than
that, so this loop is mostly of academic interest. If we actually
would need this, we would be better off with the Katajainen algorithm. */
for (uint32_t count_limit = 1; ; count_limit *= 2) {
size_t n = 0;
for (size_t i = length; i != 0;) {
@ -81,19 +81,19 @@ void CreateHuffmanTree(const uint32_t *data,
std::sort(tree, tree + n, SortHuffmanTree);
// The nodes are:
// [0, n): the sorted leaf nodes that we start with.
// [n]: we add a sentinel here.
// [n + 1, 2n): new parent nodes are added here, starting from
// (n+1). These are naturally in ascending order.
// [2n]: we add a sentinel at the end as well.
// There will be (2n+1) elements at the end.
/* The nodes are:
[0, n): the sorted leaf nodes that we start with.
[n]: we add a sentinel here.
[n + 1, 2n): new parent nodes are added here, starting from
(n+1). These are naturally in ascending order.
[2n]: we add a sentinel at the end as well.
There will be (2n+1) elements at the end. */
const HuffmanTree sentinel(std::numeric_limits<uint32_t>::max(), -1, -1);
tree[n] = sentinel;
tree[n + 1] = sentinel;
size_t i = 0; // Points to the next leaf node.
size_t j = n + 1; // Points to the next non-leaf node.
size_t i = 0; /* Points to the next leaf node. */
size_t j = n + 1; /* Points to the next non-leaf node. */
for (size_t k = n - 1; k != 0; --k) {
size_t left, right;
if (tree[i].total_count_ <= tree[j].total_count_) {
@ -111,21 +111,20 @@ void CreateHuffmanTree(const uint32_t *data,
++j;
}
// The sentinel node becomes the parent node.
/* The sentinel node becomes the parent node. */
size_t j_end = 2 * n - k;
tree[j_end].total_count_ =
tree[left].total_count_ + tree[right].total_count_;
tree[j_end].index_left_ = static_cast<int16_t>(left);
tree[j_end].index_right_or_value_ = static_cast<int16_t>(right);
// Add back the last sentinel node.
/* Add back the last sentinel node. */
tree[j_end + 1] = sentinel;
}
SetDepth(tree[2 * n - 1], &tree[0], depth, 0);
// We need to pack the Huffman tree in tree_limit bits.
// If this was not successful, add fake entities to the lowest values
// and retry.
/* We need to pack the Huffman tree in tree_limit bits. If this was not
successful, add fake entities to the lowest values and retry. */
if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) {
break;
}
@ -229,7 +228,7 @@ void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
size_t limit;
size_t sum;
const size_t streak_limit = 1240;
// Let's make the Huffman code more compatible with rle encoding.
/* Let's make the Huffman code more compatible with rle encoding. */
size_t i;
for (i = 0; i < length; i++) {
if (counts[i]) {
@ -243,9 +242,9 @@ void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
--length;
}
if (length == 0) {
return; // All zeros.
return; /* All zeros. */
}
// Now counts[0..length - 1] does not have trailing zeros.
/* Now counts[0..length - 1] does not have trailing zeros. */
{
size_t nonzeros = 0;
uint32_t smallest_nonzero = 1 << 30;
@ -258,7 +257,7 @@ void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
}
}
if (nonzeros < 5) {
// Small histogram will model it well.
/* Small histogram will model it well. */
return;
}
size_t zeros = length - nonzeros;
@ -275,13 +274,13 @@ void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
return;
}
}
// 2) Let's mark all population counts that already can be encoded
// with an rle code.
/* 2) Let's mark all population counts that already can be encoded
with an rle code. */
memset(good_for_rle, 0, length);
{
// Let's not spoil any of the existing good rle codes.
// Mark any seq of 0's that is longer as 5 as a good_for_rle.
// Mark any seq of non-0's that is longer as 7 as a good_for_rle.
/* Let's not spoil any of the existing good rle codes.
Mark any seq of 0's that is longer as 5 as a good_for_rle.
Mark any seq of non-0's that is longer as 7 as a good_for_rle. */
uint32_t symbol = counts[0];
size_t step = 0;
for (i = 0; i <= length; ++i) {
@ -302,8 +301,8 @@ void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
}
}
}
// 3) Let's replace those population counts that lead to more rle codes.
// Math here is in 24.8 fixed point representation.
/* 3) Let's replace those population counts that lead to more rle codes.
Math here is in 24.8 fixed point representation. */
stride = 0;
limit = 256 * (counts[0] + counts[1] + counts[2]) / 3 + 420;
sum = 0;
@ -313,26 +312,26 @@ void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
(256 * counts[i] - limit + streak_limit) >= 2 * streak_limit) {
if (stride >= 4 || (stride >= 3 && sum == 0)) {
size_t k;
// The stride must end, collapse what we have, if we have enough (4).
/* The stride must end, collapse what we have, if we have enough (4). */
size_t count = (sum + stride / 2) / stride;
if (count == 0) {
count = 1;
}
if (sum == 0) {
// Don't make an all zeros stride to be upgraded to ones.
/* Don't make an all zeros stride to be upgraded to ones. */
count = 0;
}
for (k = 0; k < stride; ++k) {
// We don't want to change value at counts[i],
// that is already belonging to the next stride. Thus - 1.
/* We don't want to change value at counts[i],
that is already belonging to the next stride. Thus - 1. */
counts[i - k - 1] = static_cast<uint32_t>(count);
}
}
stride = 0;
sum = 0;
if (i < length - 2) {
// All interesting strides have a count of at least 4,
// at least when non-zeros.
/* All interesting strides have a count of at least 4, */
/* at least when non-zeros. */
limit = 256 * (counts[i] + counts[i + 1] + counts[i + 2]) / 3 + 420;
} else if (i < length) {
limit = 256 * counts[i];
@ -387,7 +386,7 @@ void WriteHuffmanTree(const uint8_t* depth,
uint8_t* extra_bits_data) {
uint8_t previous_value = 8;
// Throw away trailing zeros.
/* Throw away trailing zeros. */
size_t new_length = length;
for (size_t i = 0; i < length; ++i) {
if (depth[length - i - 1] == 0) {
@ -397,17 +396,17 @@ void WriteHuffmanTree(const uint8_t* depth,
}
}
// First gather statistics on if it is a good idea to do rle.
/* First gather statistics on if it is a good idea to do rle. */
bool use_rle_for_non_zero = false;
bool use_rle_for_zero = false;
if (length > 50) {
// Find rle coding for longer codes.
// Shorter codes seem not to benefit from rle.
/* Find rle coding for longer codes.
Shorter codes seem not to benefit from rle. */
DecideOverRleUse(depth, new_length,
&use_rle_for_non_zero, &use_rle_for_zero);
}
// Actual rle coding.
/* Actual rle coding. */
for (size_t i = 0; i < new_length;) {
const uint8_t value = depth[i];
size_t reps = 1;
@ -432,7 +431,7 @@ void WriteHuffmanTree(const uint8_t* depth,
namespace {
uint16_t ReverseBits(int num_bits, uint16_t bits) {
static const size_t kLut[16] = { // Pre-reversed 4-bit values.
static const size_t kLut[16] = { /* Pre-reversed 4-bit values. */
0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf
};
@ -451,8 +450,8 @@ uint16_t ReverseBits(int num_bits, uint16_t bits) {
void ConvertBitDepthsToSymbols(const uint8_t *depth,
size_t len,
uint16_t *bits) {
// In Brotli, all bit depths are [1..15]
// 0 bit depth means that the symbol does not exist.
/* In Brotli, all bit depths are [1..15]
0 bit depth means that the symbol does not exist. */
const int kMaxBits = 16; // 0..15 are values for bits
uint16_t bl_count[kMaxBits] = { 0 };
{

View File

@ -4,7 +4,7 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Entropy encoding (Huffman) utilities.
/* Entropy encoding (Huffman) utilities. */
#ifndef BROTLI_ENC_ENTROPY_ENCODE_H_
#define BROTLI_ENC_ENTROPY_ENCODE_H_
@ -17,7 +17,7 @@
namespace brotli {
// A node of a Huffman tree.
/* A node of a Huffman tree. */
struct HuffmanTree {
HuffmanTree() {}
HuffmanTree(uint32_t count, int16_t left, int16_t right)
@ -33,44 +33,44 @@ struct HuffmanTree {
void SetDepth(const HuffmanTree &p, HuffmanTree *pool,
uint8_t *depth, uint8_t level);
// This function will create a Huffman tree.
//
// The (data,length) contains the population counts.
// The tree_limit is the maximum bit depth of the Huffman codes.
//
// The depth contains the tree, i.e., how many bits are used for
// the symbol.
//
// The actual Huffman tree is constructed in the tree[] array, which has to
// be at least 2 * length + 1 long.
//
// See http://en.wikipedia.org/wiki/Huffman_coding
/* This function will create a Huffman tree.
The (data,length) contains the population counts.
The tree_limit is the maximum bit depth of the Huffman codes.
The depth contains the tree, i.e., how many bits are used for
the symbol.
The actual Huffman tree is constructed in the tree[] array, which has to
be at least 2 * length + 1 long.
See http://en.wikipedia.org/wiki/Huffman_coding */
void CreateHuffmanTree(const uint32_t *data,
const size_t length,
const int tree_limit,
HuffmanTree* tree,
uint8_t *depth);
// Change the population counts in a way that the consequent
// Huffman tree compression, especially its rle-part will be more
// likely to compress this data more efficiently.
//
// length contains the size of the histogram.
// counts contains the population counts.
// good_for_rle is a buffer of at least length size
/* Change the population counts in a way that the consequent
Huffman tree compression, especially its rle-part will be more
likely to compress this data more efficiently.
length contains the size of the histogram.
counts contains the population counts.
good_for_rle is a buffer of at least length size */
void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
uint8_t* good_for_rle);
// Write a Huffman tree from bit depths into the bitstream representation
// of a Huffman tree. The generated Huffman tree is to be compressed once
// more using a Huffman tree
/* Write a Huffman tree from bit depths into the bitstream representation
of a Huffman tree. The generated Huffman tree is to be compressed once
more using a Huffman tree */
void WriteHuffmanTree(const uint8_t* depth,
size_t num,
size_t* tree_size,
uint8_t* tree,
uint8_t* extra_bits_data);
// Get the actual bit values for a tree of bit depths.
/* Get the actual bit values for a tree of bit depths. */
void ConvertBitDepthsToSymbols(const uint8_t *depth,
size_t len,
uint16_t *bits);
@ -102,4 +102,4 @@ typedef EntropyCode<258> EntropyCodeBlockType;
} // namespace brotli
#endif // BROTLI_ENC_ENTROPY_ENCODE_H_
#endif /* BROTLI_ENC_ENTROPY_ENCODE_H_ */

View File

@ -4,7 +4,7 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Static entropy codes used for faster meta-block encoding.
/* Static entropy codes used for faster meta-block encoding. */
#ifndef BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
#define BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
@ -569,4 +569,4 @@ inline void StoreStaticDistanceHuffmanTree(size_t* storage_ix,
} // namespace brotli
#endif // BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
#endif /* BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_ */

View File

@ -4,7 +4,7 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Utilities for fast computation of logarithms.
/* Utilities for fast computation of logarithms. */
#ifndef BROTLI_ENC_FAST_LOG_H_
#define BROTLI_ENC_FAST_LOG_H_
@ -26,10 +26,10 @@ static inline uint32_t Log2FloorNonZero(size_t n) {
#endif
}
// A lookup table for small values of log2(int) to be used in entropy
// computation.
//
// ", ".join(["%.16ff" % x for x in [0.0]+[log2(x) for x in range(1, 256)]])
/* A lookup table for small values of log2(int) to be used in entropy
computation.
", ".join(["%.16ff" % x for x in [0.0]+[log2(x) for x in range(1, 256)]]) */
static const float kLog2Table[] = {
0.0000000000000000f, 0.0000000000000000f, 1.0000000000000000f,
1.5849625007211563f, 2.0000000000000000f, 2.3219280948873622f,
@ -119,14 +119,15 @@ static const float kLog2Table[] = {
7.9943534368588578f
};
// Faster logarithm for small integers, with the property of log2(0) == 0.
/* Faster logarithm for small integers, with the property of log2(0) == 0. */
static inline double FastLog2(size_t v) {
if (v < sizeof(kLog2Table) / sizeof(kLog2Table[0])) {
return kLog2Table[v];
}
#if defined(_MSC_VER) && _MSC_VER <= 1700
// Visual Studio 2012 does not have the log2() function defined, so we use
// log() and a multiplication instead.
#if (defined(_MSC_VER) && _MSC_VER <= 1600) || \
(defined(__ANDROID_API__) && __ANDROID_API__ < 18)
/* Visual Studio 2010 and Android API levels < 18 do not have the log2()
* function defined, so we use log() and a multiplication instead. */
static const double kLog2Inv = 1.4426950408889634f;
return log(static_cast<double>(v)) * kLog2Inv;
#else
@ -136,4 +137,4 @@ static inline double FastLog2(size_t v) {
} // namespace brotli
#endif // BROTLI_ENC_FAST_LOG_H_
#endif /* BROTLI_ENC_FAST_LOG_H_ */

View File

@ -4,7 +4,7 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Function to find maximal matching prefixes of strings.
/* Function to find maximal matching prefixes of strings. */
#ifndef BROTLI_ENC_FIND_MATCH_LENGTH_H_
#define BROTLI_ENC_FIND_MATCH_LENGTH_H_
@ -14,14 +14,14 @@
namespace brotli {
// Separate implementation for little-endian 64-bit targets, for speed.
/* Separate implementation for little-endian 64-bit targets, for speed. */
#if defined(__GNUC__) && defined(_LP64) && defined(IS_LITTLE_ENDIAN)
static inline size_t FindMatchLengthWithLimit(const uint8_t* s1,
const uint8_t* s2,
size_t limit) {
size_t matched = 0;
size_t limit2 = (limit >> 3) + 1; // + 1 is for pre-decrement in while
size_t limit2 = (limit >> 3) + 1; /* + 1 is for pre-decrement in while */
while (PREDICT_TRUE(--limit2)) {
if (PREDICT_FALSE(BROTLI_UNALIGNED_LOAD64(s2) ==
BROTLI_UNALIGNED_LOAD64(s1 + matched))) {
@ -35,7 +35,7 @@ static inline size_t FindMatchLengthWithLimit(const uint8_t* s1,
return matched;
}
}
limit = (limit & 7) + 1; // + 1 is for pre-decrement in while
limit = (limit & 7) + 1; /* + 1 is for pre-decrement in while */
while (--limit) {
if (PREDICT_TRUE(s1[matched] == *s2)) {
++s2;
@ -48,15 +48,15 @@ static inline size_t FindMatchLengthWithLimit(const uint8_t* s1,
}
#else
static inline size_t FindMatchLengthWithLimit(const uint8_t* s1,
const uint8_t* s2,
size_t limit) {
const uint8_t* s2,
size_t limit) {
size_t matched = 0;
const uint8_t* s2_limit = s2 + limit;
const uint8_t* s2_ptr = s2;
// Find out how long the match is. We loop over the data 32 bits at a
// time until we find a 32-bit block that doesn't match; then we find
// the first non-matching bit and use that to calculate the total
// length of the match.
/* Find out how long the match is. We loop over the data 32 bits at a
time until we find a 32-bit block that doesn't match; then we find
the first non-matching bit and use that to calculate the total
length of the match. */
while (s2_ptr <= s2_limit - 4 &&
BROTLI_UNALIGNED_LOAD32(s2_ptr) ==
BROTLI_UNALIGNED_LOAD32(s1 + matched)) {
@ -73,4 +73,4 @@ static inline size_t FindMatchLengthWithLimit(const uint8_t* s1,
} // namespace brotli
#endif // BROTLI_ENC_FIND_MATCH_LENGTH_H_
#endif /* BROTLI_ENC_FIND_MATCH_LENGTH_H_ */

View File

@ -4,8 +4,8 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// A (forgetful) hash table to the data seen by the compressor, to
// help create backward references to previous data.
/* A (forgetful) hash table to the data seen by the compressor, to
help create backward references to previous data. */
#ifndef BROTLI_ENC_HASH_H_
#define BROTLI_ENC_HASH_H_
@ -42,38 +42,38 @@ static const uint8_t kCutoffTransforms[] = {
0, 12, 27, 23, 42, 63, 56, 48, 59, 64
};
// kHashMul32 multiplier has these properties:
// * The multiplier must be odd. Otherwise we may lose the highest bit.
// * No long streaks of 1s or 0s.
// * There is no effort to ensure that it is a prime, the oddity is enough
// for this use.
// * The number has been tuned heuristically against compression benchmarks.
/* kHashMul32 multiplier has these properties:
* The multiplier must be odd. Otherwise we may lose the highest bit.
* No long streaks of 1s or 0s.
* There is no effort to ensure that it is a prime, the oddity is enough
for this use.
* The number has been tuned heuristically against compression benchmarks. */
static const uint32_t kHashMul32 = 0x1e35a7bd;
template<int kShiftBits>
inline uint32_t Hash(const uint8_t *data) {
uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
// The higher bits contain more mixture from the multiplication,
// so we take our results from there.
/* The higher bits contain more mixture from the multiplication,
so we take our results from there. */
return h >> (32 - kShiftBits);
}
// Usually, we always choose the longest backward reference. This function
// allows for the exception of that rule.
//
// If we choose a backward reference that is further away, it will
// usually be coded with more bits. We approximate this by assuming
// log2(distance). If the distance can be expressed in terms of the
// last four distances, we use some heuristic constants to estimate
// the bits cost. For the first up to four literals we use the bit
// cost of the literals from the literal cost model, after that we
// use the average bit cost of the cost model.
//
// This function is used to sometimes discard a longer backward reference
// when it is not much longer and the bit cost for encoding it is more
// than the saved literals.
//
// backward_reference_offset MUST be positive.
/* Usually, we always choose the longest backward reference. This function
allows for the exception of that rule.
If we choose a backward reference that is further away, it will
usually be coded with more bits. We approximate this by assuming
log2(distance). If the distance can be expressed in terms of the
last four distances, we use some heuristic constants to estimate
the bits cost. For the first up to four literals we use the bit
cost of the literals from the literal cost model, after that we
use the average bit cost of the cost model.
This function is used to sometimes discard a longer backward reference
when it is not much longer and the bit cost for encoding it is more
than the saved literals.
backward_reference_offset MUST be positive. */
inline double BackwardReferenceScore(size_t copy_length,
size_t backward_reference_offset) {
return 5.4 * static_cast<double>(copy_length) -
@ -511,13 +511,13 @@ class HashLongestMatch {
return match_found;
}
// Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the
// length of max_length and stores the position cur_ix in the hash table.
//
// Sets *num_matches to the number of matches found, and stores the found
// matches in matches[0] to matches[*num_matches - 1]. The matches will be
// sorted by strictly increasing length and (non-strictly) increasing
// distance.
/* Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the
length of max_length and stores the position cur_ix in the hash table.
Sets *num_matches to the number of matches found, and stores the found
matches in matches[0] to matches[*num_matches - 1]. The matches will be
sorted by strictly increasing length and (non-strictly) increasing
distance. */
size_t FindAllMatches(const uint8_t* data,
const size_t ring_buffer_mask,
const size_t cur_ix,
@ -936,7 +936,7 @@ struct Hashers {
}
}
// Custom LZ77 window.
/* Custom LZ77 window. */
void PrependCustomDictionary(
int type, int lgwin, const size_t size, const uint8_t* dict) {
switch (type) {
@ -972,4 +972,4 @@ struct Hashers {
} // namespace brotli
#endif // BROTLI_ENC_HASH_H_
#endif /* BROTLI_ENC_HASH_H_ */

View File

@ -4,7 +4,7 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Build per-context histograms of literals, commands and distance codes.
/* Build per-context histograms of literals, commands and distance codes. */
#include "./histogram.h"

View File

@ -4,7 +4,7 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Models the histograms of literals, commands and distance codes.
/* Models the histograms of literals, commands and distance codes. */
#ifndef BROTLI_ENC_HISTOGRAM_H_
#define BROTLI_ENC_HISTOGRAM_H_
@ -92,4 +92,4 @@ void BuildHistograms(
} // namespace brotli
#endif // BROTLI_ENC_HISTOGRAM_H_
#endif /* BROTLI_ENC_HISTOGRAM_H_ */

View File

@ -4,7 +4,8 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Literal cost model to allow backward reference replacement to be efficient.
/* Literal cost model to allow backward reference replacement to be efficient.
*/
#include "./literal_cost.h"
@ -20,14 +21,14 @@ namespace brotli {
static size_t UTF8Position(size_t last, size_t c, size_t clamp) {
if (c < 128) {
return 0; // Next one is the 'Byte 1' again.
} else if (c >= 192) { // Next one is the 'Byte 2' of utf-8 encoding.
return 0; /* Next one is the 'Byte 1' again. */
} else if (c >= 192) { /* Next one is the 'Byte 2' of utf-8 encoding. */
return std::min<size_t>(1, clamp);
} else {
// Let's decide over the last byte if this ends the sequence.
/* Let's decide over the last byte if this ends the sequence. */
if (last < 0xe0) {
return 0; // Completed two or three byte coding.
} else { // Next one is the 'Byte 3' of utf-8 encoding.
return 0; /* Completed two or three byte coding. */
} else { /* Next one is the 'Byte 3' of utf-8 encoding. */
return std::min<size_t>(2, clamp);
}
}
@ -36,7 +37,7 @@ static size_t UTF8Position(size_t last, size_t c, size_t clamp) {
static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
const uint8_t *data) {
size_t counts[3] = { 0 };
size_t max_utf8 = 1; // should be 2, but 1 compresses better.
size_t max_utf8 = 1; /* should be 2, but 1 compresses better. */
size_t last_c = 0;
size_t utf8_pos = 0;
for (size_t i = 0; i < len; ++i) {
@ -56,16 +57,15 @@ static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
const uint8_t *data, float *cost) {
// max_utf8 is 0 (normal ascii single byte modeling),
// 1 (for 2-byte utf-8 modeling), or 2 (for 3-byte utf-8 modeling).
/* max_utf8 is 0 (normal ascii single byte modeling),
1 (for 2-byte utf-8 modeling), or 2 (for 3-byte utf-8 modeling). */
const size_t max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data);
size_t histogram[3][256] = { { 0 } };
size_t window_half = 495;
size_t in_window = std::min(window_half, len);
size_t in_window_utf8[3] = { 0 };
// Bootstrap histograms.
/* Bootstrap histograms. */
size_t last_c = 0;
size_t utf8_pos = 0;
for (size_t i = 0; i < in_window; ++i) {
@ -76,10 +76,10 @@ static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
last_c = c;
}
// Compute bit costs with sliding window.
/* Compute bit costs with sliding window. */
for (size_t i = 0; i < len; ++i) {
if (i >= window_half) {
// Remove a byte in the past.
/* Remove a byte in the past. */
size_t c = i < window_half + 1 ?
0 : data[(pos + i - window_half - 1) & mask];
size_t last_c = i < window_half + 2 ?
@ -89,7 +89,7 @@ static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
--in_window_utf8[utf8_pos2];
}
if (i + window_half < len) {
// Add a byte in the future.
/* Add a byte in the future. */
size_t c = data[(pos + i + window_half - 1) & mask];
size_t last_c = data[(pos + i + window_half - 2) & mask];
size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
@ -110,10 +110,10 @@ static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
lit_cost *= 0.5;
lit_cost += 0.5;
}
// Make the first bytes more expensive -- seems to help, not sure why.
// Perhaps because the entropy source is changing its properties
// rapidly in the beginning of the file, perhaps because the beginning
// of the data is a statistical "anomaly".
/* Make the first bytes more expensive -- seems to help, not sure why.
Perhaps because the entropy source is changing its properties
rapidly in the beginning of the file, perhaps because the beginning
of the data is a statistical "anomaly". */
if (i < 2000) {
lit_cost += 0.7 - (static_cast<double>(2000 - i) / 2000.0 * 0.35);
}
@ -131,20 +131,20 @@ void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
size_t window_half = 2000;
size_t in_window = std::min(window_half, len);
// Bootstrap histogram.
/* Bootstrap histogram. */
for (size_t i = 0; i < in_window; ++i) {
++histogram[data[(pos + i) & mask]];
}
// Compute bit costs with sliding window.
/* Compute bit costs with sliding window. */
for (size_t i = 0; i < len; ++i) {
if (i >= window_half) {
// Remove a byte in the past.
/* Remove a byte in the past. */
--histogram[data[(pos + i - window_half) & mask]];
--in_window;
}
if (i + window_half < len) {
// Add a byte in the future.
/* Add a byte in the future. */
++histogram[data[(pos + i + window_half) & mask]];
++in_window;
}

View File

@ -4,7 +4,8 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Literal cost model to allow backward reference replacement to be efficient.
/* Literal cost model to allow backward reference replacement to be efficient.
*/
#ifndef BROTLI_ENC_LITERAL_COST_H_
#define BROTLI_ENC_LITERAL_COST_H_
@ -13,12 +14,12 @@
namespace brotli {
// Estimates how many bits the literals in the interval [pos, pos + len) in the
// ringbuffer (data, mask) will take entropy coded and writes these estimates
// to the cost[0..len) array.
/* Estimates how many bits the literals in the interval [pos, pos + len) in the
ringbuffer (data, mask) will take entropy coded and writes these estimates
to the cost[0..len) array. */
void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
const uint8_t *data, float *cost);
} // namespace brotli
#endif // BROTLI_ENC_LITERAL_COST_H_
#endif /* BROTLI_ENC_LITERAL_COST_H_ */

View File

@ -4,11 +4,12 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Algorithms for distributing the literals and commands of a metablock between
// block types and contexts.
/* Algorithms for distributing the literals and commands of a metablock between
block types and contexts. */
#include "./metablock.h"
#include "../common/types.h"
#include "./block_splitter.h"
#include "./cluster.h"
#include "./context.h"
@ -55,7 +56,7 @@ void BuildMetaBlock(const uint8_t* ringbuffer,
&mb->command_histograms,
&distance_histograms);
// Histogram ids need to fit in one byte.
/* Histogram ids need to fit in one byte. */
static const size_t kMaxNumberOfHistograms = 256;
ClusterHistograms(literal_histograms,
@ -201,32 +202,32 @@ class BlockSplitter {
private:
static const uint16_t kMaxBlockTypes = 256;
// Alphabet size of particular block category.
/* Alphabet size of particular block category. */
const size_t alphabet_size_;
// We collect at least this many symbols for each block.
/* We collect at least this many symbols for each block. */
const size_t min_block_size_;
// We merge histograms A and B if
// entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
// where A is the current histogram and B is the histogram of the last or the
// second last block type.
/* We merge histograms A and B if
entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
where A is the current histogram and B is the histogram of the last or the
second last block type. */
const double split_threshold_;
size_t num_blocks_;
BlockSplit* split_; // not owned
std::vector<HistogramType>* histograms_; // not owned
BlockSplit* split_; /* not owned */
std::vector<HistogramType>* histograms_; /* not owned */
// The number of symbols that we want to collect before deciding on whether
// or not to merge the block with a previous one or emit a new block.
/* The number of symbols that we want to collect before deciding on whether
or not to merge the block with a previous one or emit a new block. */
size_t target_block_size_;
// The number of symbols in the current histogram.
/* The number of symbols in the current histogram. */
size_t block_size_;
// Offset of the current histogram.
/* Offset of the current histogram. */
size_t curr_histogram_ix_;
// Offset of the histograms of the previous two block types.
/* Offset of the histograms of the previous two block types. */
size_t last_histogram_ix_[2];
// Entropy of the previous two block types.
/* Entropy of the previous two block types. */
double last_entropy_[2];
// The number of times we merged the current block with the last one.
/* The number of times we merged the current block with the last one. */
size_t merge_last_count_;
};
@ -314,10 +315,10 @@ class ContextBlockSplitter {
}
}
// Does either of three things:
// (1) emits the current block with a new block type;
// (2) emits the current block with the type of the second last block;
// (3) merges the current block with the last block.
/* Does either of three things:
(1) emits the current block with a new block type;
(2) emits the current block with the type of the second last block;
(3) merges the current block with the last block. */
void FinishBlock(bool is_final) {
if (block_size_ < min_block_size_) {
block_size_ = min_block_size_;
@ -336,10 +337,10 @@ class ContextBlockSplitter {
curr_histogram_ix_ += num_contexts_;
block_size_ = 0;
} else if (block_size_ > 0) {
// Try merging the set of histograms for the current block type with the
// respective set of histograms for the last and second last block types.
// Decide over the split based on the total reduction of entropy across
// all contexts.
/* Try merging the set of histograms for the current block type with the
respective set of histograms for the last and second last block types.
Decide over the split based on the total reduction of entropy across
all contexts. */
std::vector<double> entropy(num_contexts_);
std::vector<HistogramType> combined_histo(2 * num_contexts_);
std::vector<double> combined_entropy(2 * num_contexts_);

View File

@ -4,14 +4,15 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Algorithms for distributing the literals and commands of a metablock between
// block types and contexts.
/* Algorithms for distributing the literals and commands of a metablock between
block types and contexts. */
#ifndef BROTLI_ENC_METABLOCK_H_
#define BROTLI_ENC_METABLOCK_H_
#include <vector>
#include "../common/types.h"
#include "./command.h"
#include "./histogram.h"
@ -36,7 +37,7 @@ struct MetaBlockSplit {
std::vector<HistogramDistance> distance_histograms;
};
// Uses the slow shortest-path block splitter and does context clustering.
/* Uses the slow shortest-path block splitter and does context clustering. */
void BuildMetaBlock(const uint8_t* ringbuffer,
const size_t pos,
const size_t mask,
@ -47,8 +48,8 @@ void BuildMetaBlock(const uint8_t* ringbuffer,
ContextType literal_context_mode,
MetaBlockSplit* mb);
// Uses a fast greedy block splitter that tries to merge current block with the
// last or the second last block and does not do any context modeling.
/* Uses a fast greedy block splitter that tries to merge current block with the
last or the second last block and does not do any context modeling. */
void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
size_t pos,
size_t mask,
@ -56,9 +57,9 @@ void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
size_t n_commands,
MetaBlockSplit* mb);
// Uses a fast greedy block splitter that tries to merge current block with the
// last or the second last block and uses a static context clustering which
// is the same for all block types.
/* Uses a fast greedy block splitter that tries to merge current block with the
last or the second last block and uses a static context clustering which
is the same for all block types. */
void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
size_t pos,
size_t mask,
@ -77,4 +78,4 @@ void OptimizeHistograms(size_t num_direct_distance_codes,
} // namespace brotli
#endif // BROTLI_ENC_METABLOCK_H_
#endif /* BROTLI_ENC_METABLOCK_H_ */

View File

@ -4,14 +4,15 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Macros for endianness, branch prediction and unaligned loads and stores.
/* Macros for endianness, branch prediction and unaligned loads and stores. */
#ifndef BROTLI_ENC_PORT_H_
#define BROTLI_ENC_PORT_H_
#include <assert.h>
#include <string.h>
#include <string.h> /* memcpy */
#include "../common/port.h"
#include "../common/types.h"
#if defined OS_LINUX || defined OS_CYGWIN
@ -25,9 +26,9 @@
#define __LITTLE_ENDIAN LITTLE_ENDIAN
#endif
// define the macro IS_LITTLE_ENDIAN
// using the above endian definitions from endian.h if
// endian.h was included
/* define the macro IS_LITTLE_ENDIAN
using the above endian definitions from endian.h if
endian.h was included */
#ifdef __BYTE_ORDER
#if __BYTE_ORDER == __LITTLE_ENDIAN
#define IS_LITTLE_ENDIAN
@ -38,41 +39,28 @@
#if defined(__LITTLE_ENDIAN__)
#define IS_LITTLE_ENDIAN
#endif
#endif // __BYTE_ORDER
#endif /* __BYTE_ORDER */
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
#define IS_LITTLE_ENDIAN
#endif
// Enable little-endian optimization for x64 architecture on Windows.
/* Enable little-endian optimization for x64 architecture on Windows. */
#if (defined(_WIN32) || defined(_WIN64)) && defined(_M_X64)
#define IS_LITTLE_ENDIAN
#endif
/* Compatibility with non-clang compilers. */
#ifndef __has_builtin
#define __has_builtin(x) 0
#endif
#if (__GNUC__ > 2) || (__GNUC__ == 2 && __GNUC_MINOR__ > 95) || \
(defined(__llvm__) && __has_builtin(__builtin_expect))
#define PREDICT_FALSE(x) (__builtin_expect(x, 0))
#define PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
#else
#define PREDICT_FALSE(x) (x)
#define PREDICT_TRUE(x) (x)
#endif
// Portable handling of unaligned loads, stores, and copies.
// On some platforms, like ARM, the copy functions can be more efficient
// then a load and a store.
/* Portable handling of unaligned loads, stores, and copies.
On some platforms, like ARM, the copy functions can be more efficient
then a load and a store. */
#if defined(ARCH_PIII) || \
defined(ARCH_ATHLON) || defined(ARCH_K8) || defined(_ARCH_PPC)
// x86 and x86-64 can perform unaligned loads/stores directly;
// modern PowerPC hardware can also do unaligned integer loads and stores;
// but note: the FPU still sends unaligned loads and stores to a trap handler!
/* x86 and x86-64 can perform unaligned loads/stores directly;
modern PowerPC hardware can also do unaligned integer loads and stores;
but note: the FPU still sends unaligned loads and stores to a trap handler!
*/
#define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
#define BROTLI_UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64_t *>(_p))
@ -94,50 +82,50 @@
!defined(__ARM_ARCH_6ZK__) && \
!defined(__ARM_ARCH_6T2__)
// ARMv7 and newer support native unaligned accesses, but only of 16-bit
// and 32-bit values (not 64-bit); older versions either raise a fatal signal,
// do an unaligned read and rotate the words around a bit, or do the reads very
// slowly (trip through kernel mode).
/* ARMv7 and newer support native unaligned accesses, but only of 16-bit
and 32-bit values (not 64-bit); older versions either raise a fatal signal,
do an unaligned read and rotate the words around a bit, or do the reads very
slowly (trip through kernel mode). */
#define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
#define BROTLI_UNALIGNED_STORE32(_p, _val) \
(*reinterpret_cast<uint32_t *>(_p) = (_val))
inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
static inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
uint64_t t;
memcpy(&t, p, sizeof t);
return t;
}
inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
static inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
memcpy(p, &v, sizeof v);
}
#else
// These functions are provided for architectures that don't support
// unaligned loads and stores.
/* These functions are provided for architectures that don't support */
/* unaligned loads and stores. */
inline uint32_t BROTLI_UNALIGNED_LOAD32(const void *p) {
static inline uint32_t BROTLI_UNALIGNED_LOAD32(const void *p) {
uint32_t t;
memcpy(&t, p, sizeof t);
return t;
}
inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
static inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
uint64_t t;
memcpy(&t, p, sizeof t);
return t;
}
inline void BROTLI_UNALIGNED_STORE32(void *p, uint32_t v) {
static inline void BROTLI_UNALIGNED_STORE32(void *p, uint32_t v) {
memcpy(p, &v, sizeof v);
}
inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
static inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
memcpy(p, &v, sizeof v);
}
#endif
#endif // BROTLI_ENC_PORT_H_
#endif /* BROTLI_ENC_PORT_H_ */

View File

@ -4,8 +4,8 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Functions for encoding of integers into prefix codes the amount of extra
// bits, and the actual values of the extra bits.
/* Functions for encoding of integers into prefix codes the amount of extra
bits, and the actual values of the extra bits. */
#ifndef BROTLI_ENC_PREFIX_H_
#define BROTLI_ENC_PREFIX_H_
@ -76,4 +76,4 @@ inline void PrefixEncodeCopyDistance(size_t distance_code,
} // namespace brotli
#endif // BROTLI_ENC_PREFIX_H_
#endif /* BROTLI_ENC_PREFIX_H_ */

View File

@ -4,7 +4,7 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Sliding window over the input data.
/* Sliding window over the input data. */
#ifndef BROTLI_ENC_RINGBUFFER_H_
#define BROTLI_ENC_RINGBUFFER_H_
@ -16,15 +16,15 @@
namespace brotli {
// A RingBuffer(window_bits, tail_bits) contains `1 << window_bits' bytes of
// data in a circular manner: writing a byte writes it to:
// `position() % (1 << window_bits)'.
// For convenience, the RingBuffer array contains another copy of the
// first `1 << tail_bits' bytes:
// buffer_[i] == buffer_[i + (1 << window_bits)], if i < (1 << tail_bits),
// and another copy of the last two bytes:
// buffer_[-1] == buffer_[(1 << window_bits) - 1] and
// buffer_[-2] == buffer_[(1 << window_bits) - 2].
/* A RingBuffer(window_bits, tail_bits) contains `1 << window_bits' bytes of
data in a circular manner: writing a byte writes it to:
`position() % (1 << window_bits)'.
For convenience, the RingBuffer array contains another copy of the
first `1 << tail_bits' bytes:
buffer_[i] == buffer_[i + (1 << window_bits)], if i < (1 << tail_bits),
and another copy of the last two bytes:
buffer_[-1] == buffer_[(1 << window_bits) - 1] and
buffer_[-2] == buffer_[(1 << window_bits) - 2]. */
class RingBuffer {
public:
RingBuffer(int window_bits, int tail_bits)
@ -41,8 +41,8 @@ class RingBuffer {
free(data_);
}
// Allocates or re-allocates data_ to the given length + plus some slack
// region before and after. Fills the slack regions with zeros.
/* Allocates or re-allocates data_ to the given length + plus some slack
region before and after. Fills the slack regions with zeros. */
inline void InitBuffer(const uint32_t buflen) {
static const size_t kSlackForEightByteHashingEverywhere = 7;
cur_size_ = buflen;
@ -55,41 +55,41 @@ class RingBuffer {
}
}
// Push bytes into the ring buffer.
/* Push bytes into the ring buffer. */
void Write(const uint8_t *bytes, size_t n) {
if (pos_ == 0 && n < tail_size_) {
// Special case for the first write: to process the first block, we don't
// need to allocate the whole ringbuffer and we don't need the tail
// either. However, we do this memory usage optimization only if the
// first write is less than the tail size, which is also the input block
// size, otherwise it is likely that other blocks will follow and we
// will need to reallocate to the full size anyway.
/* Special case for the first write: to process the first block, we don't
need to allocate the whole ringbuffer and we don't need the tail
either. However, we do this memory usage optimization only if the
first write is less than the tail size, which is also the input block
size, otherwise it is likely that other blocks will follow and we
will need to reallocate to the full size anyway. */
pos_ = static_cast<uint32_t>(n);
InitBuffer(pos_);
memcpy(buffer_, bytes, n);
return;
}
if (cur_size_ < total_size_) {
// Lazily allocate the full buffer.
/* Lazily allocate the full buffer. */
InitBuffer(total_size_);
// Initialize the last two bytes to zero, so that we don't have to worry
// later when we copy the last two bytes to the first two positions.
/* Initialize the last two bytes to zero, so that we don't have to worry
later when we copy the last two bytes to the first two positions. */
buffer_[size_ - 2] = 0;
buffer_[size_ - 1] = 0;
}
const size_t masked_pos = pos_ & mask_;
// The length of the writes is limited so that we do not need to worry
// about a write
/* The length of the writes is limited so that we do not need to worry
about a write */
WriteTail(bytes, n);
if (PREDICT_TRUE(masked_pos + n <= size_)) {
// A single write fits.
/* A single write fits. */
memcpy(&buffer_[masked_pos], bytes, n);
} else {
// Split into two writes.
// Copy into the end of the buffer, including the tail buffer.
/* Split into two writes.
Copy into the end of the buffer, including the tail buffer. */
memcpy(&buffer_[masked_pos], bytes,
std::min(n, total_size_ - masked_pos));
// Copy into the beginning of the buffer
/* Copy into the beginning of the buffer */
memcpy(&buffer_[0], bytes + (size_ - masked_pos),
n - (size_ - masked_pos));
}
@ -142,4 +142,4 @@ class RingBuffer {
} // namespace brotli
#endif // BROTLI_ENC_RINGBUFFER_H_
#endif /* BROTLI_ENC_RINGBUFFER_H_ */

View File

@ -17,8 +17,8 @@ namespace brotli {
inline uint32_t Hash(const uint8_t *data) {
uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kDictHashMul32;
// The higher bits contain more mixture from the multiplication,
// so we take our results from there.
/* The higher bits contain more mixture from the multiplication,
so we take our results from there. */
return h >> (32 - kDictNumBits);
}
@ -42,18 +42,18 @@ inline bool IsMatch(DictWord w, const uint8_t* data, size_t max_length) {
const size_t offset = kBrotliDictionaryOffsetsByLength[w.len] + w.len * w.idx;
const uint8_t* dict = &kBrotliDictionary[offset];
if (w.transform == 0) {
// Match against base dictionary word.
/* Match against base dictionary word. */
return FindMatchLengthWithLimit(dict, data, w.len) == w.len;
} else if (w.transform == 10) {
// Match against uppercase first transform.
// Note that there are only ASCII uppercase words in the lookup table.
/* Match against uppercase first transform.
Note that there are only ASCII uppercase words in the lookup table. */
return (dict[0] >= 'a' && dict[0] <= 'z' &&
(dict[0] ^ 32) == data[0] &&
FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) ==
w.len - 1u);
} else {
// Match against uppercase all transform.
// Note that there are only ASCII uppercase words in the lookup table.
/* Match against uppercase all transform.
Note that there are only ASCII uppercase words in the lookup table. */
for (size_t i = 0; i < w.len; ++i) {
if (dict[i] >= 'a' && dict[i] <= 'z') {
if ((dict[i] ^ 32) != data[i]) return false;
@ -82,12 +82,12 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
const size_t id = w.idx;
if (w.transform == 0) {
const size_t matchlen = DictMatchLength(data, id, l, max_length);
// Transform "" + kIdentity + ""
/* Transform "" + kIdentity + "" */
if (matchlen == l) {
AddMatch(id, l, l, matches);
found_match = true;
}
// Transforms "" + kOmitLast1 + "" and "" + kOmitLast1 + "ing "
/* Transforms "" + kOmitLast1 + "" and "" + kOmitLast1 + "ing " */
if (matchlen >= l - 1) {
AddMatch(id + 12 * n, l - 1, l, matches);
if (l + 2 < max_length &&
@ -97,7 +97,7 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
}
found_match = true;
}
// Transform "" + kOmitLastN + "" (N = 2 .. 9)
/* Transform "" + kOmitLastN + "" (N = 2 .. 9) */
size_t minlen = min_length;
if (l > 9) minlen = std::max(minlen, l - 9);
size_t maxlen = std::min(matchlen, l - 2);
@ -109,7 +109,7 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
continue;
}
const uint8_t* s = &data[l];
// Transforms "" + kIdentity + <suffix>
/* Transforms "" + kIdentity + <suffix> */
if (s[0] == ' ') {
AddMatch(id + n, l + 1, l, matches);
if (s[1] == 'a') {
@ -127,7 +127,7 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
} else if (s[1] == 'b') {
if (s[2] == 'y' && s[3] == ' ') {
AddMatch(id + 38 * n, l + 4, l, matches);
}
}
} else if (s[1] == 'i') {
if (s[2] == 'n') {
if (s[3] == ' ') AddMatch(id + 16 * n, l + 4, l, matches);
@ -235,7 +235,7 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
} else if (s[0] == 'i') {
if (s[1] == 'v') {
if (s[2] == 'e' && s[3] == ' ') {
AddMatch(id + 92 * n, l + 4, l, matches);
AddMatch(id + 92 * n, l + 4, l, matches);
}
} else if (s[1] == 'z') {
if (s[2] == 'e' && s[3] == ' ') {
@ -256,19 +256,19 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
}
}
} else {
// Set t=false for kUppercaseFirst and
// t=true otherwise (kUppercaseAll) transform.
/* Set is_all_caps=0 for kUppercaseFirst and
is_all_caps=1 otherwise (kUppercaseAll) transform. */
const bool t = w.transform != kUppercaseFirst;
if (!IsMatch(w, data, max_length)) {
continue;
}
// Transform "" + kUppercase{First,All} + ""
/* Transform "" + kUppercase{First,All} + "" */
AddMatch(id + (t ? 44 : 9) * n, l, l, matches);
found_match = true;
if (l + 1 >= max_length) {
continue;
}
// Transforms "" + kUppercase{First,All} + <suffix>
/* Transforms "" + kUppercase{First,All} + <suffix> */
const uint8_t* s = &data[l];
if (s[0] == ' ') {
AddMatch(id + (t ? 68 : 4) * n, l + 1, l, matches);
@ -301,7 +301,7 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
}
}
}
// Transforms with prefixes " " and "."
/* Transforms with prefixes " " and "." */
if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) {
bool is_space = (data[0] == ' ');
key = Hash(&data[1]);
@ -317,13 +317,14 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
if (!IsMatch(w, &data[1], max_length - 1)) {
continue;
}
// Transforms " " + kIdentity + "" and "." + kIdentity + ""
/* Transforms " " + kIdentity + "" and "." + kIdentity + "" */
AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches);
found_match = true;
if (l + 2 >= max_length) {
continue;
}
// Transforms " " + kIdentity + <suffix> and "." + kIdentity + <suffix>
/* Transforms " " + kIdentity + <suffix> and "." + kIdentity + <suffix>
*/
const uint8_t* s = &data[l + 1];
if (s[0] == ' ') {
AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches);
@ -349,19 +350,19 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
}
}
} else if (is_space) {
// Set t=false for kUppercaseFirst and
// t=true otherwise (kUppercaseAll) transform.
/* Set is_all_caps=0 for kUppercaseFirst and
is_all_caps=1 otherwise (kUppercaseAll) transform. */
const bool t = w.transform != kUppercaseFirst;
if (!IsMatch(w, &data[1], max_length - 1)) {
continue;
}
// Transforms " " + kUppercase{First,All} + ""
/* Transforms " " + kUppercase{First,All} + "" */
AddMatch(id + (t ? 85 : 30) * n, l + 1, l, matches);
found_match = true;
if (l + 2 >= max_length) {
continue;
}
// Transforms " " + kUppercase{First,All} + <suffix>
/* Transforms " " + kUppercase{First,All} + <suffix> */
const uint8_t* s = &data[l + 1];
if (s[0] == ' ') {
AddMatch(id + (t ? 83 : 15) * n, l + 2, l, matches);
@ -388,7 +389,7 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
}
}
if (max_length >= 6) {
// Transforms with prefixes "e ", "s ", ", " and "\xc2\xa0"
/* Transforms with prefixes "e ", "s ", ", " and "\xc2\xa0" */
if ((data[1] == ' ' &&
(data[0] == 'e' || data[0] == 's' || data[0] == ',')) ||
(data[0] == 0xc2 && data[1] == 0xa0)) {
@ -415,7 +416,7 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
}
}
if (max_length >= 9) {
// Transforms with prefixes " the " and ".com/"
/* Transforms with prefixes " the " and ".com/" */
if ((data[0] == ' ' && data[1] == 't' && data[2] == 'h' &&
data[3] == 'e' && data[4] == ' ') ||
(data[0] == '.' && data[1] == 'c' && data[2] == 'o' &&

View File

@ -4,7 +4,7 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Class to model the static dictionary.
/* Class to model the static dictionary. */
#ifndef BROTLI_ENC_STATIC_DICT_H_
#define BROTLI_ENC_STATIC_DICT_H_
@ -16,12 +16,13 @@ namespace brotli {
static const size_t kMaxDictionaryMatchLen = 37;
static const uint32_t kInvalidMatch = 0xfffffff;
// Matches data against static dictionary words, and for each length l,
// for which a match is found, updates matches[l] to be the minimum possible
// (distance << 5) + len_code.
// Prerequisites:
// matches array is at least kMaxDictionaryMatchLen + 1 long
// all elements are initialized to kInvalidMatch
/* Matches data against static dictionary words, and for each length l,
for which a match is found, updates matches[l] to be the minimum possible
(distance << 5) + len_code.
Returns 1 if matches have been found, otherwise 0.
Prerequisites:
matches array is at least BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1 long
all elements are initialized to kInvalidMatch */
bool FindAllStaticDictionaryMatches(const uint8_t* data,
size_t min_length,
size_t max_length,
@ -29,4 +30,4 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
} // namespace brotli
#endif // BROTLI_ENC_STATIC_DICT_H_
#endif /* BROTLI_ENC_STATIC_DICT_H_ */

View File

@ -4,7 +4,7 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Lookup table for static dictionary and transforms.
/* Lookup table for static dictionary and transforms. */
#ifndef BROTLI_ENC_DICTIONARY_LUT_H_
#define BROTLI_ENC_DICTIONARY_LUT_H_
@ -13,8 +13,8 @@
namespace brotli {
static const int kDictNumBits = 15
;static const uint32_t kDictHashMul32 = 0x1e35a7bd;
static const int kDictNumBits = 15;
static const uint32_t kDictHashMul32 = 0x1e35a7bd;
struct DictWord {
uint8_t len;
@ -12052,4 +12052,4 @@ static const DictWord kStaticDictionaryWords[] = {
} // namespace brotli
#endif // BROTLI_ENC_DICTIONARY_LUT_H_
#endif /* BROTLI_ENC_STATIC_DICT_LUT_H_ */

View File

@ -4,14 +4,14 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Convience routines to make Brotli I/O classes from some memory containers and
// files.
/* Convience routines to make Brotli I/O classes from some memory containers and
files. */
#include "./streams.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <string.h> /* memcpy */
namespace brotli {
@ -26,7 +26,7 @@ void BrotliMemOut::Reset(void* buf, size_t len) {
pos_ = 0;
}
// Brotli output routine: copy n bytes to the output buffer.
/* Brotli output routine: copy n bytes to the output buffer. */
bool BrotliMemOut::Write(const void *buf, size_t n) {
if (n + pos_ > len_)
return false;
@ -47,7 +47,7 @@ void BrotliStringOut::Reset(std::string* buf, size_t max_size) {
max_size_ = max_size;
}
// Brotli output routine: add n bytes to a string.
/* Brotli output routine: add n bytes to a string. */
bool BrotliStringOut::Write(const void *buf, size_t n) {
if (buf_->size() + n > max_size_)
return false;
@ -66,7 +66,7 @@ void BrotliMemIn::Reset(const void* buf, size_t len) {
pos_ = 0;
}
// Brotli input routine: read the next chunk of memory.
/* Brotli input routine: read the next chunk of memory. */
const void* BrotliMemIn::Read(size_t n, size_t* output) {
if (pos_ == len_) {
return NULL;
@ -111,4 +111,4 @@ bool BrotliFileOut::Write(const void* buf, size_t n) {
return true;
}
} // namespace brotli
} /* namespace brotli */

View File

@ -4,7 +4,7 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Input and output classes for streaming brotli compression.
/* Input and output classes for streaming brotli compression. */
#ifndef BROTLI_ENC_STREAMS_H_
#define BROTLI_ENC_STREAMS_H_
@ -17,71 +17,71 @@
namespace brotli {
// Input interface for the compression routines.
/* Input interface for the compression routines. */
class BrotliIn {
public:
virtual ~BrotliIn(void) {}
// Return a pointer to the next block of input of at most n bytes.
// Return the actual length in *nread.
// At end of data, return NULL. Don't return NULL if there is more data
// to read, even if called with n == 0.
// Read will only be called if some of its bytes are needed.
/* Return a pointer to the next block of input of at most n bytes.
Return the actual length in *nread.
At end of data, return NULL. Don't return NULL if there is more data
to read, even if called with n == 0.
Read will only be called if some of its bytes are needed. */
virtual const void* Read(size_t n, size_t* nread) = 0;
};
// Output interface for the compression routines.
/* Output interface for the compression routines. */
class BrotliOut {
public:
virtual ~BrotliOut(void) {}
// Write n bytes of data from buf.
// Return true if all written, false otherwise.
/* Write n bytes of data from buf.
Return true if all written, false otherwise. */
virtual bool Write(const void *buf, size_t n) = 0;
};
// Adapter class to make BrotliIn objects from raw memory.
/* Adapter class to make BrotliIn objects from raw memory. */
class BrotliMemIn : public BrotliIn {
public:
BrotliMemIn(const void* buf, size_t len);
void Reset(const void* buf, size_t len);
// returns the amount of data consumed
/* returns the amount of data consumed */
size_t position(void) const { return pos_; }
const void* Read(size_t n, size_t* OUTPUT);
private:
const void* buf_; // start of input buffer
size_t len_; // length of input
size_t pos_; // current read position within input
const void* buf_; /* start of input buffer */
size_t len_; /* length of input */
size_t pos_; /* current read position within input */
};
// Adapter class to make BrotliOut objects from raw memory.
/* Adapter class to make BrotliOut objects from raw memory. */
class BrotliMemOut : public BrotliOut {
public:
BrotliMemOut(void* buf, size_t len);
void Reset(void* buf, size_t len);
// returns the amount of data written
/* returns the amount of data written */
size_t position(void) const { return pos_; }
bool Write(const void* buf, size_t n);
private:
void* buf_; // start of output buffer
size_t len_; // length of output
size_t pos_; // current write position within output
void* buf_; /* start of output buffer */
size_t len_; /* length of output */
size_t pos_; /* current write position within output */
};
// Adapter class to make BrotliOut objects from a string.
/* Adapter class to make BrotliOut objects from a string. */
class BrotliStringOut : public BrotliOut {
public:
// Create a writer that appends its data to buf.
// buf->size() will grow to at most max_size
// buf is expected to be empty when constructing BrotliStringOut.
/* Create a writer that appends its data to buf.
buf->size() will grow to at most max_size
buf is expected to be empty when constructing BrotliStringOut. */
BrotliStringOut(std::string* buf, size_t max_size);
void Reset(std::string* buf, size_t max_len);
@ -89,11 +89,11 @@ class BrotliStringOut : public BrotliOut {
bool Write(const void* buf, size_t n);
private:
std::string* buf_; // start of output buffer
size_t max_size_; // max length of output
std::string* buf_; /* start of output buffer */
size_t max_size_; /* max length of output */
};
// Adapter class to make BrotliIn object from a file.
/* Adapter class to make BrotliIn object from a file. */
class BrotliFileIn : public BrotliIn {
public:
BrotliFileIn(FILE* f, size_t max_read_size);
@ -107,7 +107,7 @@ class BrotliFileIn : public BrotliIn {
size_t buf_size_;
};
// Adapter class to make BrotliOut object from a file.
/* Adapter class to make BrotliOut object from a file. */
class BrotliFileOut : public BrotliOut {
public:
explicit BrotliFileOut(FILE* f);
@ -117,6 +117,6 @@ class BrotliFileOut : public BrotliOut {
FILE* f_;
};
} // namespace brotli
} /* namespace brotli */
#endif // BROTLI_ENC_STREAMS_H_
#endif /* BROTLI_ENC_STREAMS_H_ */

View File

@ -4,7 +4,7 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Heuristics for deciding about the UTF8-ness of strings.
/* Heuristics for deciding about the UTF8-ness of strings. */
#include "./utf8_util.h"
@ -15,14 +15,14 @@ namespace brotli {
namespace {
size_t ParseAsUTF8(int* symbol, const uint8_t* input, size_t size) {
// ASCII
/* ASCII */
if ((input[0] & 0x80) == 0) {
*symbol = input[0];
if (*symbol > 0) {
return 1;
}
}
// 2-byte UTF8
/* 2-byte UTF8 */
if (size > 1u &&
(input[0] & 0xe0) == 0xc0 &&
(input[1] & 0xc0) == 0x80) {
@ -32,7 +32,7 @@ size_t ParseAsUTF8(int* symbol, const uint8_t* input, size_t size) {
return 2;
}
}
// 3-byte UFT8
/* 3-byte UFT8 */
if (size > 2u &&
(input[0] & 0xf0) == 0xe0 &&
(input[1] & 0xc0) == 0x80 &&
@ -44,7 +44,7 @@ size_t ParseAsUTF8(int* symbol, const uint8_t* input, size_t size) {
return 3;
}
}
// 4-byte UFT8
/* 4-byte UFT8 */
if (size > 3u &&
(input[0] & 0xf8) == 0xf0 &&
(input[1] & 0xc0) == 0x80 &&
@ -58,14 +58,14 @@ size_t ParseAsUTF8(int* symbol, const uint8_t* input, size_t size) {
return 4;
}
}
// Not UTF8, emit a special symbol above the UTF8-code space
/* Not UTF8, emit a special symbol above the UTF8-code space */
*symbol = 0x110000 | input[0];
return 1;
}
} // namespace
// Returns true if at least min_fraction of the data is UTF8-encoded.
/* Returns 1 if at least min_fraction of the data is UTF8-encoded.*/
bool IsMostlyUTF8(const uint8_t* data, const size_t pos, const size_t mask,
const size_t length, const double min_fraction) {
size_t size_utf8 = 0;

View File

@ -4,7 +4,7 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Heuristics for deciding about the UTF8-ness of strings.
/* Heuristics for deciding about the UTF8-ness of strings. */
#ifndef BROTLI_ENC_UTF8_UTIL_H_
#define BROTLI_ENC_UTF8_UTIL_H_
@ -15,11 +15,12 @@ namespace brotli {
static const double kMinUTF8Ratio = 0.75;
// Returns true if at least min_fraction of the bytes between pos and
// pos + length in the (data, mask) ringbuffer is UTF8-encoded.
/* Returns 1 if at least min_fraction of the bytes between pos and
pos + length in the (data, mask) ringbuffer is UTF8-encoded, otherwise
returns 0. */
bool IsMostlyUTF8(const uint8_t* data, const size_t pos, const size_t mask,
const size_t length, const double min_fraction);
} // namespace brotli
#endif // BROTLI_ENC_UTF8_UTIL_H_
#endif /* BROTLI_ENC_UTF8_UTIL_H_ */

View File

@ -4,36 +4,36 @@
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
// Write bits into a byte array.
/* Write bits into a byte array. */
#ifndef BROTLI_ENC_WRITE_BITS_H_
#define BROTLI_ENC_WRITE_BITS_H_
#include <assert.h>
#include <stdio.h>
#include <stdio.h> /* printf */
#include "../common/types.h"
#include "./port.h"
namespace brotli {
//#define BIT_WRITER_DEBUG
/*#define BIT_WRITER_DEBUG */
// This function writes bits into bytes in increasing addresses, and within
// a byte least-significant-bit first.
//
// The function can write up to 56 bits in one go with WriteBits
// Example: let's assume that 3 bits (Rs below) have been written already:
//
// BYTE-0 BYTE+1 BYTE+2
//
// 0000 0RRR 0000 0000 0000 0000
//
// Now, we could write 5 or less bits in MSB by just sifting by 3
// and OR'ing to BYTE-0.
//
// For n bits, we take the last 5 bits, OR that with high bits in BYTE-0,
// and locate the rest in BYTE+1, BYTE+2, etc.
/* This function writes bits into bytes in increasing addresses, and within
a byte least-significant-bit first.
The function can write up to 56 bits in one go with WriteBits
Example: let's assume that 3 bits (Rs below) have been written already:
BYTE-0 BYTE+1 BYTE+2
0000 0RRR 0000 0000 0000 0000
Now, we could write 5 or less bits in MSB by just sifting by 3
and OR'ing to BYTE-0.
For n bits, we take the last 5 bits, OR that with high bits in BYTE-0,
and locate the rest in BYTE+1, BYTE+2, etc. */
inline void WriteBits(size_t n_bits,
uint64_t bits,
size_t * __restrict pos,
@ -44,18 +44,18 @@ inline void WriteBits(size_t n_bits,
assert((bits >> n_bits) == 0);
assert(n_bits <= 56);
#ifdef IS_LITTLE_ENDIAN
// This branch of the code can write up to 56 bits at a time,
// 7 bits are lost by being perhaps already in *p and at least
// 1 bit is needed to initialize the bit-stream ahead (i.e. if 7
// bits are in *p and we write 57 bits, then the next write will
// access a byte that was never initialized).
/* This branch of the code can write up to 56 bits at a time,
7 bits are lost by being perhaps already in *p and at least
1 bit is needed to initialize the bit-stream ahead (i.e. if 7
bits are in *p and we write 57 bits, then the next write will
access a byte that was never initialized). */
uint8_t *p = &array[*pos >> 3];
uint64_t v = *p;
v |= bits << (*pos & 7);
BROTLI_UNALIGNED_STORE64(p, v); // Set some bits.
BROTLI_UNALIGNED_STORE64(p, v); /* Set some bits. */
*pos += n_bits;
#else
// implicit & 0xff is assumed for uint8_t arithmetics
/* implicit & 0xff is assumed for uint8_t arithmetics */
uint8_t *array_pos = &array[*pos >> 3];
const size_t bits_reserved_in_first_byte = (*pos & 7);
bits <<= bits_reserved_in_first_byte;
@ -81,4 +81,4 @@ inline void WriteBitsPrepareStorage(size_t pos, uint8_t *array) {
} // namespace brotli
#endif // BROTLI_ENC_WRITE_BITS_H_
#endif /* BROTLI_ENC_WRITE_BITS_H_ */