mirror of
https://github.com/google/brotli.git
synced 2024-11-23 12:10:07 +00:00
commit
6e356105b5
@ -4,7 +4,7 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Function to find backward reference copies.
|
||||
/* Function to find backward reference copies. */
|
||||
|
||||
#include "./backward_references.h"
|
||||
|
||||
@ -12,16 +12,17 @@
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
#include "../common/types.h"
|
||||
#include "./command.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./literal_cost.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// The maximum length for which the zopflification uses distinct distances.
|
||||
/* The maximum length for which the zopflification uses distinct distances. */
|
||||
static const uint16_t kMaxZopfliLen = 325;
|
||||
|
||||
// Histogram based cost model for zopflification.
|
||||
/* Histogram based cost model for zopflification. */
|
||||
class ZopfliCostModel {
|
||||
public:
|
||||
ZopfliCostModel(void) : min_cost_cmd_(kInfinity) {}
|
||||
@ -178,9 +179,9 @@ inline size_t ComputeDistanceCode(size_t distance,
|
||||
return distance + 15;
|
||||
}
|
||||
|
||||
// REQUIRES: len >= 2, start_pos <= pos
|
||||
// REQUIRES: cost < kInfinity, nodes[start_pos].cost < kInfinity
|
||||
// Maintains the "ZopfliNode array invariant".
|
||||
/* REQUIRES: len >= 2, start_pos <= pos */
|
||||
/* REQUIRES: cost < kInfinity, nodes[start_pos].cost < kInfinity */
|
||||
/* Maintains the "ZopfliNode array invariant". */
|
||||
inline void UpdateZopfliNode(ZopfliNode* nodes, size_t pos, size_t start_pos,
|
||||
size_t len, size_t len_code, size_t dist,
|
||||
size_t short_code, float cost) {
|
||||
@ -191,7 +192,7 @@ inline void UpdateZopfliNode(ZopfliNode* nodes, size_t pos, size_t start_pos,
|
||||
next.cost = cost;
|
||||
}
|
||||
|
||||
// Maintains the smallest 2^k cost difference together with their positions
|
||||
/* Maintains the smallest 8 cost difference together with their positions */
|
||||
class StartPosQueue {
|
||||
public:
|
||||
struct PosData {
|
||||
@ -212,8 +213,8 @@ class StartPosQueue {
|
||||
++idx_;
|
||||
size_t len = size();
|
||||
q_[offset] = posdata;
|
||||
/* Restore the sorted order. In the list of |len| items at most |len - 1|
|
||||
adjacent element comparisons / swaps are required. */
|
||||
/* Restore the sorted order. In the list of |len| items at most |len - 1|
|
||||
adjacent element comparisons / swaps are required. */
|
||||
for (size_t i = 1; i < len; ++i) {
|
||||
if (q_[offset & mask_].costdiff > q_[(offset + 1) & mask_].costdiff) {
|
||||
std::swap(q_[offset & mask_], q_[(offset + 1) & mask_]);
|
||||
@ -234,14 +235,14 @@ class StartPosQueue {
|
||||
size_t idx_;
|
||||
};
|
||||
|
||||
// Returns the minimum possible copy length that can improve the cost of any
|
||||
// future position.
|
||||
/* Returns the minimum possible copy length that can improve the cost of any */
|
||||
/* future position. */
|
||||
static size_t ComputeMinimumCopyLength(const StartPosQueue& queue,
|
||||
const ZopfliNode* nodes,
|
||||
const ZopfliCostModel& model,
|
||||
const size_t num_bytes,
|
||||
const size_t pos) {
|
||||
// Compute the minimum possible cost of reaching any future position.
|
||||
/* Compute the minimum possible cost of reaching any future position. */
|
||||
const size_t start0 = queue.GetStartPosData(0).pos;
|
||||
float min_cost = (nodes[start0].cost +
|
||||
model.GetLiteralCosts(start0, pos) +
|
||||
@ -250,13 +251,13 @@ static size_t ComputeMinimumCopyLength(const StartPosQueue& queue,
|
||||
size_t next_len_bucket = 4;
|
||||
size_t next_len_offset = 10;
|
||||
while (pos + len <= num_bytes && nodes[pos + len].cost <= min_cost) {
|
||||
// We already reached (pos + len) with no more cost than the minimum
|
||||
// possible cost of reaching anything from this pos, so there is no point in
|
||||
// looking for lengths <= len.
|
||||
/* We already reached (pos + len) with no more cost than the minimum
|
||||
possible cost of reaching anything from this pos, so there is no point in
|
||||
looking for lengths <= len. */
|
||||
++len;
|
||||
if (len == next_len_offset) {
|
||||
// We reached the next copy length code bucket, so we add one more
|
||||
// extra bit to the minimum cost.
|
||||
/* We reached the next copy length code bucket, so we add one more
|
||||
extra bit to the minimum cost. */
|
||||
min_cost += static_cast<float>(1.0);
|
||||
next_len_offset += next_len_bucket;
|
||||
next_len_bucket *= 2;
|
||||
@ -265,13 +266,13 @@ static size_t ComputeMinimumCopyLength(const StartPosQueue& queue,
|
||||
return len;
|
||||
}
|
||||
|
||||
// Fills in dist_cache[0..3] with the last four distances (as defined by
|
||||
// Section 4. of the Spec) that would be used at (block_start + pos) if we
|
||||
// used the shortest path of commands from block_start, computed from
|
||||
// nodes[0..pos]. The last four distances at block_start are in
|
||||
// starting_dist_cach[0..3].
|
||||
// REQUIRES: nodes[pos].cost < kInfinity
|
||||
// REQUIRES: nodes[0..pos] satisfies that "ZopfliNode array invariant".
|
||||
/* Fills in dist_cache[0..3] with the last four distances (as defined by
|
||||
Section 4. of the Spec) that would be used at (block_start + pos) if we
|
||||
used the shortest path of commands from block_start, computed from
|
||||
nodes[0..pos]. The last four distances at block_start are in
|
||||
starting_dist_cach[0..3].
|
||||
REQUIRES: nodes[pos].cost < kInfinity
|
||||
REQUIRES: nodes[0..pos] satisfies that "ZopfliNode array invariant". */
|
||||
static void ComputeDistanceCache(const size_t block_start,
|
||||
const size_t pos,
|
||||
const size_t max_backward,
|
||||
@ -280,21 +281,21 @@ static void ComputeDistanceCache(const size_t block_start,
|
||||
int* dist_cache) {
|
||||
int idx = 0;
|
||||
size_t p = pos;
|
||||
// Because of prerequisite, does at most (pos + 1) / 2 iterations.
|
||||
/* Because of prerequisite, does at most (pos + 1) / 2 iterations. */
|
||||
while (idx < 4 && p > 0) {
|
||||
const size_t clen = nodes[p].copy_length();
|
||||
const size_t ilen = nodes[p].insert_length;
|
||||
const size_t dist = nodes[p].copy_distance();
|
||||
// Since block_start + p is the end position of the command, the copy part
|
||||
// starts from block_start + p - clen. Distances that are greater than this
|
||||
// or greater than max_backward are static dictionary references, and do
|
||||
// not update the last distances. Also distance code 0 (last distance)
|
||||
// does not update the last distances.
|
||||
/* Since block_start + p is the end position of the command, the copy part
|
||||
starts from block_start + p - clen. Distances that are greater than this
|
||||
or greater than max_backward are static dictionary references, and do
|
||||
not update the last distances. Also distance code 0 (last distance)
|
||||
does not update the last distances. */
|
||||
if (dist + clen <= block_start + p && dist <= max_backward &&
|
||||
nodes[p].distance_code() > 0) {
|
||||
dist_cache[idx++] = static_cast<int>(dist);
|
||||
}
|
||||
// Because of prerequisite, p >= clen + ilen >= 2.
|
||||
/* Because of prerequisite, p >= clen + ilen >= 2. */
|
||||
p -= clen + ilen;
|
||||
}
|
||||
for (; idx < 4; ++idx) {
|
||||
@ -330,15 +331,15 @@ static void UpdateNodes(const size_t num_bytes,
|
||||
const size_t min_len = ComputeMinimumCopyLength(
|
||||
*queue, nodes, *model, num_bytes, pos);
|
||||
|
||||
// Go over the command starting positions in order of increasing cost
|
||||
// difference.
|
||||
/* Go over the command starting positions in order of increasing cost
|
||||
difference. */
|
||||
for (size_t k = 0; k < 5 && k < queue->size(); ++k) {
|
||||
const StartPosQueue::PosData& posdata = queue->GetStartPosData(k);
|
||||
const size_t start = posdata.pos;
|
||||
const float start_costdiff = posdata.costdiff;
|
||||
|
||||
// Look for last distance matches using the distance cache from this
|
||||
// starting position.
|
||||
/* Look for last distance matches using the distance cache from this
|
||||
starting position. */
|
||||
size_t best_len = min_len - 1;
|
||||
for (size_t j = 0; j < kNumDistanceShortCodes; ++j) {
|
||||
const size_t idx = kDistanceCacheIndex[j];
|
||||
@ -374,23 +375,23 @@ static void UpdateNodes(const size_t num_bytes,
|
||||
}
|
||||
}
|
||||
|
||||
// At higher iterations look only for new last distance matches, since
|
||||
// looking only for new command start positions with the same distances
|
||||
// does not help much.
|
||||
/* At higher iterations look only for new last distance matches, since
|
||||
looking only for new command start positions with the same distances
|
||||
does not help much. */
|
||||
if (k >= 2) continue;
|
||||
|
||||
// Loop through all possible copy lengths at this position.
|
||||
/* Loop through all possible copy lengths at this position. */
|
||||
size_t len = min_len;
|
||||
for (size_t j = 0; j < num_matches; ++j) {
|
||||
BackwardMatch match = matches[j];
|
||||
size_t dist = match.distance;
|
||||
bool is_dictionary_match = dist > max_distance;
|
||||
// We already tried all possible last distance matches, so we can use
|
||||
// normal distance code here.
|
||||
/* We already tried all possible last distance matches, so we can use
|
||||
normal distance code here. */
|
||||
size_t dist_code = dist + 15;
|
||||
// Try all copy lengths up until the maximum copy length corresponding
|
||||
// to this distance. If the distance refers to the static dictionary, or
|
||||
// the maximum length is long enough, try only one maximum length.
|
||||
/* Try all copy lengths up until the maximum copy length corresponding
|
||||
to this distance. If the distance refers to the static dictionary, or
|
||||
the maximum length is long enough, try only one maximum length. */
|
||||
size_t max_len = match.length();
|
||||
if (len < max_len && (is_dictionary_match || max_len > kMaxZopfliLen)) {
|
||||
len = max_len;
|
||||
@ -487,8 +488,8 @@ static void ZopfliIterate(size_t num_bytes,
|
||||
max_backward_limit, dist_cache, num_matches[i],
|
||||
&matches[cur_match_pos], &model, &queue, &nodes[0]);
|
||||
cur_match_pos += num_matches[i];
|
||||
// The zopflification can be too slow in case of very long lengths, so in
|
||||
// such case skip it all, it does not cost a lot of compression ratio.
|
||||
/* The zopflification can be too slow in case of very long lengths, so in
|
||||
such case skip it all, it does not cost a lot of compression ratio. */
|
||||
if (num_matches[i] == 1 &&
|
||||
matches[cur_match_pos - 1].length() > kMaxZopfliLen) {
|
||||
i += matches[cur_match_pos - 1].length() - 1;
|
||||
|
@ -4,7 +4,7 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Function to find backward reference copies.
|
||||
/* Function to find backward reference copies. */
|
||||
|
||||
#ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_
|
||||
#define BROTLI_ENC_BACKWARD_REFERENCES_H_
|
||||
@ -17,10 +17,10 @@
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// "commands" points to the next output command to write to, "*num_commands" is
|
||||
// initially the total amount of commands output by previous
|
||||
// CreateBackwardReferences calls, and must be incremented by the amount written
|
||||
// by this call.
|
||||
/* "commands" points to the next output command to write to, "*num_commands" is
|
||||
initially the total amount of commands output by previous
|
||||
CreateBackwardReferences calls, and must be incremented by the amount written
|
||||
by this call. */
|
||||
void CreateBackwardReferences(size_t num_bytes,
|
||||
size_t position,
|
||||
bool is_last,
|
||||
@ -66,31 +66,32 @@ struct ZopfliNode {
|
||||
return copy_length() + insert_length;
|
||||
}
|
||||
|
||||
// best length to get up to this byte (not including this byte itself)
|
||||
// highest 8 bit is used to reconstruct the length code
|
||||
/* best length to get up to this byte (not including this byte itself)
|
||||
highest 8 bit is used to reconstruct the length code */
|
||||
uint32_t length;
|
||||
// distance associated with the length
|
||||
// highest 7 bit contains distance short code + 1 (or zero if no short code)
|
||||
/* distance associated with the length
|
||||
highest 7 bit contains distance short code + 1 (or zero if no short code)
|
||||
*/
|
||||
uint32_t distance;
|
||||
// number of literal inserts before this copy
|
||||
/* number of literal inserts before this copy */
|
||||
uint32_t insert_length;
|
||||
// smallest cost to get to this byte from the beginning, as found so far
|
||||
/* Smallest cost to get to this byte from the beginning, as found so far. */
|
||||
float cost;
|
||||
};
|
||||
|
||||
// Computes the shortest path of commands from position to at most
|
||||
// position + num_bytes.
|
||||
//
|
||||
// On return, path->size() is the number of commands found and path[i] is the
|
||||
// length of the ith command (copy length plus insert length).
|
||||
// Note that the sum of the lengths of all commands can be less than num_bytes.
|
||||
//
|
||||
// On return, the nodes[0..num_bytes] array will have the following
|
||||
// "ZopfliNode array invariant":
|
||||
// For each i in [1..num_bytes], if nodes[i].cost < kInfinity, then
|
||||
// (1) nodes[i].copy_length() >= 2
|
||||
// (2) nodes[i].command_length() <= i and
|
||||
// (3) nodes[i - nodes[i].command_length()].cost < kInfinity
|
||||
/* Computes the shortest path of commands from position to at most
|
||||
position + num_bytes.
|
||||
|
||||
On return, path->size() is the number of commands found and path[i] is the
|
||||
length of the ith command (copy length plus insert length).
|
||||
Note that the sum of the lengths of all commands can be less than num_bytes.
|
||||
|
||||
On return, the nodes[0..num_bytes] array will have the following
|
||||
"ZopfliNode array invariant":
|
||||
For each i in [1..num_bytes], if nodes[i].cost < kInfinity, then
|
||||
(1) nodes[i].copy_length() >= 2
|
||||
(2) nodes[i].command_length() <= i and
|
||||
(3) nodes[i - nodes[i].command_length()].cost < kInfinity */
|
||||
void ZopfliComputeShortestPath(size_t num_bytes,
|
||||
size_t position,
|
||||
const uint8_t* ringbuffer,
|
||||
@ -113,4 +114,4 @@ void ZopfliCreateCommands(const size_t num_bytes,
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_BACKWARD_REFERENCES_H_
|
||||
#endif /* BROTLI_ENC_BACKWARD_REFERENCES_H_ */
|
||||
|
@ -4,7 +4,7 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Functions to estimate the bit cost of Huffman trees.
|
||||
/* Functions to estimate the bit cost of Huffman trees. */
|
||||
|
||||
#ifndef BROTLI_ENC_BIT_COST_H_
|
||||
#define BROTLI_ENC_BIT_COST_H_
|
||||
@ -42,7 +42,7 @@ static inline double BitsEntropy(const uint32_t *population, size_t size) {
|
||||
size_t sum;
|
||||
double retval = ShannonEntropy(population, size, &sum);
|
||||
if (retval < sum) {
|
||||
// At least one bit per literal is needed.
|
||||
/* At least one bit per literal is needed. */
|
||||
retval = static_cast<double>(sum);
|
||||
}
|
||||
return retval;
|
||||
@ -158,4 +158,4 @@ double PopulationCost(const Histogram<kSize>& histogram) {
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_BIT_COST_H_
|
||||
#endif /* BROTLI_ENC_BIT_COST_H_ */
|
||||
|
@ -4,7 +4,7 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Block split point selection utilities.
|
||||
/* Block split point selection utilities. */
|
||||
|
||||
#include "./block_splitter.h"
|
||||
|
||||
@ -42,7 +42,7 @@ void CopyLiteralsToByteArray(const Command* cmds,
|
||||
const size_t offset,
|
||||
const size_t mask,
|
||||
std::vector<uint8_t>* literals) {
|
||||
// Count how many we have.
|
||||
/* Count how many we have. */
|
||||
size_t total_length = 0;
|
||||
for (size_t i = 0; i < num_commands; ++i) {
|
||||
total_length += cmds[i].insert_len_;
|
||||
@ -456,11 +456,11 @@ void SplitBlock(const Command* cmds,
|
||||
BlockSplit* insert_and_copy_split,
|
||||
BlockSplit* dist_split) {
|
||||
{
|
||||
// Create a continuous array of literals.
|
||||
/* Create a continuous array of literals. */
|
||||
std::vector<uint8_t> literals;
|
||||
CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, &literals);
|
||||
// Create the block split on the array of literals.
|
||||
// Literal histograms have alphabet size 256.
|
||||
/* Create the block split on the array of literals.
|
||||
Literal histograms have alphabet size 256. */
|
||||
SplitByteVector<256>(
|
||||
literals,
|
||||
kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
|
||||
@ -469,12 +469,12 @@ void SplitBlock(const Command* cmds,
|
||||
}
|
||||
|
||||
{
|
||||
// Compute prefix codes for commands.
|
||||
/* Compute prefix codes for commands. */
|
||||
std::vector<uint16_t> insert_and_copy_codes(num_commands);
|
||||
for (size_t i = 0; i < num_commands; ++i) {
|
||||
insert_and_copy_codes[i] = cmds[i].cmd_prefix_;
|
||||
}
|
||||
// Create the block split on the array of command prefixes.
|
||||
/* Create the block split on the array of command prefixes. */
|
||||
SplitByteVector<kNumCommandPrefixes>(
|
||||
insert_and_copy_codes,
|
||||
kSymbolsPerCommandHistogram, kMaxCommandHistograms,
|
||||
@ -483,7 +483,7 @@ void SplitBlock(const Command* cmds,
|
||||
}
|
||||
|
||||
{
|
||||
// Create a continuous array of distance prefixes.
|
||||
/* Create a continuous array of distance prefixes. */
|
||||
std::vector<uint16_t> distance_prefixes(num_commands);
|
||||
size_t pos = 0;
|
||||
for (size_t i = 0; i < num_commands; ++i) {
|
||||
@ -493,7 +493,7 @@ void SplitBlock(const Command* cmds,
|
||||
}
|
||||
}
|
||||
distance_prefixes.resize(pos);
|
||||
// Create the block split on the array of distance prefixes.
|
||||
/* Create the block split on the array of distance prefixes. */
|
||||
SplitByteVector<kNumDistancePrefixes>(
|
||||
distance_prefixes,
|
||||
kSymbolsPerDistanceHistogram, kMaxCommandHistograms,
|
||||
|
@ -4,7 +4,7 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Block split point selection utilities.
|
||||
/* Block split point selection utilities. */
|
||||
|
||||
#ifndef BROTLI_ENC_BLOCK_SPLITTER_H_
|
||||
#define BROTLI_ENC_BLOCK_SPLITTER_H_
|
||||
@ -58,4 +58,4 @@ void SplitBlock(const Command* cmds,
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_BLOCK_SPLITTER_H_
|
||||
#endif /* BROTLI_ENC_BLOCK_SPLITTER_H_ */
|
||||
|
@ -4,9 +4,9 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Brotli bit stream functions to support the low level format. There are no
|
||||
// compression algorithms here, just the right ordering of bits to match the
|
||||
// specs.
|
||||
/* Brotli bit stream functions to support the low level format. There are no
|
||||
compression algorithms here, just the right ordering of bits to match the
|
||||
specs. */
|
||||
|
||||
#include "./brotli_bit_stream.h"
|
||||
|
||||
@ -16,6 +16,7 @@
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
#include "../common/types.h"
|
||||
#include "./bit_cost.h"
|
||||
#include "./context.h"
|
||||
#include "./entropy_encode.h"
|
||||
@ -34,9 +35,9 @@ static const size_t kContextMapAlphabetSize = 256 + 16;
|
||||
// Block type alphabet has 256 block id symbols plus 2 special symbols.
|
||||
static const size_t kBlockTypeAlphabetSize = 256 + 2;
|
||||
|
||||
// nibblesbits represents the 2 bits to encode MNIBBLES (0-3)
|
||||
// REQUIRES: length > 0
|
||||
// REQUIRES: length <= (1 << 24)
|
||||
/* nibblesbits represents the 2 bits to encode MNIBBLES (0-3)
|
||||
REQUIRES: length > 0
|
||||
REQUIRES: length <= (1 << 24) */
|
||||
void EncodeMlen(size_t length, uint64_t* bits,
|
||||
size_t* numbits, uint64_t* nibblesbits) {
|
||||
assert(length > 0);
|
||||
@ -76,13 +77,16 @@ void StoreVarLenUint8(size_t n, size_t* storage_ix, uint8_t* storage) {
|
||||
}
|
||||
}
|
||||
|
||||
/* Stores the compressed meta-block header.
|
||||
REQUIRES: length > 0
|
||||
REQUIRES: length <= (1 << 24) */
|
||||
void StoreCompressedMetaBlockHeader(bool final_block,
|
||||
size_t length,
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
// Write ISLAST bit.
|
||||
/* Write ISLAST bit. */
|
||||
WriteBits(1, final_block, storage_ix, storage);
|
||||
// Write ISEMPTY bit.
|
||||
/* Write ISEMPTY bit. */
|
||||
if (final_block) {
|
||||
WriteBits(1, 0, storage_ix, storage);
|
||||
}
|
||||
@ -95,15 +99,19 @@ void StoreCompressedMetaBlockHeader(bool final_block,
|
||||
WriteBits(nlenbits, lenbits, storage_ix, storage);
|
||||
|
||||
if (!final_block) {
|
||||
// Write ISUNCOMPRESSED bit.
|
||||
/* Write ISUNCOMPRESSED bit. */
|
||||
WriteBits(1, 0, storage_ix, storage);
|
||||
}
|
||||
}
|
||||
|
||||
/* Stores the uncompressed meta-block header.
|
||||
REQUIRES: length > 0
|
||||
REQUIRES: length <= (1 << 24) */
|
||||
void StoreUncompressedMetaBlockHeader(size_t length,
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
// Write ISLAST bit. Uncompressed block cannot be the last one, so set to 0.
|
||||
/* Write ISLAST bit.
|
||||
Uncompressed block cannot be the last one, so set to 0. */
|
||||
WriteBits(1, 0, storage_ix, storage);
|
||||
uint64_t lenbits;
|
||||
size_t nlenbits;
|
||||
@ -111,7 +119,7 @@ void StoreUncompressedMetaBlockHeader(size_t length,
|
||||
EncodeMlen(length, &lenbits, &nlenbits, &nibblesbits);
|
||||
WriteBits(2, nibblesbits, storage_ix, storage);
|
||||
WriteBits(nlenbits, lenbits, storage_ix, storage);
|
||||
// Write ISUNCOMPRESSED bit.
|
||||
/* Write ISUNCOMPRESSED bit. */
|
||||
WriteBits(1, 1, storage_ix, storage);
|
||||
}
|
||||
|
||||
@ -123,16 +131,16 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask(
|
||||
static const uint8_t kStorageOrder[kCodeLengthCodes] = {
|
||||
1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
};
|
||||
// The bit lengths of the Huffman code over the code length alphabet
|
||||
// are compressed with the following static Huffman code:
|
||||
// Symbol Code
|
||||
// ------ ----
|
||||
// 0 00
|
||||
// 1 1110
|
||||
// 2 110
|
||||
// 3 01
|
||||
// 4 10
|
||||
// 5 1111
|
||||
/* The bit lengths of the Huffman code over the code length alphabet
|
||||
are compressed with the following static Huffman code:
|
||||
Symbol Code
|
||||
------ ----
|
||||
0 00
|
||||
1 1110
|
||||
2 110
|
||||
3 01
|
||||
4 10
|
||||
5 1111 */
|
||||
static const uint8_t kHuffmanBitLengthHuffmanCodeSymbols[6] = {
|
||||
0, 7, 3, 2, 1, 15
|
||||
};
|
||||
@ -140,7 +148,7 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask(
|
||||
2, 4, 3, 2, 2, 4
|
||||
};
|
||||
|
||||
// Throw away trailing zeros:
|
||||
/* Throw away trailing zeros: */
|
||||
size_t codes_to_store = kCodeLengthCodes;
|
||||
if (num_codes > 1) {
|
||||
for (; codes_to_store > 0; --codes_to_store) {
|
||||
@ -149,12 +157,12 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask(
|
||||
}
|
||||
}
|
||||
}
|
||||
size_t skip_some = 0; // skips none.
|
||||
size_t skip_some = 0; /* skips none. */
|
||||
if (code_length_bitdepth[kStorageOrder[0]] == 0 &&
|
||||
code_length_bitdepth[kStorageOrder[1]] == 0) {
|
||||
skip_some = 2; // skips two.
|
||||
skip_some = 2; /* skips two. */
|
||||
if (code_length_bitdepth[kStorageOrder[2]] == 0) {
|
||||
skip_some = 3; // skips three.
|
||||
skip_some = 3; /* skips three. */
|
||||
}
|
||||
}
|
||||
WriteBits(2, skip_some, storage_ix, storage);
|
||||
@ -177,7 +185,7 @@ static void StoreHuffmanTreeToBitMask(
|
||||
size_t ix = huffman_tree[i];
|
||||
WriteBits(code_length_bitdepth[ix], code_length_bitdepth_symbols[ix],
|
||||
storage_ix, storage);
|
||||
// Extra bits
|
||||
/* Extra bits */
|
||||
switch (ix) {
|
||||
case 16:
|
||||
WriteBits(2, huffman_tree_extra_bits[i], storage_ix, storage);
|
||||
@ -194,11 +202,11 @@ static void StoreSimpleHuffmanTree(const uint8_t* depths,
|
||||
size_t num_symbols,
|
||||
size_t max_bits,
|
||||
size_t *storage_ix, uint8_t *storage) {
|
||||
// value of 1 indicates a simple Huffman code
|
||||
/* value of 1 indicates a simple Huffman code */
|
||||
WriteBits(2, 1, storage_ix, storage);
|
||||
WriteBits(2, num_symbols - 1, storage_ix, storage); // NSYM - 1
|
||||
WriteBits(2, num_symbols - 1, storage_ix, storage); /* NSYM - 1 */
|
||||
|
||||
// Sort
|
||||
/* Sort */
|
||||
for (size_t i = 0; i < num_symbols; i++) {
|
||||
for (size_t j = i + 1; j < num_symbols; j++) {
|
||||
if (depths[symbols[j]] < depths[symbols[i]]) {
|
||||
@ -219,19 +227,19 @@ static void StoreSimpleHuffmanTree(const uint8_t* depths,
|
||||
WriteBits(max_bits, symbols[1], storage_ix, storage);
|
||||
WriteBits(max_bits, symbols[2], storage_ix, storage);
|
||||
WriteBits(max_bits, symbols[3], storage_ix, storage);
|
||||
// tree-select
|
||||
/* tree-select */
|
||||
WriteBits(1, depths[symbols[0]] == 1 ? 1 : 0, storage_ix, storage);
|
||||
}
|
||||
}
|
||||
|
||||
// num = alphabet size
|
||||
// depths = symbol depths
|
||||
/* num = alphabet size
|
||||
depths = symbol depths */
|
||||
void StoreHuffmanTree(const uint8_t* depths, size_t num,
|
||||
HuffmanTree* tree,
|
||||
size_t *storage_ix, uint8_t *storage) {
|
||||
// Write the Huffman tree into the brotli-representation.
|
||||
// The command alphabet is the largest, so this allocation will fit all
|
||||
// alphabets.
|
||||
/* Write the Huffman tree into the brotli-representation.
|
||||
The command alphabet is the largest, so this allocation will fit all
|
||||
alphabets. */
|
||||
assert(num <= kNumCommandPrefixes);
|
||||
uint8_t huffman_tree[kNumCommandPrefixes];
|
||||
uint8_t huffman_tree_extra_bits[kNumCommandPrefixes];
|
||||
@ -239,7 +247,7 @@ void StoreHuffmanTree(const uint8_t* depths, size_t num,
|
||||
WriteHuffmanTree(depths, num, &huffman_tree_size, huffman_tree,
|
||||
huffman_tree_extra_bits);
|
||||
|
||||
// Calculate the statistics of the Huffman tree in brotli-representation.
|
||||
/* Calculate the statistics of the Huffman tree in brotli-representation. */
|
||||
uint32_t huffman_tree_histogram[kCodeLengthCodes] = { 0 };
|
||||
for (size_t i = 0; i < huffman_tree_size; ++i) {
|
||||
++huffman_tree_histogram[huffman_tree[i]];
|
||||
@ -259,8 +267,8 @@ void StoreHuffmanTree(const uint8_t* depths, size_t num,
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate another Huffman tree to use for compressing both the
|
||||
// earlier Huffman tree with.
|
||||
/* Calculate another Huffman tree to use for compressing both the
|
||||
earlier Huffman tree with. */
|
||||
uint8_t code_length_bitdepth[kCodeLengthCodes] = { 0 };
|
||||
uint16_t code_length_bitdepth_symbols[kCodeLengthCodes] = { 0 };
|
||||
CreateHuffmanTree(&huffman_tree_histogram[0], kCodeLengthCodes,
|
||||
@ -268,7 +276,7 @@ void StoreHuffmanTree(const uint8_t* depths, size_t num,
|
||||
ConvertBitDepthsToSymbols(code_length_bitdepth, kCodeLengthCodes,
|
||||
&code_length_bitdepth_symbols[0]);
|
||||
|
||||
// Now, we have all the data, let's start storing it
|
||||
/* Now, we have all the data, let's start storing it */
|
||||
StoreHuffmanTreeOfHuffmanTreeToBitMask(num_codes, code_length_bitdepth,
|
||||
storage_ix, storage);
|
||||
|
||||
@ -276,7 +284,7 @@ void StoreHuffmanTree(const uint8_t* depths, size_t num,
|
||||
code_length_bitdepth[code] = 0;
|
||||
}
|
||||
|
||||
// Store the real huffman tree now.
|
||||
/* Store the real huffman tree now. */
|
||||
StoreHuffmanTreeToBitMask(huffman_tree_size,
|
||||
huffman_tree,
|
||||
huffman_tree_extra_bits,
|
||||
@ -285,6 +293,8 @@ void StoreHuffmanTree(const uint8_t* depths, size_t num,
|
||||
storage_ix, storage);
|
||||
}
|
||||
|
||||
/* Builds a Huffman tree from histogram[0:length] into depth[0:length] and
|
||||
bits[0:length] and stores the encoded tree to the bit stream. */
|
||||
void BuildAndStoreHuffmanTree(const uint32_t *histogram,
|
||||
const size_t length,
|
||||
HuffmanTree* tree,
|
||||
@ -379,13 +389,13 @@ void BuildAndStoreHuffmanTreeFast(const uint32_t *histogram,
|
||||
}
|
||||
const int n = static_cast<int>(node - tree);
|
||||
std::sort(tree, node, SortHuffmanTree);
|
||||
// The nodes are:
|
||||
// [0, n): the sorted leaf nodes that we start with.
|
||||
// [n]: we add a sentinel here.
|
||||
// [n + 1, 2n): new parent nodes are added here, starting from
|
||||
// (n+1). These are naturally in ascending order.
|
||||
// [2n]: we add a sentinel at the end as well.
|
||||
// There will be (2n+1) elements at the end.
|
||||
/* The nodes are:
|
||||
[0, n): the sorted leaf nodes that we start with.
|
||||
[n]: we add a sentinel here.
|
||||
[n + 1, 2n): new parent nodes are added here, starting from
|
||||
(n+1). These are naturally in ascending order.
|
||||
[2n]: we add a sentinel at the end as well.
|
||||
There will be (2n+1) elements at the end. */
|
||||
const HuffmanTree sentinel(std::numeric_limits<int>::max(), -1, -1);
|
||||
*node++ = sentinel;
|
||||
*node++ = sentinel;
|
||||
@ -408,18 +418,17 @@ void BuildAndStoreHuffmanTreeFast(const uint32_t *histogram,
|
||||
right = j;
|
||||
++j;
|
||||
}
|
||||
// The sentinel node becomes the parent node.
|
||||
/* The sentinel node becomes the parent node. */
|
||||
node[-1].total_count_ =
|
||||
tree[left].total_count_ + tree[right].total_count_;
|
||||
node[-1].index_left_ = static_cast<int16_t>(left);
|
||||
node[-1].index_right_or_value_ = static_cast<int16_t>(right);
|
||||
// Add back the last sentinel node.
|
||||
/* Add back the last sentinel node. */
|
||||
*node++ = sentinel;
|
||||
}
|
||||
SetDepth(tree[2 * n - 1], &tree[0], depth, 0);
|
||||
// We need to pack the Huffman tree in 14 bits.
|
||||
// If this was not successful, add fake entities to the lowest values
|
||||
// and retry.
|
||||
/* We need to pack the Huffman tree in 14 bits. If this was not
|
||||
successful, add fake entities to the lowest values and retry. */
|
||||
if (PREDICT_TRUE(*std::max_element(&depth[0], &depth[length]) <= 14)) {
|
||||
break;
|
||||
}
|
||||
@ -427,11 +436,11 @@ void BuildAndStoreHuffmanTreeFast(const uint32_t *histogram,
|
||||
free(tree);
|
||||
ConvertBitDepthsToSymbols(depth, length, bits);
|
||||
if (count <= 4) {
|
||||
// value of 1 indicates a simple Huffman code
|
||||
/* value of 1 indicates a simple Huffman code */
|
||||
WriteBits(2, 1, storage_ix, storage);
|
||||
WriteBits(2, count - 1, storage_ix, storage); // NSYM - 1
|
||||
WriteBits(2, count - 1, storage_ix, storage); /* NSYM - 1 */
|
||||
|
||||
// Sort
|
||||
/* Sort */
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
for (size_t j = i + 1; j < count; j++) {
|
||||
if (depth[symbols[j]] < depth[symbols[i]]) {
|
||||
@ -452,14 +461,14 @@ void BuildAndStoreHuffmanTreeFast(const uint32_t *histogram,
|
||||
WriteBits(max_bits, symbols[1], storage_ix, storage);
|
||||
WriteBits(max_bits, symbols[2], storage_ix, storage);
|
||||
WriteBits(max_bits, symbols[3], storage_ix, storage);
|
||||
// tree-select
|
||||
/* tree-select */
|
||||
WriteBits(1, depth[symbols[0]] == 1 ? 1 : 0, storage_ix, storage);
|
||||
}
|
||||
} else {
|
||||
// Complex Huffman Tree
|
||||
/* Complex Huffman Tree */
|
||||
StoreStaticCodeLengthCode(storage_ix, storage);
|
||||
|
||||
// Actual rle coding.
|
||||
/* Actual rle coding. */
|
||||
uint8_t previous_value = 8;
|
||||
for (size_t i = 0; i < length;) {
|
||||
const uint8_t value = depth[i];
|
||||
@ -531,12 +540,12 @@ static void MoveToFrontTransform(const uint32_t* __restrict v_in,
|
||||
}
|
||||
}
|
||||
|
||||
// Finds runs of zeros in v[0..in_size) and replaces them with a prefix code of
|
||||
// the run length plus extra bits (lower 9 bits is the prefix code and the rest
|
||||
// are the extra bits). Non-zero values in v[] are shifted by
|
||||
// *max_length_prefix. Will not create prefix codes bigger than the initial
|
||||
// value of *max_run_length_prefix. The prefix code of run length L is simply
|
||||
// Log2Floor(L) and the number of extra bits is the same as the prefix code.
|
||||
/* Finds runs of zeros in v[0..in_size) and replaces them with a prefix code of
|
||||
the run length plus extra bits (lower 9 bits is the prefix code and the rest
|
||||
are the extra bits). Non-zero values in v[] are shifted by
|
||||
*max_length_prefix. Will not create prefix codes bigger than the initial
|
||||
value of *max_run_length_prefix. The prefix code of run length L is simply
|
||||
Log2Floor(L) and the number of extra bits is the same as the prefix code. */
|
||||
static void RunLengthCodeZeros(const size_t in_size,
|
||||
uint32_t* __restrict v,
|
||||
size_t* __restrict out_size,
|
||||
@ -630,6 +639,7 @@ void EncodeContextMap(const std::vector<uint32_t>& context_map,
|
||||
delete[] rle_symbols;
|
||||
}
|
||||
|
||||
/* Stores the block switch command with index block_ix to the bit stream. */
|
||||
void StoreBlockSwitch(const BlockSplitCode& code,
|
||||
const size_t block_ix,
|
||||
size_t* storage_ix,
|
||||
@ -646,6 +656,8 @@ void StoreBlockSwitch(const BlockSplitCode& code,
|
||||
storage_ix, storage);
|
||||
}
|
||||
|
||||
/* Builds a BlockSplitCode data structure from the block split given by the
|
||||
vector of block types and block lengths and stores it to the bit stream. */
|
||||
static void BuildAndStoreBlockSplitCode(const std::vector<uint8_t>& types,
|
||||
const std::vector<uint32_t>& lengths,
|
||||
const size_t num_types,
|
||||
@ -695,6 +707,7 @@ static void BuildAndStoreBlockSplitCode(const std::vector<uint8_t>& types,
|
||||
}
|
||||
}
|
||||
|
||||
/* Stores a context map where the histogram type is always the block type. */
|
||||
void StoreTrivialContextMap(size_t num_types,
|
||||
size_t context_bits,
|
||||
HuffmanTree* tree,
|
||||
@ -711,7 +724,7 @@ void StoreTrivialContextMap(size_t num_types,
|
||||
memset(histogram, 0, alphabet_size * sizeof(histogram[0]));
|
||||
memset(depths, 0, alphabet_size * sizeof(depths[0]));
|
||||
memset(bits, 0, alphabet_size * sizeof(bits[0]));
|
||||
// Write RLEMAX.
|
||||
/* Write RLEMAX. */
|
||||
WriteBits(1, 1, storage_ix, storage);
|
||||
WriteBits(4, repeat_code - 1, storage_ix, storage);
|
||||
histogram[repeat_code] = static_cast<uint32_t>(num_types);
|
||||
@ -728,12 +741,12 @@ void StoreTrivialContextMap(size_t num_types,
|
||||
WriteBits(depths[repeat_code], bits[repeat_code], storage_ix, storage);
|
||||
WriteBits(repeat_code, repeat_bits, storage_ix, storage);
|
||||
}
|
||||
// Write IMTF (inverse-move-to-front) bit.
|
||||
/* Write IMTF (inverse-move-to-front) bit. */
|
||||
WriteBits(1, 1, storage_ix, storage);
|
||||
}
|
||||
}
|
||||
|
||||
// Manages the encoding of one block category (literal, command or distance).
|
||||
/* Manages the encoding of one block category (literal, command or distance). */
|
||||
class BlockEncoder {
|
||||
public:
|
||||
BlockEncoder(size_t alphabet_size,
|
||||
@ -748,8 +761,8 @@ class BlockEncoder {
|
||||
block_len_(block_lengths.empty() ? 0 : block_lengths[0]),
|
||||
entropy_ix_(0) {}
|
||||
|
||||
// Creates entropy codes of block lengths and block types and stores them
|
||||
// to the bit stream.
|
||||
/* Creates entropy codes of block lengths and block types and stores them
|
||||
to the bit stream. */
|
||||
void BuildAndStoreBlockSwitchEntropyCodes(HuffmanTree* tree,
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
@ -776,8 +789,8 @@ class BlockEncoder {
|
||||
}
|
||||
}
|
||||
|
||||
// Stores the next symbol with the entropy code of the current block type.
|
||||
// Updates the block type and block length at block boundaries.
|
||||
/* Stores the next symbol with the entropy code of the current block type.
|
||||
Updates the block type and block length at block boundaries. */
|
||||
void StoreSymbol(size_t symbol, size_t* storage_ix, uint8_t* storage) {
|
||||
if (block_len_ == 0) {
|
||||
++block_ix_;
|
||||
@ -790,9 +803,9 @@ class BlockEncoder {
|
||||
WriteBits(depths_[ix], bits_[ix], storage_ix, storage);
|
||||
}
|
||||
|
||||
// Stores the next symbol with the entropy code of the current block type and
|
||||
// context value.
|
||||
// Updates the block type and block length at block boundaries.
|
||||
/* Stores the next symbol with the entropy code of the current block type and
|
||||
context value.
|
||||
Updates the block type and block length at block boundaries. */
|
||||
template<int kContextBits>
|
||||
void StoreSymbolWithContext(size_t symbol, size_t context,
|
||||
const std::vector<uint32_t>& context_map,
|
||||
@ -1132,8 +1145,8 @@ void StoreMetaBlockFast(const uint8_t* input,
|
||||
}
|
||||
}
|
||||
|
||||
// This is for storing uncompressed blocks (simple raw storage of
|
||||
// bytes-as-bytes).
|
||||
/* This is for storing uncompressed blocks (simple raw storage of
|
||||
bytes-as-bytes). */
|
||||
void StoreUncompressedMetaBlock(bool final_block,
|
||||
const uint8_t * __restrict input,
|
||||
size_t position, size_t mask,
|
||||
@ -1154,15 +1167,15 @@ void StoreUncompressedMetaBlock(bool final_block,
|
||||
memcpy(&storage[*storage_ix >> 3], &input[masked_pos], len);
|
||||
*storage_ix += len << 3;
|
||||
|
||||
// We need to clear the next 4 bytes to continue to be
|
||||
// compatible with WriteBits.
|
||||
/* We need to clear the next 4 bytes to continue to be
|
||||
compatible with BrotliWriteBits. */
|
||||
brotli::WriteBitsPrepareStorage(*storage_ix, storage);
|
||||
|
||||
// Since the uncompressed block itself may not be the final block, add an
|
||||
// empty one after this.
|
||||
/* Since the uncompressed block itself may not be the final block, add an
|
||||
empty one after this. */
|
||||
if (final_block) {
|
||||
brotli::WriteBits(1, 1, storage_ix, storage); // islast
|
||||
brotli::WriteBits(1, 1, storage_ix, storage); // isempty
|
||||
brotli::WriteBits(1, 1, storage_ix, storage); /* islast */
|
||||
brotli::WriteBits(1, 1, storage_ix, storage); /* isempty */
|
||||
JumpToByteBoundary(storage_ix, storage);
|
||||
}
|
||||
}
|
||||
|
@ -4,14 +4,14 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Functions to convert brotli-related data structures into the
|
||||
// brotli bit stream. The functions here operate under
|
||||
// assumption that there is enough space in the storage, i.e., there are
|
||||
// no out-of-range checks anywhere.
|
||||
//
|
||||
// These functions do bit addressing into a byte array. The byte array
|
||||
// is called "storage" and the index to the bit is called storage_ix
|
||||
// in function arguments.
|
||||
/* Functions to convert brotli-related data structures into the
|
||||
brotli bit stream. The functions here operate under
|
||||
assumption that there is enough space in the storage, i.e., there are
|
||||
no out-of-range checks anywhere.
|
||||
|
||||
These functions do bit addressing into a byte array. The byte array
|
||||
is called "storage" and the index to the bit is called storage_ix
|
||||
in function arguments. */
|
||||
|
||||
#ifndef BROTLI_ENC_BROTLI_BIT_STREAM_H_
|
||||
#define BROTLI_ENC_BROTLI_BIT_STREAM_H_
|
||||
@ -24,8 +24,8 @@
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// All Store functions here will use a storage_ix, which is always the bit
|
||||
// position for the current storage.
|
||||
/* All Store functions here will use a storage_ix, which is always the bit
|
||||
position for the current storage. */
|
||||
|
||||
// Stores a number between 0 and 255.
|
||||
void StoreVarLenUint8(size_t n, size_t* storage_ix, uint8_t* storage);
|
||||
@ -114,8 +114,8 @@ void StoreBlockSwitch(const BlockSplitCode& code,
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage);
|
||||
|
||||
// REQUIRES: length > 0
|
||||
// REQUIRES: length <= (1 << 24)
|
||||
/* REQUIRES: length > 0 */
|
||||
/* REQUIRES: length <= (1 << 24) */
|
||||
void StoreMetaBlock(const uint8_t* input,
|
||||
size_t start_pos,
|
||||
size_t length,
|
||||
@ -132,10 +132,10 @@ void StoreMetaBlock(const uint8_t* input,
|
||||
size_t *storage_ix,
|
||||
uint8_t *storage);
|
||||
|
||||
// Stores the meta-block without doing any block splitting, just collects
|
||||
// one histogram per block category and uses that for entropy coding.
|
||||
// REQUIRES: length > 0
|
||||
// REQUIRES: length <= (1 << 24)
|
||||
/* Stores the meta-block without doing any block splitting, just collects
|
||||
one histogram per block category and uses that for entropy coding.
|
||||
REQUIRES: length > 0
|
||||
REQUIRES: length <= (1 << 24) */
|
||||
void StoreMetaBlockTrivial(const uint8_t* input,
|
||||
size_t start_pos,
|
||||
size_t length,
|
||||
@ -146,10 +146,10 @@ void StoreMetaBlockTrivial(const uint8_t* input,
|
||||
size_t *storage_ix,
|
||||
uint8_t *storage);
|
||||
|
||||
// Same as above, but uses static prefix codes for histograms with a only a few
|
||||
// symbols, and uses static code length prefix codes for all other histograms.
|
||||
// REQUIRES: length > 0
|
||||
// REQUIRES: length <= (1 << 24)
|
||||
/* Same as above, but uses static prefix codes for histograms with a only a few
|
||||
symbols, and uses static code length prefix codes for all other histograms.
|
||||
REQUIRES: length > 0
|
||||
REQUIRES: length <= (1 << 24) */
|
||||
void StoreMetaBlockFast(const uint8_t* input,
|
||||
size_t start_pos,
|
||||
size_t length,
|
||||
@ -160,10 +160,10 @@ void StoreMetaBlockFast(const uint8_t* input,
|
||||
size_t *storage_ix,
|
||||
uint8_t *storage);
|
||||
|
||||
// This is for storing uncompressed blocks (simple raw storage of
|
||||
// bytes-as-bytes).
|
||||
// REQUIRES: length > 0
|
||||
// REQUIRES: length <= (1 << 24)
|
||||
/* This is for storing uncompressed blocks (simple raw storage of
|
||||
bytes-as-bytes).
|
||||
REQUIRES: length > 0
|
||||
REQUIRES: length <= (1 << 24) */
|
||||
void StoreUncompressedMetaBlock(bool final_block,
|
||||
const uint8_t* input,
|
||||
size_t position, size_t mask,
|
||||
@ -171,9 +171,9 @@ void StoreUncompressedMetaBlock(bool final_block,
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage);
|
||||
|
||||
// Stores an empty metadata meta-block and syncs to a byte boundary.
|
||||
/* Stores an empty metadata meta-block and syncs to a byte boundary. */
|
||||
void StoreSyncMetaBlock(size_t* storage_ix, uint8_t* storage);
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_BROTLI_BIT_STREAM_H_
|
||||
#endif /* BROTLI_ENC_BROTLI_BIT_STREAM_H_ */
|
||||
|
@ -4,7 +4,7 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Functions for clustering similar histograms together.
|
||||
/* Functions for clustering similar histograms together. */
|
||||
|
||||
#ifndef BROTLI_ENC_CLUSTER_H_
|
||||
#define BROTLI_ENC_CLUSTER_H_
|
||||
@ -328,4 +328,4 @@ void ClusterHistograms(const std::vector<HistogramType>& in,
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_CLUSTER_H_
|
||||
#endif /* BROTLI_ENC_CLUSTER_H_ */
|
||||
|
@ -4,7 +4,7 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// This class models a sequence of literals and a backward reference copy.
|
||||
/* This class models a sequence of literals and a backward reference copy. */
|
||||
|
||||
#ifndef BROTLI_ENC_COMMAND_H_
|
||||
#define BROTLI_ENC_COMMAND_H_
|
||||
@ -63,8 +63,8 @@ static inline uint16_t CombineLengthCodes(
|
||||
if (use_last_distance && inscode < 8 && copycode < 16) {
|
||||
return (copycode < 8) ? bits64 : (bits64 | 64);
|
||||
} else {
|
||||
// "To convert an insert-and-copy length code to an insert length code and
|
||||
// a copy length code, the following table can be used"
|
||||
/* "To convert an insert-and-copy length code to an insert length code and
|
||||
a copy length code, the following table can be used" */
|
||||
static const uint16_t cells[9] = { 128u, 192u, 384u, 256u, 320u, 512u,
|
||||
448u, 576u, 640u };
|
||||
return cells[(copycode >> 3) + 3 * (inscode >> 3)] | bits64;
|
||||
@ -153,4 +153,4 @@ struct Command {
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_COMMAND_H_
|
||||
#endif /* BROTLI_ENC_COMMAND_H_ */
|
||||
|
@ -4,13 +4,13 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Function for fast encoding of an input fragment, independently from the input
|
||||
// history. This function uses one-pass processing: when we find a backward
|
||||
// match, we immediately emit the corresponding command and literal codes to
|
||||
// the bit stream.
|
||||
//
|
||||
// Adapted from the CompressFragment() function in
|
||||
// https://github.com/google/snappy/blob/master/snappy.cc
|
||||
/* Function for fast encoding of an input fragment, independently from the input
|
||||
history. This function uses one-pass processing: when we find a backward
|
||||
match, we immediately emit the corresponding command and literal codes to
|
||||
the bit stream.
|
||||
|
||||
Adapted from the CompressFragment() function in
|
||||
https://github.com/google/snappy/blob/master/snappy.cc */
|
||||
|
||||
#include "./compress_fragment.h"
|
||||
|
||||
@ -27,12 +27,12 @@
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// kHashMul32 multiplier has these properties:
|
||||
// * The multiplier must be odd. Otherwise we may lose the highest bit.
|
||||
// * No long streaks of 1s or 0s.
|
||||
// * There is no effort to ensure that it is a prime, the oddity is enough
|
||||
// for this use.
|
||||
// * The number has been tuned heuristically against compression benchmarks.
|
||||
/* kHashMul32 multiplier has these properties:
|
||||
* The multiplier must be odd. Otherwise we may lose the highest bit.
|
||||
* No long streaks of 1s or 0s.
|
||||
* There is no effort to ensure that it is a prime, the oddity is enough
|
||||
for this use.
|
||||
* The number has been tuned heuristically against compression benchmarks. */
|
||||
static const uint32_t kHashMul32 = 0x1e35a7bd;
|
||||
|
||||
static inline uint32_t Hash(const uint8_t* p, size_t shift) {
|
||||
@ -52,12 +52,12 @@ static inline int IsMatch(const uint8_t* p1, const uint8_t* p2) {
|
||||
p1[4] == p2[4]);
|
||||
}
|
||||
|
||||
// Builds a literal prefix code into "depths" and "bits" based on the statistics
|
||||
// of the "input" string and stores it into the bit stream.
|
||||
// Note that the prefix code here is built from the pre-LZ77 input, therefore
|
||||
// we can only approximate the statistics of the actual literal stream.
|
||||
// Moreover, for long inputs we build a histogram from a sample of the input
|
||||
// and thus have to assign a non-zero depth for each literal.
|
||||
/* Builds a literal prefix code into "depths" and "bits" based on the statistics
|
||||
of the "input" string and stores it into the bit stream.
|
||||
Note that the prefix code here is built from the pre-LZ77 input, therefore
|
||||
we can only approximate the statistics of the actual literal stream.
|
||||
Moreover, for long inputs we build a histogram from a sample of the input
|
||||
and thus have to assign a non-zero depth for each literal. */
|
||||
static void BuildAndStoreLiteralPrefixCode(const uint8_t* input,
|
||||
const size_t input_size,
|
||||
uint8_t depths[256],
|
||||
@ -72,8 +72,8 @@ static void BuildAndStoreLiteralPrefixCode(const uint8_t* input,
|
||||
}
|
||||
histogram_total = input_size;
|
||||
for (size_t i = 0; i < 256; ++i) {
|
||||
// We weigh the first 11 samples with weight 3 to account for the
|
||||
// balancing effect of the LZ77 phase on the histogram.
|
||||
/* We weigh the first 11 samples with weight 3 to account for the
|
||||
balancing effect of the LZ77 phase on the histogram. */
|
||||
const uint32_t adjust = 2 * std::min(histogram[i], 11u);
|
||||
histogram[i] += adjust;
|
||||
histogram_total += adjust;
|
||||
@ -85,11 +85,11 @@ static void BuildAndStoreLiteralPrefixCode(const uint8_t* input,
|
||||
}
|
||||
histogram_total = (input_size + kSampleRate - 1) / kSampleRate;
|
||||
for (size_t i = 0; i < 256; ++i) {
|
||||
// We add 1 to each population count to avoid 0 bit depths (since this is
|
||||
// only a sample and we don't know if the symbol appears or not), and we
|
||||
// weigh the first 11 samples with weight 3 to account for the balancing
|
||||
// effect of the LZ77 phase on the histogram (more frequent symbols are
|
||||
// more likely to be in backward references instead as literals).
|
||||
/* We add 1 to each population count to avoid 0 bit depths (since this is
|
||||
only a sample and we don't know if the symbol appears or not), and we
|
||||
weigh the first 11 samples with weight 3 to account for the balancing
|
||||
effect of the LZ77 phase on the histogram (more frequent symbols are
|
||||
more likely to be in backward references instead as literals). */
|
||||
const uint32_t adjust = 1 + 2 * std::min(histogram[i], 11u);
|
||||
histogram[i] += adjust;
|
||||
histogram_total += adjust;
|
||||
@ -100,23 +100,23 @@ static void BuildAndStoreLiteralPrefixCode(const uint8_t* input,
|
||||
depths, bits, storage_ix, storage);
|
||||
}
|
||||
|
||||
// Builds a command and distance prefix code (each 64 symbols) into "depth" and
|
||||
// "bits" based on "histogram" and stores it into the bit stream.
|
||||
/* Builds a command and distance prefix code (each 64 symbols) into "depth" and
|
||||
"bits" based on "histogram" and stores it into the bit stream. */
|
||||
static void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128],
|
||||
uint8_t depth[128],
|
||||
uint16_t bits[128],
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
// Tree size for building a tree over 64 symbols is 2 * 64 + 1.
|
||||
/* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
|
||||
static const size_t kTreeSize = 129;
|
||||
HuffmanTree tree[kTreeSize];
|
||||
CreateHuffmanTree(histogram, 64, 15, tree, depth);
|
||||
CreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
|
||||
// We have to jump through a few hoopes here in order to compute
|
||||
// the command bits because the symbols are in a different order than in
|
||||
// the full alphabet. This looks complicated, but having the symbols
|
||||
// in this order in the command bits saves a few branches in the Emit*
|
||||
// functions.
|
||||
/* We have to jump through a few hoopes here in order to compute
|
||||
the command bits because the symbols are in a different order than in
|
||||
the full alphabet. This looks complicated, but having the symbols
|
||||
in this order in the command bits saves a few branches in the Emit*
|
||||
functions. */
|
||||
uint8_t cmd_depth[64];
|
||||
uint16_t cmd_bits[64];
|
||||
memcpy(cmd_depth, depth, 24);
|
||||
@ -134,7 +134,7 @@ static void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128],
|
||||
memcpy(bits + 56, cmd_bits + 56, 16);
|
||||
ConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
|
||||
{
|
||||
// Create the bit length array for the full command alphabet.
|
||||
/* Create the bit length array for the full command alphabet. */
|
||||
uint8_t cmd_depth[704] = { 0 };
|
||||
memcpy(cmd_depth, depth, 8);
|
||||
memcpy(cmd_depth + 64, depth + 8, 8);
|
||||
@ -151,7 +151,7 @@ static void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128],
|
||||
StoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
|
||||
}
|
||||
|
||||
// REQUIRES: insertlen < 6210
|
||||
/* REQUIRES: insertlen < 6210 */
|
||||
inline void EmitInsertLen(size_t insertlen,
|
||||
const uint8_t depth[128],
|
||||
const uint16_t bits[128],
|
||||
@ -299,21 +299,21 @@ inline void EmitLiterals(const uint8_t* input, const size_t len,
|
||||
}
|
||||
}
|
||||
|
||||
// REQUIRES: len <= 1 << 20.
|
||||
/* REQUIRES: len <= 1 << 20. */
|
||||
static void StoreMetaBlockHeader(
|
||||
size_t len, bool is_uncompressed, size_t* storage_ix, uint8_t* storage) {
|
||||
// ISLAST
|
||||
/* ISLAST */
|
||||
WriteBits(1, 0, storage_ix, storage);
|
||||
if (len <= (1U << 16)) {
|
||||
// MNIBBLES is 4
|
||||
/* MNIBBLES is 4 */
|
||||
WriteBits(2, 0, storage_ix, storage);
|
||||
WriteBits(16, len - 1, storage_ix, storage);
|
||||
} else {
|
||||
// MNIBBLES is 5
|
||||
/* MNIBBLES is 5 */
|
||||
WriteBits(2, 1, storage_ix, storage);
|
||||
WriteBits(20, len - 1, storage_ix, storage);
|
||||
}
|
||||
// ISUNCOMPRESSED
|
||||
/* ISUNCOMPRESSED */
|
||||
WriteBits(1, is_uncompressed, storage_ix, storage);
|
||||
}
|
||||
|
||||
@ -406,11 +406,12 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
|
||||
return;
|
||||
}
|
||||
|
||||
// "next_emit" is a pointer to the first byte that is not covered by a
|
||||
// previous copy. Bytes between "next_emit" and the start of the next copy or
|
||||
// the end of the input will be emitted as literal bytes.
|
||||
/* "next_emit" is a pointer to the first byte that is not covered by a
|
||||
previous copy. Bytes between "next_emit" and the start of the next copy or
|
||||
the end of the input will be emitted as literal bytes. */
|
||||
const uint8_t* next_emit = input;
|
||||
// Save the start of the first block for position and distance computations.
|
||||
/* Save the start of the first block for position and distance computations.
|
||||
*/
|
||||
const uint8_t* base_ip = input;
|
||||
|
||||
static const size_t kFirstBlockSize = 3 << 15;
|
||||
@ -419,8 +420,8 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
|
||||
const uint8_t* metablock_start = input;
|
||||
size_t block_size = std::min(input_size, kFirstBlockSize);
|
||||
size_t total_block_size = block_size;
|
||||
// Save the bit position of the MLEN field of the meta-block header, so that
|
||||
// we can update it later if we decide to extend this meta-block.
|
||||
/* Save the bit position of the MLEN field of the meta-block header, so that
|
||||
we can update it later if we decide to extend this meta-block. */
|
||||
size_t mlen_storage_ix = *storage_ix + 3;
|
||||
StoreMetaBlockHeader(block_size, 0, storage_ix, storage);
|
||||
// No block splits, no contexts.
|
||||
@ -439,10 +440,10 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
|
||||
storage_ix, storage);
|
||||
|
||||
emit_commands:
|
||||
// Initialize the command and distance histograms. We will gather
|
||||
// statistics of command and distance codes during the processing
|
||||
// of this block and use it to update the command and distance
|
||||
// prefix codes for the next block.
|
||||
/* Initialize the command and distance histograms. We will gather
|
||||
statistics of command and distance codes during the processing
|
||||
of this block and use it to update the command and distance
|
||||
prefix codes for the next block. */
|
||||
uint32_t cmd_histo[128] = {
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
|
||||
@ -466,31 +467,31 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
|
||||
const size_t kInputMarginBytes = 16;
|
||||
const size_t kMinMatchLen = 5;
|
||||
if (PREDICT_TRUE(block_size >= kInputMarginBytes)) {
|
||||
// For the last block, we need to keep a 16 bytes margin so that we can be
|
||||
// sure that all distances are at most window size - 16.
|
||||
// For all other blocks, we only need to keep a margin of 5 bytes so that
|
||||
// we don't go over the block size with a copy.
|
||||
/* For the last block, we need to keep a 16 bytes margin so that we can be
|
||||
sure that all distances are at most window size - 16.
|
||||
For all other blocks, we only need to keep a margin of 5 bytes so that
|
||||
we don't go over the block size with a copy. */
|
||||
const size_t len_limit = std::min(block_size - kMinMatchLen,
|
||||
input_size - kInputMarginBytes);
|
||||
const uint8_t* ip_limit = input + len_limit;
|
||||
|
||||
for (uint32_t next_hash = Hash(++ip, shift); ; ) {
|
||||
assert(next_emit < ip);
|
||||
// Step 1: Scan forward in the input looking for a 5-byte-long match.
|
||||
// If we get close to exhausting the input then goto emit_remainder.
|
||||
//
|
||||
// Heuristic match skipping: If 32 bytes are scanned with no matches
|
||||
// found, start looking only at every other byte. If 32 more bytes are
|
||||
// scanned, look at every third byte, etc.. When a match is found,
|
||||
// immediately go back to looking at every byte. This is a small loss
|
||||
// (~5% performance, ~0.1% density) for compressible data due to more
|
||||
// bookkeeping, but for non-compressible data (such as JPEG) it's a huge
|
||||
// win since the compressor quickly "realizes" the data is incompressible
|
||||
// and doesn't bother looking for matches everywhere.
|
||||
//
|
||||
// The "skip" variable keeps track of how many bytes there are since the
|
||||
// last match; dividing it by 32 (ie. right-shifting by five) gives the
|
||||
// number of bytes to move ahead for each iteration.
|
||||
/* Step 1: Scan forward in the input looking for a 5-byte-long match.
|
||||
If we get close to exhausting the input then goto emit_remainder.
|
||||
|
||||
Heuristic match skipping: If 32 bytes are scanned with no matches
|
||||
found, start looking only at every other byte. If 32 more bytes are
|
||||
scanned, look at every third byte, etc.. When a match is found,
|
||||
immediately go back to looking at every byte. This is a small loss
|
||||
(~5% performance, ~0.1% density) for compressible data due to more
|
||||
bookkeeping, but for non-compressible data (such as JPEG) it's a huge
|
||||
win since the compressor quickly "realizes" the data is incompressible
|
||||
and doesn't bother looking for matches everywhere.
|
||||
|
||||
The "skip" variable keeps track of how many bytes there are since the
|
||||
last match; dividing it by 32 (ie. right-shifting by five) gives the
|
||||
number of bytes to move ahead for each iteration. */
|
||||
uint32_t skip = 32;
|
||||
|
||||
const uint8_t* next_ip = ip;
|
||||
@ -519,15 +520,15 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
|
||||
table[hash] = static_cast<int>(ip - base_ip);
|
||||
} while (PREDICT_TRUE(!IsMatch(ip, candidate)));
|
||||
|
||||
// Step 2: Emit the found match together with the literal bytes from
|
||||
// "next_emit" to the bit stream, and then see if we can find a next macth
|
||||
// immediately afterwards. Repeat until we find no match for the input
|
||||
// without emitting some literal bytes.
|
||||
/* Step 2: Emit the found match together with the literal bytes from
|
||||
"next_emit" to the bit stream, and then see if we can find a next macth
|
||||
immediately afterwards. Repeat until we find no match for the input
|
||||
without emitting some literal bytes. */
|
||||
uint64_t input_bytes;
|
||||
|
||||
{
|
||||
// We have a 5-byte match at ip, and we need to emit bytes in
|
||||
// [next_emit, ip).
|
||||
/* We have a 5-byte match at ip, and we need to emit bytes in
|
||||
[next_emit, ip). */
|
||||
const uint8_t* base = ip;
|
||||
size_t matched = 5 + FindMatchLengthWithLimit(
|
||||
candidate + 5, ip + 5, static_cast<size_t>(ip_end - ip) - 5);
|
||||
@ -567,9 +568,9 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
|
||||
if (PREDICT_FALSE(ip >= ip_limit)) {
|
||||
goto emit_remainder;
|
||||
}
|
||||
// We could immediately start working at ip now, but to improve
|
||||
// compression we first update "table" with the hashes of some positions
|
||||
// within the last copy.
|
||||
/* We could immediately start working at ip now, but to improve
|
||||
compression we first update "table" with the hashes of some positions
|
||||
within the last copy. */
|
||||
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3);
|
||||
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 3);
|
||||
@ -584,8 +585,8 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
|
||||
}
|
||||
|
||||
while (IsMatch(ip, candidate)) {
|
||||
// We have a 5-byte match at ip, and no need to emit any literal bytes
|
||||
// prior to ip.
|
||||
/* We have a 5-byte match at ip, and no need to emit any literal bytes
|
||||
prior to ip. */
|
||||
const uint8_t* base = ip;
|
||||
size_t matched = 5 + FindMatchLengthWithLimit(
|
||||
candidate + 5, ip + 5, static_cast<size_t>(ip_end - ip) - 5);
|
||||
@ -601,9 +602,9 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
|
||||
if (PREDICT_FALSE(ip >= ip_limit)) {
|
||||
goto emit_remainder;
|
||||
}
|
||||
// We could immediately start working at ip now, but to improve
|
||||
// compression we first update "table" with the hashes of some positions
|
||||
// within the last copy.
|
||||
/* We could immediately start working at ip now, but to improve
|
||||
compression we first update "table" with the hashes of some positions
|
||||
within the last copy. */
|
||||
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3);
|
||||
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 3);
|
||||
@ -627,22 +628,22 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
|
||||
input_size -= block_size;
|
||||
block_size = std::min(input_size, kMergeBlockSize);
|
||||
|
||||
// Decide if we want to continue this meta-block instead of emitting the
|
||||
// last insert-only command.
|
||||
/* Decide if we want to continue this meta-block instead of emitting the
|
||||
last insert-only command. */
|
||||
if (input_size > 0 &&
|
||||
total_block_size + block_size <= (1 << 20) &&
|
||||
ShouldMergeBlock(input, block_size, lit_depth)) {
|
||||
assert(total_block_size > (1 << 16));
|
||||
// Update the size of the current meta-block and continue emitting commands.
|
||||
// We can do this because the current size and the new size both have 5
|
||||
// nibbles.
|
||||
/* Update the size of the current meta-block and continue emitting commands.
|
||||
We can do this because the current size and the new size both have 5
|
||||
nibbles. */
|
||||
total_block_size += block_size;
|
||||
UpdateBits(20, static_cast<uint32_t>(total_block_size - 1),
|
||||
mlen_storage_ix, storage);
|
||||
goto emit_commands;
|
||||
}
|
||||
|
||||
// Emit the remaining bytes as literals.
|
||||
/* Emit the remaining bytes as literals. */
|
||||
if (next_emit < ip_end) {
|
||||
const size_t insert = static_cast<size_t>(ip_end - next_emit);
|
||||
if (PREDICT_TRUE(insert < 6210)) {
|
||||
@ -663,17 +664,17 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
|
||||
next_emit = ip_end;
|
||||
|
||||
next_block:
|
||||
// If we have more data, write a new meta-block header and prefix codes and
|
||||
// then continue emitting commands.
|
||||
/* If we have more data, write a new meta-block header and prefix codes and
|
||||
then continue emitting commands. */
|
||||
if (input_size > 0) {
|
||||
metablock_start = input;
|
||||
block_size = std::min(input_size, kFirstBlockSize);
|
||||
total_block_size = block_size;
|
||||
// Save the bit position of the MLEN field of the meta-block header, so that
|
||||
// we can update it later if we decide to extend this meta-block.
|
||||
/* Save the bit position of the MLEN field of the meta-block header, so that
|
||||
we can update it later if we decide to extend this meta-block. */
|
||||
mlen_storage_ix = *storage_ix + 3;
|
||||
StoreMetaBlockHeader(block_size, 0, storage_ix, storage);
|
||||
// No block splits, no contexts.
|
||||
/* No block splits, no contexts. */
|
||||
WriteBits(13, 0, storage_ix, storage);
|
||||
memset(lit_depth, 0, sizeof(lit_depth));
|
||||
memset(lit_bits, 0, sizeof(lit_bits));
|
||||
@ -685,12 +686,12 @@ next_block:
|
||||
}
|
||||
|
||||
if (is_last) {
|
||||
WriteBits(1, 1, storage_ix, storage); // islast
|
||||
WriteBits(1, 1, storage_ix, storage); // isempty
|
||||
WriteBits(1, 1, storage_ix, storage); /* islast */
|
||||
WriteBits(1, 1, storage_ix, storage); /* isempty */
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
} else {
|
||||
// If this is not the last block, update the command and distance prefix
|
||||
// codes for the next block and store the compressed forms.
|
||||
/* If this is not the last block, update the command and distance prefix
|
||||
codes for the next block and store the compressed forms. */
|
||||
cmd_code[0] = 0;
|
||||
*cmd_code_numbits = 0;
|
||||
BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
|
||||
|
@ -4,10 +4,10 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Function for fast encoding of an input fragment, independently from the input
|
||||
// history. This function uses one-pass processing: when we find a backward
|
||||
// match, we immediately emit the corresponding command and literal codes to
|
||||
// the bit stream.
|
||||
/* Function for fast encoding of an input fragment, independently from the input
|
||||
history. This function uses one-pass processing: when we find a backward
|
||||
match, we immediately emit the corresponding command and literal codes to
|
||||
the bit stream. */
|
||||
|
||||
#ifndef BROTLI_ENC_COMPRESS_FRAGMENT_H_
|
||||
#define BROTLI_ENC_COMPRESS_FRAGMENT_H_
|
||||
@ -16,25 +16,25 @@
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// Compresses "input" string to the "*storage" buffer as one or more complete
|
||||
// meta-blocks, and updates the "*storage_ix" bit position.
|
||||
//
|
||||
// If "is_last" is true, emits an additional empty last meta-block.
|
||||
//
|
||||
// "cmd_depth" and "cmd_bits" contain the command and distance prefix codes
|
||||
// (see comment in encode.h) used for the encoding of this input fragment.
|
||||
// If "is_last" is false, they are updated to reflect the statistics
|
||||
// of this input fragment, to be used for the encoding of the next fragment.
|
||||
//
|
||||
// "*cmd_code_numbits" is the number of bits of the compressed representation
|
||||
// of the command and distance prefix codes, and "cmd_code" is an array of
|
||||
// at least "(*cmd_code_numbits + 7) >> 3" size that contains the compressed
|
||||
// command and distance prefix codes. If "is_last" is false, these are also
|
||||
// updated to represent the updated "cmd_depth" and "cmd_bits".
|
||||
//
|
||||
// REQUIRES: "input_size" is greater than zero, or "is_last" is true.
|
||||
// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
|
||||
// REQUIRES: "table_size" is a power of two
|
||||
/* Compresses "input" string to the "*storage" buffer as one or more complete
|
||||
meta-blocks, and updates the "*storage_ix" bit position.
|
||||
|
||||
If "is_last" is 1, emits an additional empty last meta-block.
|
||||
|
||||
"cmd_depth" and "cmd_bits" contain the command and distance prefix codes
|
||||
(see comment in encode.h) used for the encoding of this input fragment.
|
||||
If "is_last" is 0, they are updated to reflect the statistics
|
||||
of this input fragment, to be used for the encoding of the next fragment.
|
||||
|
||||
"*cmd_code_numbits" is the number of bits of the compressed representation
|
||||
of the command and distance prefix codes, and "cmd_code" is an array of
|
||||
at least "(*cmd_code_numbits + 7) >> 3" size that contains the compressed
|
||||
command and distance prefix codes. If "is_last" is 0, these are also
|
||||
updated to represent the updated "cmd_depth" and "cmd_bits".
|
||||
|
||||
REQUIRES: "input_size" is greater than zero, or "is_last" is 1.
|
||||
REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
|
||||
REQUIRES: "table_size" is a power of two */
|
||||
void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
|
||||
bool is_last,
|
||||
int* table, size_t table_size,
|
||||
@ -44,4 +44,4 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_COMPRESS_FRAGMENT_H_
|
||||
#endif /* BROTLI_ENC_COMPRESS_FRAGMENT_H_ */
|
||||
|
@ -4,11 +4,11 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Function for fast encoding of an input fragment, independently from the input
|
||||
// history. This function uses two-pass processing: in the first pass we save
|
||||
// the found backward matches and literal bytes into a buffer, and in the
|
||||
// second pass we emit them into the bit stream using prefix codes built based
|
||||
// on the actual command and literal byte histograms.
|
||||
/* Function for fast encoding of an input fragment, independently from the input
|
||||
history. This function uses two-pass processing: in the first pass we save
|
||||
the found backward matches and literal bytes into a buffer, and in the
|
||||
second pass we emit them into the bit stream using prefix codes built based
|
||||
on the actual command and literal byte histograms. */
|
||||
|
||||
#include "./compress_fragment_two_pass.h"
|
||||
|
||||
@ -25,12 +25,12 @@
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// kHashMul32 multiplier has these properties:
|
||||
// * The multiplier must be odd. Otherwise we may lose the highest bit.
|
||||
// * No long streaks of 1s or 0s.
|
||||
// * There is no effort to ensure that it is a prime, the oddity is enough
|
||||
// for this use.
|
||||
// * The number has been tuned heuristically against compression benchmarks.
|
||||
/* kHashMul32 multiplier has these properties:
|
||||
* The multiplier must be odd. Otherwise we may lose the highest bit.
|
||||
* No long streaks of 1s or 0s.
|
||||
* There is no effort to ensure that it is a prime, the oddity is enough
|
||||
for this use.
|
||||
* The number has been tuned heuristically against compression benchmarks. */
|
||||
static const uint32_t kHashMul32 = 0x1e35a7bd;
|
||||
|
||||
static inline uint32_t Hash(const uint8_t* p, size_t shift) {
|
||||
@ -51,22 +51,22 @@ static inline int IsMatch(const uint8_t* p1, const uint8_t* p2) {
|
||||
p1[5] == p2[5]);
|
||||
}
|
||||
|
||||
// Builds a command and distance prefix code (each 64 symbols) into "depth" and
|
||||
// "bits" based on "histogram" and stores it into the bit stream.
|
||||
/* Builds a command and distance prefix code (each 64 symbols) into "depth" and
|
||||
"bits" based on "histogram" and stores it into the bit stream. */
|
||||
static void BuildAndStoreCommandPrefixCode(
|
||||
const uint32_t histogram[128],
|
||||
uint8_t depth[128], uint16_t bits[128],
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
// Tree size for building a tree over 64 symbols is 2 * 64 + 1.
|
||||
/* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
|
||||
static const size_t kTreeSize = 129;
|
||||
HuffmanTree tree[kTreeSize];
|
||||
CreateHuffmanTree(histogram, 64, 15, tree, depth);
|
||||
CreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
|
||||
// We have to jump through a few hoopes here in order to compute
|
||||
// the command bits because the symbols are in a different order than in
|
||||
// the full alphabet. This looks complicated, but having the symbols
|
||||
// in this order in the command bits saves a few branches in the Emit*
|
||||
// functions.
|
||||
/* We have to jump through a few hoopes here in order to compute
|
||||
the command bits because the symbols are in a different order than in
|
||||
the full alphabet. This looks complicated, but having the symbols
|
||||
in this order in the command bits saves a few branches in the Emit*
|
||||
functions. */
|
||||
uint8_t cmd_depth[64];
|
||||
uint16_t cmd_bits[64];
|
||||
memcpy(cmd_depth, depth + 24, 24);
|
||||
@ -84,7 +84,7 @@ static void BuildAndStoreCommandPrefixCode(
|
||||
memcpy(bits + 56, cmd_bits + 48, 16);
|
||||
ConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
|
||||
{
|
||||
// Create the bit length array for the full command alphabet.
|
||||
/* Create the bit length array for the full command alphabet. */
|
||||
uint8_t cmd_depth[704] = { 0 };
|
||||
memcpy(cmd_depth, depth + 24, 8);
|
||||
memcpy(cmd_depth + 64, depth + 32, 8);
|
||||
@ -202,21 +202,21 @@ inline void EmitDistance(uint32_t distance, uint32_t** commands) {
|
||||
++(*commands);
|
||||
}
|
||||
|
||||
// REQUIRES: len <= 1 << 20.
|
||||
/* REQUIRES: len <= 1 << 20. */
|
||||
static void StoreMetaBlockHeader(
|
||||
size_t len, bool is_uncompressed, size_t* storage_ix, uint8_t* storage) {
|
||||
// ISLAST
|
||||
/* ISLAST */
|
||||
WriteBits(1, 0, storage_ix, storage);
|
||||
if (len <= (1U << 16)) {
|
||||
// MNIBBLES is 4
|
||||
/* MNIBBLES is 4 */
|
||||
WriteBits(2, 0, storage_ix, storage);
|
||||
WriteBits(16, len - 1, storage_ix, storage);
|
||||
} else {
|
||||
// MNIBBLES is 5
|
||||
/* MNIBBLES is 5 */
|
||||
WriteBits(2, 1, storage_ix, storage);
|
||||
WriteBits(20, len - 1, storage_ix, storage);
|
||||
}
|
||||
// ISUNCOMPRESSED
|
||||
/* ISUNCOMPRESSED */
|
||||
WriteBits(1, is_uncompressed, storage_ix, storage);
|
||||
}
|
||||
|
||||
@ -224,7 +224,7 @@ static void CreateCommands(const uint8_t* input, size_t block_size,
|
||||
size_t input_size, const uint8_t* base_ip,
|
||||
int* table, size_t table_size,
|
||||
uint8_t** literals, uint32_t** commands) {
|
||||
// "ip" is the input pointer.
|
||||
/* "ip" is the input pointer. */
|
||||
const uint8_t* ip = input;
|
||||
assert(table_size);
|
||||
assert(table_size <= (1u << 31));
|
||||
@ -233,40 +233,40 @@ static void CreateCommands(const uint8_t* input, size_t block_size,
|
||||
assert(table_size - 1 == static_cast<size_t>(
|
||||
MAKE_UINT64_T(0xFFFFFFFF, 0xFFFFFF) >> shift));
|
||||
const uint8_t* ip_end = input + block_size;
|
||||
// "next_emit" is a pointer to the first byte that is not covered by a
|
||||
// previous copy. Bytes between "next_emit" and the start of the next copy or
|
||||
// the end of the input will be emitted as literal bytes.
|
||||
/* "next_emit" is a pointer to the first byte that is not covered by a
|
||||
previous copy. Bytes between "next_emit" and the start of the next copy or
|
||||
the end of the input will be emitted as literal bytes. */
|
||||
const uint8_t* next_emit = input;
|
||||
|
||||
int last_distance = -1;
|
||||
const size_t kInputMarginBytes = 16;
|
||||
const size_t kMinMatchLen = 6;
|
||||
if (PREDICT_TRUE(block_size >= kInputMarginBytes)) {
|
||||
// For the last block, we need to keep a 16 bytes margin so that we can be
|
||||
// sure that all distances are at most window size - 16.
|
||||
// For all other blocks, we only need to keep a margin of 5 bytes so that
|
||||
// we don't go over the block size with a copy.
|
||||
/* For the last block, we need to keep a 16 bytes margin so that we can be
|
||||
sure that all distances are at most window size - 16.
|
||||
For all other blocks, we only need to keep a margin of 5 bytes so that
|
||||
we don't go over the block size with a copy. */
|
||||
const size_t len_limit = std::min(block_size - kMinMatchLen,
|
||||
input_size - kInputMarginBytes);
|
||||
const uint8_t* ip_limit = input + len_limit;
|
||||
|
||||
for (uint32_t next_hash = Hash(++ip, shift); ; ) {
|
||||
assert(next_emit < ip);
|
||||
// Step 1: Scan forward in the input looking for a 6-byte-long match.
|
||||
// If we get close to exhausting the input then goto emit_remainder.
|
||||
//
|
||||
// Heuristic match skipping: If 32 bytes are scanned with no matches
|
||||
// found, start looking only at every other byte. If 32 more bytes are
|
||||
// scanned, look at every third byte, etc.. When a match is found,
|
||||
// immediately go back to looking at every byte. This is a small loss
|
||||
// (~5% performance, ~0.1% density) for compressible data due to more
|
||||
// bookkeeping, but for non-compressible data (such as JPEG) it's a huge
|
||||
// win since the compressor quickly "realizes" the data is incompressible
|
||||
// and doesn't bother looking for matches everywhere.
|
||||
//
|
||||
// The "skip" variable keeps track of how many bytes there are since the
|
||||
// last match; dividing it by 32 (ie. right-shifting by five) gives the
|
||||
// number of bytes to move ahead for each iteration.
|
||||
/* Step 1: Scan forward in the input looking for a 6-byte-long match.
|
||||
If we get close to exhausting the input then goto emit_remainder.
|
||||
|
||||
Heuristic match skipping: If 32 bytes are scanned with no matches
|
||||
found, start looking only at every other byte. If 32 more bytes are
|
||||
scanned, look at every third byte, etc.. When a match is found,
|
||||
immediately go back to looking at every byte. This is a small loss
|
||||
(~5% performance, ~0.1% density) for compressible data due to more
|
||||
bookkeeping, but for non-compressible data (such as JPEG) it's a huge
|
||||
win since the compressor quickly "realizes" the data is incompressible
|
||||
and doesn't bother looking for matches everywhere.
|
||||
|
||||
The "skip" variable keeps track of how many bytes there are since the
|
||||
last match; dividing it by 32 (ie. right-shifting by five) gives the
|
||||
number of bytes to move ahead for each iteration. */
|
||||
uint32_t skip = 32;
|
||||
|
||||
const uint8_t* next_ip = ip;
|
||||
@ -295,15 +295,15 @@ static void CreateCommands(const uint8_t* input, size_t block_size,
|
||||
table[hash] = static_cast<int>(ip - base_ip);
|
||||
} while (PREDICT_TRUE(!IsMatch(ip, candidate)));
|
||||
|
||||
// Step 2: Emit the found match together with the literal bytes from
|
||||
// "next_emit", and then see if we can find a next macth immediately
|
||||
// afterwards. Repeat until we find no match for the input
|
||||
// without emitting some literal bytes.
|
||||
/* Step 2: Emit the found match together with the literal bytes from
|
||||
"next_emit", and then see if we can find a next macth immediately
|
||||
afterwards. Repeat until we find no match for the input
|
||||
without emitting some literal bytes. */
|
||||
uint64_t input_bytes;
|
||||
|
||||
{
|
||||
// We have a 6-byte match at ip, and we need to emit bytes in
|
||||
// [next_emit, ip).
|
||||
/* We have a 6-byte match at ip, and we need to emit bytes in
|
||||
[next_emit, ip). */
|
||||
const uint8_t* base = ip;
|
||||
size_t matched = 6 + FindMatchLengthWithLimit(
|
||||
candidate + 6, ip + 6, static_cast<size_t>(ip_end - ip) - 6);
|
||||
@ -327,9 +327,9 @@ static void CreateCommands(const uint8_t* input, size_t block_size,
|
||||
if (PREDICT_FALSE(ip >= ip_limit)) {
|
||||
goto emit_remainder;
|
||||
}
|
||||
// We could immediately start working at ip now, but to improve
|
||||
// compression we first update "table" with the hashes of some positions
|
||||
// within the last copy.
|
||||
/* We could immediately start working at ip now, but to improve
|
||||
compression we first update "table" with the hashes of some
|
||||
positions within the last copy. */
|
||||
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 5);
|
||||
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 5);
|
||||
@ -349,8 +349,8 @@ static void CreateCommands(const uint8_t* input, size_t block_size,
|
||||
}
|
||||
|
||||
while (IsMatch(ip, candidate)) {
|
||||
// We have a 6-byte match at ip, and no need to emit any
|
||||
// literal bytes prior to ip.
|
||||
/* We have a 6-byte match at ip, and no need to emit any
|
||||
literal bytes prior to ip. */
|
||||
const uint8_t* base = ip;
|
||||
size_t matched = 6 + FindMatchLengthWithLimit(
|
||||
candidate + 6, ip + 6, static_cast<size_t>(ip_end - ip) - 6);
|
||||
@ -364,9 +364,9 @@ static void CreateCommands(const uint8_t* input, size_t block_size,
|
||||
if (PREDICT_FALSE(ip >= ip_limit)) {
|
||||
goto emit_remainder;
|
||||
}
|
||||
// We could immediately start working at ip now, but to improve
|
||||
// compression we first update "table" with the hashes of some positions
|
||||
// within the last copy.
|
||||
/* We could immediately start working at ip now, but to improve
|
||||
compression we first update "table" with the hashes of some
|
||||
positions within the last copy. */
|
||||
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 5);
|
||||
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 5);
|
||||
@ -391,7 +391,7 @@ static void CreateCommands(const uint8_t* input, size_t block_size,
|
||||
|
||||
emit_remainder:
|
||||
assert(next_emit <= ip_end);
|
||||
// Emit the remaining bytes as literals.
|
||||
/* Emit the remaining bytes as literals. */
|
||||
if (next_emit < ip_end) {
|
||||
const uint32_t insert = static_cast<uint32_t>(ip_end - next_emit);
|
||||
EmitInsertLen(insert, commands);
|
||||
@ -483,7 +483,8 @@ void BrotliCompressFragmentTwoPass(const uint8_t* input, size_t input_size,
|
||||
uint32_t* command_buf, uint8_t* literal_buf,
|
||||
int* table, size_t table_size,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
// Save the start of the first block for position and distance computations.
|
||||
/* Save the start of the first block for position and distance computations.
|
||||
*/
|
||||
const uint8_t* base_ip = input;
|
||||
|
||||
while (input_size > 0) {
|
||||
@ -496,14 +497,14 @@ void BrotliCompressFragmentTwoPass(const uint8_t* input, size_t input_size,
|
||||
const size_t num_commands = static_cast<size_t>(commands - command_buf);
|
||||
if (ShouldCompress(input, block_size, num_literals)) {
|
||||
StoreMetaBlockHeader(block_size, 0, storage_ix, storage);
|
||||
// No block splits, no contexts.
|
||||
/* No block splits, no contexts. */
|
||||
WriteBits(13, 0, storage_ix, storage);
|
||||
StoreCommands(literal_buf, num_literals, command_buf, num_commands,
|
||||
storage_ix, storage);
|
||||
} else {
|
||||
// Since we did not find many backward references and the entropy of
|
||||
// the data is close to 8 bits, we can simply emit an uncompressed block.
|
||||
// This makes compression speed of uncompressible data about 3x faster.
|
||||
/* Since we did not find many backward references and the entropy of
|
||||
the data is close to 8 bits, we can simply emit an uncompressed block.
|
||||
This makes compression speed of uncompressible data about 3x faster. */
|
||||
StoreMetaBlockHeader(block_size, 1, storage_ix, storage);
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
memcpy(&storage[*storage_ix >> 3], input, block_size);
|
||||
@ -515,8 +516,8 @@ void BrotliCompressFragmentTwoPass(const uint8_t* input, size_t input_size,
|
||||
}
|
||||
|
||||
if (is_last) {
|
||||
WriteBits(1, 1, storage_ix, storage); // islast
|
||||
WriteBits(1, 1, storage_ix, storage); // isempty
|
||||
WriteBits(1, 1, storage_ix, storage); /* islast */
|
||||
WriteBits(1, 1, storage_ix, storage); /* isempty */
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
}
|
||||
}
|
||||
|
@ -4,11 +4,11 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Function for fast encoding of an input fragment, independently from the input
|
||||
// history. This function uses two-pass processing: in the first pass we save
|
||||
// the found backward matches and literal bytes into a buffer, and in the
|
||||
// second pass we emit them into the bit stream using prefix codes built based
|
||||
// on the actual command and literal byte histograms.
|
||||
/* Function for fast encoding of an input fragment, independently from the input
|
||||
history. This function uses two-pass processing: in the first pass we save
|
||||
the found backward matches and literal bytes into a buffer, and in the
|
||||
second pass we emit them into the bit stream using prefix codes built based
|
||||
on the actual command and literal byte histograms. */
|
||||
|
||||
#ifndef BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
|
||||
#define BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
|
||||
@ -19,16 +19,16 @@ namespace brotli {
|
||||
|
||||
static const size_t kCompressFragmentTwoPassBlockSize = 1 << 17;
|
||||
|
||||
// Compresses "input" string to the "*storage" buffer as one or more complete
|
||||
// meta-blocks, and updates the "*storage_ix" bit position.
|
||||
//
|
||||
// If "is_last" is true, emits an additional empty last meta-block.
|
||||
//
|
||||
// REQUIRES: "input_size" is greater than zero, or "is_last" is true.
|
||||
// REQUIRES: "command_buf" and "literal_buf" point to at least
|
||||
// kCompressFragmentTwoPassBlockSize long arrays.
|
||||
// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
|
||||
// REQUIRES: "table_size" is a power of two
|
||||
/* Compresses "input" string to the "*storage" buffer as one or more complete
|
||||
meta-blocks, and updates the "*storage_ix" bit position.
|
||||
|
||||
If "is_last" is 1, emits an additional empty last meta-block.
|
||||
|
||||
REQUIRES: "input_size" is greater than zero, or "is_last" is 1.
|
||||
REQUIRES: "command_buf" and "literal_buf" point to at least
|
||||
kCompressFragmentTwoPassBlockSize long arrays.
|
||||
REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
|
||||
REQUIRES: "table_size" is a power of two */
|
||||
void BrotliCompressFragmentTwoPass(const uint8_t* input, size_t input_size,
|
||||
bool is_last,
|
||||
uint32_t* command_buf, uint8_t* literal_buf,
|
||||
@ -37,4 +37,4 @@ void BrotliCompressFragmentTwoPass(const uint8_t* input, size_t input_size,
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
|
||||
#endif /* BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_ */
|
||||
|
171
enc/context.h
171
enc/context.h
@ -4,7 +4,7 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Functions to map previous bytes into a context id.
|
||||
/* Functions to map previous bytes into a context id. */
|
||||
|
||||
#ifndef BROTLI_ENC_CONTEXT_H_
|
||||
#define BROTLI_ENC_CONTEXT_H_
|
||||
@ -13,82 +13,83 @@
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// Second-order context lookup table for UTF8 byte streams.
|
||||
//
|
||||
// If p1 and p2 are the previous two bytes, we calculate the context as
|
||||
//
|
||||
// context = kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256].
|
||||
//
|
||||
// If the previous two bytes are ASCII characters (i.e. < 128), this will be
|
||||
// equivalent to
|
||||
//
|
||||
// context = 4 * context1(p1) + context2(p2),
|
||||
//
|
||||
// where context1 is based on the previous byte in the following way:
|
||||
//
|
||||
// 0 : non-ASCII control
|
||||
// 1 : \t, \n, \r
|
||||
// 2 : space
|
||||
// 3 : other punctuation
|
||||
// 4 : " '
|
||||
// 5 : %
|
||||
// 6 : ( < [ {
|
||||
// 7 : ) > ] }
|
||||
// 8 : , ; :
|
||||
// 9 : .
|
||||
// 10 : =
|
||||
// 11 : number
|
||||
// 12 : upper-case vowel
|
||||
// 13 : upper-case consonant
|
||||
// 14 : lower-case vowel
|
||||
// 15 : lower-case consonant
|
||||
//
|
||||
// and context2 is based on the second last byte:
|
||||
//
|
||||
// 0 : control, space
|
||||
// 1 : punctuation
|
||||
// 2 : upper-case letter, number
|
||||
// 3 : lower-case letter
|
||||
//
|
||||
// If the last byte is ASCII, and the second last byte is not (in a valid UTF8
|
||||
// stream it will be a continuation byte, value between 128 and 191), the
|
||||
// context is the same as if the second last byte was an ASCII control or space.
|
||||
//
|
||||
// If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
|
||||
// be a continuation byte and the context id is 2 or 3 depending on the LSB of
|
||||
// the last byte and to a lesser extent on the second last byte if it is ASCII.
|
||||
//
|
||||
// If the last byte is a UTF8 continuation byte, the second last byte can be:
|
||||
// - continuation byte: the next byte is probably ASCII or lead byte (assuming
|
||||
// 4-byte UTF8 characters are rare) and the context id is 0 or 1.
|
||||
// - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
|
||||
// - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3
|
||||
//
|
||||
// The possible value combinations of the previous two bytes, the range of
|
||||
// context ids and the type of the next byte is summarized in the table below:
|
||||
//
|
||||
// |--------\-----------------------------------------------------------------|
|
||||
// | \ Last byte |
|
||||
// | Second \---------------------------------------------------------------|
|
||||
// | last byte \ ASCII | cont. byte | lead byte |
|
||||
// | \ (0-127) | (128-191) | (192-) |
|
||||
// |=============|===================|=====================|==================|
|
||||
// | ASCII | next: ASCII/lead | not valid | next: cont. |
|
||||
// | (0-127) | context: 4 - 63 | | context: 2 - 3 |
|
||||
// |-------------|-------------------|---------------------|------------------|
|
||||
// | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. |
|
||||
// | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 |
|
||||
// |-------------|-------------------|---------------------|------------------|
|
||||
// | lead byte | not valid | next: ASCII/lead | not valid |
|
||||
// | (192-207) | | context: 0 - 1 | |
|
||||
// |-------------|-------------------|---------------------|------------------|
|
||||
// | lead byte | not valid | next: cont. | not valid |
|
||||
// | (208-) | | context: 2 - 3 | |
|
||||
// |-------------|-------------------|---------------------|------------------|
|
||||
/* Second-order context lookup table for UTF8 byte streams.
|
||||
|
||||
If p1 and p2 are the previous two bytes, we calculate the context as
|
||||
|
||||
context = kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256].
|
||||
|
||||
If the previous two bytes are ASCII characters (i.e. < 128), this will be
|
||||
equivalent to
|
||||
|
||||
context = 4 * context1(p1) + context2(p2),
|
||||
|
||||
where context1 is based on the previous byte in the following way:
|
||||
|
||||
0 : non-ASCII control
|
||||
1 : \t, \n, \r
|
||||
2 : space
|
||||
3 : other punctuation
|
||||
4 : " '
|
||||
5 : %
|
||||
6 : ( < [ {
|
||||
7 : ) > ] }
|
||||
8 : , ; :
|
||||
9 : .
|
||||
10 : =
|
||||
11 : number
|
||||
12 : upper-case vowel
|
||||
13 : upper-case consonant
|
||||
14 : lower-case vowel
|
||||
15 : lower-case consonant
|
||||
|
||||
and context2 is based on the second last byte:
|
||||
|
||||
0 : control, space
|
||||
1 : punctuation
|
||||
2 : upper-case letter, number
|
||||
3 : lower-case letter
|
||||
|
||||
If the last byte is ASCII, and the second last byte is not (in a valid UTF8
|
||||
stream it will be a continuation byte, value between 128 and 191), the
|
||||
context is the same as if the second last byte was an ASCII control or space.
|
||||
|
||||
If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
|
||||
be a continuation byte and the context id is 2 or 3 depending on the LSB of
|
||||
the last byte and to a lesser extent on the second last byte if it is ASCII.
|
||||
|
||||
If the last byte is a UTF8 continuation byte, the second last byte can be:
|
||||
- continuation byte: the next byte is probably ASCII or lead byte (assuming
|
||||
4-byte UTF8 characters are rare) and the context id is 0 or 1.
|
||||
- lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
|
||||
- lead byte (208 - 255): next byte is continuation byte, context is 2 or 3
|
||||
|
||||
The possible value combinations of the previous two bytes, the range of
|
||||
context ids and the type of the next byte is summarized in the table below:
|
||||
|
||||
|--------\-----------------------------------------------------------------|
|
||||
| \ Last byte |
|
||||
| Second \---------------------------------------------------------------|
|
||||
| last byte \ ASCII | cont. byte | lead byte |
|
||||
| \ (0-127) | (128-191) | (192-) |
|
||||
|=============|===================|=====================|==================|
|
||||
| ASCII | next: ASCII/lead | not valid | next: cont. |
|
||||
| (0-127) | context: 4 - 63 | | context: 2 - 3 |
|
||||
|-------------|-------------------|---------------------|------------------|
|
||||
| cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. |
|
||||
| (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 |
|
||||
|-------------|-------------------|---------------------|------------------|
|
||||
| lead byte | not valid | next: ASCII/lead | not valid |
|
||||
| (192-207) | | context: 0 - 1 | |
|
||||
|-------------|-------------------|---------------------|------------------|
|
||||
| lead byte | not valid | next: cont. | not valid |
|
||||
| (208-) | | context: 2 - 3 | |
|
||||
|-------------|-------------------|---------------------|------------------|
|
||||
*/
|
||||
static const uint8_t kUTF8ContextLookup[512] = {
|
||||
// Last byte.
|
||||
//
|
||||
// ASCII range.
|
||||
/* Last byte. */
|
||||
/* */
|
||||
/* ASCII range. */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
|
||||
@ -97,19 +98,19 @@ static const uint8_t kUTF8ContextLookup[512] = {
|
||||
52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
|
||||
12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
|
||||
60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0,
|
||||
// UTF8 continuation byte range.
|
||||
/* UTF8 continuation byte range. */
|
||||
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
||||
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
||||
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
||||
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
|
||||
// UTF8 lead byte range.
|
||||
/* UTF8 lead byte range. */
|
||||
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
||||
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
||||
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
||||
2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
|
||||
// Second last byte.
|
||||
//
|
||||
// ASCII range.
|
||||
/* Second last byte. */
|
||||
/* */
|
||||
/* ASCII range. */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
@ -118,19 +119,19 @@ static const uint8_t kUTF8ContextLookup[512] = {
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
|
||||
1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
|
||||
// UTF8 continuation byte range.
|
||||
/* UTF8 continuation byte range. */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
// UTF8 lead byte range.
|
||||
/* UTF8 lead byte range. */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
};
|
||||
|
||||
// Context lookup table for small signed integers.
|
||||
/* Context lookup table for small signed integers. */
|
||||
static const uint8_t kSigned3BitContextLookup[] = {
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
@ -175,4 +176,4 @@ static inline uint8_t Context(uint8_t p1, uint8_t p2, ContextType mode) {
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_CONTEXT_H_
|
||||
#endif /* BROTLI_ENC_CONTEXT_H_ */
|
||||
|
@ -4,7 +4,7 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Hash table on the 4-byte prefixes of static dictionary words.
|
||||
/* Hash table on the 4-byte prefixes of static dictionary words. */
|
||||
|
||||
#ifndef BROTLI_ENC_DICTIONARY_HASH_H_
|
||||
#define BROTLI_ENC_DICTIONARY_HASH_H_
|
||||
@ -4114,4 +4114,4 @@ static const uint16_t kStaticDictionaryHash[] = {
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_DICTIONARY_HASH_H_
|
||||
#endif /* BROTLI_ENC_DICTIONARY_HASH_H_ */
|
||||
|
207
enc/encode.cc
207
enc/encode.cc
@ -4,7 +4,7 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Implementation of Brotli compressor.
|
||||
/* Implementation of Brotli compressor. */
|
||||
|
||||
#include "./encode.h"
|
||||
|
||||
@ -36,8 +36,8 @@ namespace brotli {
|
||||
static const int kMinQualityForBlockSplit = 4;
|
||||
static const int kMinQualityForContextModeling = 5;
|
||||
static const int kMinQualityForOptimizeHistograms = 4;
|
||||
// For quality 2 there is no block splitting, so we buffer at most this much
|
||||
// literals and commands.
|
||||
/* For quality 2 there is no block splitting, so we buffer at most this much
|
||||
literals and commands. */
|
||||
static const size_t kMaxNumDelayedSymbols = 0x2fff;
|
||||
|
||||
#define COPY_ARRAY(dst, src) memcpy(dst, src, sizeof(src));
|
||||
@ -95,10 +95,10 @@ static size_t HashTableSize(size_t max_table_size, size_t input_size) {
|
||||
int* BrotliCompressor::GetHashTable(int quality,
|
||||
size_t input_size,
|
||||
size_t* table_size) {
|
||||
// Use smaller hash table when input.size() is smaller, since we
|
||||
// fill the table, incurring O(hash table size) overhead for
|
||||
// compression, and if the input is short, we won't need that
|
||||
// many hash table entries anyway.
|
||||
/* Use smaller hash table when input.size() is smaller, since we
|
||||
fill the table, incurring O(hash table size) overhead for
|
||||
compression, and if the input is short, we won't need that
|
||||
many hash table entries anyway. */
|
||||
const size_t max_table_size = MaxHashTableSize(quality);
|
||||
assert(max_table_size >= 256);
|
||||
size_t htsize = HashTableSize(max_table_size, input_size);
|
||||
@ -135,7 +135,7 @@ static void EncodeWindowBits(int lgwin, uint8_t* last_byte,
|
||||
}
|
||||
}
|
||||
|
||||
// Initializes the command and distance prefix codes for the first block.
|
||||
/* Initializes the command and distance prefix codes for the first block. */
|
||||
static void InitCommandPrefixCodes(uint8_t cmd_depths[128],
|
||||
uint16_t cmd_bits[128],
|
||||
uint8_t cmd_code[512],
|
||||
@ -167,8 +167,8 @@ static void InitCommandPrefixCodes(uint8_t cmd_depths[128],
|
||||
COPY_ARRAY(cmd_depths, kDefaultCommandDepths);
|
||||
COPY_ARRAY(cmd_bits, kDefaultCommandBits);
|
||||
|
||||
// Initialize the pre-compressed form of the command and distance prefix
|
||||
// codes.
|
||||
/* Initialize the pre-compressed form of the command and distance prefix
|
||||
codes. */
|
||||
static const uint8_t kDefaultCommandCode[] = {
|
||||
0xff, 0x77, 0xd5, 0xbf, 0xe7, 0xde, 0xea, 0x9e, 0x51, 0x5d, 0xde, 0xc6,
|
||||
0x70, 0x57, 0xbc, 0x58, 0x58, 0x58, 0xd8, 0xd8, 0x58, 0xd5, 0xcb, 0x8c,
|
||||
@ -181,13 +181,13 @@ static void InitCommandPrefixCodes(uint8_t cmd_depths[128],
|
||||
*cmd_code_numbits = kDefaultCommandCodeNumBits;
|
||||
}
|
||||
|
||||
// Decide about the context map based on the ability of the prediction
|
||||
// ability of the previous byte UTF8-prefix on the next byte. The
|
||||
// prediction ability is calculated as shannon entropy. Here we need
|
||||
// shannon entropy instead of 'BitsEntropy' since the prefix will be
|
||||
// encoded with the remaining 6 bits of the following byte, and
|
||||
// BitsEntropy will assume that symbol to be stored alone using Huffman
|
||||
// coding.
|
||||
/* Decide about the context map based on the ability of the prediction
|
||||
ability of the previous byte UTF8-prefix on the next byte. The
|
||||
prediction ability is calculated as shannon entropy. Here we need
|
||||
shannon entropy instead of 'BitsEntropy' since the prefix will be
|
||||
encoded with the remaining 6 bits of the following byte, and
|
||||
BitsEntropy will assume that symbol to be stored alone using Huffman
|
||||
coding. */
|
||||
static void ChooseContextMap(int quality,
|
||||
uint32_t* bigram_histo,
|
||||
size_t* num_literal_contexts,
|
||||
@ -232,11 +232,11 @@ static void ChooseContextMap(int quality,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
if (quality < 7) {
|
||||
// 3 context models is a bit slower, don't use it at lower qualities.
|
||||
/* 3 context models is a bit slower, don't use it at lower qualities. */
|
||||
entropy3 = entropy1 * 10;
|
||||
}
|
||||
// If expected savings by symbol are less than 0.2 bits, skip the
|
||||
// context modeling -- in exchange for faster decoding speed.
|
||||
/* If expected savings by symbol are less than 0.2 bits, skip the
|
||||
context modeling -- in exchange for faster decoding speed. */
|
||||
if (entropy1 - entropy2 < 0.2 &&
|
||||
entropy1 - entropy3 < 0.2) {
|
||||
*num_literal_contexts = 1;
|
||||
@ -261,9 +261,9 @@ static void DecideOverLiteralContextModeling(
|
||||
if (quality < kMinQualityForContextModeling || length < 64) {
|
||||
return;
|
||||
}
|
||||
// Gather bigram data of the UTF8 byte prefixes. To make the analysis of
|
||||
// UTF8 data faster we only examine 64 byte long strides at every 4kB
|
||||
// intervals.
|
||||
/* Gather bigram data of the UTF8 byte prefixes. To make the analysis of
|
||||
UTF8 data faster we only examine 64 byte long strides at every 4kB
|
||||
intervals. */
|
||||
const size_t end_pos = start_pos + length;
|
||||
uint32_t bigram_prefix_histo[9] = { 0 };
|
||||
for (; start_pos + 64 <= end_pos; start_pos += 4096) {
|
||||
@ -325,7 +325,7 @@ static void WriteMetaBlockInternal(const uint8_t* data,
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
if (bytes == 0) {
|
||||
// Write the ISLAST and ISEMPTY bits.
|
||||
/* Write the ISLAST and ISEMPTY bits. */
|
||||
WriteBits(2, 3, storage_ix, storage);
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
return;
|
||||
@ -333,8 +333,8 @@ static void WriteMetaBlockInternal(const uint8_t* data,
|
||||
|
||||
if (!ShouldCompress(data, mask, last_flush_pos, bytes,
|
||||
num_literals, num_commands)) {
|
||||
// Restore the distance cache, as its last update by
|
||||
// CreateBackwardReferences is now unused.
|
||||
/* Restore the distance cache, as its last update by
|
||||
CreateBackwardReferences is now unused. */
|
||||
memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
|
||||
StoreUncompressedMetaBlock(is_last, data,
|
||||
WrapPosition(last_flush_pos), mask, bytes,
|
||||
@ -416,7 +416,7 @@ static void WriteMetaBlockInternal(const uint8_t* data,
|
||||
storage_ix, storage);
|
||||
}
|
||||
if (bytes + 4 < (*storage_ix >> 3)) {
|
||||
// Restore the distance cache and last byte.
|
||||
/* Restore the distance cache and last byte. */
|
||||
memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
|
||||
storage[0] = last_byte;
|
||||
*storage_ix = last_byte_bits;
|
||||
@ -444,7 +444,7 @@ BrotliCompressor::BrotliCompressor(BrotliParams params)
|
||||
command_buf_(NULL),
|
||||
literal_buf_(NULL),
|
||||
is_last_block_emitted_(0) {
|
||||
// Sanitize params.
|
||||
/* Sanitize params. */
|
||||
params_.quality = std::max(0, params_.quality);
|
||||
if (params_.lgwin < kMinWindowBits) {
|
||||
params_.lgwin = kMinWindowBits;
|
||||
@ -465,18 +465,18 @@ BrotliCompressor::BrotliCompressor(BrotliParams params)
|
||||
std::max(kMinInputBlockBits, params_.lgblock));
|
||||
}
|
||||
|
||||
// Initialize input and literal cost ring buffers.
|
||||
// We allocate at least lgwin + 1 bits for the ring buffer so that the newly
|
||||
// added block fits there completely and we still get lgwin bits and at least
|
||||
// read_block_size_bits + 1 bits because the copy tail length needs to be
|
||||
// smaller than ringbuffer size.
|
||||
/* Initialize input and literal cost ring buffers.
|
||||
We allocate at least lgwin + 1 bits for the ring buffer so that the newly
|
||||
added block fits there completely and we still get lgwin bits and at least
|
||||
read_block_size_bits + 1 bits because the copy tail length needs to be
|
||||
smaller than ringbuffer size. */
|
||||
int ringbuffer_bits = std::max(params_.lgwin + 1, params_.lgblock + 1);
|
||||
ringbuffer_ = new RingBuffer(ringbuffer_bits, params_.lgblock);
|
||||
|
||||
commands_ = 0;
|
||||
cmd_alloc_size_ = 0;
|
||||
|
||||
// Initialize last byte with stream header.
|
||||
/* Initialize last byte with stream header. */
|
||||
EncodeWindowBits(params_.lgwin, &last_byte_, &last_byte_bits_);
|
||||
|
||||
// Initialize distance cache.
|
||||
@ -496,7 +496,7 @@ BrotliCompressor::BrotliCompressor(BrotliParams params)
|
||||
literal_buf_ = new uint8_t[kCompressFragmentTwoPassBlockSize];
|
||||
}
|
||||
|
||||
// Initialize hashers.
|
||||
/* Initialize hashers. */
|
||||
hash_type_ = std::min(10, params_.quality);
|
||||
hashers_->Init(hash_type_);
|
||||
}
|
||||
@ -516,48 +516,49 @@ void BrotliCompressor::CopyInputToRingBuffer(const size_t input_size,
|
||||
ringbuffer_->Write(input_buffer, input_size);
|
||||
input_pos_ += input_size;
|
||||
|
||||
// TL;DR: If needed, initialize 7 more bytes in the ring buffer to make the
|
||||
// hashing not depend on uninitialized data. This makes compression
|
||||
// deterministic and it prevents uninitialized memory warnings in Valgrind.
|
||||
// Even without erasing, the output would be valid (but nondeterministic).
|
||||
//
|
||||
// Background information: The compressor stores short (at most 8 bytes)
|
||||
// substrings of the input already read in a hash table, and detects
|
||||
// repetitions by looking up such substrings in the hash table. If it
|
||||
// can find a substring, it checks whether the substring is really there
|
||||
// in the ring buffer (or it's just a hash collision). Should the hash
|
||||
// table become corrupt, this check makes sure that the output is
|
||||
// still valid, albeit the compression ratio would be bad.
|
||||
//
|
||||
// The compressor populates the hash table from the ring buffer as it's
|
||||
// reading new bytes from the input. However, at the last few indexes of
|
||||
// the ring buffer, there are not enough bytes to build full-length
|
||||
// substrings from. Since the hash table always contains full-length
|
||||
// substrings, we erase with dummy 0s here to make sure that those
|
||||
// substrings will contain 0s at the end instead of uninitialized
|
||||
// data.
|
||||
//
|
||||
// Please note that erasing is not necessary (because the
|
||||
// memory region is already initialized since he ring buffer
|
||||
// has a `tail' that holds a copy of the beginning,) so we
|
||||
// skip erasing if we have already gone around at least once in
|
||||
// the ring buffer.
|
||||
/* TL;DR: If needed, initialize 7 more bytes in the ring buffer to make the
|
||||
hashing not depend on uninitialized data. This makes compression
|
||||
deterministic and it prevents uninitialized memory warnings in Valgrind.
|
||||
Even without erasing, the output would be valid (but nondeterministic).
|
||||
|
||||
Background information: The compressor stores short (at most 8 bytes)
|
||||
substrings of the input already read in a hash table, and detects
|
||||
repetitions by looking up such substrings in the hash table. If it
|
||||
can find a substring, it checks whether the substring is really there
|
||||
in the ring buffer (or it's just a hash collision). Should the hash
|
||||
table become corrupt, this check makes sure that the output is
|
||||
still valid, albeit the compression ratio would be bad.
|
||||
|
||||
The compressor populates the hash table from the ring buffer as it's
|
||||
reading new bytes from the input. However, at the last few indexes of
|
||||
the ring buffer, there are not enough bytes to build full-length
|
||||
substrings from. Since the hash table always contains full-length
|
||||
substrings, we erase with dummy 0s here to make sure that those
|
||||
substrings will contain 0s at the end instead of uninitialized
|
||||
data.
|
||||
|
||||
Please note that erasing is not necessary (because the
|
||||
memory region is already initialized since he ring buffer
|
||||
has a `tail' that holds a copy of the beginning,) so we
|
||||
skip erasing if we have already gone around at least once in
|
||||
the ring buffer.
|
||||
|
||||
Only clear during the first round of ringbuffer writes. On
|
||||
subsequent rounds data in the ringbuffer would be affected. */
|
||||
size_t pos = ringbuffer_->position();
|
||||
// Only clear during the first round of ringbuffer writes. On
|
||||
// subsequent rounds data in the ringbuffer would be affected.
|
||||
if (pos <= ringbuffer_->mask()) {
|
||||
// This is the first time when the ring buffer is being written.
|
||||
// We clear 7 bytes just after the bytes that have been copied from
|
||||
// the input buffer.
|
||||
//
|
||||
// The ringbuffer has a "tail" that holds a copy of the beginning,
|
||||
// but only once the ring buffer has been fully written once, i.e.,
|
||||
// pos <= mask. For the first time, we need to write values
|
||||
// in this tail (where index may be larger than mask), so that
|
||||
// we have exactly defined behavior and don't read un-initialized
|
||||
// memory. Due to performance reasons, hashing reads data using a
|
||||
// LOAD64, which can go 7 bytes beyond the bytes written in the
|
||||
// ringbuffer.
|
||||
/* This is the first time when the ring buffer is being written.
|
||||
We clear 7 bytes just after the bytes that have been copied from
|
||||
the input buffer.
|
||||
|
||||
The ringbuffer has a "tail" that holds a copy of the beginning,
|
||||
but only once the ring buffer has been fully written once, i.e.,
|
||||
pos <= mask. For the first time, we need to write values
|
||||
in this tail (where index may be larger than mask), so that
|
||||
we have exactly defined behavior and don't read un-initialized
|
||||
memory. Due to performance reasons, hashing reads data using a
|
||||
LOAD64, which can go 7 bytes beyond the bytes written in the
|
||||
ringbuffer. */
|
||||
memset(ringbuffer_->start() + pos, 0, 7);
|
||||
}
|
||||
}
|
||||
@ -595,8 +596,8 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last,
|
||||
|
||||
if (params_.quality <= 1) {
|
||||
if (delta == 0 && !is_last) {
|
||||
// We have no new input data and we don't have to finish the stream, so
|
||||
// nothing to do.
|
||||
/* We have no new input data and we don't have to finish the stream, so
|
||||
nothing to do. */
|
||||
*out_size = 0;
|
||||
return true;
|
||||
}
|
||||
@ -630,11 +631,11 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last,
|
||||
return true;
|
||||
}
|
||||
|
||||
// Theoretical max number of commands is 1 per 2 bytes.
|
||||
/* Theoretical max number of commands is 1 per 2 bytes. */
|
||||
size_t newsize = num_commands_ + bytes / 2 + 1;
|
||||
if (newsize > cmd_alloc_size_) {
|
||||
// Reserve a bit more memory to allow merging with a next block
|
||||
// without realloc: that would impact speed.
|
||||
/* Reserve a bit more memory to allow merging with a next block
|
||||
without realloc: that would impact speed. */
|
||||
newsize += (bytes / 4) + 16;
|
||||
cmd_alloc_size_ = newsize;
|
||||
commands_ =
|
||||
@ -662,13 +663,13 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last,
|
||||
num_literals_ < max_literals &&
|
||||
num_commands_ < max_commands &&
|
||||
input_pos_ + input_block_size() <= last_flush_pos_ + max_length) {
|
||||
// Merge with next input block. Everything will happen later.
|
||||
/* Merge with next input block. Everything will happen later. */
|
||||
last_processed_pos_ = input_pos_;
|
||||
*out_size = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Create the last insert-only command.
|
||||
/* Create the last insert-only command. */
|
||||
if (last_insert_len_ > 0) {
|
||||
brotli::Command cmd(last_insert_len_);
|
||||
commands_[num_commands_++] = cmd;
|
||||
@ -677,8 +678,8 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last,
|
||||
}
|
||||
|
||||
if (!is_last && input_pos_ == last_flush_pos_) {
|
||||
// We have no new input data and we don't have to finish the stream, so
|
||||
// nothing to do.
|
||||
/* We have no new input data and we don't have to finish the stream, so
|
||||
nothing to do. */
|
||||
*out_size = 0;
|
||||
return true;
|
||||
}
|
||||
@ -708,8 +709,8 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last,
|
||||
}
|
||||
num_commands_ = 0;
|
||||
num_literals_ = 0;
|
||||
// Save the state of the distance cache in case we need to restore it for
|
||||
// emitting an uncompressed block.
|
||||
/* Save the state of the distance cache in case we need to restore it for
|
||||
emitting an uncompressed block. */
|
||||
memcpy(saved_dist_cache_, dist_cache_, sizeof(dist_cache_));
|
||||
*output = &storage[0];
|
||||
*out_size = storage_ix >> 3;
|
||||
@ -829,14 +830,14 @@ static int BrotliCompressBufferQuality10(int lgwin,
|
||||
ZopfliComputeShortestPath(block_size, block_start, input_buffer, mask,
|
||||
max_backward_limit, dist_cache,
|
||||
hasher, nodes, &path);
|
||||
// We allocate a command buffer in the first iteration of this loop that
|
||||
// will be likely big enough for the whole metablock, so that for most
|
||||
// inputs we will not have to reallocate in later iterations. We do the
|
||||
// allocation here and not before the loop, because if the input is small,
|
||||
// this will be allocated after the zopfli cost model is freed, so this
|
||||
// will not increase peak memory usage.
|
||||
// TODO: If the first allocation is too small, increase command
|
||||
// buffer size exponentially.
|
||||
/* We allocate a command buffer in the first iteration of this loop that
|
||||
will be likely big enough for the whole metablock, so that for most
|
||||
inputs we will not have to reallocate in later iterations. We do the
|
||||
allocation here and not before the loop, because if the input is small,
|
||||
this will be allocated after the zopfli cost model is freed, so this
|
||||
will not increase peak memory usage.
|
||||
TODO: If the first allocation is too small, increase command
|
||||
buffer size exponentially. */
|
||||
size_t new_cmd_alloc_size = std::max(expected_num_commands,
|
||||
num_commands + path.size() + 1);
|
||||
if (cmd_alloc_size != new_cmd_alloc_size) {
|
||||
@ -868,15 +869,15 @@ static int BrotliCompressBufferQuality10(int lgwin,
|
||||
size_t storage_ix = last_byte_bits;
|
||||
|
||||
if (metablock_size == 0) {
|
||||
// Write the ISLAST and ISEMPTY bits.
|
||||
/* Write the ISLAST and ISEMPTY bits. */
|
||||
storage = new uint8_t[16];
|
||||
storage[0] = last_byte;
|
||||
WriteBits(2, 3, &storage_ix, storage);
|
||||
storage_ix = (storage_ix + 7u) & ~7u;
|
||||
} else if (!ShouldCompress(input_buffer, mask, metablock_start,
|
||||
metablock_size, num_literals, num_commands)) {
|
||||
// Restore the distance cache, as its last update by
|
||||
// CreateBackwardReferences is now unused.
|
||||
/* Restore the distance cache, as its last update by
|
||||
CreateBackwardReferences is now unused. */
|
||||
memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
|
||||
storage = new uint8_t[metablock_size + 16];
|
||||
storage[0] = last_byte;
|
||||
@ -914,7 +915,7 @@ static int BrotliCompressBufferQuality10(int lgwin,
|
||||
mb,
|
||||
&storage_ix, storage);
|
||||
if (metablock_size + 4 < (storage_ix >> 3)) {
|
||||
// Restore the distance cache and last byte.
|
||||
/* Restore the distance cache and last byte. */
|
||||
memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
|
||||
storage[0] = last_byte;
|
||||
storage_ix = last_byte_bits;
|
||||
@ -928,8 +929,8 @@ static int BrotliCompressBufferQuality10(int lgwin,
|
||||
metablock_start += metablock_size;
|
||||
prev_byte = input_buffer[metablock_start - 1];
|
||||
prev_byte2 = input_buffer[metablock_start - 2];
|
||||
// Save the state of the distance cache in case we need to restore it for
|
||||
// emitting an uncompressed block.
|
||||
/* Save the state of the distance cache in case we need to restore it for
|
||||
emitting an uncompressed block. */
|
||||
memcpy(saved_dist_cache, dist_cache, 4 * sizeof(dist_cache[0]));
|
||||
|
||||
const size_t out_size = storage_ix >> 3;
|
||||
@ -955,17 +956,17 @@ int BrotliCompressBuffer(BrotliParams params,
|
||||
size_t* encoded_size,
|
||||
uint8_t* encoded_buffer) {
|
||||
if (*encoded_size == 0) {
|
||||
// Output buffer needs at least one byte.
|
||||
/* Output buffer needs at least one byte. */
|
||||
return 0;
|
||||
}
|
||||
if (input_size == 0) {
|
||||
// Handle the special case of empty input.
|
||||
/* Handle the special case of empty input. */
|
||||
*encoded_size = 1;
|
||||
*encoded_buffer = 6;
|
||||
return 1;
|
||||
}
|
||||
if (params.quality == 10) {
|
||||
// TODO: Implement this direct path for all quality levels.
|
||||
/* TODO: Implement this direct path for all quality levels. */
|
||||
const int lgwin = std::min(24, std::max(16, params.lgwin));
|
||||
return BrotliCompressBufferQuality10(lgwin, input_size, input_buffer,
|
||||
encoded_size, encoded_buffer);
|
||||
|
22
enc/encode.h
22
enc/encode.h
@ -4,7 +4,7 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// API for Brotli compression
|
||||
/* API for Brotli compression. */
|
||||
|
||||
#ifndef BROTLI_ENC_ENCODE_H_
|
||||
#define BROTLI_ENC_ENCODE_H_
|
||||
@ -38,23 +38,23 @@ struct BrotliParams {
|
||||
enable_context_modeling(true) {}
|
||||
|
||||
enum Mode {
|
||||
// Default compression mode. The compressor does not know anything in
|
||||
// advance about the properties of the input.
|
||||
/* Default compression mode. The compressor does not know anything in
|
||||
advance about the properties of the input. */
|
||||
MODE_GENERIC = 0,
|
||||
// Compression mode for UTF-8 format text input.
|
||||
/* Compression mode for UTF-8 format text input. */
|
||||
MODE_TEXT = 1,
|
||||
// Compression mode used in WOFF 2.0.
|
||||
/* Compression mode used in WOFF 2.0. */
|
||||
MODE_FONT = 2
|
||||
};
|
||||
Mode mode;
|
||||
|
||||
// Controls the compression-speed vs compression-density tradeoffs. The higher
|
||||
// the quality, the slower the compression. Range is 0 to 11.
|
||||
/* Controls the compression-speed vs compression-density tradeoffs. The higher
|
||||
the |quality|, the slower the compression. Range is 0 to 11. */
|
||||
int quality;
|
||||
// Base 2 logarithm of the sliding window size. Range is 10 to 24.
|
||||
/* Base 2 logarithm of the sliding window size. Range is 10 to 24. */
|
||||
int lgwin;
|
||||
// Base 2 logarithm of the maximum input block size. Range is 16 to 24.
|
||||
// If set to 0, the value will be set based on the quality.
|
||||
/* Base 2 logarithm of the maximum input block size. Range is 16 to 24.
|
||||
If set to 0, the value will be set based on the quality. */
|
||||
int lgblock;
|
||||
|
||||
// These settings are deprecated and will be ignored.
|
||||
@ -207,4 +207,4 @@ int BrotliCompressWithCustomDictionary(size_t dictsize, const uint8_t* dict,
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_ENCODE_H_
|
||||
#endif /* BROTLI_ENC_ENCODE_H_ */
|
||||
|
@ -4,7 +4,7 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Implementation of parallel Brotli compressor.
|
||||
/* Implementation of parallel Brotli compressor. */
|
||||
|
||||
#include "./encode_parallel.h"
|
||||
|
||||
@ -63,33 +63,33 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
|
||||
return false;
|
||||
}
|
||||
|
||||
// Copy prefix + next input block into a continuous area.
|
||||
/* Copy prefix + next input block into a continuous area. */
|
||||
uint32_t input_pos = prefix_size;
|
||||
// CreateBackwardReferences reads up to 3 bytes past the end of input if the
|
||||
// mask points past the end of input.
|
||||
// FindMatchLengthWithLimit could do another 8 bytes look-forward.
|
||||
/* CreateBackwardReferences reads up to 3 bytes past the end of input if the
|
||||
mask points past the end of input.
|
||||
FindMatchLengthWithLimit could do another 8 bytes look-forward. */
|
||||
std::vector<uint8_t> input(prefix_size + input_size + 4 + 8);
|
||||
memcpy(&input[0], prefix_buffer, prefix_size);
|
||||
memcpy(&input[input_pos], input_buffer, input_size);
|
||||
// Since we don't have a ringbuffer, masking is a no-op.
|
||||
// We use one less bit than the full range because some of the code uses
|
||||
// mask + 1 as the size of the ringbuffer.
|
||||
/* Since we don't have a ringbuffer, masking is a no-op.
|
||||
We use one less bit than the full range because some of the code uses
|
||||
mask + 1 as the size of the ringbuffer. */
|
||||
const uint32_t mask = std::numeric_limits<uint32_t>::max() >> 1;
|
||||
|
||||
uint8_t prev_byte = input_pos > 0 ? input[(input_pos - 1) & mask] : 0;
|
||||
uint8_t prev_byte2 = input_pos > 1 ? input[(input_pos - 2) & mask] : 0;
|
||||
|
||||
// Decide about UTF8 mode.
|
||||
/* Decide about UTF8 mode. */
|
||||
static const double kMinUTF8Ratio = 0.75;
|
||||
bool utf8_mode = IsMostlyUTF8(&input[0], input_pos, mask, input_size,
|
||||
kMinUTF8Ratio);
|
||||
|
||||
// Initialize hashers.
|
||||
/* Initialize hashers. */
|
||||
int hash_type = std::min(10, params.quality);
|
||||
Hashers* hashers = new Hashers();
|
||||
hashers->Init(hash_type);
|
||||
|
||||
// Compute backward references.
|
||||
/* Compute backward references. */
|
||||
size_t last_insert_len = 0;
|
||||
size_t num_commands = 0;
|
||||
size_t num_literals = 0;
|
||||
@ -119,7 +119,7 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
|
||||
}
|
||||
assert(num_commands != 0);
|
||||
|
||||
// Build the meta-block.
|
||||
/* Build the meta-block. */
|
||||
MetaBlockSplit mb;
|
||||
uint32_t num_direct_distance_codes =
|
||||
params.mode == BrotliParams::MODE_FONT ? 12 : 0;
|
||||
@ -141,7 +141,7 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
|
||||
&mb);
|
||||
}
|
||||
|
||||
// Set up the temporary output storage.
|
||||
/* Set up the temporary output storage. */
|
||||
const size_t max_out_size = 2 * input_size + 500;
|
||||
std::vector<uint8_t> storage(max_out_size);
|
||||
uint8_t first_byte = 0;
|
||||
@ -161,7 +161,7 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
|
||||
storage[0] = static_cast<uint8_t>(first_byte);
|
||||
size_t storage_ix = first_byte_bits;
|
||||
|
||||
// Store the meta-block to the temporary output.
|
||||
/* Store the meta-block to the temporary output. */
|
||||
StoreMetaBlock(&input[0], input_pos, input_size, mask,
|
||||
prev_byte, prev_byte2,
|
||||
is_last,
|
||||
@ -173,14 +173,14 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
|
||||
&storage_ix, &storage[0]);
|
||||
free(commands);
|
||||
|
||||
// If this is not the last meta-block, store an empty metadata
|
||||
// meta-block so that the meta-block will end at a byte boundary.
|
||||
/* If this is not the last meta-block, store an empty metadata
|
||||
meta-block so that the meta-block will end at a byte boundary. */
|
||||
if (!is_last) {
|
||||
StoreSyncMetaBlock(&storage_ix, &storage[0]);
|
||||
}
|
||||
|
||||
// If the compressed data is too large, fall back to an uncompressed
|
||||
// meta-block.
|
||||
/* If the compressed data is too large, fall back to an uncompressed
|
||||
meta-block. */
|
||||
size_t output_size = storage_ix >> 3;
|
||||
if (input_size + 4 < output_size) {
|
||||
storage[0] = static_cast<uint8_t>(first_byte);
|
||||
@ -191,7 +191,7 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
|
||||
output_size = storage_ix >> 3;
|
||||
}
|
||||
|
||||
// Copy the temporary output with size-check to the output.
|
||||
/* Copy the temporary output with size-check to the output. */
|
||||
if (output_size > *encoded_size) {
|
||||
return false;
|
||||
}
|
||||
@ -200,7 +200,7 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} /* namespace */
|
||||
|
||||
int BrotliCompressBufferParallel(BrotliParams params,
|
||||
size_t input_size,
|
||||
@ -208,15 +208,15 @@ int BrotliCompressBufferParallel(BrotliParams params,
|
||||
size_t* encoded_size,
|
||||
uint8_t* encoded_buffer) {
|
||||
if (*encoded_size == 0) {
|
||||
// Output buffer needs at least one byte.
|
||||
/* Output buffer needs at least one byte. */
|
||||
return 0;
|
||||
} else if (input_size == 0) {
|
||||
} else if (input_size == 0) {
|
||||
encoded_buffer[0] = 6;
|
||||
*encoded_size = 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Sanitize params.
|
||||
/* Sanitize params. */
|
||||
if (params.lgwin < kMinWindowBits) {
|
||||
params.lgwin = kMinWindowBits;
|
||||
} else if (params.lgwin > kMaxWindowBits) {
|
||||
@ -237,7 +237,7 @@ int BrotliCompressBufferParallel(BrotliParams params,
|
||||
|
||||
std::vector<std::vector<uint8_t> > compressed_pieces;
|
||||
|
||||
// Compress block-by-block independently.
|
||||
/* Compress block-by-block independently. */
|
||||
for (size_t pos = 0; pos < input_size; ) {
|
||||
uint32_t input_block_size =
|
||||
static_cast<uint32_t>(std::min(max_input_block_size, input_size - pos));
|
||||
@ -261,7 +261,7 @@ int BrotliCompressBufferParallel(BrotliParams params,
|
||||
pos += input_block_size;
|
||||
}
|
||||
|
||||
// Piece together the output.
|
||||
/* Piece together the output. */
|
||||
size_t out_pos = 0;
|
||||
for (size_t i = 0; i < compressed_pieces.size(); ++i) {
|
||||
const std::vector<uint8_t>& out = compressed_pieces[i];
|
||||
@ -276,4 +276,4 @@ int BrotliCompressBufferParallel(BrotliParams params,
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace brotli
|
||||
} /* namespace brotli */
|
||||
|
@ -4,9 +4,9 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// API for parallel Brotli compression
|
||||
// Note that this is only a proof of concept currently and not part of the
|
||||
// final API yet.
|
||||
/* API for parallel Brotli compression
|
||||
Note that this is only a proof of concept currently and not part of the
|
||||
final API yet. */
|
||||
|
||||
#ifndef BROTLI_ENC_ENCODE_PARALLEL_H_
|
||||
#define BROTLI_ENC_ENCODE_PARALLEL_H_
|
||||
@ -22,6 +22,6 @@ int BrotliCompressBufferParallel(BrotliParams params,
|
||||
size_t* encoded_size,
|
||||
uint8_t* encoded_buffer);
|
||||
|
||||
} // namespace brotli
|
||||
} /* namespace brotli */
|
||||
|
||||
#endif // BROTLI_ENC_ENCODE_PARALLEL_H_
|
||||
#endif /* BROTLI_ENC_ENCODE_PARALLEL_H_ */
|
||||
|
@ -4,7 +4,7 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Entropy encoding (Huffman) utilities.
|
||||
/* Entropy encoding (Huffman) utilities. */
|
||||
|
||||
#include "./entropy_encode.h"
|
||||
|
||||
@ -31,7 +31,7 @@ void SetDepth(const HuffmanTree &p,
|
||||
}
|
||||
}
|
||||
|
||||
// Sort the root nodes, least popular first.
|
||||
/* Sort the root nodes, least popular first. */
|
||||
static inline bool SortHuffmanTree(const HuffmanTree& v0,
|
||||
const HuffmanTree& v1) {
|
||||
if (v0.total_count_ != v1.total_count_) {
|
||||
@ -40,30 +40,30 @@ static inline bool SortHuffmanTree(const HuffmanTree& v0,
|
||||
return v0.index_right_or_value_ > v1.index_right_or_value_;
|
||||
}
|
||||
|
||||
// This function will create a Huffman tree.
|
||||
//
|
||||
// The catch here is that the tree cannot be arbitrarily deep.
|
||||
// Brotli specifies a maximum depth of 15 bits for "code trees"
|
||||
// and 7 bits for "code length code trees."
|
||||
//
|
||||
// count_limit is the value that is to be faked as the minimum value
|
||||
// and this minimum value is raised until the tree matches the
|
||||
// maximum length requirement.
|
||||
//
|
||||
// This algorithm is not of excellent performance for very long data blocks,
|
||||
// especially when population counts are longer than 2**tree_limit, but
|
||||
// we are not planning to use this with extremely long blocks.
|
||||
//
|
||||
// See http://en.wikipedia.org/wiki/Huffman_coding
|
||||
/* This function will create a Huffman tree.
|
||||
|
||||
The catch here is that the tree cannot be arbitrarily deep.
|
||||
Brotli specifies a maximum depth of 15 bits for "code trees"
|
||||
and 7 bits for "code length code trees."
|
||||
|
||||
count_limit is the value that is to be faked as the minimum value
|
||||
and this minimum value is raised until the tree matches the
|
||||
maximum length requirement.
|
||||
|
||||
This algorithm is not of excellent performance for very long data blocks,
|
||||
especially when population counts are longer than 2**tree_limit, but
|
||||
we are not planning to use this with extremely long blocks.
|
||||
|
||||
See http://en.wikipedia.org/wiki/Huffman_coding */
|
||||
void CreateHuffmanTree(const uint32_t *data,
|
||||
const size_t length,
|
||||
const int tree_limit,
|
||||
HuffmanTree* tree,
|
||||
uint8_t *depth) {
|
||||
// For block sizes below 64 kB, we never need to do a second iteration
|
||||
// of this loop. Probably all of our block sizes will be smaller than
|
||||
// that, so this loop is mostly of academic interest. If we actually
|
||||
// would need this, we would be better off with the Katajainen algorithm.
|
||||
/* For block sizes below 64 kB, we never need to do a second iteration
|
||||
of this loop. Probably all of our block sizes will be smaller than
|
||||
that, so this loop is mostly of academic interest. If we actually
|
||||
would need this, we would be better off with the Katajainen algorithm. */
|
||||
for (uint32_t count_limit = 1; ; count_limit *= 2) {
|
||||
size_t n = 0;
|
||||
for (size_t i = length; i != 0;) {
|
||||
@ -81,19 +81,19 @@ void CreateHuffmanTree(const uint32_t *data,
|
||||
|
||||
std::sort(tree, tree + n, SortHuffmanTree);
|
||||
|
||||
// The nodes are:
|
||||
// [0, n): the sorted leaf nodes that we start with.
|
||||
// [n]: we add a sentinel here.
|
||||
// [n + 1, 2n): new parent nodes are added here, starting from
|
||||
// (n+1). These are naturally in ascending order.
|
||||
// [2n]: we add a sentinel at the end as well.
|
||||
// There will be (2n+1) elements at the end.
|
||||
/* The nodes are:
|
||||
[0, n): the sorted leaf nodes that we start with.
|
||||
[n]: we add a sentinel here.
|
||||
[n + 1, 2n): new parent nodes are added here, starting from
|
||||
(n+1). These are naturally in ascending order.
|
||||
[2n]: we add a sentinel at the end as well.
|
||||
There will be (2n+1) elements at the end. */
|
||||
const HuffmanTree sentinel(std::numeric_limits<uint32_t>::max(), -1, -1);
|
||||
tree[n] = sentinel;
|
||||
tree[n + 1] = sentinel;
|
||||
|
||||
size_t i = 0; // Points to the next leaf node.
|
||||
size_t j = n + 1; // Points to the next non-leaf node.
|
||||
size_t i = 0; /* Points to the next leaf node. */
|
||||
size_t j = n + 1; /* Points to the next non-leaf node. */
|
||||
for (size_t k = n - 1; k != 0; --k) {
|
||||
size_t left, right;
|
||||
if (tree[i].total_count_ <= tree[j].total_count_) {
|
||||
@ -111,21 +111,20 @@ void CreateHuffmanTree(const uint32_t *data,
|
||||
++j;
|
||||
}
|
||||
|
||||
// The sentinel node becomes the parent node.
|
||||
/* The sentinel node becomes the parent node. */
|
||||
size_t j_end = 2 * n - k;
|
||||
tree[j_end].total_count_ =
|
||||
tree[left].total_count_ + tree[right].total_count_;
|
||||
tree[j_end].index_left_ = static_cast<int16_t>(left);
|
||||
tree[j_end].index_right_or_value_ = static_cast<int16_t>(right);
|
||||
|
||||
// Add back the last sentinel node.
|
||||
/* Add back the last sentinel node. */
|
||||
tree[j_end + 1] = sentinel;
|
||||
}
|
||||
SetDepth(tree[2 * n - 1], &tree[0], depth, 0);
|
||||
|
||||
// We need to pack the Huffman tree in tree_limit bits.
|
||||
// If this was not successful, add fake entities to the lowest values
|
||||
// and retry.
|
||||
/* We need to pack the Huffman tree in tree_limit bits. If this was not
|
||||
successful, add fake entities to the lowest values and retry. */
|
||||
if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) {
|
||||
break;
|
||||
}
|
||||
@ -229,7 +228,7 @@ void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
|
||||
size_t limit;
|
||||
size_t sum;
|
||||
const size_t streak_limit = 1240;
|
||||
// Let's make the Huffman code more compatible with rle encoding.
|
||||
/* Let's make the Huffman code more compatible with rle encoding. */
|
||||
size_t i;
|
||||
for (i = 0; i < length; i++) {
|
||||
if (counts[i]) {
|
||||
@ -243,9 +242,9 @@ void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
|
||||
--length;
|
||||
}
|
||||
if (length == 0) {
|
||||
return; // All zeros.
|
||||
return; /* All zeros. */
|
||||
}
|
||||
// Now counts[0..length - 1] does not have trailing zeros.
|
||||
/* Now counts[0..length - 1] does not have trailing zeros. */
|
||||
{
|
||||
size_t nonzeros = 0;
|
||||
uint32_t smallest_nonzero = 1 << 30;
|
||||
@ -258,7 +257,7 @@ void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
|
||||
}
|
||||
}
|
||||
if (nonzeros < 5) {
|
||||
// Small histogram will model it well.
|
||||
/* Small histogram will model it well. */
|
||||
return;
|
||||
}
|
||||
size_t zeros = length - nonzeros;
|
||||
@ -275,13 +274,13 @@ void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
|
||||
return;
|
||||
}
|
||||
}
|
||||
// 2) Let's mark all population counts that already can be encoded
|
||||
// with an rle code.
|
||||
/* 2) Let's mark all population counts that already can be encoded
|
||||
with an rle code. */
|
||||
memset(good_for_rle, 0, length);
|
||||
{
|
||||
// Let's not spoil any of the existing good rle codes.
|
||||
// Mark any seq of 0's that is longer as 5 as a good_for_rle.
|
||||
// Mark any seq of non-0's that is longer as 7 as a good_for_rle.
|
||||
/* Let's not spoil any of the existing good rle codes.
|
||||
Mark any seq of 0's that is longer as 5 as a good_for_rle.
|
||||
Mark any seq of non-0's that is longer as 7 as a good_for_rle. */
|
||||
uint32_t symbol = counts[0];
|
||||
size_t step = 0;
|
||||
for (i = 0; i <= length; ++i) {
|
||||
@ -302,8 +301,8 @@ void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
|
||||
}
|
||||
}
|
||||
}
|
||||
// 3) Let's replace those population counts that lead to more rle codes.
|
||||
// Math here is in 24.8 fixed point representation.
|
||||
/* 3) Let's replace those population counts that lead to more rle codes.
|
||||
Math here is in 24.8 fixed point representation. */
|
||||
stride = 0;
|
||||
limit = 256 * (counts[0] + counts[1] + counts[2]) / 3 + 420;
|
||||
sum = 0;
|
||||
@ -313,26 +312,26 @@ void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
|
||||
(256 * counts[i] - limit + streak_limit) >= 2 * streak_limit) {
|
||||
if (stride >= 4 || (stride >= 3 && sum == 0)) {
|
||||
size_t k;
|
||||
// The stride must end, collapse what we have, if we have enough (4).
|
||||
/* The stride must end, collapse what we have, if we have enough (4). */
|
||||
size_t count = (sum + stride / 2) / stride;
|
||||
if (count == 0) {
|
||||
count = 1;
|
||||
}
|
||||
if (sum == 0) {
|
||||
// Don't make an all zeros stride to be upgraded to ones.
|
||||
/* Don't make an all zeros stride to be upgraded to ones. */
|
||||
count = 0;
|
||||
}
|
||||
for (k = 0; k < stride; ++k) {
|
||||
// We don't want to change value at counts[i],
|
||||
// that is already belonging to the next stride. Thus - 1.
|
||||
/* We don't want to change value at counts[i],
|
||||
that is already belonging to the next stride. Thus - 1. */
|
||||
counts[i - k - 1] = static_cast<uint32_t>(count);
|
||||
}
|
||||
}
|
||||
stride = 0;
|
||||
sum = 0;
|
||||
if (i < length - 2) {
|
||||
// All interesting strides have a count of at least 4,
|
||||
// at least when non-zeros.
|
||||
/* All interesting strides have a count of at least 4, */
|
||||
/* at least when non-zeros. */
|
||||
limit = 256 * (counts[i] + counts[i + 1] + counts[i + 2]) / 3 + 420;
|
||||
} else if (i < length) {
|
||||
limit = 256 * counts[i];
|
||||
@ -387,7 +386,7 @@ void WriteHuffmanTree(const uint8_t* depth,
|
||||
uint8_t* extra_bits_data) {
|
||||
uint8_t previous_value = 8;
|
||||
|
||||
// Throw away trailing zeros.
|
||||
/* Throw away trailing zeros. */
|
||||
size_t new_length = length;
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
if (depth[length - i - 1] == 0) {
|
||||
@ -397,17 +396,17 @@ void WriteHuffmanTree(const uint8_t* depth,
|
||||
}
|
||||
}
|
||||
|
||||
// First gather statistics on if it is a good idea to do rle.
|
||||
/* First gather statistics on if it is a good idea to do rle. */
|
||||
bool use_rle_for_non_zero = false;
|
||||
bool use_rle_for_zero = false;
|
||||
if (length > 50) {
|
||||
// Find rle coding for longer codes.
|
||||
// Shorter codes seem not to benefit from rle.
|
||||
/* Find rle coding for longer codes.
|
||||
Shorter codes seem not to benefit from rle. */
|
||||
DecideOverRleUse(depth, new_length,
|
||||
&use_rle_for_non_zero, &use_rle_for_zero);
|
||||
}
|
||||
|
||||
// Actual rle coding.
|
||||
/* Actual rle coding. */
|
||||
for (size_t i = 0; i < new_length;) {
|
||||
const uint8_t value = depth[i];
|
||||
size_t reps = 1;
|
||||
@ -432,7 +431,7 @@ void WriteHuffmanTree(const uint8_t* depth,
|
||||
namespace {
|
||||
|
||||
uint16_t ReverseBits(int num_bits, uint16_t bits) {
|
||||
static const size_t kLut[16] = { // Pre-reversed 4-bit values.
|
||||
static const size_t kLut[16] = { /* Pre-reversed 4-bit values. */
|
||||
0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
|
||||
0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf
|
||||
};
|
||||
@ -451,8 +450,8 @@ uint16_t ReverseBits(int num_bits, uint16_t bits) {
|
||||
void ConvertBitDepthsToSymbols(const uint8_t *depth,
|
||||
size_t len,
|
||||
uint16_t *bits) {
|
||||
// In Brotli, all bit depths are [1..15]
|
||||
// 0 bit depth means that the symbol does not exist.
|
||||
/* In Brotli, all bit depths are [1..15]
|
||||
0 bit depth means that the symbol does not exist. */
|
||||
const int kMaxBits = 16; // 0..15 are values for bits
|
||||
uint16_t bl_count[kMaxBits] = { 0 };
|
||||
{
|
||||
|
@ -4,7 +4,7 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Entropy encoding (Huffman) utilities.
|
||||
/* Entropy encoding (Huffman) utilities. */
|
||||
|
||||
#ifndef BROTLI_ENC_ENTROPY_ENCODE_H_
|
||||
#define BROTLI_ENC_ENTROPY_ENCODE_H_
|
||||
@ -17,7 +17,7 @@
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// A node of a Huffman tree.
|
||||
/* A node of a Huffman tree. */
|
||||
struct HuffmanTree {
|
||||
HuffmanTree() {}
|
||||
HuffmanTree(uint32_t count, int16_t left, int16_t right)
|
||||
@ -33,44 +33,44 @@ struct HuffmanTree {
|
||||
void SetDepth(const HuffmanTree &p, HuffmanTree *pool,
|
||||
uint8_t *depth, uint8_t level);
|
||||
|
||||
// This function will create a Huffman tree.
|
||||
//
|
||||
// The (data,length) contains the population counts.
|
||||
// The tree_limit is the maximum bit depth of the Huffman codes.
|
||||
//
|
||||
// The depth contains the tree, i.e., how many bits are used for
|
||||
// the symbol.
|
||||
//
|
||||
// The actual Huffman tree is constructed in the tree[] array, which has to
|
||||
// be at least 2 * length + 1 long.
|
||||
//
|
||||
// See http://en.wikipedia.org/wiki/Huffman_coding
|
||||
/* This function will create a Huffman tree.
|
||||
|
||||
The (data,length) contains the population counts.
|
||||
The tree_limit is the maximum bit depth of the Huffman codes.
|
||||
|
||||
The depth contains the tree, i.e., how many bits are used for
|
||||
the symbol.
|
||||
|
||||
The actual Huffman tree is constructed in the tree[] array, which has to
|
||||
be at least 2 * length + 1 long.
|
||||
|
||||
See http://en.wikipedia.org/wiki/Huffman_coding */
|
||||
void CreateHuffmanTree(const uint32_t *data,
|
||||
const size_t length,
|
||||
const int tree_limit,
|
||||
HuffmanTree* tree,
|
||||
uint8_t *depth);
|
||||
|
||||
// Change the population counts in a way that the consequent
|
||||
// Huffman tree compression, especially its rle-part will be more
|
||||
// likely to compress this data more efficiently.
|
||||
//
|
||||
// length contains the size of the histogram.
|
||||
// counts contains the population counts.
|
||||
// good_for_rle is a buffer of at least length size
|
||||
/* Change the population counts in a way that the consequent
|
||||
Huffman tree compression, especially its rle-part will be more
|
||||
likely to compress this data more efficiently.
|
||||
|
||||
length contains the size of the histogram.
|
||||
counts contains the population counts.
|
||||
good_for_rle is a buffer of at least length size */
|
||||
void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
|
||||
uint8_t* good_for_rle);
|
||||
|
||||
// Write a Huffman tree from bit depths into the bitstream representation
|
||||
// of a Huffman tree. The generated Huffman tree is to be compressed once
|
||||
// more using a Huffman tree
|
||||
/* Write a Huffman tree from bit depths into the bitstream representation
|
||||
of a Huffman tree. The generated Huffman tree is to be compressed once
|
||||
more using a Huffman tree */
|
||||
void WriteHuffmanTree(const uint8_t* depth,
|
||||
size_t num,
|
||||
size_t* tree_size,
|
||||
uint8_t* tree,
|
||||
uint8_t* extra_bits_data);
|
||||
|
||||
// Get the actual bit values for a tree of bit depths.
|
||||
/* Get the actual bit values for a tree of bit depths. */
|
||||
void ConvertBitDepthsToSymbols(const uint8_t *depth,
|
||||
size_t len,
|
||||
uint16_t *bits);
|
||||
@ -102,4 +102,4 @@ typedef EntropyCode<258> EntropyCodeBlockType;
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_ENTROPY_ENCODE_H_
|
||||
#endif /* BROTLI_ENC_ENTROPY_ENCODE_H_ */
|
||||
|
@ -4,7 +4,7 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Static entropy codes used for faster meta-block encoding.
|
||||
/* Static entropy codes used for faster meta-block encoding. */
|
||||
|
||||
#ifndef BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
|
||||
#define BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
|
||||
@ -569,4 +569,4 @@ inline void StoreStaticDistanceHuffmanTree(size_t* storage_ix,
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
|
||||
#endif /* BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_ */
|
||||
|
@ -4,7 +4,7 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Utilities for fast computation of logarithms.
|
||||
/* Utilities for fast computation of logarithms. */
|
||||
|
||||
#ifndef BROTLI_ENC_FAST_LOG_H_
|
||||
#define BROTLI_ENC_FAST_LOG_H_
|
||||
@ -26,10 +26,10 @@ static inline uint32_t Log2FloorNonZero(size_t n) {
|
||||
#endif
|
||||
}
|
||||
|
||||
// A lookup table for small values of log2(int) to be used in entropy
|
||||
// computation.
|
||||
//
|
||||
// ", ".join(["%.16ff" % x for x in [0.0]+[log2(x) for x in range(1, 256)]])
|
||||
/* A lookup table for small values of log2(int) to be used in entropy
|
||||
computation.
|
||||
|
||||
", ".join(["%.16ff" % x for x in [0.0]+[log2(x) for x in range(1, 256)]]) */
|
||||
static const float kLog2Table[] = {
|
||||
0.0000000000000000f, 0.0000000000000000f, 1.0000000000000000f,
|
||||
1.5849625007211563f, 2.0000000000000000f, 2.3219280948873622f,
|
||||
@ -119,14 +119,15 @@ static const float kLog2Table[] = {
|
||||
7.9943534368588578f
|
||||
};
|
||||
|
||||
// Faster logarithm for small integers, with the property of log2(0) == 0.
|
||||
/* Faster logarithm for small integers, with the property of log2(0) == 0. */
|
||||
static inline double FastLog2(size_t v) {
|
||||
if (v < sizeof(kLog2Table) / sizeof(kLog2Table[0])) {
|
||||
return kLog2Table[v];
|
||||
}
|
||||
#if defined(_MSC_VER) && _MSC_VER <= 1700
|
||||
// Visual Studio 2012 does not have the log2() function defined, so we use
|
||||
// log() and a multiplication instead.
|
||||
#if (defined(_MSC_VER) && _MSC_VER <= 1600) || \
|
||||
(defined(__ANDROID_API__) && __ANDROID_API__ < 18)
|
||||
/* Visual Studio 2010 and Android API levels < 18 do not have the log2()
|
||||
* function defined, so we use log() and a multiplication instead. */
|
||||
static const double kLog2Inv = 1.4426950408889634f;
|
||||
return log(static_cast<double>(v)) * kLog2Inv;
|
||||
#else
|
||||
@ -136,4 +137,4 @@ static inline double FastLog2(size_t v) {
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_FAST_LOG_H_
|
||||
#endif /* BROTLI_ENC_FAST_LOG_H_ */
|
||||
|
@ -4,7 +4,7 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Function to find maximal matching prefixes of strings.
|
||||
/* Function to find maximal matching prefixes of strings. */
|
||||
|
||||
#ifndef BROTLI_ENC_FIND_MATCH_LENGTH_H_
|
||||
#define BROTLI_ENC_FIND_MATCH_LENGTH_H_
|
||||
@ -14,14 +14,14 @@
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// Separate implementation for little-endian 64-bit targets, for speed.
|
||||
/* Separate implementation for little-endian 64-bit targets, for speed. */
|
||||
#if defined(__GNUC__) && defined(_LP64) && defined(IS_LITTLE_ENDIAN)
|
||||
|
||||
static inline size_t FindMatchLengthWithLimit(const uint8_t* s1,
|
||||
const uint8_t* s2,
|
||||
size_t limit) {
|
||||
size_t matched = 0;
|
||||
size_t limit2 = (limit >> 3) + 1; // + 1 is for pre-decrement in while
|
||||
size_t limit2 = (limit >> 3) + 1; /* + 1 is for pre-decrement in while */
|
||||
while (PREDICT_TRUE(--limit2)) {
|
||||
if (PREDICT_FALSE(BROTLI_UNALIGNED_LOAD64(s2) ==
|
||||
BROTLI_UNALIGNED_LOAD64(s1 + matched))) {
|
||||
@ -35,7 +35,7 @@ static inline size_t FindMatchLengthWithLimit(const uint8_t* s1,
|
||||
return matched;
|
||||
}
|
||||
}
|
||||
limit = (limit & 7) + 1; // + 1 is for pre-decrement in while
|
||||
limit = (limit & 7) + 1; /* + 1 is for pre-decrement in while */
|
||||
while (--limit) {
|
||||
if (PREDICT_TRUE(s1[matched] == *s2)) {
|
||||
++s2;
|
||||
@ -48,15 +48,15 @@ static inline size_t FindMatchLengthWithLimit(const uint8_t* s1,
|
||||
}
|
||||
#else
|
||||
static inline size_t FindMatchLengthWithLimit(const uint8_t* s1,
|
||||
const uint8_t* s2,
|
||||
size_t limit) {
|
||||
const uint8_t* s2,
|
||||
size_t limit) {
|
||||
size_t matched = 0;
|
||||
const uint8_t* s2_limit = s2 + limit;
|
||||
const uint8_t* s2_ptr = s2;
|
||||
// Find out how long the match is. We loop over the data 32 bits at a
|
||||
// time until we find a 32-bit block that doesn't match; then we find
|
||||
// the first non-matching bit and use that to calculate the total
|
||||
// length of the match.
|
||||
/* Find out how long the match is. We loop over the data 32 bits at a
|
||||
time until we find a 32-bit block that doesn't match; then we find
|
||||
the first non-matching bit and use that to calculate the total
|
||||
length of the match. */
|
||||
while (s2_ptr <= s2_limit - 4 &&
|
||||
BROTLI_UNALIGNED_LOAD32(s2_ptr) ==
|
||||
BROTLI_UNALIGNED_LOAD32(s1 + matched)) {
|
||||
@ -73,4 +73,4 @@ static inline size_t FindMatchLengthWithLimit(const uint8_t* s1,
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_FIND_MATCH_LENGTH_H_
|
||||
#endif /* BROTLI_ENC_FIND_MATCH_LENGTH_H_ */
|
||||
|
70
enc/hash.h
70
enc/hash.h
@ -4,8 +4,8 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// A (forgetful) hash table to the data seen by the compressor, to
|
||||
// help create backward references to previous data.
|
||||
/* A (forgetful) hash table to the data seen by the compressor, to
|
||||
help create backward references to previous data. */
|
||||
|
||||
#ifndef BROTLI_ENC_HASH_H_
|
||||
#define BROTLI_ENC_HASH_H_
|
||||
@ -42,38 +42,38 @@ static const uint8_t kCutoffTransforms[] = {
|
||||
0, 12, 27, 23, 42, 63, 56, 48, 59, 64
|
||||
};
|
||||
|
||||
// kHashMul32 multiplier has these properties:
|
||||
// * The multiplier must be odd. Otherwise we may lose the highest bit.
|
||||
// * No long streaks of 1s or 0s.
|
||||
// * There is no effort to ensure that it is a prime, the oddity is enough
|
||||
// for this use.
|
||||
// * The number has been tuned heuristically against compression benchmarks.
|
||||
/* kHashMul32 multiplier has these properties:
|
||||
* The multiplier must be odd. Otherwise we may lose the highest bit.
|
||||
* No long streaks of 1s or 0s.
|
||||
* There is no effort to ensure that it is a prime, the oddity is enough
|
||||
for this use.
|
||||
* The number has been tuned heuristically against compression benchmarks. */
|
||||
static const uint32_t kHashMul32 = 0x1e35a7bd;
|
||||
|
||||
template<int kShiftBits>
|
||||
inline uint32_t Hash(const uint8_t *data) {
|
||||
uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
|
||||
// The higher bits contain more mixture from the multiplication,
|
||||
// so we take our results from there.
|
||||
/* The higher bits contain more mixture from the multiplication,
|
||||
so we take our results from there. */
|
||||
return h >> (32 - kShiftBits);
|
||||
}
|
||||
|
||||
// Usually, we always choose the longest backward reference. This function
|
||||
// allows for the exception of that rule.
|
||||
//
|
||||
// If we choose a backward reference that is further away, it will
|
||||
// usually be coded with more bits. We approximate this by assuming
|
||||
// log2(distance). If the distance can be expressed in terms of the
|
||||
// last four distances, we use some heuristic constants to estimate
|
||||
// the bits cost. For the first up to four literals we use the bit
|
||||
// cost of the literals from the literal cost model, after that we
|
||||
// use the average bit cost of the cost model.
|
||||
//
|
||||
// This function is used to sometimes discard a longer backward reference
|
||||
// when it is not much longer and the bit cost for encoding it is more
|
||||
// than the saved literals.
|
||||
//
|
||||
// backward_reference_offset MUST be positive.
|
||||
/* Usually, we always choose the longest backward reference. This function
|
||||
allows for the exception of that rule.
|
||||
|
||||
If we choose a backward reference that is further away, it will
|
||||
usually be coded with more bits. We approximate this by assuming
|
||||
log2(distance). If the distance can be expressed in terms of the
|
||||
last four distances, we use some heuristic constants to estimate
|
||||
the bits cost. For the first up to four literals we use the bit
|
||||
cost of the literals from the literal cost model, after that we
|
||||
use the average bit cost of the cost model.
|
||||
|
||||
This function is used to sometimes discard a longer backward reference
|
||||
when it is not much longer and the bit cost for encoding it is more
|
||||
than the saved literals.
|
||||
|
||||
backward_reference_offset MUST be positive. */
|
||||
inline double BackwardReferenceScore(size_t copy_length,
|
||||
size_t backward_reference_offset) {
|
||||
return 5.4 * static_cast<double>(copy_length) -
|
||||
@ -511,13 +511,13 @@ class HashLongestMatch {
|
||||
return match_found;
|
||||
}
|
||||
|
||||
// Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the
|
||||
// length of max_length and stores the position cur_ix in the hash table.
|
||||
//
|
||||
// Sets *num_matches to the number of matches found, and stores the found
|
||||
// matches in matches[0] to matches[*num_matches - 1]. The matches will be
|
||||
// sorted by strictly increasing length and (non-strictly) increasing
|
||||
// distance.
|
||||
/* Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the
|
||||
length of max_length and stores the position cur_ix in the hash table.
|
||||
|
||||
Sets *num_matches to the number of matches found, and stores the found
|
||||
matches in matches[0] to matches[*num_matches - 1]. The matches will be
|
||||
sorted by strictly increasing length and (non-strictly) increasing
|
||||
distance. */
|
||||
size_t FindAllMatches(const uint8_t* data,
|
||||
const size_t ring_buffer_mask,
|
||||
const size_t cur_ix,
|
||||
@ -936,7 +936,7 @@ struct Hashers {
|
||||
}
|
||||
}
|
||||
|
||||
// Custom LZ77 window.
|
||||
/* Custom LZ77 window. */
|
||||
void PrependCustomDictionary(
|
||||
int type, int lgwin, const size_t size, const uint8_t* dict) {
|
||||
switch (type) {
|
||||
@ -972,4 +972,4 @@ struct Hashers {
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_HASH_H_
|
||||
#endif /* BROTLI_ENC_HASH_H_ */
|
||||
|
@ -4,7 +4,7 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Build per-context histograms of literals, commands and distance codes.
|
||||
/* Build per-context histograms of literals, commands and distance codes. */
|
||||
|
||||
#include "./histogram.h"
|
||||
|
||||
|
@ -4,7 +4,7 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Models the histograms of literals, commands and distance codes.
|
||||
/* Models the histograms of literals, commands and distance codes. */
|
||||
|
||||
#ifndef BROTLI_ENC_HISTOGRAM_H_
|
||||
#define BROTLI_ENC_HISTOGRAM_H_
|
||||
@ -92,4 +92,4 @@ void BuildHistograms(
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_HISTOGRAM_H_
|
||||
#endif /* BROTLI_ENC_HISTOGRAM_H_ */
|
||||
|
@ -4,7 +4,8 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Literal cost model to allow backward reference replacement to be efficient.
|
||||
/* Literal cost model to allow backward reference replacement to be efficient.
|
||||
*/
|
||||
|
||||
#include "./literal_cost.h"
|
||||
|
||||
@ -20,14 +21,14 @@ namespace brotli {
|
||||
|
||||
static size_t UTF8Position(size_t last, size_t c, size_t clamp) {
|
||||
if (c < 128) {
|
||||
return 0; // Next one is the 'Byte 1' again.
|
||||
} else if (c >= 192) { // Next one is the 'Byte 2' of utf-8 encoding.
|
||||
return 0; /* Next one is the 'Byte 1' again. */
|
||||
} else if (c >= 192) { /* Next one is the 'Byte 2' of utf-8 encoding. */
|
||||
return std::min<size_t>(1, clamp);
|
||||
} else {
|
||||
// Let's decide over the last byte if this ends the sequence.
|
||||
/* Let's decide over the last byte if this ends the sequence. */
|
||||
if (last < 0xe0) {
|
||||
return 0; // Completed two or three byte coding.
|
||||
} else { // Next one is the 'Byte 3' of utf-8 encoding.
|
||||
return 0; /* Completed two or three byte coding. */
|
||||
} else { /* Next one is the 'Byte 3' of utf-8 encoding. */
|
||||
return std::min<size_t>(2, clamp);
|
||||
}
|
||||
}
|
||||
@ -36,7 +37,7 @@ static size_t UTF8Position(size_t last, size_t c, size_t clamp) {
|
||||
static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
|
||||
const uint8_t *data) {
|
||||
size_t counts[3] = { 0 };
|
||||
size_t max_utf8 = 1; // should be 2, but 1 compresses better.
|
||||
size_t max_utf8 = 1; /* should be 2, but 1 compresses better. */
|
||||
size_t last_c = 0;
|
||||
size_t utf8_pos = 0;
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
@ -56,16 +57,15 @@ static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
|
||||
|
||||
static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
|
||||
const uint8_t *data, float *cost) {
|
||||
|
||||
// max_utf8 is 0 (normal ascii single byte modeling),
|
||||
// 1 (for 2-byte utf-8 modeling), or 2 (for 3-byte utf-8 modeling).
|
||||
/* max_utf8 is 0 (normal ascii single byte modeling),
|
||||
1 (for 2-byte utf-8 modeling), or 2 (for 3-byte utf-8 modeling). */
|
||||
const size_t max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data);
|
||||
size_t histogram[3][256] = { { 0 } };
|
||||
size_t window_half = 495;
|
||||
size_t in_window = std::min(window_half, len);
|
||||
size_t in_window_utf8[3] = { 0 };
|
||||
|
||||
// Bootstrap histograms.
|
||||
/* Bootstrap histograms. */
|
||||
size_t last_c = 0;
|
||||
size_t utf8_pos = 0;
|
||||
for (size_t i = 0; i < in_window; ++i) {
|
||||
@ -76,10 +76,10 @@ static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
|
||||
last_c = c;
|
||||
}
|
||||
|
||||
// Compute bit costs with sliding window.
|
||||
/* Compute bit costs with sliding window. */
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
if (i >= window_half) {
|
||||
// Remove a byte in the past.
|
||||
/* Remove a byte in the past. */
|
||||
size_t c = i < window_half + 1 ?
|
||||
0 : data[(pos + i - window_half - 1) & mask];
|
||||
size_t last_c = i < window_half + 2 ?
|
||||
@ -89,7 +89,7 @@ static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
|
||||
--in_window_utf8[utf8_pos2];
|
||||
}
|
||||
if (i + window_half < len) {
|
||||
// Add a byte in the future.
|
||||
/* Add a byte in the future. */
|
||||
size_t c = data[(pos + i + window_half - 1) & mask];
|
||||
size_t last_c = data[(pos + i + window_half - 2) & mask];
|
||||
size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
|
||||
@ -110,10 +110,10 @@ static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
|
||||
lit_cost *= 0.5;
|
||||
lit_cost += 0.5;
|
||||
}
|
||||
// Make the first bytes more expensive -- seems to help, not sure why.
|
||||
// Perhaps because the entropy source is changing its properties
|
||||
// rapidly in the beginning of the file, perhaps because the beginning
|
||||
// of the data is a statistical "anomaly".
|
||||
/* Make the first bytes more expensive -- seems to help, not sure why.
|
||||
Perhaps because the entropy source is changing its properties
|
||||
rapidly in the beginning of the file, perhaps because the beginning
|
||||
of the data is a statistical "anomaly". */
|
||||
if (i < 2000) {
|
||||
lit_cost += 0.7 - (static_cast<double>(2000 - i) / 2000.0 * 0.35);
|
||||
}
|
||||
@ -131,20 +131,20 @@ void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
|
||||
size_t window_half = 2000;
|
||||
size_t in_window = std::min(window_half, len);
|
||||
|
||||
// Bootstrap histogram.
|
||||
/* Bootstrap histogram. */
|
||||
for (size_t i = 0; i < in_window; ++i) {
|
||||
++histogram[data[(pos + i) & mask]];
|
||||
}
|
||||
|
||||
// Compute bit costs with sliding window.
|
||||
/* Compute bit costs with sliding window. */
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
if (i >= window_half) {
|
||||
// Remove a byte in the past.
|
||||
/* Remove a byte in the past. */
|
||||
--histogram[data[(pos + i - window_half) & mask]];
|
||||
--in_window;
|
||||
}
|
||||
if (i + window_half < len) {
|
||||
// Add a byte in the future.
|
||||
/* Add a byte in the future. */
|
||||
++histogram[data[(pos + i + window_half) & mask]];
|
||||
++in_window;
|
||||
}
|
||||
|
@ -4,7 +4,8 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Literal cost model to allow backward reference replacement to be efficient.
|
||||
/* Literal cost model to allow backward reference replacement to be efficient.
|
||||
*/
|
||||
|
||||
#ifndef BROTLI_ENC_LITERAL_COST_H_
|
||||
#define BROTLI_ENC_LITERAL_COST_H_
|
||||
@ -13,12 +14,12 @@
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// Estimates how many bits the literals in the interval [pos, pos + len) in the
|
||||
// ringbuffer (data, mask) will take entropy coded and writes these estimates
|
||||
// to the cost[0..len) array.
|
||||
/* Estimates how many bits the literals in the interval [pos, pos + len) in the
|
||||
ringbuffer (data, mask) will take entropy coded and writes these estimates
|
||||
to the cost[0..len) array. */
|
||||
void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
|
||||
const uint8_t *data, float *cost);
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_LITERAL_COST_H_
|
||||
#endif /* BROTLI_ENC_LITERAL_COST_H_ */
|
||||
|
@ -4,11 +4,12 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Algorithms for distributing the literals and commands of a metablock between
|
||||
// block types and contexts.
|
||||
/* Algorithms for distributing the literals and commands of a metablock between
|
||||
block types and contexts. */
|
||||
|
||||
#include "./metablock.h"
|
||||
|
||||
#include "../common/types.h"
|
||||
#include "./block_splitter.h"
|
||||
#include "./cluster.h"
|
||||
#include "./context.h"
|
||||
@ -55,7 +56,7 @@ void BuildMetaBlock(const uint8_t* ringbuffer,
|
||||
&mb->command_histograms,
|
||||
&distance_histograms);
|
||||
|
||||
// Histogram ids need to fit in one byte.
|
||||
/* Histogram ids need to fit in one byte. */
|
||||
static const size_t kMaxNumberOfHistograms = 256;
|
||||
|
||||
ClusterHistograms(literal_histograms,
|
||||
@ -201,32 +202,32 @@ class BlockSplitter {
|
||||
private:
|
||||
static const uint16_t kMaxBlockTypes = 256;
|
||||
|
||||
// Alphabet size of particular block category.
|
||||
/* Alphabet size of particular block category. */
|
||||
const size_t alphabet_size_;
|
||||
// We collect at least this many symbols for each block.
|
||||
/* We collect at least this many symbols for each block. */
|
||||
const size_t min_block_size_;
|
||||
// We merge histograms A and B if
|
||||
// entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
|
||||
// where A is the current histogram and B is the histogram of the last or the
|
||||
// second last block type.
|
||||
/* We merge histograms A and B if
|
||||
entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
|
||||
where A is the current histogram and B is the histogram of the last or the
|
||||
second last block type. */
|
||||
const double split_threshold_;
|
||||
|
||||
size_t num_blocks_;
|
||||
BlockSplit* split_; // not owned
|
||||
std::vector<HistogramType>* histograms_; // not owned
|
||||
BlockSplit* split_; /* not owned */
|
||||
std::vector<HistogramType>* histograms_; /* not owned */
|
||||
|
||||
// The number of symbols that we want to collect before deciding on whether
|
||||
// or not to merge the block with a previous one or emit a new block.
|
||||
/* The number of symbols that we want to collect before deciding on whether
|
||||
or not to merge the block with a previous one or emit a new block. */
|
||||
size_t target_block_size_;
|
||||
// The number of symbols in the current histogram.
|
||||
/* The number of symbols in the current histogram. */
|
||||
size_t block_size_;
|
||||
// Offset of the current histogram.
|
||||
/* Offset of the current histogram. */
|
||||
size_t curr_histogram_ix_;
|
||||
// Offset of the histograms of the previous two block types.
|
||||
/* Offset of the histograms of the previous two block types. */
|
||||
size_t last_histogram_ix_[2];
|
||||
// Entropy of the previous two block types.
|
||||
/* Entropy of the previous two block types. */
|
||||
double last_entropy_[2];
|
||||
// The number of times we merged the current block with the last one.
|
||||
/* The number of times we merged the current block with the last one. */
|
||||
size_t merge_last_count_;
|
||||
};
|
||||
|
||||
@ -314,10 +315,10 @@ class ContextBlockSplitter {
|
||||
}
|
||||
}
|
||||
|
||||
// Does either of three things:
|
||||
// (1) emits the current block with a new block type;
|
||||
// (2) emits the current block with the type of the second last block;
|
||||
// (3) merges the current block with the last block.
|
||||
/* Does either of three things:
|
||||
(1) emits the current block with a new block type;
|
||||
(2) emits the current block with the type of the second last block;
|
||||
(3) merges the current block with the last block. */
|
||||
void FinishBlock(bool is_final) {
|
||||
if (block_size_ < min_block_size_) {
|
||||
block_size_ = min_block_size_;
|
||||
@ -336,10 +337,10 @@ class ContextBlockSplitter {
|
||||
curr_histogram_ix_ += num_contexts_;
|
||||
block_size_ = 0;
|
||||
} else if (block_size_ > 0) {
|
||||
// Try merging the set of histograms for the current block type with the
|
||||
// respective set of histograms for the last and second last block types.
|
||||
// Decide over the split based on the total reduction of entropy across
|
||||
// all contexts.
|
||||
/* Try merging the set of histograms for the current block type with the
|
||||
respective set of histograms for the last and second last block types.
|
||||
Decide over the split based on the total reduction of entropy across
|
||||
all contexts. */
|
||||
std::vector<double> entropy(num_contexts_);
|
||||
std::vector<HistogramType> combined_histo(2 * num_contexts_);
|
||||
std::vector<double> combined_entropy(2 * num_contexts_);
|
||||
|
@ -4,14 +4,15 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Algorithms for distributing the literals and commands of a metablock between
|
||||
// block types and contexts.
|
||||
/* Algorithms for distributing the literals and commands of a metablock between
|
||||
block types and contexts. */
|
||||
|
||||
#ifndef BROTLI_ENC_METABLOCK_H_
|
||||
#define BROTLI_ENC_METABLOCK_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "../common/types.h"
|
||||
#include "./command.h"
|
||||
#include "./histogram.h"
|
||||
|
||||
@ -36,7 +37,7 @@ struct MetaBlockSplit {
|
||||
std::vector<HistogramDistance> distance_histograms;
|
||||
};
|
||||
|
||||
// Uses the slow shortest-path block splitter and does context clustering.
|
||||
/* Uses the slow shortest-path block splitter and does context clustering. */
|
||||
void BuildMetaBlock(const uint8_t* ringbuffer,
|
||||
const size_t pos,
|
||||
const size_t mask,
|
||||
@ -47,8 +48,8 @@ void BuildMetaBlock(const uint8_t* ringbuffer,
|
||||
ContextType literal_context_mode,
|
||||
MetaBlockSplit* mb);
|
||||
|
||||
// Uses a fast greedy block splitter that tries to merge current block with the
|
||||
// last or the second last block and does not do any context modeling.
|
||||
/* Uses a fast greedy block splitter that tries to merge current block with the
|
||||
last or the second last block and does not do any context modeling. */
|
||||
void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
|
||||
size_t pos,
|
||||
size_t mask,
|
||||
@ -56,9 +57,9 @@ void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
|
||||
size_t n_commands,
|
||||
MetaBlockSplit* mb);
|
||||
|
||||
// Uses a fast greedy block splitter that tries to merge current block with the
|
||||
// last or the second last block and uses a static context clustering which
|
||||
// is the same for all block types.
|
||||
/* Uses a fast greedy block splitter that tries to merge current block with the
|
||||
last or the second last block and uses a static context clustering which
|
||||
is the same for all block types. */
|
||||
void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
|
||||
size_t pos,
|
||||
size_t mask,
|
||||
@ -77,4 +78,4 @@ void OptimizeHistograms(size_t num_direct_distance_codes,
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_METABLOCK_H_
|
||||
#endif /* BROTLI_ENC_METABLOCK_H_ */
|
||||
|
68
enc/port.h
68
enc/port.h
@ -4,14 +4,15 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Macros for endianness, branch prediction and unaligned loads and stores.
|
||||
/* Macros for endianness, branch prediction and unaligned loads and stores. */
|
||||
|
||||
#ifndef BROTLI_ENC_PORT_H_
|
||||
#define BROTLI_ENC_PORT_H_
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include <string.h> /* memcpy */
|
||||
|
||||
#include "../common/port.h"
|
||||
#include "../common/types.h"
|
||||
|
||||
#if defined OS_LINUX || defined OS_CYGWIN
|
||||
@ -25,9 +26,9 @@
|
||||
#define __LITTLE_ENDIAN LITTLE_ENDIAN
|
||||
#endif
|
||||
|
||||
// define the macro IS_LITTLE_ENDIAN
|
||||
// using the above endian definitions from endian.h if
|
||||
// endian.h was included
|
||||
/* define the macro IS_LITTLE_ENDIAN
|
||||
using the above endian definitions from endian.h if
|
||||
endian.h was included */
|
||||
#ifdef __BYTE_ORDER
|
||||
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
||||
#define IS_LITTLE_ENDIAN
|
||||
@ -38,41 +39,28 @@
|
||||
#if defined(__LITTLE_ENDIAN__)
|
||||
#define IS_LITTLE_ENDIAN
|
||||
#endif
|
||||
#endif // __BYTE_ORDER
|
||||
#endif /* __BYTE_ORDER */
|
||||
|
||||
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
|
||||
#define IS_LITTLE_ENDIAN
|
||||
#endif
|
||||
|
||||
// Enable little-endian optimization for x64 architecture on Windows.
|
||||
/* Enable little-endian optimization for x64 architecture on Windows. */
|
||||
#if (defined(_WIN32) || defined(_WIN64)) && defined(_M_X64)
|
||||
#define IS_LITTLE_ENDIAN
|
||||
#endif
|
||||
|
||||
/* Compatibility with non-clang compilers. */
|
||||
#ifndef __has_builtin
|
||||
#define __has_builtin(x) 0
|
||||
#endif
|
||||
|
||||
#if (__GNUC__ > 2) || (__GNUC__ == 2 && __GNUC_MINOR__ > 95) || \
|
||||
(defined(__llvm__) && __has_builtin(__builtin_expect))
|
||||
#define PREDICT_FALSE(x) (__builtin_expect(x, 0))
|
||||
#define PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
|
||||
#else
|
||||
#define PREDICT_FALSE(x) (x)
|
||||
#define PREDICT_TRUE(x) (x)
|
||||
#endif
|
||||
|
||||
// Portable handling of unaligned loads, stores, and copies.
|
||||
// On some platforms, like ARM, the copy functions can be more efficient
|
||||
// then a load and a store.
|
||||
/* Portable handling of unaligned loads, stores, and copies.
|
||||
On some platforms, like ARM, the copy functions can be more efficient
|
||||
then a load and a store. */
|
||||
|
||||
#if defined(ARCH_PIII) || \
|
||||
defined(ARCH_ATHLON) || defined(ARCH_K8) || defined(_ARCH_PPC)
|
||||
|
||||
// x86 and x86-64 can perform unaligned loads/stores directly;
|
||||
// modern PowerPC hardware can also do unaligned integer loads and stores;
|
||||
// but note: the FPU still sends unaligned loads and stores to a trap handler!
|
||||
/* x86 and x86-64 can perform unaligned loads/stores directly;
|
||||
modern PowerPC hardware can also do unaligned integer loads and stores;
|
||||
but note: the FPU still sends unaligned loads and stores to a trap handler!
|
||||
*/
|
||||
|
||||
#define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
|
||||
#define BROTLI_UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64_t *>(_p))
|
||||
@ -94,50 +82,50 @@
|
||||
!defined(__ARM_ARCH_6ZK__) && \
|
||||
!defined(__ARM_ARCH_6T2__)
|
||||
|
||||
// ARMv7 and newer support native unaligned accesses, but only of 16-bit
|
||||
// and 32-bit values (not 64-bit); older versions either raise a fatal signal,
|
||||
// do an unaligned read and rotate the words around a bit, or do the reads very
|
||||
// slowly (trip through kernel mode).
|
||||
/* ARMv7 and newer support native unaligned accesses, but only of 16-bit
|
||||
and 32-bit values (not 64-bit); older versions either raise a fatal signal,
|
||||
do an unaligned read and rotate the words around a bit, or do the reads very
|
||||
slowly (trip through kernel mode). */
|
||||
|
||||
#define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
|
||||
#define BROTLI_UNALIGNED_STORE32(_p, _val) \
|
||||
(*reinterpret_cast<uint32_t *>(_p) = (_val))
|
||||
|
||||
inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
|
||||
static inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
|
||||
uint64_t t;
|
||||
memcpy(&t, p, sizeof t);
|
||||
return t;
|
||||
}
|
||||
|
||||
inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
|
||||
static inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
|
||||
memcpy(p, &v, sizeof v);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// These functions are provided for architectures that don't support
|
||||
// unaligned loads and stores.
|
||||
/* These functions are provided for architectures that don't support */
|
||||
/* unaligned loads and stores. */
|
||||
|
||||
inline uint32_t BROTLI_UNALIGNED_LOAD32(const void *p) {
|
||||
static inline uint32_t BROTLI_UNALIGNED_LOAD32(const void *p) {
|
||||
uint32_t t;
|
||||
memcpy(&t, p, sizeof t);
|
||||
return t;
|
||||
}
|
||||
|
||||
inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
|
||||
static inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
|
||||
uint64_t t;
|
||||
memcpy(&t, p, sizeof t);
|
||||
return t;
|
||||
}
|
||||
|
||||
inline void BROTLI_UNALIGNED_STORE32(void *p, uint32_t v) {
|
||||
static inline void BROTLI_UNALIGNED_STORE32(void *p, uint32_t v) {
|
||||
memcpy(p, &v, sizeof v);
|
||||
}
|
||||
|
||||
inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
|
||||
static inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
|
||||
memcpy(p, &v, sizeof v);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif // BROTLI_ENC_PORT_H_
|
||||
#endif /* BROTLI_ENC_PORT_H_ */
|
||||
|
@ -4,8 +4,8 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Functions for encoding of integers into prefix codes the amount of extra
|
||||
// bits, and the actual values of the extra bits.
|
||||
/* Functions for encoding of integers into prefix codes the amount of extra
|
||||
bits, and the actual values of the extra bits. */
|
||||
|
||||
#ifndef BROTLI_ENC_PREFIX_H_
|
||||
#define BROTLI_ENC_PREFIX_H_
|
||||
@ -76,4 +76,4 @@ inline void PrefixEncodeCopyDistance(size_t distance_code,
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_PREFIX_H_
|
||||
#endif /* BROTLI_ENC_PREFIX_H_ */
|
||||
|
@ -4,7 +4,7 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Sliding window over the input data.
|
||||
/* Sliding window over the input data. */
|
||||
|
||||
#ifndef BROTLI_ENC_RINGBUFFER_H_
|
||||
#define BROTLI_ENC_RINGBUFFER_H_
|
||||
@ -16,15 +16,15 @@
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// A RingBuffer(window_bits, tail_bits) contains `1 << window_bits' bytes of
|
||||
// data in a circular manner: writing a byte writes it to:
|
||||
// `position() % (1 << window_bits)'.
|
||||
// For convenience, the RingBuffer array contains another copy of the
|
||||
// first `1 << tail_bits' bytes:
|
||||
// buffer_[i] == buffer_[i + (1 << window_bits)], if i < (1 << tail_bits),
|
||||
// and another copy of the last two bytes:
|
||||
// buffer_[-1] == buffer_[(1 << window_bits) - 1] and
|
||||
// buffer_[-2] == buffer_[(1 << window_bits) - 2].
|
||||
/* A RingBuffer(window_bits, tail_bits) contains `1 << window_bits' bytes of
|
||||
data in a circular manner: writing a byte writes it to:
|
||||
`position() % (1 << window_bits)'.
|
||||
For convenience, the RingBuffer array contains another copy of the
|
||||
first `1 << tail_bits' bytes:
|
||||
buffer_[i] == buffer_[i + (1 << window_bits)], if i < (1 << tail_bits),
|
||||
and another copy of the last two bytes:
|
||||
buffer_[-1] == buffer_[(1 << window_bits) - 1] and
|
||||
buffer_[-2] == buffer_[(1 << window_bits) - 2]. */
|
||||
class RingBuffer {
|
||||
public:
|
||||
RingBuffer(int window_bits, int tail_bits)
|
||||
@ -41,8 +41,8 @@ class RingBuffer {
|
||||
free(data_);
|
||||
}
|
||||
|
||||
// Allocates or re-allocates data_ to the given length + plus some slack
|
||||
// region before and after. Fills the slack regions with zeros.
|
||||
/* Allocates or re-allocates data_ to the given length + plus some slack
|
||||
region before and after. Fills the slack regions with zeros. */
|
||||
inline void InitBuffer(const uint32_t buflen) {
|
||||
static const size_t kSlackForEightByteHashingEverywhere = 7;
|
||||
cur_size_ = buflen;
|
||||
@ -55,41 +55,41 @@ class RingBuffer {
|
||||
}
|
||||
}
|
||||
|
||||
// Push bytes into the ring buffer.
|
||||
/* Push bytes into the ring buffer. */
|
||||
void Write(const uint8_t *bytes, size_t n) {
|
||||
if (pos_ == 0 && n < tail_size_) {
|
||||
// Special case for the first write: to process the first block, we don't
|
||||
// need to allocate the whole ringbuffer and we don't need the tail
|
||||
// either. However, we do this memory usage optimization only if the
|
||||
// first write is less than the tail size, which is also the input block
|
||||
// size, otherwise it is likely that other blocks will follow and we
|
||||
// will need to reallocate to the full size anyway.
|
||||
/* Special case for the first write: to process the first block, we don't
|
||||
need to allocate the whole ringbuffer and we don't need the tail
|
||||
either. However, we do this memory usage optimization only if the
|
||||
first write is less than the tail size, which is also the input block
|
||||
size, otherwise it is likely that other blocks will follow and we
|
||||
will need to reallocate to the full size anyway. */
|
||||
pos_ = static_cast<uint32_t>(n);
|
||||
InitBuffer(pos_);
|
||||
memcpy(buffer_, bytes, n);
|
||||
return;
|
||||
}
|
||||
if (cur_size_ < total_size_) {
|
||||
// Lazily allocate the full buffer.
|
||||
/* Lazily allocate the full buffer. */
|
||||
InitBuffer(total_size_);
|
||||
// Initialize the last two bytes to zero, so that we don't have to worry
|
||||
// later when we copy the last two bytes to the first two positions.
|
||||
/* Initialize the last two bytes to zero, so that we don't have to worry
|
||||
later when we copy the last two bytes to the first two positions. */
|
||||
buffer_[size_ - 2] = 0;
|
||||
buffer_[size_ - 1] = 0;
|
||||
}
|
||||
const size_t masked_pos = pos_ & mask_;
|
||||
// The length of the writes is limited so that we do not need to worry
|
||||
// about a write
|
||||
/* The length of the writes is limited so that we do not need to worry
|
||||
about a write */
|
||||
WriteTail(bytes, n);
|
||||
if (PREDICT_TRUE(masked_pos + n <= size_)) {
|
||||
// A single write fits.
|
||||
/* A single write fits. */
|
||||
memcpy(&buffer_[masked_pos], bytes, n);
|
||||
} else {
|
||||
// Split into two writes.
|
||||
// Copy into the end of the buffer, including the tail buffer.
|
||||
/* Split into two writes.
|
||||
Copy into the end of the buffer, including the tail buffer. */
|
||||
memcpy(&buffer_[masked_pos], bytes,
|
||||
std::min(n, total_size_ - masked_pos));
|
||||
// Copy into the beginning of the buffer
|
||||
/* Copy into the beginning of the buffer */
|
||||
memcpy(&buffer_[0], bytes + (size_ - masked_pos),
|
||||
n - (size_ - masked_pos));
|
||||
}
|
||||
@ -142,4 +142,4 @@ class RingBuffer {
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_RINGBUFFER_H_
|
||||
#endif /* BROTLI_ENC_RINGBUFFER_H_ */
|
||||
|
@ -17,8 +17,8 @@ namespace brotli {
|
||||
|
||||
inline uint32_t Hash(const uint8_t *data) {
|
||||
uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kDictHashMul32;
|
||||
// The higher bits contain more mixture from the multiplication,
|
||||
// so we take our results from there.
|
||||
/* The higher bits contain more mixture from the multiplication,
|
||||
so we take our results from there. */
|
||||
return h >> (32 - kDictNumBits);
|
||||
}
|
||||
|
||||
@ -42,18 +42,18 @@ inline bool IsMatch(DictWord w, const uint8_t* data, size_t max_length) {
|
||||
const size_t offset = kBrotliDictionaryOffsetsByLength[w.len] + w.len * w.idx;
|
||||
const uint8_t* dict = &kBrotliDictionary[offset];
|
||||
if (w.transform == 0) {
|
||||
// Match against base dictionary word.
|
||||
/* Match against base dictionary word. */
|
||||
return FindMatchLengthWithLimit(dict, data, w.len) == w.len;
|
||||
} else if (w.transform == 10) {
|
||||
// Match against uppercase first transform.
|
||||
// Note that there are only ASCII uppercase words in the lookup table.
|
||||
/* Match against uppercase first transform.
|
||||
Note that there are only ASCII uppercase words in the lookup table. */
|
||||
return (dict[0] >= 'a' && dict[0] <= 'z' &&
|
||||
(dict[0] ^ 32) == data[0] &&
|
||||
FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) ==
|
||||
w.len - 1u);
|
||||
} else {
|
||||
// Match against uppercase all transform.
|
||||
// Note that there are only ASCII uppercase words in the lookup table.
|
||||
/* Match against uppercase all transform.
|
||||
Note that there are only ASCII uppercase words in the lookup table. */
|
||||
for (size_t i = 0; i < w.len; ++i) {
|
||||
if (dict[i] >= 'a' && dict[i] <= 'z') {
|
||||
if ((dict[i] ^ 32) != data[i]) return false;
|
||||
@ -82,12 +82,12 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
||||
const size_t id = w.idx;
|
||||
if (w.transform == 0) {
|
||||
const size_t matchlen = DictMatchLength(data, id, l, max_length);
|
||||
// Transform "" + kIdentity + ""
|
||||
/* Transform "" + kIdentity + "" */
|
||||
if (matchlen == l) {
|
||||
AddMatch(id, l, l, matches);
|
||||
found_match = true;
|
||||
}
|
||||
// Transforms "" + kOmitLast1 + "" and "" + kOmitLast1 + "ing "
|
||||
/* Transforms "" + kOmitLast1 + "" and "" + kOmitLast1 + "ing " */
|
||||
if (matchlen >= l - 1) {
|
||||
AddMatch(id + 12 * n, l - 1, l, matches);
|
||||
if (l + 2 < max_length &&
|
||||
@ -97,7 +97,7 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
||||
}
|
||||
found_match = true;
|
||||
}
|
||||
// Transform "" + kOmitLastN + "" (N = 2 .. 9)
|
||||
/* Transform "" + kOmitLastN + "" (N = 2 .. 9) */
|
||||
size_t minlen = min_length;
|
||||
if (l > 9) minlen = std::max(minlen, l - 9);
|
||||
size_t maxlen = std::min(matchlen, l - 2);
|
||||
@ -109,7 +109,7 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
||||
continue;
|
||||
}
|
||||
const uint8_t* s = &data[l];
|
||||
// Transforms "" + kIdentity + <suffix>
|
||||
/* Transforms "" + kIdentity + <suffix> */
|
||||
if (s[0] == ' ') {
|
||||
AddMatch(id + n, l + 1, l, matches);
|
||||
if (s[1] == 'a') {
|
||||
@ -127,7 +127,7 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
||||
} else if (s[1] == 'b') {
|
||||
if (s[2] == 'y' && s[3] == ' ') {
|
||||
AddMatch(id + 38 * n, l + 4, l, matches);
|
||||
}
|
||||
}
|
||||
} else if (s[1] == 'i') {
|
||||
if (s[2] == 'n') {
|
||||
if (s[3] == ' ') AddMatch(id + 16 * n, l + 4, l, matches);
|
||||
@ -235,7 +235,7 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
||||
} else if (s[0] == 'i') {
|
||||
if (s[1] == 'v') {
|
||||
if (s[2] == 'e' && s[3] == ' ') {
|
||||
AddMatch(id + 92 * n, l + 4, l, matches);
|
||||
AddMatch(id + 92 * n, l + 4, l, matches);
|
||||
}
|
||||
} else if (s[1] == 'z') {
|
||||
if (s[2] == 'e' && s[3] == ' ') {
|
||||
@ -256,19 +256,19 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Set t=false for kUppercaseFirst and
|
||||
// t=true otherwise (kUppercaseAll) transform.
|
||||
/* Set is_all_caps=0 for kUppercaseFirst and
|
||||
is_all_caps=1 otherwise (kUppercaseAll) transform. */
|
||||
const bool t = w.transform != kUppercaseFirst;
|
||||
if (!IsMatch(w, data, max_length)) {
|
||||
continue;
|
||||
}
|
||||
// Transform "" + kUppercase{First,All} + ""
|
||||
/* Transform "" + kUppercase{First,All} + "" */
|
||||
AddMatch(id + (t ? 44 : 9) * n, l, l, matches);
|
||||
found_match = true;
|
||||
if (l + 1 >= max_length) {
|
||||
continue;
|
||||
}
|
||||
// Transforms "" + kUppercase{First,All} + <suffix>
|
||||
/* Transforms "" + kUppercase{First,All} + <suffix> */
|
||||
const uint8_t* s = &data[l];
|
||||
if (s[0] == ' ') {
|
||||
AddMatch(id + (t ? 68 : 4) * n, l + 1, l, matches);
|
||||
@ -301,7 +301,7 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
||||
}
|
||||
}
|
||||
}
|
||||
// Transforms with prefixes " " and "."
|
||||
/* Transforms with prefixes " " and "." */
|
||||
if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) {
|
||||
bool is_space = (data[0] == ' ');
|
||||
key = Hash(&data[1]);
|
||||
@ -317,13 +317,14 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
||||
if (!IsMatch(w, &data[1], max_length - 1)) {
|
||||
continue;
|
||||
}
|
||||
// Transforms " " + kIdentity + "" and "." + kIdentity + ""
|
||||
/* Transforms " " + kIdentity + "" and "." + kIdentity + "" */
|
||||
AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches);
|
||||
found_match = true;
|
||||
if (l + 2 >= max_length) {
|
||||
continue;
|
||||
}
|
||||
// Transforms " " + kIdentity + <suffix> and "." + kIdentity + <suffix>
|
||||
/* Transforms " " + kIdentity + <suffix> and "." + kIdentity + <suffix>
|
||||
*/
|
||||
const uint8_t* s = &data[l + 1];
|
||||
if (s[0] == ' ') {
|
||||
AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches);
|
||||
@ -349,19 +350,19 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
||||
}
|
||||
}
|
||||
} else if (is_space) {
|
||||
// Set t=false for kUppercaseFirst and
|
||||
// t=true otherwise (kUppercaseAll) transform.
|
||||
/* Set is_all_caps=0 for kUppercaseFirst and
|
||||
is_all_caps=1 otherwise (kUppercaseAll) transform. */
|
||||
const bool t = w.transform != kUppercaseFirst;
|
||||
if (!IsMatch(w, &data[1], max_length - 1)) {
|
||||
continue;
|
||||
}
|
||||
// Transforms " " + kUppercase{First,All} + ""
|
||||
/* Transforms " " + kUppercase{First,All} + "" */
|
||||
AddMatch(id + (t ? 85 : 30) * n, l + 1, l, matches);
|
||||
found_match = true;
|
||||
if (l + 2 >= max_length) {
|
||||
continue;
|
||||
}
|
||||
// Transforms " " + kUppercase{First,All} + <suffix>
|
||||
/* Transforms " " + kUppercase{First,All} + <suffix> */
|
||||
const uint8_t* s = &data[l + 1];
|
||||
if (s[0] == ' ') {
|
||||
AddMatch(id + (t ? 83 : 15) * n, l + 2, l, matches);
|
||||
@ -388,7 +389,7 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
||||
}
|
||||
}
|
||||
if (max_length >= 6) {
|
||||
// Transforms with prefixes "e ", "s ", ", " and "\xc2\xa0"
|
||||
/* Transforms with prefixes "e ", "s ", ", " and "\xc2\xa0" */
|
||||
if ((data[1] == ' ' &&
|
||||
(data[0] == 'e' || data[0] == 's' || data[0] == ',')) ||
|
||||
(data[0] == 0xc2 && data[1] == 0xa0)) {
|
||||
@ -415,7 +416,7 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
||||
}
|
||||
}
|
||||
if (max_length >= 9) {
|
||||
// Transforms with prefixes " the " and ".com/"
|
||||
/* Transforms with prefixes " the " and ".com/" */
|
||||
if ((data[0] == ' ' && data[1] == 't' && data[2] == 'h' &&
|
||||
data[3] == 'e' && data[4] == ' ') ||
|
||||
(data[0] == '.' && data[1] == 'c' && data[2] == 'o' &&
|
||||
|
@ -4,7 +4,7 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Class to model the static dictionary.
|
||||
/* Class to model the static dictionary. */
|
||||
|
||||
#ifndef BROTLI_ENC_STATIC_DICT_H_
|
||||
#define BROTLI_ENC_STATIC_DICT_H_
|
||||
@ -16,12 +16,13 @@ namespace brotli {
|
||||
static const size_t kMaxDictionaryMatchLen = 37;
|
||||
static const uint32_t kInvalidMatch = 0xfffffff;
|
||||
|
||||
// Matches data against static dictionary words, and for each length l,
|
||||
// for which a match is found, updates matches[l] to be the minimum possible
|
||||
// (distance << 5) + len_code.
|
||||
// Prerequisites:
|
||||
// matches array is at least kMaxDictionaryMatchLen + 1 long
|
||||
// all elements are initialized to kInvalidMatch
|
||||
/* Matches data against static dictionary words, and for each length l,
|
||||
for which a match is found, updates matches[l] to be the minimum possible
|
||||
(distance << 5) + len_code.
|
||||
Returns 1 if matches have been found, otherwise 0.
|
||||
Prerequisites:
|
||||
matches array is at least BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1 long
|
||||
all elements are initialized to kInvalidMatch */
|
||||
bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
||||
size_t min_length,
|
||||
size_t max_length,
|
||||
@ -29,4 +30,4 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_STATIC_DICT_H_
|
||||
#endif /* BROTLI_ENC_STATIC_DICT_H_ */
|
||||
|
@ -4,7 +4,7 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Lookup table for static dictionary and transforms.
|
||||
/* Lookup table for static dictionary and transforms. */
|
||||
|
||||
#ifndef BROTLI_ENC_DICTIONARY_LUT_H_
|
||||
#define BROTLI_ENC_DICTIONARY_LUT_H_
|
||||
@ -13,8 +13,8 @@
|
||||
|
||||
namespace brotli {
|
||||
|
||||
static const int kDictNumBits = 15
|
||||
;static const uint32_t kDictHashMul32 = 0x1e35a7bd;
|
||||
static const int kDictNumBits = 15;
|
||||
static const uint32_t kDictHashMul32 = 0x1e35a7bd;
|
||||
|
||||
struct DictWord {
|
||||
uint8_t len;
|
||||
@ -12052,4 +12052,4 @@ static const DictWord kStaticDictionaryWords[] = {
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_DICTIONARY_LUT_H_
|
||||
#endif /* BROTLI_ENC_STATIC_DICT_LUT_H_ */
|
||||
|
@ -4,14 +4,14 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Convience routines to make Brotli I/O classes from some memory containers and
|
||||
// files.
|
||||
/* Convience routines to make Brotli I/O classes from some memory containers and
|
||||
files. */
|
||||
|
||||
#include "./streams.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <string.h> /* memcpy */
|
||||
|
||||
namespace brotli {
|
||||
|
||||
@ -26,7 +26,7 @@ void BrotliMemOut::Reset(void* buf, size_t len) {
|
||||
pos_ = 0;
|
||||
}
|
||||
|
||||
// Brotli output routine: copy n bytes to the output buffer.
|
||||
/* Brotli output routine: copy n bytes to the output buffer. */
|
||||
bool BrotliMemOut::Write(const void *buf, size_t n) {
|
||||
if (n + pos_ > len_)
|
||||
return false;
|
||||
@ -47,7 +47,7 @@ void BrotliStringOut::Reset(std::string* buf, size_t max_size) {
|
||||
max_size_ = max_size;
|
||||
}
|
||||
|
||||
// Brotli output routine: add n bytes to a string.
|
||||
/* Brotli output routine: add n bytes to a string. */
|
||||
bool BrotliStringOut::Write(const void *buf, size_t n) {
|
||||
if (buf_->size() + n > max_size_)
|
||||
return false;
|
||||
@ -66,7 +66,7 @@ void BrotliMemIn::Reset(const void* buf, size_t len) {
|
||||
pos_ = 0;
|
||||
}
|
||||
|
||||
// Brotli input routine: read the next chunk of memory.
|
||||
/* Brotli input routine: read the next chunk of memory. */
|
||||
const void* BrotliMemIn::Read(size_t n, size_t* output) {
|
||||
if (pos_ == len_) {
|
||||
return NULL;
|
||||
@ -111,4 +111,4 @@ bool BrotliFileOut::Write(const void* buf, size_t n) {
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace brotli
|
||||
} /* namespace brotli */
|
||||
|
@ -4,7 +4,7 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Input and output classes for streaming brotli compression.
|
||||
/* Input and output classes for streaming brotli compression. */
|
||||
|
||||
#ifndef BROTLI_ENC_STREAMS_H_
|
||||
#define BROTLI_ENC_STREAMS_H_
|
||||
@ -17,71 +17,71 @@
|
||||
|
||||
namespace brotli {
|
||||
|
||||
// Input interface for the compression routines.
|
||||
/* Input interface for the compression routines. */
|
||||
class BrotliIn {
|
||||
public:
|
||||
virtual ~BrotliIn(void) {}
|
||||
|
||||
// Return a pointer to the next block of input of at most n bytes.
|
||||
// Return the actual length in *nread.
|
||||
// At end of data, return NULL. Don't return NULL if there is more data
|
||||
// to read, even if called with n == 0.
|
||||
// Read will only be called if some of its bytes are needed.
|
||||
/* Return a pointer to the next block of input of at most n bytes.
|
||||
Return the actual length in *nread.
|
||||
At end of data, return NULL. Don't return NULL if there is more data
|
||||
to read, even if called with n == 0.
|
||||
Read will only be called if some of its bytes are needed. */
|
||||
virtual const void* Read(size_t n, size_t* nread) = 0;
|
||||
};
|
||||
|
||||
// Output interface for the compression routines.
|
||||
/* Output interface for the compression routines. */
|
||||
class BrotliOut {
|
||||
public:
|
||||
virtual ~BrotliOut(void) {}
|
||||
|
||||
// Write n bytes of data from buf.
|
||||
// Return true if all written, false otherwise.
|
||||
/* Write n bytes of data from buf.
|
||||
Return true if all written, false otherwise. */
|
||||
virtual bool Write(const void *buf, size_t n) = 0;
|
||||
};
|
||||
|
||||
// Adapter class to make BrotliIn objects from raw memory.
|
||||
/* Adapter class to make BrotliIn objects from raw memory. */
|
||||
class BrotliMemIn : public BrotliIn {
|
||||
public:
|
||||
BrotliMemIn(const void* buf, size_t len);
|
||||
|
||||
void Reset(const void* buf, size_t len);
|
||||
|
||||
// returns the amount of data consumed
|
||||
/* returns the amount of data consumed */
|
||||
size_t position(void) const { return pos_; }
|
||||
|
||||
const void* Read(size_t n, size_t* OUTPUT);
|
||||
|
||||
private:
|
||||
const void* buf_; // start of input buffer
|
||||
size_t len_; // length of input
|
||||
size_t pos_; // current read position within input
|
||||
const void* buf_; /* start of input buffer */
|
||||
size_t len_; /* length of input */
|
||||
size_t pos_; /* current read position within input */
|
||||
};
|
||||
|
||||
// Adapter class to make BrotliOut objects from raw memory.
|
||||
/* Adapter class to make BrotliOut objects from raw memory. */
|
||||
class BrotliMemOut : public BrotliOut {
|
||||
public:
|
||||
BrotliMemOut(void* buf, size_t len);
|
||||
|
||||
void Reset(void* buf, size_t len);
|
||||
|
||||
// returns the amount of data written
|
||||
/* returns the amount of data written */
|
||||
size_t position(void) const { return pos_; }
|
||||
|
||||
bool Write(const void* buf, size_t n);
|
||||
|
||||
private:
|
||||
void* buf_; // start of output buffer
|
||||
size_t len_; // length of output
|
||||
size_t pos_; // current write position within output
|
||||
void* buf_; /* start of output buffer */
|
||||
size_t len_; /* length of output */
|
||||
size_t pos_; /* current write position within output */
|
||||
};
|
||||
|
||||
// Adapter class to make BrotliOut objects from a string.
|
||||
/* Adapter class to make BrotliOut objects from a string. */
|
||||
class BrotliStringOut : public BrotliOut {
|
||||
public:
|
||||
// Create a writer that appends its data to buf.
|
||||
// buf->size() will grow to at most max_size
|
||||
// buf is expected to be empty when constructing BrotliStringOut.
|
||||
/* Create a writer that appends its data to buf.
|
||||
buf->size() will grow to at most max_size
|
||||
buf is expected to be empty when constructing BrotliStringOut. */
|
||||
BrotliStringOut(std::string* buf, size_t max_size);
|
||||
|
||||
void Reset(std::string* buf, size_t max_len);
|
||||
@ -89,11 +89,11 @@ class BrotliStringOut : public BrotliOut {
|
||||
bool Write(const void* buf, size_t n);
|
||||
|
||||
private:
|
||||
std::string* buf_; // start of output buffer
|
||||
size_t max_size_; // max length of output
|
||||
std::string* buf_; /* start of output buffer */
|
||||
size_t max_size_; /* max length of output */
|
||||
};
|
||||
|
||||
// Adapter class to make BrotliIn object from a file.
|
||||
/* Adapter class to make BrotliIn object from a file. */
|
||||
class BrotliFileIn : public BrotliIn {
|
||||
public:
|
||||
BrotliFileIn(FILE* f, size_t max_read_size);
|
||||
@ -107,7 +107,7 @@ class BrotliFileIn : public BrotliIn {
|
||||
size_t buf_size_;
|
||||
};
|
||||
|
||||
// Adapter class to make BrotliOut object from a file.
|
||||
/* Adapter class to make BrotliOut object from a file. */
|
||||
class BrotliFileOut : public BrotliOut {
|
||||
public:
|
||||
explicit BrotliFileOut(FILE* f);
|
||||
@ -117,6 +117,6 @@ class BrotliFileOut : public BrotliOut {
|
||||
FILE* f_;
|
||||
};
|
||||
|
||||
} // namespace brotli
|
||||
} /* namespace brotli */
|
||||
|
||||
#endif // BROTLI_ENC_STREAMS_H_
|
||||
#endif /* BROTLI_ENC_STREAMS_H_ */
|
||||
|
@ -4,7 +4,7 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Heuristics for deciding about the UTF8-ness of strings.
|
||||
/* Heuristics for deciding about the UTF8-ness of strings. */
|
||||
|
||||
#include "./utf8_util.h"
|
||||
|
||||
@ -15,14 +15,14 @@ namespace brotli {
|
||||
namespace {
|
||||
|
||||
size_t ParseAsUTF8(int* symbol, const uint8_t* input, size_t size) {
|
||||
// ASCII
|
||||
/* ASCII */
|
||||
if ((input[0] & 0x80) == 0) {
|
||||
*symbol = input[0];
|
||||
if (*symbol > 0) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
// 2-byte UTF8
|
||||
/* 2-byte UTF8 */
|
||||
if (size > 1u &&
|
||||
(input[0] & 0xe0) == 0xc0 &&
|
||||
(input[1] & 0xc0) == 0x80) {
|
||||
@ -32,7 +32,7 @@ size_t ParseAsUTF8(int* symbol, const uint8_t* input, size_t size) {
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
// 3-byte UFT8
|
||||
/* 3-byte UFT8 */
|
||||
if (size > 2u &&
|
||||
(input[0] & 0xf0) == 0xe0 &&
|
||||
(input[1] & 0xc0) == 0x80 &&
|
||||
@ -44,7 +44,7 @@ size_t ParseAsUTF8(int* symbol, const uint8_t* input, size_t size) {
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
// 4-byte UFT8
|
||||
/* 4-byte UFT8 */
|
||||
if (size > 3u &&
|
||||
(input[0] & 0xf8) == 0xf0 &&
|
||||
(input[1] & 0xc0) == 0x80 &&
|
||||
@ -58,14 +58,14 @@ size_t ParseAsUTF8(int* symbol, const uint8_t* input, size_t size) {
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
// Not UTF8, emit a special symbol above the UTF8-code space
|
||||
/* Not UTF8, emit a special symbol above the UTF8-code space */
|
||||
*symbol = 0x110000 | input[0];
|
||||
return 1;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// Returns true if at least min_fraction of the data is UTF8-encoded.
|
||||
/* Returns 1 if at least min_fraction of the data is UTF8-encoded.*/
|
||||
bool IsMostlyUTF8(const uint8_t* data, const size_t pos, const size_t mask,
|
||||
const size_t length, const double min_fraction) {
|
||||
size_t size_utf8 = 0;
|
||||
|
@ -4,7 +4,7 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Heuristics for deciding about the UTF8-ness of strings.
|
||||
/* Heuristics for deciding about the UTF8-ness of strings. */
|
||||
|
||||
#ifndef BROTLI_ENC_UTF8_UTIL_H_
|
||||
#define BROTLI_ENC_UTF8_UTIL_H_
|
||||
@ -15,11 +15,12 @@ namespace brotli {
|
||||
|
||||
static const double kMinUTF8Ratio = 0.75;
|
||||
|
||||
// Returns true if at least min_fraction of the bytes between pos and
|
||||
// pos + length in the (data, mask) ringbuffer is UTF8-encoded.
|
||||
/* Returns 1 if at least min_fraction of the bytes between pos and
|
||||
pos + length in the (data, mask) ringbuffer is UTF8-encoded, otherwise
|
||||
returns 0. */
|
||||
bool IsMostlyUTF8(const uint8_t* data, const size_t pos, const size_t mask,
|
||||
const size_t length, const double min_fraction);
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_UTF8_UTIL_H_
|
||||
#endif /* BROTLI_ENC_UTF8_UTIL_H_ */
|
||||
|
@ -4,36 +4,36 @@
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Write bits into a byte array.
|
||||
/* Write bits into a byte array. */
|
||||
|
||||
#ifndef BROTLI_ENC_WRITE_BITS_H_
|
||||
#define BROTLI_ENC_WRITE_BITS_H_
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <stdio.h> /* printf */
|
||||
|
||||
#include "../common/types.h"
|
||||
#include "./port.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
//#define BIT_WRITER_DEBUG
|
||||
/*#define BIT_WRITER_DEBUG */
|
||||
|
||||
// This function writes bits into bytes in increasing addresses, and within
|
||||
// a byte least-significant-bit first.
|
||||
//
|
||||
// The function can write up to 56 bits in one go with WriteBits
|
||||
// Example: let's assume that 3 bits (Rs below) have been written already:
|
||||
//
|
||||
// BYTE-0 BYTE+1 BYTE+2
|
||||
//
|
||||
// 0000 0RRR 0000 0000 0000 0000
|
||||
//
|
||||
// Now, we could write 5 or less bits in MSB by just sifting by 3
|
||||
// and OR'ing to BYTE-0.
|
||||
//
|
||||
// For n bits, we take the last 5 bits, OR that with high bits in BYTE-0,
|
||||
// and locate the rest in BYTE+1, BYTE+2, etc.
|
||||
/* This function writes bits into bytes in increasing addresses, and within
|
||||
a byte least-significant-bit first.
|
||||
|
||||
The function can write up to 56 bits in one go with WriteBits
|
||||
Example: let's assume that 3 bits (Rs below) have been written already:
|
||||
|
||||
BYTE-0 BYTE+1 BYTE+2
|
||||
|
||||
0000 0RRR 0000 0000 0000 0000
|
||||
|
||||
Now, we could write 5 or less bits in MSB by just sifting by 3
|
||||
and OR'ing to BYTE-0.
|
||||
|
||||
For n bits, we take the last 5 bits, OR that with high bits in BYTE-0,
|
||||
and locate the rest in BYTE+1, BYTE+2, etc. */
|
||||
inline void WriteBits(size_t n_bits,
|
||||
uint64_t bits,
|
||||
size_t * __restrict pos,
|
||||
@ -44,18 +44,18 @@ inline void WriteBits(size_t n_bits,
|
||||
assert((bits >> n_bits) == 0);
|
||||
assert(n_bits <= 56);
|
||||
#ifdef IS_LITTLE_ENDIAN
|
||||
// This branch of the code can write up to 56 bits at a time,
|
||||
// 7 bits are lost by being perhaps already in *p and at least
|
||||
// 1 bit is needed to initialize the bit-stream ahead (i.e. if 7
|
||||
// bits are in *p and we write 57 bits, then the next write will
|
||||
// access a byte that was never initialized).
|
||||
/* This branch of the code can write up to 56 bits at a time,
|
||||
7 bits are lost by being perhaps already in *p and at least
|
||||
1 bit is needed to initialize the bit-stream ahead (i.e. if 7
|
||||
bits are in *p and we write 57 bits, then the next write will
|
||||
access a byte that was never initialized). */
|
||||
uint8_t *p = &array[*pos >> 3];
|
||||
uint64_t v = *p;
|
||||
v |= bits << (*pos & 7);
|
||||
BROTLI_UNALIGNED_STORE64(p, v); // Set some bits.
|
||||
BROTLI_UNALIGNED_STORE64(p, v); /* Set some bits. */
|
||||
*pos += n_bits;
|
||||
#else
|
||||
// implicit & 0xff is assumed for uint8_t arithmetics
|
||||
/* implicit & 0xff is assumed for uint8_t arithmetics */
|
||||
uint8_t *array_pos = &array[*pos >> 3];
|
||||
const size_t bits_reserved_in_first_byte = (*pos & 7);
|
||||
bits <<= bits_reserved_in_first_byte;
|
||||
@ -81,4 +81,4 @@ inline void WriteBitsPrepareStorage(size_t pos, uint8_t *array) {
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_WRITE_BITS_H_
|
||||
#endif /* BROTLI_ENC_WRITE_BITS_H_ */
|
||||
|
Loading…
Reference in New Issue
Block a user