mirror of
https://github.com/google/brotli.git
synced 2024-11-21 19:20:09 +00:00
Convert encoder to plain C.
This commit is contained in:
parent
63111b21e8
commit
b972c67780
@ -81,7 +81,7 @@
|
||||
#endif
|
||||
|
||||
#if defined(BROTLI_DEBUG) || defined(BROTLI_ENABLE_LOG)
|
||||
static inline void BrotliDump(const char* f, int l, const char* fn) {
|
||||
static BROTLI_INLINE void BrotliDump(const char* f, int l, const char* fn) {
|
||||
fprintf(stderr, "%s:%d (%s)\n", f, l, fn);
|
||||
fflush(stderr);
|
||||
}
|
||||
|
@ -2,10 +2,10 @@
|
||||
|
||||
include ../shared.mk
|
||||
|
||||
OBJS = backward_references.o block_splitter.o brotli_bit_stream.o \
|
||||
compress_fragment.o compress_fragment_two_pass.o encode.o \
|
||||
encode_parallel.o entropy_encode.o histogram.o literal_cost.o \
|
||||
metablock.o static_dict.o streams.o utf8_util.o
|
||||
OBJS = backward_references.o bit_cost.o block_splitter.o brotli_bit_stream.o \
|
||||
cluster.o compress_fragment.o compress_fragment_two_pass.o compressor.o \
|
||||
encode.o encode_parallel.o entropy_encode.o histogram.o literal_cost.o \
|
||||
memory.o metablock.o static_dict.o streams.o utf8_util.o
|
||||
all : $(OBJS)
|
||||
|
||||
clean :
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -9,63 +9,37 @@
|
||||
#ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_
|
||||
#define BROTLI_ENC_BACKWARD_REFERENCES_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "../common/types.h"
|
||||
#include "./command.h"
|
||||
#include "./hash.h"
|
||||
#include "./memory.h"
|
||||
#include "./port.h"
|
||||
|
||||
namespace brotli {
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* "commands" points to the next output command to write to, "*num_commands" is
|
||||
initially the total amount of commands output by previous
|
||||
CreateBackwardReferences calls, and must be incremented by the amount written
|
||||
by this call. */
|
||||
void CreateBackwardReferences(size_t num_bytes,
|
||||
size_t position,
|
||||
bool is_last,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t ringbuffer_mask,
|
||||
const int quality,
|
||||
const int lgwin,
|
||||
Hashers* hashers,
|
||||
int hash_type,
|
||||
int* dist_cache,
|
||||
size_t* last_insert_len,
|
||||
Command* commands,
|
||||
size_t* num_commands,
|
||||
size_t* num_literals);
|
||||
|
||||
static const float kInfinity = std::numeric_limits<float>::infinity();
|
||||
|
||||
struct ZopfliNode {
|
||||
ZopfliNode(void) : length(1),
|
||||
distance(0),
|
||||
insert_length(0),
|
||||
cost(kInfinity) {}
|
||||
|
||||
inline uint32_t copy_length() const {
|
||||
return length & 0xffffff;
|
||||
}
|
||||
|
||||
inline uint32_t length_code() const {
|
||||
const uint32_t modifier = length >> 24;
|
||||
return copy_length() + 9u - modifier;
|
||||
}
|
||||
|
||||
inline uint32_t copy_distance() const {
|
||||
return distance & 0x1ffffff;
|
||||
}
|
||||
|
||||
inline uint32_t distance_code() const {
|
||||
const uint32_t short_code = distance >> 25;
|
||||
return short_code == 0 ? copy_distance() + 15 : short_code - 1;
|
||||
}
|
||||
|
||||
inline uint32_t command_length() const {
|
||||
return copy_length() + insert_length;
|
||||
}
|
||||
BROTLI_INTERNAL void BrotliCreateBackwardReferences(MemoryManager* m,
|
||||
size_t num_bytes,
|
||||
size_t position,
|
||||
int is_last,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t ringbuffer_mask,
|
||||
const int quality,
|
||||
const int lgwin,
|
||||
Hashers* hashers,
|
||||
int hash_type,
|
||||
int* dist_cache,
|
||||
size_t* last_insert_len,
|
||||
Command* commands,
|
||||
size_t* num_commands,
|
||||
size_t* num_literals);
|
||||
|
||||
typedef struct ZopfliNode {
|
||||
/* best length to get up to this byte (not including this byte itself)
|
||||
highest 8 bit is used to reconstruct the length code */
|
||||
uint32_t length;
|
||||
@ -75,9 +49,21 @@ struct ZopfliNode {
|
||||
uint32_t distance;
|
||||
/* number of literal inserts before this copy */
|
||||
uint32_t insert_length;
|
||||
|
||||
/* This union holds information used by dynamic-programming. During forward
|
||||
pass |cost| it used to store the goal function. On path backtracing pass
|
||||
|next| is assigned the offset to next node on the path. As |cost| is not
|
||||
used after the forward pass, it shares the memory with |next|. */
|
||||
union {
|
||||
/* Smallest cost to get to this byte from the beginning, as found so far. */
|
||||
float cost;
|
||||
};
|
||||
float cost;
|
||||
/* Offset to the next node on the path. Equals to command_length() of the
|
||||
next node on the path. For last node equals to BROTLI_UINT32_MAX */
|
||||
uint32_t next;
|
||||
} u;
|
||||
} ZopfliNode;
|
||||
|
||||
BROTLI_INTERNAL void BrotliInitZopfliNodes(ZopfliNode* array, size_t length);
|
||||
|
||||
/* Computes the shortest path of commands from position to at most
|
||||
position + num_bytes.
|
||||
@ -92,26 +78,28 @@ struct ZopfliNode {
|
||||
(1) nodes[i].copy_length() >= 2
|
||||
(2) nodes[i].command_length() <= i and
|
||||
(3) nodes[i - nodes[i].command_length()].cost < kInfinity */
|
||||
void ZopfliComputeShortestPath(size_t num_bytes,
|
||||
size_t position,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t ringbuffer_mask,
|
||||
const size_t max_backward_limit,
|
||||
const int* dist_cache,
|
||||
Hashers::H10* hasher,
|
||||
ZopfliNode* nodes,
|
||||
std::vector<uint32_t>* path);
|
||||
BROTLI_INTERNAL size_t BrotliZopfliComputeShortestPath(
|
||||
MemoryManager* m, size_t num_bytes, size_t position,
|
||||
const uint8_t* ringbuffer, size_t ringbuffer_mask, const int quality,
|
||||
const size_t max_backward_limit, const int* dist_cache, H10* hasher,
|
||||
ZopfliNode* nodes);
|
||||
|
||||
void ZopfliCreateCommands(const size_t num_bytes,
|
||||
const size_t block_start,
|
||||
const size_t max_backward_limit,
|
||||
const std::vector<uint32_t>& path,
|
||||
const ZopfliNode* nodes,
|
||||
int* dist_cache,
|
||||
size_t* last_insert_len,
|
||||
Command* commands,
|
||||
size_t* num_literals);
|
||||
BROTLI_INTERNAL void BrotliZopfliCreateCommands(const size_t num_bytes,
|
||||
const size_t block_start,
|
||||
const size_t max_backward_limit,
|
||||
const ZopfliNode* nodes,
|
||||
int* dist_cache,
|
||||
size_t* last_insert_len,
|
||||
Command* commands,
|
||||
size_t* num_literals);
|
||||
|
||||
} // namespace brotli
|
||||
/* Maximum distance, see section 9.1. of the spec. */
|
||||
static BROTLI_INLINE size_t MaxBackwardLimit(int lgwin) {
|
||||
return (1u << lgwin) - 16;
|
||||
}
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* BROTLI_ENC_BACKWARD_REFERENCES_H_ */
|
||||
|
156
enc/backward_references_inc.h
Normal file
156
enc/backward_references_inc.h
Normal file
@ -0,0 +1,156 @@
|
||||
/* NOLINT(build/header_guard) */
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* template parameters: FN */
|
||||
|
||||
#define Hasher HASHER()
|
||||
|
||||
static void FN(CreateBackwardReferences)(MemoryManager* m,
|
||||
size_t num_bytes,
|
||||
size_t position,
|
||||
int is_last,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t ringbuffer_mask,
|
||||
const int quality,
|
||||
const int lgwin,
|
||||
Hasher* hasher,
|
||||
int* dist_cache,
|
||||
size_t* last_insert_len,
|
||||
Command* commands,
|
||||
size_t* num_commands,
|
||||
size_t* num_literals) {
|
||||
/* Set maximum distance, see section 9.1. of the spec. */
|
||||
const size_t max_backward_limit = MaxBackwardLimit(lgwin);
|
||||
|
||||
const Command * const orig_commands = commands;
|
||||
size_t insert_length = *last_insert_len;
|
||||
const size_t pos_end = position + num_bytes;
|
||||
const size_t store_end = num_bytes >= FN(StoreLookahead)() ?
|
||||
position + num_bytes - FN(StoreLookahead)() + 1 : position;
|
||||
|
||||
/* For speed up heuristics for random data. */
|
||||
const size_t random_heuristics_window_size = quality < 9 ? 64 : 512;
|
||||
size_t apply_random_heuristics = position + random_heuristics_window_size;
|
||||
|
||||
/* Minimum score to accept a backward reference. */
|
||||
const double kMinScore = 4.0;
|
||||
|
||||
FN(Init)(m, hasher, ringbuffer, lgwin, position, num_bytes, is_last);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
FN(StitchToPreviousBlock)(hasher, num_bytes, position,
|
||||
ringbuffer, ringbuffer_mask);
|
||||
|
||||
while (position + FN(HashTypeLength)() < pos_end) {
|
||||
size_t max_length = pos_end - position;
|
||||
size_t max_distance = BROTLI_MIN(size_t, position, max_backward_limit);
|
||||
size_t best_len = 0;
|
||||
size_t best_len_code = 0;
|
||||
size_t best_dist = 0;
|
||||
double best_score = kMinScore;
|
||||
int is_match_found = FN(FindLongestMatch)(hasher, ringbuffer,
|
||||
ringbuffer_mask, dist_cache, position, max_length, max_distance,
|
||||
&best_len, &best_len_code, &best_dist, &best_score);
|
||||
if (is_match_found) {
|
||||
/* Found a match. Let's look for something even better ahead. */
|
||||
int delayed_backward_references_in_row = 0;
|
||||
--max_length;
|
||||
for (;; --max_length) {
|
||||
size_t best_len_2 =
|
||||
quality < 5 ? BROTLI_MIN(size_t, best_len - 1, max_length) : 0;
|
||||
size_t best_len_code_2 = 0;
|
||||
size_t best_dist_2 = 0;
|
||||
double best_score_2 = kMinScore;
|
||||
const double cost_diff_lazy = 7.0;
|
||||
max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
|
||||
is_match_found = FN(FindLongestMatch)(hasher, ringbuffer,
|
||||
ringbuffer_mask, dist_cache, position + 1, max_length, max_distance,
|
||||
&best_len_2, &best_len_code_2, &best_dist_2, &best_score_2);
|
||||
if (is_match_found && best_score_2 >= best_score + cost_diff_lazy) {
|
||||
/* Ok, let's just write one byte for now and start a match from the
|
||||
next byte. */
|
||||
++position;
|
||||
++insert_length;
|
||||
best_len = best_len_2;
|
||||
best_len_code = best_len_code_2;
|
||||
best_dist = best_dist_2;
|
||||
best_score = best_score_2;
|
||||
if (++delayed_backward_references_in_row < 4 &&
|
||||
position + FN(HashTypeLength)() < pos_end) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
apply_random_heuristics =
|
||||
position + 2 * best_len + random_heuristics_window_size;
|
||||
max_distance = BROTLI_MIN(size_t, position, max_backward_limit);
|
||||
{
|
||||
/* The first 16 codes are special shortcodes,
|
||||
and the minimum offset is 1. */
|
||||
size_t distance_code =
|
||||
ComputeDistanceCode(best_dist, max_distance, quality, dist_cache);
|
||||
if (best_dist <= max_distance && distance_code > 0) {
|
||||
dist_cache[3] = dist_cache[2];
|
||||
dist_cache[2] = dist_cache[1];
|
||||
dist_cache[1] = dist_cache[0];
|
||||
dist_cache[0] = (int)best_dist;
|
||||
}
|
||||
InitCommand(
|
||||
commands++, insert_length, best_len, best_len_code, distance_code);
|
||||
}
|
||||
*num_literals += insert_length;
|
||||
insert_length = 0;
|
||||
/* Put the hash keys into the table, if there are enough bytes left.
|
||||
Depending on the hasher implementation, it can push all positions
|
||||
in the given range or only a subset of them. */
|
||||
FN(StoreRange)(hasher, ringbuffer, ringbuffer_mask, position + 2,
|
||||
BROTLI_MIN(size_t, position + best_len, store_end));
|
||||
position += best_len;
|
||||
} else {
|
||||
++insert_length;
|
||||
++position;
|
||||
/* If we have not seen matches for a long time, we can skip some
|
||||
match lookups. Unsuccessful match lookups are very very expensive
|
||||
and this kind of a heuristic speeds up compression quite
|
||||
a lot. */
|
||||
if (position > apply_random_heuristics) {
|
||||
/* Going through uncompressible data, jump. */
|
||||
if (position >
|
||||
apply_random_heuristics + 4 * random_heuristics_window_size) {
|
||||
/* It is quite a long time since we saw a copy, so we assume
|
||||
that this data is not compressible, and store hashes less
|
||||
often. Hashes of non compressible data are less likely to
|
||||
turn out to be useful in the future, too, so we store less of
|
||||
them to not to flood out the hash table of good compressible
|
||||
data. */
|
||||
const size_t kMargin =
|
||||
BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 4);
|
||||
size_t pos_jump =
|
||||
BROTLI_MIN(size_t, position + 16, pos_end - kMargin);
|
||||
for (; position < pos_jump; position += 4) {
|
||||
FN(Store)(hasher, ringbuffer, ringbuffer_mask, position);
|
||||
insert_length += 4;
|
||||
}
|
||||
} else {
|
||||
const size_t kMargin =
|
||||
BROTLI_MAX(size_t, FN(StoreLookahead)() - 1, 2);
|
||||
size_t pos_jump =
|
||||
BROTLI_MIN(size_t, position + 8, pos_end - kMargin);
|
||||
for (; position < pos_jump; position += 2) {
|
||||
FN(Store)(hasher, ringbuffer, ringbuffer_mask, position);
|
||||
insert_length += 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
insert_length += pos_end - position;
|
||||
*last_insert_len = insert_length;
|
||||
*num_commands += (size_t)(commands - orig_commands);
|
||||
}
|
||||
|
||||
#undef Hasher
|
35
enc/bit_cost.c
Normal file
35
enc/bit_cost.c
Normal file
@ -0,0 +1,35 @@
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Functions to estimate the bit cost of Huffman trees. */
|
||||
|
||||
#include "./bit_cost.h"
|
||||
|
||||
#include "../common/constants.h"
|
||||
#include "../common/types.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./histogram.h"
|
||||
#include "./port.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define FN(X) X ## Literal
|
||||
#include "./bit_cost_inc.h" /* NOLINT(build/include) */
|
||||
#undef FN
|
||||
|
||||
#define FN(X) X ## Command
|
||||
#include "./bit_cost_inc.h" /* NOLINT(build/include) */
|
||||
#undef FN
|
||||
|
||||
#define FN(X) X ## Distance
|
||||
#include "./bit_cost_inc.h" /* NOLINT(build/include) */
|
||||
#undef FN
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
136
enc/bit_cost.h
136
enc/bit_cost.h
@ -10,13 +10,16 @@
|
||||
#define BROTLI_ENC_BIT_COST_H_
|
||||
|
||||
#include "../common/types.h"
|
||||
#include "./entropy_encode.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./histogram.h"
|
||||
#include "./port.h"
|
||||
|
||||
namespace brotli {
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static inline double ShannonEntropy(const uint32_t *population, size_t size,
|
||||
size_t *total) {
|
||||
static BROTLI_INLINE double ShannonEntropy(const uint32_t *population,
|
||||
size_t size, size_t *total) {
|
||||
size_t sum = 0;
|
||||
double retval = 0;
|
||||
const uint32_t *population_end = population + size;
|
||||
@ -27,135 +30,34 @@ static inline double ShannonEntropy(const uint32_t *population, size_t size,
|
||||
while (population < population_end) {
|
||||
p = *population++;
|
||||
sum += p;
|
||||
retval -= static_cast<double>(p) * FastLog2(p);
|
||||
retval -= (double)p * FastLog2(p);
|
||||
odd_number_of_elements_left:
|
||||
p = *population++;
|
||||
sum += p;
|
||||
retval -= static_cast<double>(p) * FastLog2(p);
|
||||
retval -= (double)p * FastLog2(p);
|
||||
}
|
||||
if (sum) retval += static_cast<double>(sum) * FastLog2(sum);
|
||||
if (sum) retval += (double)sum * FastLog2(sum);
|
||||
*total = sum;
|
||||
return retval;
|
||||
}
|
||||
|
||||
static inline double BitsEntropy(const uint32_t *population, size_t size) {
|
||||
static BROTLI_INLINE double BitsEntropy(
|
||||
const uint32_t *population, size_t size) {
|
||||
size_t sum;
|
||||
double retval = ShannonEntropy(population, size, &sum);
|
||||
if (retval < sum) {
|
||||
/* At least one bit per literal is needed. */
|
||||
retval = static_cast<double>(sum);
|
||||
retval = (double)sum;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
template<int kSize>
|
||||
double PopulationCost(const Histogram<kSize>& histogram) {
|
||||
static const double kOneSymbolHistogramCost = 12;
|
||||
static const double kTwoSymbolHistogramCost = 20;
|
||||
static const double kThreeSymbolHistogramCost = 28;
|
||||
static const double kFourSymbolHistogramCost = 37;
|
||||
if (histogram.total_count_ == 0) {
|
||||
return kOneSymbolHistogramCost;
|
||||
}
|
||||
int count = 0;
|
||||
int s[5];
|
||||
for (int i = 0; i < kSize; ++i) {
|
||||
if (histogram.data_[i] > 0) {
|
||||
s[count] = i;
|
||||
++count;
|
||||
if (count > 4) break;
|
||||
}
|
||||
}
|
||||
if (count == 1) {
|
||||
return kOneSymbolHistogramCost;
|
||||
}
|
||||
if (count == 2) {
|
||||
return (kTwoSymbolHistogramCost +
|
||||
static_cast<double>(histogram.total_count_));
|
||||
}
|
||||
if (count == 3) {
|
||||
const uint32_t histo0 = histogram.data_[s[0]];
|
||||
const uint32_t histo1 = histogram.data_[s[1]];
|
||||
const uint32_t histo2 = histogram.data_[s[2]];
|
||||
const uint32_t histomax = std::max(histo0, std::max(histo1, histo2));
|
||||
return (kThreeSymbolHistogramCost +
|
||||
2 * (histo0 + histo1 + histo2) - histomax);
|
||||
}
|
||||
if (count == 4) {
|
||||
uint32_t histo[4];
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
histo[i] = histogram.data_[s[i]];
|
||||
}
|
||||
// Sort
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
for (int j = i + 1; j < 4; ++j) {
|
||||
if (histo[j] > histo[i]) {
|
||||
std::swap(histo[j], histo[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
const uint32_t h23 = histo[2] + histo[3];
|
||||
const uint32_t histomax = std::max(h23, histo[0]);
|
||||
return (kFourSymbolHistogramCost +
|
||||
3 * h23 + 2 * (histo[0] + histo[1]) - histomax);
|
||||
}
|
||||
BROTLI_INTERNAL double BrotliPopulationCostLiteral(const HistogramLiteral*);
|
||||
BROTLI_INTERNAL double BrotliPopulationCostCommand(const HistogramCommand*);
|
||||
BROTLI_INTERNAL double BrotliPopulationCostDistance(const HistogramDistance*);
|
||||
|
||||
// In this loop we compute the entropy of the histogram and simultaneously
|
||||
// build a simplified histogram of the code length codes where we use the
|
||||
// zero repeat code 17, but we don't use the non-zero repeat code 16.
|
||||
double bits = 0;
|
||||
size_t max_depth = 1;
|
||||
uint32_t depth_histo[kCodeLengthCodes] = { 0 };
|
||||
const double log2total = FastLog2(histogram.total_count_);
|
||||
for (size_t i = 0; i < kSize;) {
|
||||
if (histogram.data_[i] > 0) {
|
||||
// Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
|
||||
// = log2(total_count) - log2(count(symbol))
|
||||
double log2p = log2total - FastLog2(histogram.data_[i]);
|
||||
// Approximate the bit depth by round(-log2(P(symbol)))
|
||||
size_t depth = static_cast<size_t>(log2p + 0.5);
|
||||
bits += histogram.data_[i] * log2p;
|
||||
if (depth > 15) {
|
||||
depth = 15;
|
||||
}
|
||||
if (depth > max_depth) {
|
||||
max_depth = depth;
|
||||
}
|
||||
++depth_histo[depth];
|
||||
++i;
|
||||
} else {
|
||||
// Compute the run length of zeros and add the appropriate number of 0 and
|
||||
// 17 code length codes to the code length code histogram.
|
||||
uint32_t reps = 1;
|
||||
for (size_t k = i + 1; k < kSize && histogram.data_[k] == 0; ++k) {
|
||||
++reps;
|
||||
}
|
||||
i += reps;
|
||||
if (i == kSize) {
|
||||
// Don't add any cost for the last zero run, since these are encoded
|
||||
// only implicitly.
|
||||
break;
|
||||
}
|
||||
if (reps < 3) {
|
||||
depth_histo[0] += reps;
|
||||
} else {
|
||||
reps -= 2;
|
||||
while (reps > 0) {
|
||||
++depth_histo[17];
|
||||
// Add the 3 extra bits for the 17 code length code.
|
||||
bits += 3;
|
||||
reps >>= 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Add the estimated encoding cost of the code length code histogram.
|
||||
bits += static_cast<double>(18 + 2 * max_depth);
|
||||
// Add the entropy of the code length code histogram.
|
||||
bits += BitsEntropy(depth_histo, kCodeLengthCodes);
|
||||
return bits;
|
||||
}
|
||||
|
||||
} // namespace brotli
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* BROTLI_ENC_BIT_COST_H_ */
|
||||
|
127
enc/bit_cost_inc.h
Normal file
127
enc/bit_cost_inc.h
Normal file
@ -0,0 +1,127 @@
|
||||
/* NOLINT(build/header_guard) */
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* template parameters: FN */
|
||||
|
||||
#define HistogramType FN(Histogram)
|
||||
|
||||
double FN(BrotliPopulationCost)(const HistogramType* histogram) {
|
||||
static const double kOneSymbolHistogramCost = 12;
|
||||
static const double kTwoSymbolHistogramCost = 20;
|
||||
static const double kThreeSymbolHistogramCost = 28;
|
||||
static const double kFourSymbolHistogramCost = 37;
|
||||
const size_t data_size = FN(HistogramDataSize)();
|
||||
int count = 0;
|
||||
size_t s[5];
|
||||
double bits = 0.0;
|
||||
size_t i;
|
||||
if (histogram->total_count_ == 0) {
|
||||
return kOneSymbolHistogramCost;
|
||||
}
|
||||
for (i = 0; i < data_size; ++i) {
|
||||
if (histogram->data_[i] > 0) {
|
||||
s[count] = i;
|
||||
++count;
|
||||
if (count > 4) break;
|
||||
}
|
||||
}
|
||||
if (count == 1) {
|
||||
return kOneSymbolHistogramCost;
|
||||
}
|
||||
if (count == 2) {
|
||||
return (kTwoSymbolHistogramCost + (double)histogram->total_count_);
|
||||
}
|
||||
if (count == 3) {
|
||||
const uint32_t histo0 = histogram->data_[s[0]];
|
||||
const uint32_t histo1 = histogram->data_[s[1]];
|
||||
const uint32_t histo2 = histogram->data_[s[2]];
|
||||
const uint32_t histomax =
|
||||
BROTLI_MAX(uint32_t, histo0, BROTLI_MAX(uint32_t, histo1, histo2));
|
||||
return (kThreeSymbolHistogramCost +
|
||||
2 * (histo0 + histo1 + histo2) - histomax);
|
||||
}
|
||||
if (count == 4) {
|
||||
uint32_t histo[4];
|
||||
uint32_t h23;
|
||||
uint32_t histomax;
|
||||
for (i = 0; i < 4; ++i) {
|
||||
histo[i] = histogram->data_[s[i]];
|
||||
}
|
||||
/* Sort */
|
||||
for (i = 0; i < 4; ++i) {
|
||||
size_t j;
|
||||
for (j = i + 1; j < 4; ++j) {
|
||||
if (histo[j] > histo[i]) {
|
||||
BROTLI_SWAP(uint32_t, histo, j, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
h23 = histo[2] + histo[3];
|
||||
histomax = BROTLI_MAX(uint32_t, h23, histo[0]);
|
||||
return (kFourSymbolHistogramCost +
|
||||
3 * h23 + 2 * (histo[0] + histo[1]) - histomax);
|
||||
}
|
||||
|
||||
{
|
||||
/* In this loop we compute the entropy of the histogram and simultaneously
|
||||
build a simplified histogram of the code length codes where we use the
|
||||
zero repeat code 17, but we don't use the non-zero repeat code 16. */
|
||||
size_t max_depth = 1;
|
||||
uint32_t depth_histo[BROTLI_CODE_LENGTH_CODES] = { 0 };
|
||||
const double log2total = FastLog2(histogram->total_count_);
|
||||
for (i = 0; i < data_size;) {
|
||||
if (histogram->data_[i] > 0) {
|
||||
/* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
|
||||
= log2(total_count) - log2(count(symbol)) */
|
||||
double log2p = log2total - FastLog2(histogram->data_[i]);
|
||||
/* Approximate the bit depth by round(-log2(P(symbol))) */
|
||||
size_t depth = (size_t)(log2p + 0.5);
|
||||
bits += histogram->data_[i] * log2p;
|
||||
if (depth > 15) {
|
||||
depth = 15;
|
||||
}
|
||||
if (depth > max_depth) {
|
||||
max_depth = depth;
|
||||
}
|
||||
++depth_histo[depth];
|
||||
++i;
|
||||
} else {
|
||||
/* Compute the run length of zeros and add the appropriate number of 0
|
||||
and 17 code length codes to the code length code histogram. */
|
||||
uint32_t reps = 1;
|
||||
size_t k;
|
||||
for (k = i + 1; k < data_size && histogram->data_[k] == 0; ++k) {
|
||||
++reps;
|
||||
}
|
||||
i += reps;
|
||||
if (i == data_size) {
|
||||
/* Don't add any cost for the last zero run, since these are encoded
|
||||
only implicitly. */
|
||||
break;
|
||||
}
|
||||
if (reps < 3) {
|
||||
depth_histo[0] += reps;
|
||||
} else {
|
||||
reps -= 2;
|
||||
while (reps > 0) {
|
||||
++depth_histo[BROTLI_REPEAT_ZERO_CODE_LENGTH];
|
||||
/* Add the 3 extra bits for the 17 code length code. */
|
||||
bits += 3;
|
||||
reps >>= 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Add the estimated encoding cost of the code length code histogram. */
|
||||
bits += (double)(18 + 2 * max_depth);
|
||||
/* Add the entropy of the code length code histogram. */
|
||||
bits += BitsEntropy(depth_histo, BROTLI_CODE_LENGTH_CODES);
|
||||
}
|
||||
return bits;
|
||||
}
|
||||
|
||||
#undef HistogramType
|
33
enc/block_encoder_inc.h
Normal file
33
enc/block_encoder_inc.h
Normal file
@ -0,0 +1,33 @@
|
||||
/* NOLINT(build/header_guard) */
|
||||
/* Copyright 2014 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* template parameters: FN */
|
||||
|
||||
#define HistogramType FN(Histogram)
|
||||
|
||||
/* Creates entropy codes for all block types and stores them to the bit
|
||||
stream. */
|
||||
static void FN(BuildAndStoreEntropyCodes)(MemoryManager* m, BlockEncoder* self,
|
||||
const HistogramType* histograms, const size_t histograms_size,
|
||||
HuffmanTree* tree, size_t* storage_ix, uint8_t* storage) {
|
||||
const size_t alphabet_size = self->alphabet_size_;
|
||||
const size_t table_size = histograms_size * alphabet_size;
|
||||
self->depths_ = BROTLI_ALLOC(m, uint8_t, table_size);
|
||||
self->bits_ = BROTLI_ALLOC(m, uint16_t, table_size);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
|
||||
{
|
||||
size_t i;
|
||||
for (i = 0; i < histograms_size; ++i) {
|
||||
size_t ix = i * alphabet_size;
|
||||
BuildAndStoreHuffmanTree(&histograms[i].data_[0], alphabet_size, tree,
|
||||
&self->depths_[ix], &self->bits_[ix], storage_ix, storage);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#undef HistogramType
|
@ -9,18 +9,19 @@
|
||||
#include "./block_splitter.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
#include <string.h> /* memcpy, memset */
|
||||
|
||||
#include "./bit_cost.h"
|
||||
#include "./cluster.h"
|
||||
#include "./command.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./histogram.h"
|
||||
#include "./memory.h"
|
||||
#include "./port.h"
|
||||
|
||||
namespace brotli {
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static const size_t kMaxLiteralHistograms = 100;
|
||||
static const size_t kMaxCommandHistograms = 50;
|
||||
@ -36,45 +37,43 @@ static const size_t kMinLengthForBlockSplitting = 128;
|
||||
static const size_t kIterMulForRefining = 2;
|
||||
static const size_t kMinItersForRefining = 100;
|
||||
|
||||
void CopyLiteralsToByteArray(const Command* cmds,
|
||||
const size_t num_commands,
|
||||
const uint8_t* data,
|
||||
const size_t offset,
|
||||
const size_t mask,
|
||||
std::vector<uint8_t>* literals) {
|
||||
static size_t CountLiterals(const Command* cmds, const size_t num_commands) {
|
||||
/* Count how many we have. */
|
||||
size_t total_length = 0;
|
||||
for (size_t i = 0; i < num_commands; ++i) {
|
||||
size_t i;
|
||||
for (i = 0; i < num_commands; ++i) {
|
||||
total_length += cmds[i].insert_len_;
|
||||
}
|
||||
if (total_length == 0) {
|
||||
return;
|
||||
}
|
||||
return total_length;
|
||||
}
|
||||
|
||||
// Allocate.
|
||||
literals->resize(total_length);
|
||||
|
||||
// Loop again, and copy this time.
|
||||
static void CopyLiteralsToByteArray(const Command* cmds,
|
||||
const size_t num_commands,
|
||||
const uint8_t* data,
|
||||
const size_t offset,
|
||||
const size_t mask,
|
||||
uint8_t* literals) {
|
||||
size_t pos = 0;
|
||||
size_t from_pos = offset & mask;
|
||||
for (size_t i = 0; i < num_commands && pos < total_length; ++i) {
|
||||
size_t i;
|
||||
for (i = 0; i < num_commands; ++i) {
|
||||
size_t insert_len = cmds[i].insert_len_;
|
||||
if (from_pos + insert_len > mask) {
|
||||
size_t head_size = mask + 1 - from_pos;
|
||||
memcpy(&(*literals)[pos], data + from_pos, head_size);
|
||||
memcpy(literals + pos, data + from_pos, head_size);
|
||||
from_pos = 0;
|
||||
pos += head_size;
|
||||
insert_len -= head_size;
|
||||
}
|
||||
if (insert_len > 0) {
|
||||
memcpy(&(*literals)[pos], data + from_pos, insert_len);
|
||||
memcpy(literals + pos, data + from_pos, insert_len);
|
||||
pos += insert_len;
|
||||
}
|
||||
from_pos = (from_pos + insert_len + cmds[i].copy_len()) & mask;
|
||||
from_pos = (from_pos + insert_len + CommandCopyLen(&cmds[i])) & mask;
|
||||
}
|
||||
}
|
||||
|
||||
inline static unsigned int MyRand(unsigned int* seed) {
|
||||
static BROTLI_INLINE unsigned int MyRand(unsigned int* seed) {
|
||||
*seed *= 16807U;
|
||||
if (*seed == 0) {
|
||||
*seed = 1;
|
||||
@ -82,424 +81,116 @@ inline static unsigned int MyRand(unsigned int* seed) {
|
||||
return *seed;
|
||||
}
|
||||
|
||||
template<typename HistogramType, typename DataType>
|
||||
void InitialEntropyCodes(const DataType* data, size_t length,
|
||||
size_t stride,
|
||||
size_t num_histograms,
|
||||
HistogramType* histograms) {
|
||||
for (size_t i = 0; i < num_histograms; ++i) {
|
||||
histograms[i].Clear();
|
||||
}
|
||||
unsigned int seed = 7;
|
||||
size_t block_length = length / num_histograms;
|
||||
for (size_t i = 0; i < num_histograms; ++i) {
|
||||
size_t pos = length * i / num_histograms;
|
||||
if (i != 0) {
|
||||
pos += MyRand(&seed) % block_length;
|
||||
}
|
||||
if (pos + stride >= length) {
|
||||
pos = length - stride - 1;
|
||||
}
|
||||
histograms[i].Add(data + pos, stride);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename HistogramType, typename DataType>
|
||||
void RandomSample(unsigned int* seed,
|
||||
const DataType* data,
|
||||
size_t length,
|
||||
size_t stride,
|
||||
HistogramType* sample) {
|
||||
size_t pos = 0;
|
||||
if (stride >= length) {
|
||||
pos = 0;
|
||||
stride = length;
|
||||
} else {
|
||||
pos = MyRand(seed) % (length - stride + 1);
|
||||
}
|
||||
sample->Add(data + pos, stride);
|
||||
}
|
||||
|
||||
template<typename HistogramType, typename DataType>
|
||||
void RefineEntropyCodes(const DataType* data, size_t length,
|
||||
size_t stride,
|
||||
size_t num_histograms,
|
||||
HistogramType* histograms) {
|
||||
size_t iters =
|
||||
kIterMulForRefining * length / stride + kMinItersForRefining;
|
||||
unsigned int seed = 7;
|
||||
iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms;
|
||||
for (size_t iter = 0; iter < iters; ++iter) {
|
||||
HistogramType sample;
|
||||
RandomSample(&seed, data, length, stride, &sample);
|
||||
size_t ix = iter % num_histograms;
|
||||
histograms[ix].AddHistogram(sample);
|
||||
}
|
||||
}
|
||||
|
||||
inline static double BitCost(size_t count) {
|
||||
static BROTLI_INLINE double BitCost(size_t count) {
|
||||
return count == 0 ? -2.0 : FastLog2(count);
|
||||
}
|
||||
|
||||
// Assigns a block id from the range [0, vec.size()) to each data element
|
||||
// in data[0..length) and fills in block_id[0..length) with the assigned values.
|
||||
// Returns the number of blocks, i.e. one plus the number of block switches.
|
||||
template<typename DataType, int kSize>
|
||||
size_t FindBlocks(const DataType* data, const size_t length,
|
||||
const double block_switch_bitcost,
|
||||
const size_t num_histograms,
|
||||
const Histogram<kSize>* histograms,
|
||||
double* insert_cost,
|
||||
double* cost,
|
||||
uint8_t* switch_signal,
|
||||
uint8_t *block_id) {
|
||||
if (num_histograms <= 1) {
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
block_id[i] = 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
const size_t bitmaplen = (num_histograms + 7) >> 3;
|
||||
assert(num_histograms <= 256);
|
||||
memset(insert_cost, 0, sizeof(insert_cost[0]) * kSize * num_histograms);
|
||||
for (size_t j = 0; j < num_histograms; ++j) {
|
||||
insert_cost[j] = FastLog2(static_cast<uint32_t>(
|
||||
histograms[j].total_count_));
|
||||
}
|
||||
for (size_t i = kSize; i != 0;) {
|
||||
--i;
|
||||
for (size_t j = 0; j < num_histograms; ++j) {
|
||||
insert_cost[i * num_histograms + j] =
|
||||
insert_cost[j] - BitCost(histograms[j].data_[i]);
|
||||
}
|
||||
}
|
||||
memset(cost, 0, sizeof(cost[0]) * num_histograms);
|
||||
memset(switch_signal, 0, sizeof(switch_signal[0]) * length * bitmaplen);
|
||||
// After each iteration of this loop, cost[k] will contain the difference
|
||||
// between the minimum cost of arriving at the current byte position using
|
||||
// entropy code k, and the minimum cost of arriving at the current byte
|
||||
// position. This difference is capped at the block switch cost, and if it
|
||||
// reaches block switch cost, it means that when we trace back from the last
|
||||
// position, we need to switch here.
|
||||
for (size_t byte_ix = 0; byte_ix < length; ++byte_ix) {
|
||||
size_t ix = byte_ix * bitmaplen;
|
||||
size_t insert_cost_ix = data[byte_ix] * num_histograms;
|
||||
double min_cost = 1e99;
|
||||
for (size_t k = 0; k < num_histograms; ++k) {
|
||||
// We are coding the symbol in data[byte_ix] with entropy code k.
|
||||
cost[k] += insert_cost[insert_cost_ix + k];
|
||||
if (cost[k] < min_cost) {
|
||||
min_cost = cost[k];
|
||||
block_id[byte_ix] = static_cast<uint8_t>(k);
|
||||
}
|
||||
}
|
||||
double block_switch_cost = block_switch_bitcost;
|
||||
// More blocks for the beginning.
|
||||
if (byte_ix < 2000) {
|
||||
block_switch_cost *= 0.77 + 0.07 * static_cast<double>(byte_ix) / 2000;
|
||||
}
|
||||
for (size_t k = 0; k < num_histograms; ++k) {
|
||||
cost[k] -= min_cost;
|
||||
if (cost[k] >= block_switch_cost) {
|
||||
cost[k] = block_switch_cost;
|
||||
const uint8_t mask = static_cast<uint8_t>(1u << (k & 7));
|
||||
assert((k >> 3) < bitmaplen);
|
||||
switch_signal[ix + (k >> 3)] |= mask;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Now trace back from the last position and switch at the marked places.
|
||||
size_t byte_ix = length - 1;
|
||||
size_t ix = byte_ix * bitmaplen;
|
||||
uint8_t cur_id = block_id[byte_ix];
|
||||
size_t num_blocks = 1;
|
||||
while (byte_ix > 0) {
|
||||
--byte_ix;
|
||||
ix -= bitmaplen;
|
||||
const uint8_t mask = static_cast<uint8_t>(1u << (cur_id & 7));
|
||||
assert((static_cast<size_t>(cur_id) >> 3) < bitmaplen);
|
||||
if (switch_signal[ix + (cur_id >> 3)] & mask) {
|
||||
if (cur_id != block_id[byte_ix]) {
|
||||
cur_id = block_id[byte_ix];
|
||||
++num_blocks;
|
||||
}
|
||||
}
|
||||
block_id[byte_ix] = cur_id;
|
||||
}
|
||||
return num_blocks;
|
||||
#define HISTOGRAMS_PER_BATCH 64
|
||||
#define CLUSTERS_PER_BATCH 16
|
||||
|
||||
#define FN(X) X ## Literal
|
||||
#define DataType uint8_t
|
||||
/* NOLINTNEXTLINE(build/include) */
|
||||
#include "./block_splitter_inc.h"
|
||||
#undef DataType
|
||||
#undef FN
|
||||
|
||||
#define FN(X) X ## Command
|
||||
#define DataType uint16_t
|
||||
/* NOLINTNEXTLINE(build/include) */
|
||||
#include "./block_splitter_inc.h"
|
||||
#undef FN
|
||||
|
||||
#define FN(X) X ## Distance
|
||||
/* NOLINTNEXTLINE(build/include) */
|
||||
#include "./block_splitter_inc.h"
|
||||
#undef DataType
|
||||
#undef FN
|
||||
|
||||
void BrotliInitBlockSplit(BlockSplit* self) {
|
||||
self->num_types = 0;
|
||||
self->num_blocks = 0;
|
||||
self->types = 0;
|
||||
self->lengths = 0;
|
||||
self->types_alloc_size = 0;
|
||||
self->lengths_alloc_size = 0;
|
||||
}
|
||||
|
||||
static size_t RemapBlockIds(uint8_t* block_ids, const size_t length,
|
||||
uint16_t* new_id, const size_t num_histograms) {
|
||||
static const uint16_t kInvalidId = 256;
|
||||
for (size_t i = 0; i < num_histograms; ++i) {
|
||||
new_id[i] = kInvalidId;
|
||||
}
|
||||
uint16_t next_id = 0;
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
assert(block_ids[i] < num_histograms);
|
||||
if (new_id[block_ids[i]] == kInvalidId) {
|
||||
new_id[block_ids[i]] = next_id++;
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
block_ids[i] = static_cast<uint8_t>(new_id[block_ids[i]]);
|
||||
assert(block_ids[i] < num_histograms);
|
||||
}
|
||||
assert(next_id <= num_histograms);
|
||||
return next_id;
|
||||
void BrotliDestroyBlockSplit(MemoryManager* m, BlockSplit* self) {
|
||||
BROTLI_FREE(m, self->types);
|
||||
BROTLI_FREE(m, self->lengths);
|
||||
}
|
||||
|
||||
template<typename HistogramType, typename DataType>
|
||||
void BuildBlockHistograms(const DataType* data, const size_t length,
|
||||
const uint8_t* block_ids,
|
||||
const size_t num_histograms,
|
||||
HistogramType* histograms) {
|
||||
for (size_t i = 0; i < num_histograms; ++i) {
|
||||
histograms[i].Clear();
|
||||
}
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
histograms[block_ids[i]].Add(data[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename HistogramType, typename DataType>
|
||||
void ClusterBlocks(const DataType* data, const size_t length,
|
||||
const size_t num_blocks,
|
||||
uint8_t* block_ids,
|
||||
BlockSplit* split) {
|
||||
static const size_t kMaxNumberOfBlockTypes = 256;
|
||||
static const size_t kHistogramsPerBatch = 64;
|
||||
static const size_t kClustersPerBatch = 16;
|
||||
std::vector<uint32_t> histogram_symbols(num_blocks);
|
||||
std::vector<uint32_t> block_lengths(num_blocks);
|
||||
|
||||
size_t block_idx = 0;
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
assert(block_idx < num_blocks);
|
||||
++block_lengths[block_idx];
|
||||
if (i + 1 == length || block_ids[i] != block_ids[i + 1]) {
|
||||
++block_idx;
|
||||
}
|
||||
}
|
||||
assert(block_idx == num_blocks);
|
||||
|
||||
const size_t expected_num_clusters =
|
||||
kClustersPerBatch *
|
||||
(num_blocks + kHistogramsPerBatch - 1) / kHistogramsPerBatch;
|
||||
std::vector<HistogramType> all_histograms;
|
||||
std::vector<uint32_t> cluster_size;
|
||||
all_histograms.reserve(expected_num_clusters);
|
||||
cluster_size.reserve(expected_num_clusters);
|
||||
size_t num_clusters = 0;
|
||||
std::vector<HistogramType> histograms(
|
||||
std::min(num_blocks, kHistogramsPerBatch));
|
||||
size_t max_num_pairs = kHistogramsPerBatch * kHistogramsPerBatch / 2;
|
||||
std::vector<HistogramPair> pairs(max_num_pairs + 1);
|
||||
size_t pos = 0;
|
||||
for (size_t i = 0; i < num_blocks; i += kHistogramsPerBatch) {
|
||||
const size_t num_to_combine = std::min(num_blocks - i, kHistogramsPerBatch);
|
||||
uint32_t sizes[kHistogramsPerBatch];
|
||||
uint32_t clusters[kHistogramsPerBatch];
|
||||
uint32_t symbols[kHistogramsPerBatch];
|
||||
uint32_t remap[kHistogramsPerBatch];
|
||||
for (size_t j = 0; j < num_to_combine; ++j) {
|
||||
histograms[j].Clear();
|
||||
for (size_t k = 0; k < block_lengths[i + j]; ++k) {
|
||||
histograms[j].Add(data[pos++]);
|
||||
}
|
||||
histograms[j].bit_cost_ = PopulationCost(histograms[j]);
|
||||
symbols[j] = clusters[j] = static_cast<uint32_t>(j);
|
||||
sizes[j] = 1;
|
||||
}
|
||||
size_t num_new_clusters = HistogramCombine(
|
||||
&histograms[0], sizes, symbols, clusters, &pairs[0], num_to_combine,
|
||||
num_to_combine, kHistogramsPerBatch, max_num_pairs);
|
||||
for (size_t j = 0; j < num_new_clusters; ++j) {
|
||||
all_histograms.push_back(histograms[clusters[j]]);
|
||||
cluster_size.push_back(sizes[clusters[j]]);
|
||||
remap[clusters[j]] = static_cast<uint32_t>(j);
|
||||
}
|
||||
for (size_t j = 0; j < num_to_combine; ++j) {
|
||||
histogram_symbols[i + j] =
|
||||
static_cast<uint32_t>(num_clusters) + remap[symbols[j]];
|
||||
}
|
||||
num_clusters += num_new_clusters;
|
||||
assert(num_clusters == cluster_size.size());
|
||||
assert(num_clusters == all_histograms.size());
|
||||
}
|
||||
|
||||
max_num_pairs =
|
||||
std::min(64 * num_clusters, (num_clusters / 2) * num_clusters);
|
||||
pairs.resize(max_num_pairs + 1);
|
||||
|
||||
std::vector<uint32_t> clusters(num_clusters);
|
||||
for (size_t i = 0; i < num_clusters; ++i) {
|
||||
clusters[i] = static_cast<uint32_t>(i);
|
||||
}
|
||||
size_t num_final_clusters =
|
||||
HistogramCombine(&all_histograms[0], &cluster_size[0],
|
||||
&histogram_symbols[0],
|
||||
&clusters[0], &pairs[0], num_clusters,
|
||||
num_blocks, kMaxNumberOfBlockTypes, max_num_pairs);
|
||||
|
||||
static const uint32_t kInvalidIndex = std::numeric_limits<uint32_t>::max();
|
||||
std::vector<uint32_t> new_index(num_clusters, kInvalidIndex);
|
||||
uint32_t next_index = 0;
|
||||
pos = 0;
|
||||
for (size_t i = 0; i < num_blocks; ++i) {
|
||||
HistogramType histo;
|
||||
for (size_t j = 0; j < block_lengths[i]; ++j) {
|
||||
histo.Add(data[pos++]);
|
||||
}
|
||||
uint32_t best_out =
|
||||
i == 0 ? histogram_symbols[0] : histogram_symbols[i - 1];
|
||||
double best_bits = HistogramBitCostDistance(
|
||||
histo, all_histograms[best_out]);
|
||||
for (size_t j = 0; j < num_final_clusters; ++j) {
|
||||
const double cur_bits = HistogramBitCostDistance(
|
||||
histo, all_histograms[clusters[j]]);
|
||||
if (cur_bits < best_bits) {
|
||||
best_bits = cur_bits;
|
||||
best_out = clusters[j];
|
||||
}
|
||||
}
|
||||
histogram_symbols[i] = best_out;
|
||||
if (new_index[best_out] == kInvalidIndex) {
|
||||
new_index[best_out] = next_index++;
|
||||
}
|
||||
}
|
||||
uint8_t max_type = 0;
|
||||
uint32_t cur_length = 0;
|
||||
block_idx = 0;
|
||||
split->types.resize(num_blocks);
|
||||
split->lengths.resize(num_blocks);
|
||||
for (size_t i = 0; i < num_blocks; ++i) {
|
||||
cur_length += block_lengths[i];
|
||||
if (i + 1 == num_blocks ||
|
||||
histogram_symbols[i] != histogram_symbols[i + 1]) {
|
||||
const uint8_t id = static_cast<uint8_t>(new_index[histogram_symbols[i]]);
|
||||
split->types[block_idx] = id;
|
||||
split->lengths[block_idx] = cur_length;
|
||||
max_type = std::max(max_type, id);
|
||||
cur_length = 0;
|
||||
++block_idx;
|
||||
}
|
||||
}
|
||||
split->types.resize(block_idx);
|
||||
split->lengths.resize(block_idx);
|
||||
split->num_types = static_cast<size_t>(max_type) + 1;
|
||||
}
|
||||
|
||||
template<int kSize, typename DataType>
|
||||
void SplitByteVector(const std::vector<DataType>& data,
|
||||
const size_t literals_per_histogram,
|
||||
const size_t max_histograms,
|
||||
const size_t sampling_stride_length,
|
||||
const double block_switch_cost,
|
||||
BlockSplit* split) {
|
||||
if (data.empty()) {
|
||||
split->num_types = 1;
|
||||
return;
|
||||
} else if (data.size() < kMinLengthForBlockSplitting) {
|
||||
split->num_types = 1;
|
||||
split->types.push_back(0);
|
||||
split->lengths.push_back(static_cast<uint32_t>(data.size()));
|
||||
return;
|
||||
}
|
||||
size_t num_histograms = data.size() / literals_per_histogram + 1;
|
||||
if (num_histograms > max_histograms) {
|
||||
num_histograms = max_histograms;
|
||||
}
|
||||
Histogram<kSize>* histograms = new Histogram<kSize>[num_histograms];
|
||||
// Find good entropy codes.
|
||||
InitialEntropyCodes(&data[0], data.size(),
|
||||
sampling_stride_length,
|
||||
num_histograms, histograms);
|
||||
RefineEntropyCodes(&data[0], data.size(),
|
||||
sampling_stride_length,
|
||||
num_histograms, histograms);
|
||||
// Find a good path through literals with the good entropy codes.
|
||||
std::vector<uint8_t> block_ids(data.size());
|
||||
size_t num_blocks;
|
||||
const size_t bitmaplen = (num_histograms + 7) >> 3;
|
||||
double* insert_cost = new double[kSize * num_histograms];
|
||||
double *cost = new double[num_histograms];
|
||||
uint8_t* switch_signal = new uint8_t[data.size() * bitmaplen];
|
||||
uint16_t* new_id = new uint16_t[num_histograms];
|
||||
for (size_t i = 0; i < 10; ++i) {
|
||||
num_blocks = FindBlocks(&data[0], data.size(),
|
||||
block_switch_cost,
|
||||
num_histograms, histograms,
|
||||
insert_cost, cost, switch_signal,
|
||||
&block_ids[0]);
|
||||
num_histograms = RemapBlockIds(&block_ids[0], data.size(),
|
||||
new_id, num_histograms);
|
||||
BuildBlockHistograms(&data[0], data.size(), &block_ids[0],
|
||||
num_histograms, histograms);
|
||||
}
|
||||
delete[] insert_cost;
|
||||
delete[] cost;
|
||||
delete[] switch_signal;
|
||||
delete[] new_id;
|
||||
delete[] histograms;
|
||||
ClusterBlocks<Histogram<kSize> >(&data[0], data.size(), num_blocks,
|
||||
&block_ids[0], split);
|
||||
}
|
||||
|
||||
void SplitBlock(const Command* cmds,
|
||||
const size_t num_commands,
|
||||
const uint8_t* data,
|
||||
const size_t pos,
|
||||
const size_t mask,
|
||||
BlockSplit* literal_split,
|
||||
BlockSplit* insert_and_copy_split,
|
||||
BlockSplit* dist_split) {
|
||||
void BrotliSplitBlock(MemoryManager* m,
|
||||
const Command* cmds,
|
||||
const size_t num_commands,
|
||||
const uint8_t* data,
|
||||
const size_t pos,
|
||||
const size_t mask,
|
||||
const int quality,
|
||||
BlockSplit* literal_split,
|
||||
BlockSplit* insert_and_copy_split,
|
||||
BlockSplit* dist_split) {
|
||||
{
|
||||
size_t literals_count = CountLiterals(cmds, num_commands);
|
||||
uint8_t* literals = BROTLI_ALLOC(m, uint8_t, literals_count);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
/* Create a continuous array of literals. */
|
||||
std::vector<uint8_t> literals;
|
||||
CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, &literals);
|
||||
CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, literals);
|
||||
/* Create the block split on the array of literals.
|
||||
Literal histograms have alphabet size 256. */
|
||||
SplitByteVector<256>(
|
||||
literals,
|
||||
SplitByteVectorLiteral(
|
||||
m, literals, literals_count,
|
||||
kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
|
||||
kLiteralStrideLength, kLiteralBlockSwitchCost,
|
||||
kLiteralStrideLength, kLiteralBlockSwitchCost, quality,
|
||||
literal_split);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
BROTLI_FREE(m, literals);
|
||||
}
|
||||
|
||||
{
|
||||
/* Compute prefix codes for commands. */
|
||||
std::vector<uint16_t> insert_and_copy_codes(num_commands);
|
||||
for (size_t i = 0; i < num_commands; ++i) {
|
||||
uint16_t* insert_and_copy_codes = BROTLI_ALLOC(m, uint16_t, num_commands);
|
||||
size_t i;
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
for (i = 0; i < num_commands; ++i) {
|
||||
insert_and_copy_codes[i] = cmds[i].cmd_prefix_;
|
||||
}
|
||||
/* Create the block split on the array of command prefixes. */
|
||||
SplitByteVector<kNumCommandPrefixes>(
|
||||
insert_and_copy_codes,
|
||||
SplitByteVectorCommand(
|
||||
m, insert_and_copy_codes, num_commands,
|
||||
kSymbolsPerCommandHistogram, kMaxCommandHistograms,
|
||||
kCommandStrideLength, kCommandBlockSwitchCost,
|
||||
kCommandStrideLength, kCommandBlockSwitchCost, quality,
|
||||
insert_and_copy_split);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
/* TODO: reuse for distances? */
|
||||
BROTLI_FREE(m, insert_and_copy_codes);
|
||||
}
|
||||
|
||||
{
|
||||
/* Create a continuous array of distance prefixes. */
|
||||
std::vector<uint16_t> distance_prefixes(num_commands);
|
||||
size_t pos = 0;
|
||||
for (size_t i = 0; i < num_commands; ++i) {
|
||||
const Command& cmd = cmds[i];
|
||||
if (cmd.copy_len() && cmd.cmd_prefix_ >= 128) {
|
||||
distance_prefixes[pos++] = cmd.dist_prefix_;
|
||||
uint16_t* distance_prefixes = BROTLI_ALLOC(m, uint16_t, num_commands);
|
||||
size_t j = 0;
|
||||
size_t i;
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
for (i = 0; i < num_commands; ++i) {
|
||||
const Command* cmd = &cmds[i];
|
||||
if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
|
||||
distance_prefixes[j++] = cmd->dist_prefix_;
|
||||
}
|
||||
}
|
||||
distance_prefixes.resize(pos);
|
||||
/* Create the block split on the array of distance prefixes. */
|
||||
SplitByteVector<kNumDistancePrefixes>(
|
||||
distance_prefixes,
|
||||
SplitByteVectorDistance(
|
||||
m, distance_prefixes, j,
|
||||
kSymbolsPerDistanceHistogram, kMaxCommandHistograms,
|
||||
kCommandStrideLength, kDistanceBlockSwitchCost,
|
||||
kCommandStrideLength, kDistanceBlockSwitchCost, quality,
|
||||
dist_split);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
BROTLI_FREE(m, distance_prefixes);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
@ -9,53 +9,42 @@
|
||||
#ifndef BROTLI_ENC_BLOCK_SPLITTER_H_
|
||||
#define BROTLI_ENC_BLOCK_SPLITTER_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "../common/types.h"
|
||||
#include "./command.h"
|
||||
#include "./metablock.h"
|
||||
#include "./memory.h"
|
||||
#include "./port.h"
|
||||
|
||||
namespace brotli {
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct BlockSplitIterator {
|
||||
explicit BlockSplitIterator(const BlockSplit& split)
|
||||
: split_(split), idx_(0), type_(0), length_(0) {
|
||||
if (!split.lengths.empty()) {
|
||||
length_ = split.lengths[0];
|
||||
}
|
||||
}
|
||||
typedef struct BlockSplit {
|
||||
size_t num_types; /* Amount of distinct types */
|
||||
size_t num_blocks; /* Amount of values in types and length */
|
||||
uint8_t* types;
|
||||
uint32_t* lengths;
|
||||
|
||||
void Next(void) {
|
||||
if (length_ == 0) {
|
||||
++idx_;
|
||||
type_ = split_.types[idx_];
|
||||
length_ = split_.lengths[idx_];
|
||||
}
|
||||
--length_;
|
||||
}
|
||||
size_t types_alloc_size;
|
||||
size_t lengths_alloc_size;
|
||||
} BlockSplit;
|
||||
|
||||
const BlockSplit& split_;
|
||||
size_t idx_;
|
||||
size_t type_;
|
||||
size_t length_;
|
||||
};
|
||||
BROTLI_INTERNAL void BrotliInitBlockSplit(BlockSplit* self);
|
||||
BROTLI_INTERNAL void BrotliDestroyBlockSplit(MemoryManager* m,
|
||||
BlockSplit* self);
|
||||
|
||||
void CopyLiteralsToByteArray(const Command* cmds,
|
||||
const size_t num_commands,
|
||||
const uint8_t* data,
|
||||
const size_t offset,
|
||||
const size_t mask,
|
||||
std::vector<uint8_t>* literals);
|
||||
BROTLI_INTERNAL void BrotliSplitBlock(MemoryManager* m,
|
||||
const Command* cmds,
|
||||
const size_t num_commands,
|
||||
const uint8_t* data,
|
||||
const size_t offset,
|
||||
const size_t mask,
|
||||
const int quality,
|
||||
BlockSplit* literal_split,
|
||||
BlockSplit* insert_and_copy_split,
|
||||
BlockSplit* dist_split);
|
||||
|
||||
void SplitBlock(const Command* cmds,
|
||||
const size_t num_commands,
|
||||
const uint8_t* data,
|
||||
const size_t offset,
|
||||
const size_t mask,
|
||||
BlockSplit* literal_split,
|
||||
BlockSplit* insert_and_copy_split,
|
||||
BlockSplit* dist_split);
|
||||
|
||||
} // namespace brotli
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* BROTLI_ENC_BLOCK_SPLITTER_H_ */
|
||||
|
431
enc/block_splitter_inc.h
Normal file
431
enc/block_splitter_inc.h
Normal file
@ -0,0 +1,431 @@
|
||||
/* NOLINT(build/header_guard) */
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* template parameters: FN, DataType */
|
||||
|
||||
#define HistogramType FN(Histogram)
|
||||
|
||||
static void FN(InitialEntropyCodes)(const DataType* data, size_t length,
|
||||
size_t stride,
|
||||
size_t num_histograms,
|
||||
HistogramType* histograms) {
|
||||
unsigned int seed = 7;
|
||||
size_t block_length = length / num_histograms;
|
||||
size_t i;
|
||||
FN(ClearHistograms)(histograms, num_histograms);
|
||||
for (i = 0; i < num_histograms; ++i) {
|
||||
size_t pos = length * i / num_histograms;
|
||||
if (i != 0) {
|
||||
pos += MyRand(&seed) % block_length;
|
||||
}
|
||||
if (pos + stride >= length) {
|
||||
pos = length - stride - 1;
|
||||
}
|
||||
FN(HistogramAddVector)(&histograms[i], data + pos, stride);
|
||||
}
|
||||
}
|
||||
|
||||
static void FN(RandomSample)(unsigned int* seed,
|
||||
const DataType* data,
|
||||
size_t length,
|
||||
size_t stride,
|
||||
HistogramType* sample) {
|
||||
size_t pos = 0;
|
||||
if (stride >= length) {
|
||||
pos = 0;
|
||||
stride = length;
|
||||
} else {
|
||||
pos = MyRand(seed) % (length - stride + 1);
|
||||
}
|
||||
FN(HistogramAddVector)(sample, data + pos, stride);
|
||||
}
|
||||
|
||||
static void FN(RefineEntropyCodes)(const DataType* data, size_t length,
|
||||
size_t stride,
|
||||
size_t num_histograms,
|
||||
HistogramType* histograms) {
|
||||
size_t iters =
|
||||
kIterMulForRefining * length / stride + kMinItersForRefining;
|
||||
unsigned int seed = 7;
|
||||
size_t iter;
|
||||
iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms;
|
||||
for (iter = 0; iter < iters; ++iter) {
|
||||
HistogramType sample;
|
||||
FN(HistogramClear)(&sample);
|
||||
FN(RandomSample)(&seed, data, length, stride, &sample);
|
||||
FN(HistogramAddHistogram)(&histograms[iter % num_histograms], &sample);
|
||||
}
|
||||
}
|
||||
|
||||
/* Assigns a block id from the range [0, vec.size()) to each data element
|
||||
in data[0..length) and fills in block_id[0..length) with the assigned values.
|
||||
Returns the number of blocks, i.e. one plus the number of block switches. */
|
||||
static size_t FN(FindBlocks)(const DataType* data, const size_t length,
|
||||
const double block_switch_bitcost,
|
||||
const size_t num_histograms,
|
||||
const HistogramType* histograms,
|
||||
double* insert_cost,
|
||||
double* cost,
|
||||
uint8_t* switch_signal,
|
||||
uint8_t *block_id) {
|
||||
const size_t data_size = FN(HistogramDataSize)();
|
||||
const size_t bitmaplen = (num_histograms + 7) >> 3;
|
||||
size_t num_blocks = 1;
|
||||
size_t i;
|
||||
size_t j;
|
||||
assert(num_histograms <= 256);
|
||||
if (num_histograms <= 1) {
|
||||
for (i = 0; i < length; ++i) {
|
||||
block_id[i] = 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
memset(insert_cost, 0, sizeof(insert_cost[0]) * data_size * num_histograms);
|
||||
for (i = 0; i < num_histograms; ++i) {
|
||||
insert_cost[i] = FastLog2((uint32_t)histograms[i].total_count_);
|
||||
}
|
||||
for (i = data_size; i != 0;) {
|
||||
--i;
|
||||
for (j = 0; j < num_histograms; ++j) {
|
||||
insert_cost[i * num_histograms + j] =
|
||||
insert_cost[j] - BitCost(histograms[j].data_[i]);
|
||||
}
|
||||
}
|
||||
memset(cost, 0, sizeof(cost[0]) * num_histograms);
|
||||
memset(switch_signal, 0, sizeof(switch_signal[0]) * length * bitmaplen);
|
||||
/* After each iteration of this loop, cost[k] will contain the difference
|
||||
between the minimum cost of arriving at the current byte position using
|
||||
entropy code k, and the minimum cost of arriving at the current byte
|
||||
position. This difference is capped at the block switch cost, and if it
|
||||
reaches block switch cost, it means that when we trace back from the last
|
||||
position, we need to switch here. */
|
||||
for (i = 0; i < length; ++i) {
|
||||
const size_t byte_ix = i;
|
||||
size_t ix = byte_ix * bitmaplen;
|
||||
size_t insert_cost_ix = data[byte_ix] * num_histograms;
|
||||
double min_cost = 1e99;
|
||||
double block_switch_cost = block_switch_bitcost;
|
||||
size_t k;
|
||||
for (k = 0; k < num_histograms; ++k) {
|
||||
/* We are coding the symbol in data[byte_ix] with entropy code k. */
|
||||
cost[k] += insert_cost[insert_cost_ix + k];
|
||||
if (cost[k] < min_cost) {
|
||||
min_cost = cost[k];
|
||||
block_id[byte_ix] = (uint8_t)k;
|
||||
}
|
||||
}
|
||||
/* More blocks for the beginning. */
|
||||
if (byte_ix < 2000) {
|
||||
block_switch_cost *= 0.77 + 0.07 * (double)byte_ix / 2000;
|
||||
}
|
||||
for (k = 0; k < num_histograms; ++k) {
|
||||
cost[k] -= min_cost;
|
||||
if (cost[k] >= block_switch_cost) {
|
||||
const uint8_t mask = (uint8_t)(1u << (k & 7));
|
||||
cost[k] = block_switch_cost;
|
||||
assert((k >> 3) < bitmaplen);
|
||||
switch_signal[ix + (k >> 3)] |= mask;
|
||||
}
|
||||
}
|
||||
}
|
||||
{ /* Trace back from the last position and switch at the marked places. */
|
||||
size_t byte_ix = length - 1;
|
||||
size_t ix = byte_ix * bitmaplen;
|
||||
uint8_t cur_id = block_id[byte_ix];
|
||||
while (byte_ix > 0) {
|
||||
const uint8_t mask = (uint8_t)(1u << (cur_id & 7));
|
||||
assert(((size_t)cur_id >> 3) < bitmaplen);
|
||||
--byte_ix;
|
||||
ix -= bitmaplen;
|
||||
if (switch_signal[ix + (cur_id >> 3)] & mask) {
|
||||
if (cur_id != block_id[byte_ix]) {
|
||||
cur_id = block_id[byte_ix];
|
||||
++num_blocks;
|
||||
}
|
||||
}
|
||||
block_id[byte_ix] = cur_id;
|
||||
}
|
||||
}
|
||||
return num_blocks;
|
||||
}
|
||||
|
||||
static size_t FN(RemapBlockIds)(uint8_t* block_ids, const size_t length,
|
||||
uint16_t* new_id, const size_t num_histograms) {
|
||||
static const uint16_t kInvalidId = 256;
|
||||
uint16_t next_id = 0;
|
||||
size_t i;
|
||||
for (i = 0; i < num_histograms; ++i) {
|
||||
new_id[i] = kInvalidId;
|
||||
}
|
||||
for (i = 0; i < length; ++i) {
|
||||
assert(block_ids[i] < num_histograms);
|
||||
if (new_id[block_ids[i]] == kInvalidId) {
|
||||
new_id[block_ids[i]] = next_id++;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < length; ++i) {
|
||||
block_ids[i] = (uint8_t)new_id[block_ids[i]];
|
||||
assert(block_ids[i] < num_histograms);
|
||||
}
|
||||
assert(next_id <= num_histograms);
|
||||
return next_id;
|
||||
}
|
||||
|
||||
static void FN(BuildBlockHistograms)(const DataType* data, const size_t length,
|
||||
const uint8_t* block_ids,
|
||||
const size_t num_histograms,
|
||||
HistogramType* histograms) {
|
||||
size_t i;
|
||||
FN(ClearHistograms)(histograms, num_histograms);
|
||||
for (i = 0; i < length; ++i) {
|
||||
FN(HistogramAdd)(&histograms[block_ids[i]], data[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static void FN(ClusterBlocks)(MemoryManager* m,
|
||||
const DataType* data, const size_t length,
|
||||
const size_t num_blocks,
|
||||
uint8_t* block_ids,
|
||||
BlockSplit* split) {
|
||||
uint32_t* histogram_symbols = BROTLI_ALLOC(m, uint32_t, num_blocks);
|
||||
uint32_t* block_lengths = BROTLI_ALLOC(m, uint32_t, num_blocks);
|
||||
const size_t expected_num_clusters = CLUSTERS_PER_BATCH *
|
||||
(num_blocks + HISTOGRAMS_PER_BATCH - 1) / HISTOGRAMS_PER_BATCH;
|
||||
size_t all_histograms_size = 0;
|
||||
size_t all_histograms_capacity = expected_num_clusters;
|
||||
HistogramType* all_histograms =
|
||||
BROTLI_ALLOC(m, HistogramType, all_histograms_capacity);
|
||||
size_t cluster_size_size = 0;
|
||||
size_t cluster_size_capacity = expected_num_clusters;
|
||||
uint32_t* cluster_size = BROTLI_ALLOC(m, uint32_t, cluster_size_capacity);
|
||||
size_t num_clusters = 0;
|
||||
HistogramType* histograms = BROTLI_ALLOC(m, HistogramType,
|
||||
BROTLI_MIN(size_t, num_blocks, HISTOGRAMS_PER_BATCH));
|
||||
size_t max_num_pairs =
|
||||
HISTOGRAMS_PER_BATCH * HISTOGRAMS_PER_BATCH / 2;
|
||||
size_t pairs_capacity = max_num_pairs + 1;
|
||||
HistogramPair* pairs = BROTLI_ALLOC(m, HistogramPair, pairs_capacity);
|
||||
size_t pos = 0;
|
||||
uint32_t* clusters;
|
||||
size_t num_final_clusters;
|
||||
static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX;
|
||||
uint32_t* new_index;
|
||||
uint8_t max_type = 0;
|
||||
size_t i;
|
||||
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
|
||||
memset(block_lengths, 0, num_blocks * sizeof(uint32_t));
|
||||
|
||||
{
|
||||
size_t block_idx = 0;
|
||||
for (i = 0; i < length; ++i) {
|
||||
assert(block_idx < num_blocks);
|
||||
++block_lengths[block_idx];
|
||||
if (i + 1 == length || block_ids[i] != block_ids[i + 1]) {
|
||||
++block_idx;
|
||||
}
|
||||
}
|
||||
assert(block_idx == num_blocks);
|
||||
}
|
||||
|
||||
for (i = 0; i < num_blocks; i += HISTOGRAMS_PER_BATCH) {
|
||||
const size_t num_to_combine =
|
||||
BROTLI_MIN(size_t, num_blocks - i, HISTOGRAMS_PER_BATCH);
|
||||
uint32_t sizes[HISTOGRAMS_PER_BATCH];
|
||||
uint32_t new_clusters[HISTOGRAMS_PER_BATCH];
|
||||
uint32_t symbols[HISTOGRAMS_PER_BATCH];
|
||||
uint32_t remap[HISTOGRAMS_PER_BATCH];
|
||||
size_t num_new_clusters;
|
||||
size_t j;
|
||||
for (j = 0; j < num_to_combine; ++j) {
|
||||
size_t k;
|
||||
FN(HistogramClear)(&histograms[j]);
|
||||
for (k = 0; k < block_lengths[i + j]; ++k) {
|
||||
FN(HistogramAdd)(&histograms[j], data[pos++]);
|
||||
}
|
||||
histograms[j].bit_cost_ = FN(BrotliPopulationCost)(&histograms[j]);
|
||||
symbols[j] = new_clusters[j] = (uint32_t)j;
|
||||
sizes[j] = 1;
|
||||
}
|
||||
num_new_clusters = FN(BrotliHistogramCombine)(
|
||||
histograms, sizes, symbols, new_clusters, pairs, num_to_combine,
|
||||
num_to_combine, HISTOGRAMS_PER_BATCH, max_num_pairs);
|
||||
BROTLI_ENSURE_CAPACITY(m, HistogramType, all_histograms,
|
||||
all_histograms_capacity, all_histograms_size + num_new_clusters);
|
||||
BROTLI_ENSURE_CAPACITY(m, uint32_t, cluster_size,
|
||||
cluster_size_capacity, cluster_size_size + num_new_clusters);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
for (j = 0; j < num_new_clusters; ++j) {
|
||||
all_histograms[all_histograms_size++] = histograms[new_clusters[j]];
|
||||
cluster_size[cluster_size_size++] = sizes[new_clusters[j]];
|
||||
remap[new_clusters[j]] = (uint32_t)j;
|
||||
}
|
||||
for (j = 0; j < num_to_combine; ++j) {
|
||||
histogram_symbols[i + j] = (uint32_t)num_clusters + remap[symbols[j]];
|
||||
}
|
||||
num_clusters += num_new_clusters;
|
||||
assert(num_clusters == cluster_size_size);
|
||||
assert(num_clusters == all_histograms_size);
|
||||
}
|
||||
BROTLI_FREE(m, histograms);
|
||||
|
||||
max_num_pairs =
|
||||
BROTLI_MIN(size_t, 64 * num_clusters, (num_clusters / 2) * num_clusters);
|
||||
if (pairs_capacity < max_num_pairs + 1) {
|
||||
BROTLI_FREE(m, pairs);
|
||||
pairs = BROTLI_ALLOC(m, HistogramPair, max_num_pairs + 1);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
}
|
||||
|
||||
clusters = BROTLI_ALLOC(m, uint32_t, num_clusters);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
for (i = 0; i < num_clusters; ++i) {
|
||||
clusters[i] = (uint32_t)i;
|
||||
}
|
||||
num_final_clusters = FN(BrotliHistogramCombine)(
|
||||
all_histograms, cluster_size, histogram_symbols, clusters, pairs,
|
||||
num_clusters, num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES,
|
||||
max_num_pairs);
|
||||
BROTLI_FREE(m, pairs);
|
||||
BROTLI_FREE(m, cluster_size);
|
||||
|
||||
new_index = BROTLI_ALLOC(m, uint32_t, num_clusters);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
for (i = 0; i < num_clusters; ++i) new_index[i] = kInvalidIndex;
|
||||
pos = 0;
|
||||
{
|
||||
uint32_t next_index = 0;
|
||||
for (i = 0; i < num_blocks; ++i) {
|
||||
HistogramType histo;
|
||||
size_t j;
|
||||
uint32_t best_out;
|
||||
double best_bits;
|
||||
FN(HistogramClear)(&histo);
|
||||
for (j = 0; j < block_lengths[i]; ++j) {
|
||||
FN(HistogramAdd)(&histo, data[pos++]);
|
||||
}
|
||||
best_out = (i == 0) ? histogram_symbols[0] : histogram_symbols[i - 1];
|
||||
best_bits =
|
||||
FN(BrotliHistogramBitCostDistance)(&histo, &all_histograms[best_out]);
|
||||
for (j = 0; j < num_final_clusters; ++j) {
|
||||
const double cur_bits = FN(BrotliHistogramBitCostDistance)(
|
||||
&histo, &all_histograms[clusters[j]]);
|
||||
if (cur_bits < best_bits) {
|
||||
best_bits = cur_bits;
|
||||
best_out = clusters[j];
|
||||
}
|
||||
}
|
||||
histogram_symbols[i] = best_out;
|
||||
if (new_index[best_out] == kInvalidIndex) {
|
||||
new_index[best_out] = next_index++;
|
||||
}
|
||||
}
|
||||
}
|
||||
BROTLI_FREE(m, clusters);
|
||||
BROTLI_FREE(m, all_histograms);
|
||||
BROTLI_ENSURE_CAPACITY(
|
||||
m, uint8_t, split->types, split->types_alloc_size, num_blocks);
|
||||
BROTLI_ENSURE_CAPACITY(
|
||||
m, uint32_t, split->lengths, split->lengths_alloc_size, num_blocks);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
{
|
||||
uint32_t cur_length = 0;
|
||||
size_t block_idx = 0;
|
||||
for (i = 0; i < num_blocks; ++i) {
|
||||
cur_length += block_lengths[i];
|
||||
if (i + 1 == num_blocks ||
|
||||
histogram_symbols[i] != histogram_symbols[i + 1]) {
|
||||
const uint8_t id = (uint8_t)new_index[histogram_symbols[i]];
|
||||
split->types[block_idx] = id;
|
||||
split->lengths[block_idx] = cur_length;
|
||||
max_type = BROTLI_MAX(uint8_t, max_type, id);
|
||||
cur_length = 0;
|
||||
++block_idx;
|
||||
}
|
||||
}
|
||||
split->num_blocks = block_idx;
|
||||
split->num_types = (size_t)max_type + 1;
|
||||
}
|
||||
BROTLI_FREE(m, new_index);
|
||||
BROTLI_FREE(m, block_lengths);
|
||||
BROTLI_FREE(m, histogram_symbols);
|
||||
}
|
||||
|
||||
static void FN(SplitByteVector)(MemoryManager* m,
|
||||
const DataType* data, const size_t length,
|
||||
const size_t literals_per_histogram,
|
||||
const size_t max_histograms,
|
||||
const size_t sampling_stride_length,
|
||||
const double block_switch_cost,
|
||||
const int quality,
|
||||
BlockSplit* split) {
|
||||
const size_t data_size = FN(HistogramDataSize)();
|
||||
size_t num_histograms = length / literals_per_histogram + 1;
|
||||
HistogramType* histograms;
|
||||
if (num_histograms > max_histograms) {
|
||||
num_histograms = max_histograms;
|
||||
}
|
||||
if (length == 0) {
|
||||
split->num_types = 1;
|
||||
return;
|
||||
} else if (length < kMinLengthForBlockSplitting) {
|
||||
BROTLI_ENSURE_CAPACITY(m, uint8_t,
|
||||
split->types, split->types_alloc_size, split->num_blocks + 1);
|
||||
BROTLI_ENSURE_CAPACITY(m, uint32_t,
|
||||
split->lengths, split->lengths_alloc_size, split->num_blocks + 1);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
split->num_types = 1;
|
||||
split->types[split->num_blocks] = 0;
|
||||
split->lengths[split->num_blocks] = (uint32_t)length;
|
||||
split->num_blocks++;
|
||||
return;
|
||||
}
|
||||
histograms = BROTLI_ALLOC(m, HistogramType, num_histograms);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
/* Find good entropy codes. */
|
||||
FN(InitialEntropyCodes)(data, length,
|
||||
sampling_stride_length,
|
||||
num_histograms, histograms);
|
||||
FN(RefineEntropyCodes)(data, length,
|
||||
sampling_stride_length,
|
||||
num_histograms, histograms);
|
||||
{
|
||||
/* Find a good path through literals with the good entropy codes. */
|
||||
uint8_t* block_ids = BROTLI_ALLOC(m, uint8_t, length);
|
||||
size_t num_blocks;
|
||||
const size_t bitmaplen = (num_histograms + 7) >> 3;
|
||||
double* insert_cost = BROTLI_ALLOC(m, double, data_size * num_histograms);
|
||||
double* cost = BROTLI_ALLOC(m, double, num_histograms);
|
||||
uint8_t* switch_signal = BROTLI_ALLOC(m, uint8_t, length * bitmaplen);
|
||||
uint16_t* new_id = BROTLI_ALLOC(m, uint16_t, num_histograms);
|
||||
const size_t iters = quality <= 10 ? 3 : 10;
|
||||
size_t i;
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
for (i = 0; i < iters; ++i) {
|
||||
num_blocks = FN(FindBlocks)(data, length,
|
||||
block_switch_cost,
|
||||
num_histograms, histograms,
|
||||
insert_cost, cost, switch_signal,
|
||||
block_ids);
|
||||
num_histograms = FN(RemapBlockIds)(block_ids, length,
|
||||
new_id, num_histograms);
|
||||
FN(BuildBlockHistograms)(data, length, block_ids,
|
||||
num_histograms, histograms);
|
||||
}
|
||||
BROTLI_FREE(m, insert_cost);
|
||||
BROTLI_FREE(m, cost);
|
||||
BROTLI_FREE(m, switch_signal);
|
||||
BROTLI_FREE(m, new_id);
|
||||
BROTLI_FREE(m, histograms);
|
||||
FN(ClusterBlocks)(m, data, length, num_blocks, block_ids, split);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
BROTLI_FREE(m, block_ids);
|
||||
}
|
||||
}
|
||||
|
||||
#undef HistogramType
|
File diff suppressed because it is too large
Load Diff
@ -16,164 +16,92 @@
|
||||
#ifndef BROTLI_ENC_BROTLI_BIT_STREAM_H_
|
||||
#define BROTLI_ENC_BROTLI_BIT_STREAM_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "../common/types.h"
|
||||
#include "./command.h"
|
||||
#include "./context.h"
|
||||
#include "./entropy_encode.h"
|
||||
#include "./memory.h"
|
||||
#include "./metablock.h"
|
||||
#include "./port.h"
|
||||
|
||||
namespace brotli {
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* All Store functions here will use a storage_ix, which is always the bit
|
||||
position for the current storage. */
|
||||
|
||||
// Stores a number between 0 and 255.
|
||||
void StoreVarLenUint8(size_t n, size_t* storage_ix, uint8_t* storage);
|
||||
BROTLI_INTERNAL void BrotliStoreHuffmanTree(const uint8_t* depths, size_t num,
|
||||
HuffmanTree* tree, size_t *storage_ix, uint8_t *storage);
|
||||
|
||||
// Stores the compressed meta-block header.
|
||||
// REQUIRES: length > 0
|
||||
// REQUIRES: length <= (1 << 24)
|
||||
void StoreCompressedMetaBlockHeader(bool final_block,
|
||||
size_t length,
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage);
|
||||
|
||||
// Stores the uncompressed meta-block header.
|
||||
// REQUIRES: length > 0
|
||||
// REQUIRES: length <= (1 << 24)
|
||||
void StoreUncompressedMetaBlockHeader(size_t length,
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage);
|
||||
|
||||
// Stores a context map where the histogram type is always the block type.
|
||||
void StoreTrivialContextMap(size_t num_types,
|
||||
size_t context_bits,
|
||||
HuffmanTree* tree,
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage);
|
||||
|
||||
void StoreHuffmanTreeOfHuffmanTreeToBitMask(
|
||||
const int num_codes,
|
||||
const uint8_t *code_length_bitdepth,
|
||||
size_t *storage_ix,
|
||||
uint8_t *storage);
|
||||
|
||||
void StoreHuffmanTree(const uint8_t* depths, size_t num, HuffmanTree* tree,
|
||||
size_t *storage_ix, uint8_t *storage);
|
||||
|
||||
// Builds a Huffman tree from histogram[0:length] into depth[0:length] and
|
||||
// bits[0:length] and stores the encoded tree to the bit stream.
|
||||
void BuildAndStoreHuffmanTree(const uint32_t *histogram,
|
||||
const size_t length,
|
||||
HuffmanTree* tree,
|
||||
uint8_t* depth,
|
||||
uint16_t* bits,
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage);
|
||||
|
||||
void BuildAndStoreHuffmanTreeFast(const uint32_t *histogram,
|
||||
const size_t histogram_total,
|
||||
const size_t max_bits,
|
||||
uint8_t* depth,
|
||||
uint16_t* bits,
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage);
|
||||
|
||||
// Encodes the given context map to the bit stream. The number of different
|
||||
// histogram ids is given by num_clusters.
|
||||
void EncodeContextMap(const std::vector<uint32_t>& context_map,
|
||||
size_t num_clusters,
|
||||
HuffmanTree* tree,
|
||||
size_t* storage_ix, uint8_t* storage);
|
||||
|
||||
// Data structure that stores everything that is needed to encode each block
|
||||
// switch command.
|
||||
struct BlockSplitCode {
|
||||
std::vector<uint32_t> type_code;
|
||||
std::vector<uint32_t> length_prefix;
|
||||
std::vector<uint32_t> length_nextra;
|
||||
std::vector<uint32_t> length_extra;
|
||||
std::vector<uint8_t> type_depths;
|
||||
std::vector<uint16_t> type_bits;
|
||||
uint8_t length_depths[kNumBlockLenPrefixes];
|
||||
uint16_t length_bits[kNumBlockLenPrefixes];
|
||||
};
|
||||
|
||||
// Builds a BlockSplitCode data structure from the block split given by the
|
||||
// vector of block types and block lengths and stores it to the bit stream.
|
||||
void BuildAndStoreBlockSplitCode(const std::vector<uint8_t>& types,
|
||||
const std::vector<uint32_t>& lengths,
|
||||
const size_t num_types,
|
||||
BlockSplitCode* code,
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage);
|
||||
|
||||
// Stores the block switch command with index block_ix to the bit stream.
|
||||
void StoreBlockSwitch(const BlockSplitCode& code,
|
||||
const size_t block_ix,
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage);
|
||||
BROTLI_INTERNAL void BrotliBuildAndStoreHuffmanTreeFast(
|
||||
MemoryManager* m, const uint32_t* histogram, const size_t histogram_total,
|
||||
const size_t max_bits, uint8_t* depth, uint16_t* bits, size_t* storage_ix,
|
||||
uint8_t* storage);
|
||||
|
||||
/* REQUIRES: length > 0 */
|
||||
/* REQUIRES: length <= (1 << 24) */
|
||||
void StoreMetaBlock(const uint8_t* input,
|
||||
size_t start_pos,
|
||||
size_t length,
|
||||
size_t mask,
|
||||
uint8_t prev_byte,
|
||||
uint8_t prev_byte2,
|
||||
bool final_block,
|
||||
uint32_t num_direct_distance_codes,
|
||||
uint32_t distance_postfix_bits,
|
||||
ContextType literal_context_mode,
|
||||
const brotli::Command *commands,
|
||||
size_t n_commands,
|
||||
const MetaBlockSplit& mb,
|
||||
size_t *storage_ix,
|
||||
uint8_t *storage);
|
||||
BROTLI_INTERNAL void BrotliStoreMetaBlock(MemoryManager* m,
|
||||
const uint8_t* input,
|
||||
size_t start_pos,
|
||||
size_t length,
|
||||
size_t mask,
|
||||
uint8_t prev_byte,
|
||||
uint8_t prev_byte2,
|
||||
int is_final_block,
|
||||
uint32_t num_direct_distance_codes,
|
||||
uint32_t distance_postfix_bits,
|
||||
ContextType literal_context_mode,
|
||||
const Command* commands,
|
||||
size_t n_commands,
|
||||
const MetaBlockSplit* mb,
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage);
|
||||
|
||||
/* Stores the meta-block without doing any block splitting, just collects
|
||||
one histogram per block category and uses that for entropy coding.
|
||||
REQUIRES: length > 0
|
||||
REQUIRES: length <= (1 << 24) */
|
||||
void StoreMetaBlockTrivial(const uint8_t* input,
|
||||
size_t start_pos,
|
||||
size_t length,
|
||||
size_t mask,
|
||||
bool is_last,
|
||||
const brotli::Command *commands,
|
||||
size_t n_commands,
|
||||
size_t *storage_ix,
|
||||
uint8_t *storage);
|
||||
BROTLI_INTERNAL void BrotliStoreMetaBlockTrivial(MemoryManager* m,
|
||||
const uint8_t* input,
|
||||
size_t start_pos,
|
||||
size_t length,
|
||||
size_t mask,
|
||||
int is_last,
|
||||
const Command *commands,
|
||||
size_t n_commands,
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage);
|
||||
|
||||
/* Same as above, but uses static prefix codes for histograms with a only a few
|
||||
symbols, and uses static code length prefix codes for all other histograms.
|
||||
REQUIRES: length > 0
|
||||
REQUIRES: length <= (1 << 24) */
|
||||
void StoreMetaBlockFast(const uint8_t* input,
|
||||
size_t start_pos,
|
||||
size_t length,
|
||||
size_t mask,
|
||||
bool is_last,
|
||||
const brotli::Command *commands,
|
||||
size_t n_commands,
|
||||
size_t *storage_ix,
|
||||
uint8_t *storage);
|
||||
BROTLI_INTERNAL void BrotliStoreMetaBlockFast(MemoryManager* m,
|
||||
const uint8_t* input,
|
||||
size_t start_pos,
|
||||
size_t length,
|
||||
size_t mask,
|
||||
int is_last,
|
||||
const Command *commands,
|
||||
size_t n_commands,
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage);
|
||||
|
||||
/* This is for storing uncompressed blocks (simple raw storage of
|
||||
bytes-as-bytes).
|
||||
REQUIRES: length > 0
|
||||
REQUIRES: length <= (1 << 24) */
|
||||
void StoreUncompressedMetaBlock(bool final_block,
|
||||
const uint8_t* input,
|
||||
size_t position, size_t mask,
|
||||
size_t len,
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage);
|
||||
BROTLI_INTERNAL void BrotliStoreUncompressedMetaBlock(
|
||||
int is_final_block, const uint8_t* input, size_t position, size_t mask,
|
||||
size_t len, size_t* storage_ix, uint8_t* storage);
|
||||
|
||||
/* Stores an empty metadata meta-block and syncs to a byte boundary. */
|
||||
void StoreSyncMetaBlock(size_t* storage_ix, uint8_t* storage);
|
||||
BROTLI_INTERNAL void BrotliStoreSyncMetaBlock(size_t* storage_ix,
|
||||
uint8_t* storage);
|
||||
|
||||
} // namespace brotli
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* BROTLI_ENC_BROTLI_BIT_STREAM_H_ */
|
||||
|
56
enc/cluster.c
Normal file
56
enc/cluster.c
Normal file
@ -0,0 +1,56 @@
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Functions for clustering similar histograms together. */
|
||||
|
||||
#include "./cluster.h"
|
||||
|
||||
#include "../common/types.h"
|
||||
#include "./bit_cost.h" /* BrotliPopulationCost */
|
||||
#include "./fast_log.h"
|
||||
#include "./histogram.h"
|
||||
#include "./memory.h"
|
||||
#include "./port.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static BROTLI_INLINE int HistogramPairIsLess(
|
||||
const HistogramPair* p1, const HistogramPair* p2) {
|
||||
if (p1->cost_diff != p2->cost_diff) {
|
||||
return (p1->cost_diff > p2->cost_diff) ? 1 : 0;
|
||||
}
|
||||
return ((p1->idx2 - p1->idx1) > (p2->idx2 - p2->idx1)) ? 1 : 0;
|
||||
}
|
||||
|
||||
/* Returns entropy reduction of the context map when we combine two clusters. */
|
||||
static BROTLI_INLINE double ClusterCostDiff(size_t size_a, size_t size_b) {
|
||||
size_t size_c = size_a + size_b;
|
||||
return (double)size_a * FastLog2(size_a) +
|
||||
(double)size_b * FastLog2(size_b) -
|
||||
(double)size_c * FastLog2(size_c);
|
||||
}
|
||||
|
||||
#define CODE(X) X
|
||||
|
||||
#define FN(X) X ## Literal
|
||||
#include "./cluster_inc.h" /* NOLINT(build/include) */
|
||||
#undef FN
|
||||
|
||||
#define FN(X) X ## Command
|
||||
#include "./cluster_inc.h" /* NOLINT(build/include) */
|
||||
#undef FN
|
||||
|
||||
#define FN(X) X ## Distance
|
||||
#include "./cluster_inc.h" /* NOLINT(build/include) */
|
||||
#undef FN
|
||||
|
||||
#undef CODE
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
323
enc/cluster.h
323
enc/cluster.h
@ -9,323 +9,40 @@
|
||||
#ifndef BROTLI_ENC_CLUSTER_H_
|
||||
#define BROTLI_ENC_CLUSTER_H_
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "../common/types.h"
|
||||
#include "./bit_cost.h"
|
||||
#include "./entropy_encode.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./histogram.h"
|
||||
#include "./memory.h"
|
||||
#include "./port.h"
|
||||
|
||||
namespace brotli {
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct HistogramPair {
|
||||
typedef struct HistogramPair {
|
||||
uint32_t idx1;
|
||||
uint32_t idx2;
|
||||
double cost_combo;
|
||||
double cost_diff;
|
||||
};
|
||||
} HistogramPair;
|
||||
|
||||
inline bool operator<(const HistogramPair& p1, const HistogramPair& p2) {
|
||||
if (p1.cost_diff != p2.cost_diff) {
|
||||
return p1.cost_diff > p2.cost_diff;
|
||||
}
|
||||
return (p1.idx2 - p1.idx1) > (p2.idx2 - p2.idx1);
|
||||
}
|
||||
#define CODE(X) /* Declaration */;
|
||||
|
||||
// Returns entropy reduction of the context map when we combine two clusters.
|
||||
inline double ClusterCostDiff(size_t size_a, size_t size_b) {
|
||||
size_t size_c = size_a + size_b;
|
||||
return static_cast<double>(size_a) * FastLog2(size_a) +
|
||||
static_cast<double>(size_b) * FastLog2(size_b) -
|
||||
static_cast<double>(size_c) * FastLog2(size_c);
|
||||
}
|
||||
#define FN(X) X ## Literal
|
||||
#include "./cluster_inc.h" /* NOLINT(build/include) */
|
||||
#undef FN
|
||||
|
||||
// Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
|
||||
// it is below a threshold, stores the pair (idx1, idx2) in the *pairs queue.
|
||||
template<typename HistogramType>
|
||||
void CompareAndPushToQueue(const HistogramType* out,
|
||||
const uint32_t* cluster_size,
|
||||
uint32_t idx1, uint32_t idx2,
|
||||
size_t max_num_pairs,
|
||||
HistogramPair* pairs,
|
||||
size_t* num_pairs) {
|
||||
if (idx1 == idx2) {
|
||||
return;
|
||||
}
|
||||
if (idx2 < idx1) {
|
||||
uint32_t t = idx2;
|
||||
idx2 = idx1;
|
||||
idx1 = t;
|
||||
}
|
||||
bool store_pair = false;
|
||||
HistogramPair p;
|
||||
p.idx1 = idx1;
|
||||
p.idx2 = idx2;
|
||||
p.cost_diff = 0.5 * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]);
|
||||
p.cost_diff -= out[idx1].bit_cost_;
|
||||
p.cost_diff -= out[idx2].bit_cost_;
|
||||
#define FN(X) X ## Command
|
||||
#include "./cluster_inc.h" /* NOLINT(build/include) */
|
||||
#undef FN
|
||||
|
||||
if (out[idx1].total_count_ == 0) {
|
||||
p.cost_combo = out[idx2].bit_cost_;
|
||||
store_pair = true;
|
||||
} else if (out[idx2].total_count_ == 0) {
|
||||
p.cost_combo = out[idx1].bit_cost_;
|
||||
store_pair = true;
|
||||
} else {
|
||||
double threshold = *num_pairs == 0 ? 1e99 :
|
||||
std::max(0.0, pairs[0].cost_diff);
|
||||
HistogramType combo = out[idx1];
|
||||
combo.AddHistogram(out[idx2]);
|
||||
double cost_combo = PopulationCost(combo);
|
||||
if (cost_combo < threshold - p.cost_diff) {
|
||||
p.cost_combo = cost_combo;
|
||||
store_pair = true;
|
||||
}
|
||||
}
|
||||
if (store_pair) {
|
||||
p.cost_diff += p.cost_combo;
|
||||
if (*num_pairs > 0 && pairs[0] < p) {
|
||||
// Replace the top of the queue if needed.
|
||||
if (*num_pairs < max_num_pairs) {
|
||||
pairs[*num_pairs] = pairs[0];
|
||||
++(*num_pairs);
|
||||
}
|
||||
pairs[0] = p;
|
||||
} else if (*num_pairs < max_num_pairs) {
|
||||
pairs[*num_pairs] = p;
|
||||
++(*num_pairs);
|
||||
}
|
||||
}
|
||||
}
|
||||
#define FN(X) X ## Distance
|
||||
#include "./cluster_inc.h" /* NOLINT(build/include) */
|
||||
#undef FN
|
||||
|
||||
template<typename HistogramType>
|
||||
size_t HistogramCombine(HistogramType* out,
|
||||
uint32_t* cluster_size,
|
||||
uint32_t* symbols,
|
||||
uint32_t* clusters,
|
||||
HistogramPair* pairs,
|
||||
size_t num_clusters,
|
||||
size_t symbols_size,
|
||||
size_t max_clusters,
|
||||
size_t max_num_pairs) {
|
||||
double cost_diff_threshold = 0.0;
|
||||
size_t min_cluster_size = 1;
|
||||
#undef CODE
|
||||
|
||||
// We maintain a vector of histogram pairs, with the property that the pair
|
||||
// with the maximum bit cost reduction is the first.
|
||||
size_t num_pairs = 0;
|
||||
for (size_t idx1 = 0; idx1 < num_clusters; ++idx1) {
|
||||
for (size_t idx2 = idx1 + 1; idx2 < num_clusters; ++idx2) {
|
||||
CompareAndPushToQueue(out, cluster_size, clusters[idx1], clusters[idx2],
|
||||
max_num_pairs, &pairs[0], &num_pairs);
|
||||
}
|
||||
}
|
||||
|
||||
while (num_clusters > min_cluster_size) {
|
||||
if (pairs[0].cost_diff >= cost_diff_threshold) {
|
||||
cost_diff_threshold = 1e99;
|
||||
min_cluster_size = max_clusters;
|
||||
continue;
|
||||
}
|
||||
// Take the best pair from the top of heap.
|
||||
uint32_t best_idx1 = pairs[0].idx1;
|
||||
uint32_t best_idx2 = pairs[0].idx2;
|
||||
out[best_idx1].AddHistogram(out[best_idx2]);
|
||||
out[best_idx1].bit_cost_ = pairs[0].cost_combo;
|
||||
cluster_size[best_idx1] += cluster_size[best_idx2];
|
||||
for (size_t i = 0; i < symbols_size; ++i) {
|
||||
if (symbols[i] == best_idx2) {
|
||||
symbols[i] = best_idx1;
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < num_clusters; ++i) {
|
||||
if (clusters[i] == best_idx2) {
|
||||
memmove(&clusters[i], &clusters[i + 1],
|
||||
(num_clusters - i - 1) * sizeof(clusters[0]));
|
||||
break;
|
||||
}
|
||||
}
|
||||
--num_clusters;
|
||||
// Remove pairs intersecting the just combined best pair.
|
||||
size_t copy_to_idx = 0;
|
||||
for (size_t i = 0; i < num_pairs; ++i) {
|
||||
HistogramPair& p = pairs[i];
|
||||
if (p.idx1 == best_idx1 || p.idx2 == best_idx1 ||
|
||||
p.idx1 == best_idx2 || p.idx2 == best_idx2) {
|
||||
// Remove invalid pair from the queue.
|
||||
continue;
|
||||
}
|
||||
if (pairs[0] < p) {
|
||||
// Replace the top of the queue if needed.
|
||||
HistogramPair front = pairs[0];
|
||||
pairs[0] = p;
|
||||
pairs[copy_to_idx] = front;
|
||||
} else {
|
||||
pairs[copy_to_idx] = p;
|
||||
}
|
||||
++copy_to_idx;
|
||||
}
|
||||
num_pairs = copy_to_idx;
|
||||
|
||||
// Push new pairs formed with the combined histogram to the heap.
|
||||
for (size_t i = 0; i < num_clusters; ++i) {
|
||||
CompareAndPushToQueue(out, cluster_size, best_idx1, clusters[i],
|
||||
max_num_pairs, &pairs[0], &num_pairs);
|
||||
}
|
||||
}
|
||||
return num_clusters;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Histogram refinement
|
||||
|
||||
// What is the bit cost of moving histogram from cur_symbol to candidate.
|
||||
template<typename HistogramType>
|
||||
double HistogramBitCostDistance(const HistogramType& histogram,
|
||||
const HistogramType& candidate) {
|
||||
if (histogram.total_count_ == 0) {
|
||||
return 0.0;
|
||||
}
|
||||
HistogramType tmp = histogram;
|
||||
tmp.AddHistogram(candidate);
|
||||
return PopulationCost(tmp) - candidate.bit_cost_;
|
||||
}
|
||||
|
||||
// Find the best 'out' histogram for each of the 'in' histograms.
|
||||
// When called, clusters[0..num_clusters) contains the unique values from
|
||||
// symbols[0..in_size), but this property is not preserved in this function.
|
||||
// Note: we assume that out[]->bit_cost_ is already up-to-date.
|
||||
template<typename HistogramType>
|
||||
void HistogramRemap(const HistogramType* in, size_t in_size,
|
||||
const uint32_t* clusters, size_t num_clusters,
|
||||
HistogramType* out, uint32_t* symbols) {
|
||||
for (size_t i = 0; i < in_size; ++i) {
|
||||
uint32_t best_out = i == 0 ? symbols[0] : symbols[i - 1];
|
||||
double best_bits = HistogramBitCostDistance(in[i], out[best_out]);
|
||||
for (size_t j = 0; j < num_clusters; ++j) {
|
||||
const double cur_bits = HistogramBitCostDistance(in[i], out[clusters[j]]);
|
||||
if (cur_bits < best_bits) {
|
||||
best_bits = cur_bits;
|
||||
best_out = clusters[j];
|
||||
}
|
||||
}
|
||||
symbols[i] = best_out;
|
||||
}
|
||||
|
||||
// Recompute each out based on raw and symbols.
|
||||
for (size_t j = 0; j < num_clusters; ++j) {
|
||||
out[clusters[j]].Clear();
|
||||
}
|
||||
for (size_t i = 0; i < in_size; ++i) {
|
||||
out[symbols[i]].AddHistogram(in[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Reorders elements of the out[0..length) array and changes values in
|
||||
// symbols[0..length) array in the following way:
|
||||
// * when called, symbols[] contains indexes into out[], and has N unique
|
||||
// values (possibly N < length)
|
||||
// * on return, symbols'[i] = f(symbols[i]) and
|
||||
// out'[symbols'[i]] = out[symbols[i]], for each 0 <= i < length,
|
||||
// where f is a bijection between the range of symbols[] and [0..N), and
|
||||
// the first occurrences of values in symbols'[i] come in consecutive
|
||||
// increasing order.
|
||||
// Returns N, the number of unique values in symbols[].
|
||||
template<typename HistogramType>
|
||||
size_t HistogramReindex(HistogramType* out, uint32_t* symbols, size_t length) {
|
||||
static const uint32_t kInvalidIndex = std::numeric_limits<uint32_t>::max();
|
||||
std::vector<uint32_t> new_index(length, kInvalidIndex);
|
||||
uint32_t next_index = 0;
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
if (new_index[symbols[i]] == kInvalidIndex) {
|
||||
new_index[symbols[i]] = next_index;
|
||||
++next_index;
|
||||
}
|
||||
}
|
||||
std::vector<HistogramType> tmp(next_index);
|
||||
next_index = 0;
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
if (new_index[symbols[i]] == next_index) {
|
||||
tmp[next_index] = out[symbols[i]];
|
||||
++next_index;
|
||||
}
|
||||
symbols[i] = new_index[symbols[i]];
|
||||
}
|
||||
for (size_t i = 0; i < next_index; ++i) {
|
||||
out[i] = tmp[i];
|
||||
}
|
||||
return next_index;
|
||||
}
|
||||
|
||||
// Clusters similar histograms in 'in' together, the selected histograms are
|
||||
// placed in 'out', and for each index in 'in', *histogram_symbols will
|
||||
// indicate which of the 'out' histograms is the best approximation.
|
||||
template<typename HistogramType>
|
||||
void ClusterHistograms(const std::vector<HistogramType>& in,
|
||||
size_t num_contexts, size_t num_blocks,
|
||||
size_t max_histograms,
|
||||
std::vector<HistogramType>* out,
|
||||
std::vector<uint32_t>* histogram_symbols) {
|
||||
const size_t in_size = num_contexts * num_blocks;
|
||||
assert(in_size == in.size());
|
||||
std::vector<uint32_t> cluster_size(in_size, 1);
|
||||
std::vector<uint32_t> clusters(in_size);
|
||||
size_t num_clusters = 0;
|
||||
out->resize(in_size);
|
||||
histogram_symbols->resize(in_size);
|
||||
for (size_t i = 0; i < in_size; ++i) {
|
||||
(*out)[i] = in[i];
|
||||
(*out)[i].bit_cost_ = PopulationCost(in[i]);
|
||||
(*histogram_symbols)[i] = static_cast<uint32_t>(i);
|
||||
}
|
||||
|
||||
const size_t max_input_histograms = 64;
|
||||
// For the first pass of clustering, we allow all pairs.
|
||||
size_t max_num_pairs = max_input_histograms * max_input_histograms / 2;
|
||||
std::vector<HistogramPair> pairs(max_num_pairs + 1);
|
||||
|
||||
for (size_t i = 0; i < in_size; i += max_input_histograms) {
|
||||
size_t num_to_combine = std::min(in_size - i, max_input_histograms);
|
||||
for (size_t j = 0; j < num_to_combine; ++j) {
|
||||
clusters[num_clusters + j] = static_cast<uint32_t>(i + j);
|
||||
}
|
||||
size_t num_new_clusters =
|
||||
HistogramCombine(&(*out)[0], &cluster_size[0],
|
||||
&(*histogram_symbols)[i],
|
||||
&clusters[num_clusters], &pairs[0],
|
||||
num_to_combine, num_to_combine,
|
||||
max_histograms, max_num_pairs);
|
||||
num_clusters += num_new_clusters;
|
||||
}
|
||||
|
||||
// For the second pass, we limit the total number of histogram pairs.
|
||||
// After this limit is reached, we only keep searching for the best pair.
|
||||
max_num_pairs =
|
||||
std::min(64 * num_clusters, (num_clusters / 2) * num_clusters);
|
||||
pairs.resize(max_num_pairs + 1);
|
||||
|
||||
// Collapse similar histograms.
|
||||
num_clusters = HistogramCombine(&(*out)[0], &cluster_size[0],
|
||||
&(*histogram_symbols)[0], &clusters[0],
|
||||
&pairs[0], num_clusters, in_size,
|
||||
max_histograms, max_num_pairs);
|
||||
|
||||
// Find the optimal map from original histograms to the final ones.
|
||||
HistogramRemap(&in[0], in_size, &clusters[0], num_clusters,
|
||||
&(*out)[0], &(*histogram_symbols)[0]);
|
||||
|
||||
// Convert the context map to a canonical form.
|
||||
size_t num_histograms =
|
||||
HistogramReindex(&(*out)[0], &(*histogram_symbols)[0], in_size);
|
||||
out->resize(num_histograms);
|
||||
}
|
||||
|
||||
} // namespace brotli
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* BROTLI_ENC_CLUSTER_H_ */
|
||||
|
315
enc/cluster_inc.h
Normal file
315
enc/cluster_inc.h
Normal file
@ -0,0 +1,315 @@
|
||||
/* NOLINT(build/header_guard) */
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* template parameters: FN, CODE */
|
||||
|
||||
#define HistogramType FN(Histogram)
|
||||
|
||||
/* Computes the bit cost reduction by combining out[idx1] and out[idx2] and if
|
||||
it is below a threshold, stores the pair (idx1, idx2) in the *pairs queue. */
|
||||
BROTLI_INTERNAL void FN(BrotliCompareAndPushToQueue)(
|
||||
const HistogramType* out, const uint32_t* cluster_size, uint32_t idx1,
|
||||
uint32_t idx2, size_t max_num_pairs, HistogramPair* pairs,
|
||||
size_t* num_pairs) CODE({
|
||||
int is_good_pair = 0;
|
||||
HistogramPair p;
|
||||
if (idx1 == idx2) {
|
||||
return;
|
||||
}
|
||||
if (idx2 < idx1) {
|
||||
uint32_t t = idx2;
|
||||
idx2 = idx1;
|
||||
idx1 = t;
|
||||
}
|
||||
p.idx1 = idx1;
|
||||
p.idx2 = idx2;
|
||||
p.cost_diff = 0.5 * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]);
|
||||
p.cost_diff -= out[idx1].bit_cost_;
|
||||
p.cost_diff -= out[idx2].bit_cost_;
|
||||
|
||||
if (out[idx1].total_count_ == 0) {
|
||||
p.cost_combo = out[idx2].bit_cost_;
|
||||
is_good_pair = 1;
|
||||
} else if (out[idx2].total_count_ == 0) {
|
||||
p.cost_combo = out[idx1].bit_cost_;
|
||||
is_good_pair = 1;
|
||||
} else {
|
||||
double threshold = *num_pairs == 0 ? 1e99 :
|
||||
BROTLI_MAX(double, 0.0, pairs[0].cost_diff);
|
||||
HistogramType combo = out[idx1];
|
||||
double cost_combo;
|
||||
FN(HistogramAddHistogram)(&combo, &out[idx2]);
|
||||
cost_combo = FN(BrotliPopulationCost)(&combo);
|
||||
if (cost_combo < threshold - p.cost_diff) {
|
||||
p.cost_combo = cost_combo;
|
||||
is_good_pair = 1;
|
||||
}
|
||||
}
|
||||
if (is_good_pair) {
|
||||
p.cost_diff += p.cost_combo;
|
||||
if (*num_pairs > 0 && HistogramPairIsLess(&pairs[0], &p)) {
|
||||
/* Replace the top of the queue if needed. */
|
||||
if (*num_pairs < max_num_pairs) {
|
||||
pairs[*num_pairs] = pairs[0];
|
||||
++(*num_pairs);
|
||||
}
|
||||
pairs[0] = p;
|
||||
} else if (*num_pairs < max_num_pairs) {
|
||||
pairs[*num_pairs] = p;
|
||||
++(*num_pairs);
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
BROTLI_INTERNAL size_t FN(BrotliHistogramCombine)(HistogramType* out,
|
||||
uint32_t* cluster_size,
|
||||
uint32_t* symbols,
|
||||
uint32_t* clusters,
|
||||
HistogramPair* pairs,
|
||||
size_t num_clusters,
|
||||
size_t symbols_size,
|
||||
size_t max_clusters,
|
||||
size_t max_num_pairs) CODE({
|
||||
double cost_diff_threshold = 0.0;
|
||||
size_t min_cluster_size = 1;
|
||||
size_t num_pairs = 0;
|
||||
|
||||
{
|
||||
/* We maintain a vector of histogram pairs, with the property that the pair
|
||||
with the maximum bit cost reduction is the first. */
|
||||
size_t idx1;
|
||||
for (idx1 = 0; idx1 < num_clusters; ++idx1) {
|
||||
size_t idx2;
|
||||
for (idx2 = idx1 + 1; idx2 < num_clusters; ++idx2) {
|
||||
FN(BrotliCompareAndPushToQueue)(out, cluster_size, clusters[idx1],
|
||||
clusters[idx2], max_num_pairs, &pairs[0], &num_pairs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while (num_clusters > min_cluster_size) {
|
||||
uint32_t best_idx1;
|
||||
uint32_t best_idx2;
|
||||
size_t i;
|
||||
if (pairs[0].cost_diff >= cost_diff_threshold) {
|
||||
cost_diff_threshold = 1e99;
|
||||
min_cluster_size = max_clusters;
|
||||
continue;
|
||||
}
|
||||
/* Take the best pair from the top of heap. */
|
||||
best_idx1 = pairs[0].idx1;
|
||||
best_idx2 = pairs[0].idx2;
|
||||
FN(HistogramAddHistogram)(&out[best_idx1], &out[best_idx2]);
|
||||
out[best_idx1].bit_cost_ = pairs[0].cost_combo;
|
||||
cluster_size[best_idx1] += cluster_size[best_idx2];
|
||||
for (i = 0; i < symbols_size; ++i) {
|
||||
if (symbols[i] == best_idx2) {
|
||||
symbols[i] = best_idx1;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < num_clusters; ++i) {
|
||||
if (clusters[i] == best_idx2) {
|
||||
memmove(&clusters[i], &clusters[i + 1],
|
||||
(num_clusters - i - 1) * sizeof(clusters[0]));
|
||||
break;
|
||||
}
|
||||
}
|
||||
--num_clusters;
|
||||
{
|
||||
/* Remove pairs intersecting the just combined best pair. */
|
||||
size_t copy_to_idx = 0;
|
||||
for (i = 0; i < num_pairs; ++i) {
|
||||
HistogramPair* p = &pairs[i];
|
||||
if (p->idx1 == best_idx1 || p->idx2 == best_idx1 ||
|
||||
p->idx1 == best_idx2 || p->idx2 == best_idx2) {
|
||||
/* Remove invalid pair from the queue. */
|
||||
continue;
|
||||
}
|
||||
if (HistogramPairIsLess(&pairs[0], p)) {
|
||||
/* Replace the top of the queue if needed. */
|
||||
HistogramPair front = pairs[0];
|
||||
pairs[0] = *p;
|
||||
pairs[copy_to_idx] = front;
|
||||
} else {
|
||||
pairs[copy_to_idx] = *p;
|
||||
}
|
||||
++copy_to_idx;
|
||||
}
|
||||
num_pairs = copy_to_idx;
|
||||
}
|
||||
|
||||
/* Push new pairs formed with the combined histogram to the heap. */
|
||||
for (i = 0; i < num_clusters; ++i) {
|
||||
FN(BrotliCompareAndPushToQueue)(out, cluster_size, best_idx1, clusters[i],
|
||||
max_num_pairs, &pairs[0], &num_pairs);
|
||||
}
|
||||
}
|
||||
return num_clusters;
|
||||
})
|
||||
|
||||
/* What is the bit cost of moving histogram from cur_symbol to candidate. */
|
||||
BROTLI_INTERNAL double FN(BrotliHistogramBitCostDistance)(
|
||||
const HistogramType* histogram, const HistogramType* candidate) CODE({
|
||||
if (histogram->total_count_ == 0) {
|
||||
return 0.0;
|
||||
} else {
|
||||
HistogramType tmp = *histogram;
|
||||
FN(HistogramAddHistogram)(&tmp, candidate);
|
||||
return FN(BrotliPopulationCost)(&tmp) - candidate->bit_cost_;
|
||||
}
|
||||
})
|
||||
|
||||
/* Find the best 'out' histogram for each of the 'in' histograms.
|
||||
When called, clusters[0..num_clusters) contains the unique values from
|
||||
symbols[0..in_size), but this property is not preserved in this function.
|
||||
Note: we assume that out[]->bit_cost_ is already up-to-date. */
|
||||
BROTLI_INTERNAL void FN(BrotliHistogramRemap)(const HistogramType* in,
|
||||
size_t in_size, const uint32_t* clusters, size_t num_clusters,
|
||||
HistogramType* out, uint32_t* symbols) CODE({
|
||||
size_t i;
|
||||
for (i = 0; i < in_size; ++i) {
|
||||
uint32_t best_out = i == 0 ? symbols[0] : symbols[i - 1];
|
||||
double best_bits =
|
||||
FN(BrotliHistogramBitCostDistance)(&in[i], &out[best_out]);
|
||||
size_t j;
|
||||
for (j = 0; j < num_clusters; ++j) {
|
||||
const double cur_bits =
|
||||
FN(BrotliHistogramBitCostDistance)(&in[i], &out[clusters[j]]);
|
||||
if (cur_bits < best_bits) {
|
||||
best_bits = cur_bits;
|
||||
best_out = clusters[j];
|
||||
}
|
||||
}
|
||||
symbols[i] = best_out;
|
||||
}
|
||||
|
||||
/* Recompute each out based on raw and symbols. */
|
||||
for (i = 0; i < num_clusters; ++i) {
|
||||
FN(HistogramClear)(&out[clusters[i]]);
|
||||
}
|
||||
for (i = 0; i < in_size; ++i) {
|
||||
FN(HistogramAddHistogram)(&out[symbols[i]], &in[i]);
|
||||
}
|
||||
})
|
||||
|
||||
/* Reorders elements of the out[0..length) array and changes values in
|
||||
symbols[0..length) array in the following way:
|
||||
* when called, symbols[] contains indexes into out[], and has N unique
|
||||
values (possibly N < length)
|
||||
* on return, symbols'[i] = f(symbols[i]) and
|
||||
out'[symbols'[i]] = out[symbols[i]], for each 0 <= i < length,
|
||||
where f is a bijection between the range of symbols[] and [0..N), and
|
||||
the first occurrences of values in symbols'[i] come in consecutive
|
||||
increasing order.
|
||||
Returns N, the number of unique values in symbols[]. */
|
||||
BROTLI_INTERNAL size_t FN(BrotliHistogramReindex)(MemoryManager* m,
|
||||
HistogramType* out, uint32_t* symbols, size_t length) CODE({
|
||||
static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX;
|
||||
uint32_t* new_index = BROTLI_ALLOC(m, uint32_t, length);
|
||||
uint32_t next_index;
|
||||
HistogramType* tmp;
|
||||
size_t i;
|
||||
if (BROTLI_IS_OOM(m)) return 0;
|
||||
for (i = 0; i < length; ++i) {
|
||||
new_index[i] = kInvalidIndex;
|
||||
}
|
||||
next_index = 0;
|
||||
for (i = 0; i < length; ++i) {
|
||||
if (new_index[symbols[i]] == kInvalidIndex) {
|
||||
new_index[symbols[i]] = next_index;
|
||||
++next_index;
|
||||
}
|
||||
}
|
||||
/* TODO: by using idea of "cycle-sort" we can avoid allocation of
|
||||
tmp and reduce the number of copying by the factor of 2. */
|
||||
tmp = BROTLI_ALLOC(m, HistogramType, next_index);
|
||||
if (BROTLI_IS_OOM(m)) return 0;
|
||||
next_index = 0;
|
||||
for (i = 0; i < length; ++i) {
|
||||
if (new_index[symbols[i]] == next_index) {
|
||||
tmp[next_index] = out[symbols[i]];
|
||||
++next_index;
|
||||
}
|
||||
symbols[i] = new_index[symbols[i]];
|
||||
}
|
||||
BROTLI_FREE(m, new_index);
|
||||
for (i = 0; i < next_index; ++i) {
|
||||
out[i] = tmp[i];
|
||||
}
|
||||
BROTLI_FREE(m, tmp);
|
||||
return next_index;
|
||||
})
|
||||
|
||||
BROTLI_INTERNAL void FN(BrotliClusterHistograms)(
|
||||
MemoryManager* m, const HistogramType* in, const size_t in_size,
|
||||
size_t max_histograms, HistogramType* out, size_t* out_size,
|
||||
uint32_t* histogram_symbols) CODE({
|
||||
uint32_t* cluster_size = BROTLI_ALLOC(m, uint32_t, in_size);
|
||||
uint32_t* clusters = BROTLI_ALLOC(m, uint32_t, in_size);
|
||||
size_t num_clusters = 0;
|
||||
const size_t max_input_histograms = 64;
|
||||
size_t pairs_capacity = max_input_histograms * max_input_histograms / 2;
|
||||
/* For the first pass of clustering, we allow all pairs. */
|
||||
HistogramPair* pairs = BROTLI_ALLOC(m, HistogramPair, pairs_capacity + 1);
|
||||
size_t i;
|
||||
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
|
||||
for (i = 0; i < in_size; ++i) {
|
||||
cluster_size[i] = 1;
|
||||
}
|
||||
|
||||
for (i = 0; i < in_size; ++i) {
|
||||
out[i] = in[i];
|
||||
out[i].bit_cost_ = FN(BrotliPopulationCost)(&in[i]);
|
||||
histogram_symbols[i] = (uint32_t)i;
|
||||
}
|
||||
|
||||
for (i = 0; i < in_size; i += max_input_histograms) {
|
||||
size_t num_to_combine =
|
||||
BROTLI_MIN(size_t, in_size - i, max_input_histograms);
|
||||
size_t num_new_clusters;
|
||||
size_t j;
|
||||
for (j = 0; j < num_to_combine; ++j) {
|
||||
clusters[num_clusters + j] = (uint32_t)(i + j);
|
||||
}
|
||||
num_new_clusters =
|
||||
FN(BrotliHistogramCombine)(out, cluster_size,
|
||||
&histogram_symbols[i],
|
||||
&clusters[num_clusters], pairs,
|
||||
num_to_combine, num_to_combine,
|
||||
max_histograms, pairs_capacity);
|
||||
num_clusters += num_new_clusters;
|
||||
}
|
||||
|
||||
{
|
||||
/* For the second pass, we limit the total number of histogram pairs.
|
||||
After this limit is reached, we only keep searching for the best pair. */
|
||||
size_t max_num_pairs = BROTLI_MIN(size_t,
|
||||
64 * num_clusters, (num_clusters / 2) * num_clusters);
|
||||
BROTLI_ENSURE_CAPACITY(
|
||||
m, HistogramPair, pairs, pairs_capacity, max_num_pairs + 1);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
|
||||
/* Collapse similar histograms. */
|
||||
num_clusters = FN(BrotliHistogramCombine)(out, cluster_size,
|
||||
histogram_symbols, clusters,
|
||||
pairs, num_clusters, in_size,
|
||||
max_histograms, max_num_pairs);
|
||||
}
|
||||
BROTLI_FREE(m, pairs);
|
||||
BROTLI_FREE(m, cluster_size);
|
||||
/* Find the optimal map from original histograms to the final ones. */
|
||||
FN(BrotliHistogramRemap)(in, in_size, clusters, num_clusters,
|
||||
out, histogram_symbols);
|
||||
BROTLI_FREE(m, clusters);
|
||||
/* Convert the context map to a canonical form. */
|
||||
*out_size = FN(BrotliHistogramReindex)(m, out, histogram_symbols, in_size);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
})
|
||||
|
||||
#undef HistogramType
|
152
enc/command.h
152
enc/command.h
@ -10,10 +10,13 @@
|
||||
#define BROTLI_ENC_COMMAND_H_
|
||||
|
||||
#include "../common/types.h"
|
||||
#include "../common/port.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./prefix.h"
|
||||
|
||||
namespace brotli {
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static uint32_t kInsBase[] = { 0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50,
|
||||
66, 98, 130, 194, 322, 578, 1090, 2114, 6210, 22594 };
|
||||
@ -24,15 +27,14 @@ static uint32_t kCopyBase[] = { 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 18, 22, 30,
|
||||
static uint32_t kCopyExtra[] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3,
|
||||
4, 4, 5, 5, 6, 7, 8, 9, 10, 24 };
|
||||
|
||||
static inline uint16_t GetInsertLengthCode(size_t insertlen) {
|
||||
static BROTLI_INLINE uint16_t GetInsertLengthCode(size_t insertlen) {
|
||||
if (insertlen < 6) {
|
||||
return static_cast<uint16_t>(insertlen);
|
||||
return (uint16_t)insertlen;
|
||||
} else if (insertlen < 130) {
|
||||
insertlen -= 2;
|
||||
uint32_t nbits = Log2FloorNonZero(insertlen) - 1u;
|
||||
return static_cast<uint16_t>((nbits << 1) + (insertlen >> nbits) + 2);
|
||||
uint32_t nbits = Log2FloorNonZero(insertlen - 2) - 1u;
|
||||
return (uint16_t)((nbits << 1) + ((insertlen - 2) >> nbits) + 2);
|
||||
} else if (insertlen < 2114) {
|
||||
return static_cast<uint16_t>(Log2FloorNonZero(insertlen - 66) + 10);
|
||||
return (uint16_t)(Log2FloorNonZero(insertlen - 66) + 10);
|
||||
} else if (insertlen < 6210) {
|
||||
return 21u;
|
||||
} else if (insertlen < 22594) {
|
||||
@ -42,24 +44,23 @@ static inline uint16_t GetInsertLengthCode(size_t insertlen) {
|
||||
}
|
||||
}
|
||||
|
||||
static inline uint16_t GetCopyLengthCode(size_t copylen) {
|
||||
static BROTLI_INLINE uint16_t GetCopyLengthCode(size_t copylen) {
|
||||
if (copylen < 10) {
|
||||
return static_cast<uint16_t>(copylen - 2);
|
||||
return (uint16_t)(copylen - 2);
|
||||
} else if (copylen < 134) {
|
||||
copylen -= 6;
|
||||
uint32_t nbits = Log2FloorNonZero(copylen) - 1u;
|
||||
return static_cast<uint16_t>((nbits << 1) + (copylen >> nbits) + 4);
|
||||
uint32_t nbits = Log2FloorNonZero(copylen - 6) - 1u;
|
||||
return (uint16_t)((nbits << 1) + ((copylen - 6) >> nbits) + 4);
|
||||
} else if (copylen < 2118) {
|
||||
return static_cast<uint16_t>(Log2FloorNonZero(copylen - 70) + 12);
|
||||
return (uint16_t)(Log2FloorNonZero(copylen - 70) + 12);
|
||||
} else {
|
||||
return 23u;
|
||||
}
|
||||
}
|
||||
|
||||
static inline uint16_t CombineLengthCodes(
|
||||
uint16_t inscode, uint16_t copycode, bool use_last_distance) {
|
||||
static BROTLI_INLINE uint16_t CombineLengthCodes(
|
||||
uint16_t inscode, uint16_t copycode, int use_last_distance) {
|
||||
uint16_t bits64 =
|
||||
static_cast<uint16_t>((copycode & 0x7u) | ((inscode & 0x7u) << 3));
|
||||
(uint16_t)((copycode & 0x7u) | ((inscode & 0x7u) << 3));
|
||||
if (use_last_distance && inscode < 8 && copycode < 16) {
|
||||
return (copycode < 8) ? bits64 : (bits64 | 64);
|
||||
} else {
|
||||
@ -71,86 +72,91 @@ static inline uint16_t CombineLengthCodes(
|
||||
}
|
||||
}
|
||||
|
||||
static inline void GetLengthCode(size_t insertlen, size_t copylen,
|
||||
bool use_last_distance,
|
||||
uint16_t* code) {
|
||||
static BROTLI_INLINE void GetLengthCode(size_t insertlen, size_t copylen,
|
||||
int use_last_distance,
|
||||
uint16_t* code) {
|
||||
uint16_t inscode = GetInsertLengthCode(insertlen);
|
||||
uint16_t copycode = GetCopyLengthCode(copylen);
|
||||
*code = CombineLengthCodes(inscode, copycode, use_last_distance);
|
||||
}
|
||||
|
||||
static inline uint32_t GetInsertBase(uint16_t inscode) {
|
||||
static BROTLI_INLINE uint32_t GetInsertBase(uint16_t inscode) {
|
||||
return kInsBase[inscode];
|
||||
}
|
||||
|
||||
static inline uint32_t GetInsertExtra(uint16_t inscode) {
|
||||
static BROTLI_INLINE uint32_t GetInsertExtra(uint16_t inscode) {
|
||||
return kInsExtra[inscode];
|
||||
}
|
||||
|
||||
static inline uint32_t GetCopyBase(uint16_t copycode) {
|
||||
static BROTLI_INLINE uint32_t GetCopyBase(uint16_t copycode) {
|
||||
return kCopyBase[copycode];
|
||||
}
|
||||
|
||||
static inline uint32_t GetCopyExtra(uint16_t copycode) {
|
||||
static BROTLI_INLINE uint32_t GetCopyExtra(uint16_t copycode) {
|
||||
return kCopyExtra[copycode];
|
||||
}
|
||||
|
||||
struct Command {
|
||||
// distance_code is e.g. 0 for same-as-last short code, or 16 for offset 1.
|
||||
Command(size_t insertlen, size_t copylen, size_t copylen_code,
|
||||
size_t distance_code)
|
||||
: insert_len_(static_cast<uint32_t>(insertlen)) {
|
||||
copy_len_ = static_cast<uint32_t>(
|
||||
copylen | ((copylen_code ^ copylen) << 24));
|
||||
// The distance prefix and extra bits are stored in this Command as if
|
||||
// npostfix and ndirect were 0, they are only recomputed later after the
|
||||
// clustering if needed.
|
||||
PrefixEncodeCopyDistance(distance_code, 0, 0, &dist_prefix_, &dist_extra_);
|
||||
GetLengthCode(insertlen, copylen_code, dist_prefix_ == 0,
|
||||
&cmd_prefix_);
|
||||
}
|
||||
|
||||
explicit Command(size_t insertlen)
|
||||
: insert_len_(static_cast<uint32_t>(insertlen))
|
||||
, copy_len_(4 << 24), dist_extra_(0), dist_prefix_(16) {
|
||||
GetLengthCode(insertlen, 4, dist_prefix_ == 0, &cmd_prefix_);
|
||||
}
|
||||
|
||||
uint32_t DistanceCode(void) const {
|
||||
if (dist_prefix_ < 16) {
|
||||
return dist_prefix_;
|
||||
}
|
||||
uint32_t nbits = dist_extra_ >> 24;
|
||||
uint32_t extra = dist_extra_ & 0xffffff;
|
||||
uint32_t prefix = dist_prefix_ - 12 - 2 * nbits;
|
||||
return (prefix << nbits) + extra + 12;
|
||||
}
|
||||
|
||||
uint32_t DistanceContext(void) const {
|
||||
uint32_t r = cmd_prefix_ >> 6;
|
||||
uint32_t c = cmd_prefix_ & 7;
|
||||
if ((r == 0 || r == 2 || r == 4 || r == 7) && (c <= 2)) {
|
||||
return c;
|
||||
}
|
||||
return 3;
|
||||
}
|
||||
|
||||
inline uint32_t copy_len(void) const {
|
||||
return copy_len_ & 0xFFFFFF;
|
||||
}
|
||||
|
||||
inline uint32_t copy_len_code(void) const {
|
||||
return (copy_len_ & 0xFFFFFF) ^ (copy_len_ >> 24);
|
||||
}
|
||||
|
||||
typedef struct Command {
|
||||
uint32_t insert_len_;
|
||||
/* Stores copy_len in low 24 bits and copy_len XOR copy_code in high 8 bit. */
|
||||
uint32_t copy_len_;
|
||||
uint32_t dist_extra_;
|
||||
uint16_t cmd_prefix_;
|
||||
uint16_t dist_prefix_;
|
||||
};
|
||||
} Command;
|
||||
|
||||
} // namespace brotli
|
||||
/* distance_code is e.g. 0 for same-as-last short code, or 16 for offset 1. */
|
||||
static BROTLI_INLINE void InitCommand(Command* self, size_t insertlen,
|
||||
size_t copylen, size_t copylen_code, size_t distance_code) {
|
||||
self->insert_len_ = (uint32_t)insertlen;
|
||||
self->copy_len_ = (uint32_t)(copylen | ((copylen_code ^ copylen) << 24));
|
||||
/* The distance prefix and extra bits are stored in this Command as if
|
||||
npostfix and ndirect were 0, they are only recomputed later after the
|
||||
clustering if needed. */
|
||||
PrefixEncodeCopyDistance(
|
||||
distance_code, 0, 0, &self->dist_prefix_, &self->dist_extra_);
|
||||
GetLengthCode(
|
||||
insertlen, copylen_code, self->dist_prefix_ == 0, &self->cmd_prefix_);
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void InitInsertCommand(Command* self, size_t insertlen) {
|
||||
self->insert_len_ = (uint32_t)insertlen;
|
||||
self->copy_len_ = 4 << 24;
|
||||
self->dist_extra_ = 0;
|
||||
self->dist_prefix_ = 16;
|
||||
GetLengthCode(insertlen, 4, 0, &self->cmd_prefix_);
|
||||
}
|
||||
|
||||
static BROTLI_INLINE uint32_t CommandDistanceCode(const Command* self) {
|
||||
if (self->dist_prefix_ < 16) {
|
||||
return self->dist_prefix_;
|
||||
} else {
|
||||
uint32_t nbits = self->dist_extra_ >> 24;
|
||||
uint32_t extra = self->dist_extra_ & 0xffffff;
|
||||
uint32_t prefix = self->dist_prefix_ - 12u - 2u * nbits;
|
||||
return (prefix << nbits) + extra + 12;
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE uint32_t CommandDistanceContext(const Command* self) {
|
||||
uint32_t r = self->cmd_prefix_ >> 6;
|
||||
uint32_t c = self->cmd_prefix_ & 7;
|
||||
if ((r == 0 || r == 2 || r == 4 || r == 7) && (c <= 2)) {
|
||||
return c;
|
||||
}
|
||||
return 3;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE uint32_t CommandCopyLen(const Command* self) {
|
||||
return self->copy_len_ & 0xFFFFFF;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE uint32_t CommandCopyLenCode(const Command* self) {
|
||||
return (self->copy_len_ & 0xFFFFFF) ^ (self->copy_len_ >> 24);
|
||||
}
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* BROTLI_ENC_COMMAND_H_ */
|
||||
|
@ -14,18 +14,21 @@
|
||||
|
||||
#include "./compress_fragment.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <string.h> /* memcmp, memcpy, memset */
|
||||
|
||||
#include "../common/types.h"
|
||||
#include "./brotli_bit_stream.h"
|
||||
#include "./entropy_encode.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./find_match_length.h"
|
||||
#include "./memory.h"
|
||||
#include "./port.h"
|
||||
#include "./write_bits.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* kHashMul32 multiplier has these properties:
|
||||
* The multiplier must be odd. Otherwise we may lose the highest bit.
|
||||
@ -35,19 +38,22 @@ namespace brotli {
|
||||
* The number has been tuned heuristically against compression benchmarks. */
|
||||
static const uint32_t kHashMul32 = 0x1e35a7bd;
|
||||
|
||||
static inline uint32_t Hash(const uint8_t* p, size_t shift) {
|
||||
static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) {
|
||||
const uint64_t h = (BROTLI_UNALIGNED_LOAD64(p) << 24) * kHashMul32;
|
||||
return static_cast<uint32_t>(h >> shift);
|
||||
return (uint32_t)(h >> shift);
|
||||
}
|
||||
|
||||
static inline uint32_t HashBytesAtOffset(uint64_t v, int offset, size_t shift) {
|
||||
static BROTLI_INLINE uint32_t HashBytesAtOffset(
|
||||
uint64_t v, int offset, size_t shift) {
|
||||
assert(offset >= 0);
|
||||
assert(offset <= 3);
|
||||
const uint64_t h = ((v >> (8 * offset)) << 24) * kHashMul32;
|
||||
return static_cast<uint32_t>(h >> shift);
|
||||
{
|
||||
const uint64_t h = ((v >> (8 * offset)) << 24) * kHashMul32;
|
||||
return (uint32_t)(h >> shift);
|
||||
}
|
||||
}
|
||||
|
||||
static inline int IsMatch(const uint8_t* p1, const uint8_t* p2) {
|
||||
static BROTLI_INLINE int IsMatch(const uint8_t* p1, const uint8_t* p2) {
|
||||
return (BROTLI_UNALIGNED_LOAD32(p1) == BROTLI_UNALIGNED_LOAD32(p2) &&
|
||||
p1[4] == p2[4]);
|
||||
}
|
||||
@ -57,281 +63,295 @@ static inline int IsMatch(const uint8_t* p1, const uint8_t* p2) {
|
||||
Note that the prefix code here is built from the pre-LZ77 input, therefore
|
||||
we can only approximate the statistics of the actual literal stream.
|
||||
Moreover, for long inputs we build a histogram from a sample of the input
|
||||
and thus have to assign a non-zero depth for each literal. */
|
||||
static void BuildAndStoreLiteralPrefixCode(const uint8_t* input,
|
||||
const size_t input_size,
|
||||
uint8_t depths[256],
|
||||
uint16_t bits[256],
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
and thus have to assign a non-zero depth for each literal.
|
||||
Returns estimated compression ratio millibytes/char for encoding given input
|
||||
with generated code. */
|
||||
static size_t BuildAndStoreLiteralPrefixCode(MemoryManager* m,
|
||||
const uint8_t* input,
|
||||
const size_t input_size,
|
||||
uint8_t depths[256],
|
||||
uint16_t bits[256],
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
uint32_t histogram[256] = { 0 };
|
||||
size_t histogram_total;
|
||||
size_t i;
|
||||
if (input_size < (1 << 15)) {
|
||||
for (size_t i = 0; i < input_size; ++i) {
|
||||
for (i = 0; i < input_size; ++i) {
|
||||
++histogram[input[i]];
|
||||
}
|
||||
histogram_total = input_size;
|
||||
for (size_t i = 0; i < 256; ++i) {
|
||||
for (i = 0; i < 256; ++i) {
|
||||
/* We weigh the first 11 samples with weight 3 to account for the
|
||||
balancing effect of the LZ77 phase on the histogram. */
|
||||
const uint32_t adjust = 2 * std::min(histogram[i], 11u);
|
||||
const uint32_t adjust = 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
|
||||
histogram[i] += adjust;
|
||||
histogram_total += adjust;
|
||||
}
|
||||
} else {
|
||||
static const size_t kSampleRate = 29;
|
||||
for (size_t i = 0; i < input_size; i += kSampleRate) {
|
||||
for (i = 0; i < input_size; i += kSampleRate) {
|
||||
++histogram[input[i]];
|
||||
}
|
||||
histogram_total = (input_size + kSampleRate - 1) / kSampleRate;
|
||||
for (size_t i = 0; i < 256; ++i) {
|
||||
for (i = 0; i < 256; ++i) {
|
||||
/* We add 1 to each population count to avoid 0 bit depths (since this is
|
||||
only a sample and we don't know if the symbol appears or not), and we
|
||||
weigh the first 11 samples with weight 3 to account for the balancing
|
||||
effect of the LZ77 phase on the histogram (more frequent symbols are
|
||||
more likely to be in backward references instead as literals). */
|
||||
const uint32_t adjust = 1 + 2 * std::min(histogram[i], 11u);
|
||||
const uint32_t adjust = 1 + 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
|
||||
histogram[i] += adjust;
|
||||
histogram_total += adjust;
|
||||
}
|
||||
}
|
||||
BuildAndStoreHuffmanTreeFast(histogram, histogram_total,
|
||||
/* max_bits = */ 8,
|
||||
depths, bits, storage_ix, storage);
|
||||
BrotliBuildAndStoreHuffmanTreeFast(m, histogram, histogram_total,
|
||||
/* max_bits = */ 8,
|
||||
depths, bits, storage_ix, storage);
|
||||
if (BROTLI_IS_OOM(m)) return 0;
|
||||
{
|
||||
size_t literal_ratio = 0;
|
||||
for (i = 0; i < 256; ++i) {
|
||||
if (histogram[i]) literal_ratio += histogram[i] * depths[i];
|
||||
}
|
||||
/* Estimated encoding ratio, millibytes per symbol. */
|
||||
return (literal_ratio * 125) / histogram_total;
|
||||
}
|
||||
}
|
||||
|
||||
/* Builds a command and distance prefix code (each 64 symbols) into "depth" and
|
||||
"bits" based on "histogram" and stores it into the bit stream. */
|
||||
static void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128],
|
||||
uint8_t depth[128],
|
||||
uint16_t bits[128],
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
uint8_t depth[128], uint16_t bits[128], size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
/* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
|
||||
static const size_t kTreeSize = 129;
|
||||
HuffmanTree tree[kTreeSize];
|
||||
CreateHuffmanTree(histogram, 64, 15, tree, depth);
|
||||
CreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
|
||||
HuffmanTree tree[129];
|
||||
uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS] = { 0 };
|
||||
uint16_t cmd_bits[64];
|
||||
|
||||
BrotliCreateHuffmanTree(histogram, 64, 15, tree, depth);
|
||||
BrotliCreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
|
||||
/* We have to jump through a few hoopes here in order to compute
|
||||
the command bits because the symbols are in a different order than in
|
||||
the full alphabet. This looks complicated, but having the symbols
|
||||
in this order in the command bits saves a few branches in the Emit*
|
||||
functions. */
|
||||
uint8_t cmd_depth[64];
|
||||
uint16_t cmd_bits[64];
|
||||
memcpy(cmd_depth, depth, 24);
|
||||
memcpy(cmd_depth + 24, depth + 40, 8);
|
||||
memcpy(cmd_depth + 32, depth + 24, 8);
|
||||
memcpy(cmd_depth + 40, depth + 48, 8);
|
||||
memcpy(cmd_depth + 48, depth + 32, 8);
|
||||
memcpy(cmd_depth + 56, depth + 56, 8);
|
||||
ConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
|
||||
BrotliConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
|
||||
memcpy(bits, cmd_bits, 48);
|
||||
memcpy(bits + 24, cmd_bits + 32, 16);
|
||||
memcpy(bits + 32, cmd_bits + 48, 16);
|
||||
memcpy(bits + 40, cmd_bits + 24, 16);
|
||||
memcpy(bits + 48, cmd_bits + 40, 16);
|
||||
memcpy(bits + 56, cmd_bits + 56, 16);
|
||||
ConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
|
||||
BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
|
||||
{
|
||||
/* Create the bit length array for the full command alphabet. */
|
||||
uint8_t cmd_depth[704] = { 0 };
|
||||
size_t i;
|
||||
memset(cmd_depth, 0, 64); /* only 64 first values were used */
|
||||
memcpy(cmd_depth, depth, 8);
|
||||
memcpy(cmd_depth + 64, depth + 8, 8);
|
||||
memcpy(cmd_depth + 128, depth + 16, 8);
|
||||
memcpy(cmd_depth + 192, depth + 24, 8);
|
||||
memcpy(cmd_depth + 384, depth + 32, 8);
|
||||
for (size_t i = 0; i < 8; ++i) {
|
||||
for (i = 0; i < 8; ++i) {
|
||||
cmd_depth[128 + 8 * i] = depth[40 + i];
|
||||
cmd_depth[256 + 8 * i] = depth[48 + i];
|
||||
cmd_depth[448 + 8 * i] = depth[56 + i];
|
||||
}
|
||||
StoreHuffmanTree(cmd_depth, 704, tree, storage_ix, storage);
|
||||
BrotliStoreHuffmanTree(
|
||||
cmd_depth, BROTLI_NUM_COMMAND_SYMBOLS, tree, storage_ix, storage);
|
||||
}
|
||||
StoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
|
||||
BrotliStoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
|
||||
}
|
||||
|
||||
/* REQUIRES: insertlen < 6210 */
|
||||
inline void EmitInsertLen(size_t insertlen,
|
||||
const uint8_t depth[128],
|
||||
const uint16_t bits[128],
|
||||
uint32_t histo[128],
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
static BROTLI_INLINE void EmitInsertLen(size_t insertlen,
|
||||
const uint8_t depth[128],
|
||||
const uint16_t bits[128],
|
||||
uint32_t histo[128],
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
if (insertlen < 6) {
|
||||
const size_t code = insertlen + 40;
|
||||
WriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
++histo[code];
|
||||
} else if (insertlen < 130) {
|
||||
insertlen -= 2;
|
||||
const uint32_t nbits = Log2FloorNonZero(insertlen) - 1u;
|
||||
const size_t prefix = insertlen >> nbits;
|
||||
const size_t tail = insertlen - 2;
|
||||
const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
|
||||
const size_t prefix = tail >> nbits;
|
||||
const size_t inscode = (nbits << 1) + prefix + 42;
|
||||
WriteBits(depth[inscode], bits[inscode], storage_ix, storage);
|
||||
WriteBits(nbits, insertlen - (prefix << nbits), storage_ix, storage);
|
||||
BrotliWriteBits(depth[inscode], bits[inscode], storage_ix, storage);
|
||||
BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
|
||||
++histo[inscode];
|
||||
} else if (insertlen < 2114) {
|
||||
insertlen -= 66;
|
||||
const uint32_t nbits = Log2FloorNonZero(insertlen);
|
||||
const size_t tail = insertlen - 66;
|
||||
const uint32_t nbits = Log2FloorNonZero(tail);
|
||||
const size_t code = nbits + 50;
|
||||
WriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
WriteBits(nbits, insertlen - (1 << nbits), storage_ix, storage);
|
||||
BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
BrotliWriteBits(nbits, tail - (1u << nbits), storage_ix, storage);
|
||||
++histo[code];
|
||||
} else {
|
||||
WriteBits(depth[61], bits[61], storage_ix, storage);
|
||||
WriteBits(12, insertlen - 2114, storage_ix, storage);
|
||||
BrotliWriteBits(depth[61], bits[61], storage_ix, storage);
|
||||
BrotliWriteBits(12, insertlen - 2114, storage_ix, storage);
|
||||
++histo[21];
|
||||
}
|
||||
}
|
||||
|
||||
inline void EmitLongInsertLen(size_t insertlen,
|
||||
const uint8_t depth[128],
|
||||
const uint16_t bits[128],
|
||||
uint32_t histo[128],
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
static BROTLI_INLINE void EmitLongInsertLen(size_t insertlen,
|
||||
const uint8_t depth[128],
|
||||
const uint16_t bits[128],
|
||||
uint32_t histo[128],
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
if (insertlen < 22594) {
|
||||
WriteBits(depth[62], bits[62], storage_ix, storage);
|
||||
WriteBits(14, insertlen - 6210, storage_ix, storage);
|
||||
BrotliWriteBits(depth[62], bits[62], storage_ix, storage);
|
||||
BrotliWriteBits(14, insertlen - 6210, storage_ix, storage);
|
||||
++histo[22];
|
||||
} else {
|
||||
WriteBits(depth[63], bits[63], storage_ix, storage);
|
||||
WriteBits(24, insertlen - 22594, storage_ix, storage);
|
||||
BrotliWriteBits(depth[63], bits[63], storage_ix, storage);
|
||||
BrotliWriteBits(24, insertlen - 22594, storage_ix, storage);
|
||||
++histo[23];
|
||||
}
|
||||
}
|
||||
|
||||
inline void EmitCopyLen(size_t copylen,
|
||||
const uint8_t depth[128],
|
||||
const uint16_t bits[128],
|
||||
uint32_t histo[128],
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
static BROTLI_INLINE void EmitCopyLen(size_t copylen,
|
||||
const uint8_t depth[128],
|
||||
const uint16_t bits[128],
|
||||
uint32_t histo[128],
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
if (copylen < 10) {
|
||||
WriteBits(depth[copylen + 14], bits[copylen + 14], storage_ix, storage);
|
||||
BrotliWriteBits(
|
||||
depth[copylen + 14], bits[copylen + 14], storage_ix, storage);
|
||||
++histo[copylen + 14];
|
||||
} else if (copylen < 134) {
|
||||
copylen -= 6;
|
||||
const uint32_t nbits = Log2FloorNonZero(copylen) - 1u;
|
||||
const size_t prefix = copylen >> nbits;
|
||||
const size_t tail = copylen - 6;
|
||||
const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
|
||||
const size_t prefix = tail >> nbits;
|
||||
const size_t code = (nbits << 1) + prefix + 20;
|
||||
WriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
WriteBits(nbits, copylen - (prefix << nbits), storage_ix, storage);
|
||||
BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
|
||||
++histo[code];
|
||||
} else if (copylen < 2118) {
|
||||
copylen -= 70;
|
||||
const uint32_t nbits = Log2FloorNonZero(copylen);
|
||||
const size_t tail = copylen - 70;
|
||||
const uint32_t nbits = Log2FloorNonZero(tail);
|
||||
const size_t code = nbits + 28;
|
||||
WriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
WriteBits(nbits, copylen - (1 << nbits), storage_ix, storage);
|
||||
BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
BrotliWriteBits(nbits, tail - (1u << nbits), storage_ix, storage);
|
||||
++histo[code];
|
||||
} else {
|
||||
WriteBits(depth[39], bits[39], storage_ix, storage);
|
||||
WriteBits(24, copylen - 2118, storage_ix, storage);
|
||||
BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
|
||||
BrotliWriteBits(24, copylen - 2118, storage_ix, storage);
|
||||
++histo[47];
|
||||
}
|
||||
}
|
||||
|
||||
inline void EmitCopyLenLastDistance(size_t copylen,
|
||||
const uint8_t depth[128],
|
||||
const uint16_t bits[128],
|
||||
uint32_t histo[128],
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
static BROTLI_INLINE void EmitCopyLenLastDistance(size_t copylen,
|
||||
const uint8_t depth[128],
|
||||
const uint16_t bits[128],
|
||||
uint32_t histo[128],
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
if (copylen < 12) {
|
||||
WriteBits(depth[copylen - 4], bits[copylen - 4], storage_ix, storage);
|
||||
BrotliWriteBits(depth[copylen - 4], bits[copylen - 4], storage_ix, storage);
|
||||
++histo[copylen - 4];
|
||||
} else if (copylen < 72) {
|
||||
copylen -= 8;
|
||||
const uint32_t nbits = Log2FloorNonZero(copylen) - 1;
|
||||
const size_t prefix = copylen >> nbits;
|
||||
const size_t tail = copylen - 8;
|
||||
const uint32_t nbits = Log2FloorNonZero(tail) - 1;
|
||||
const size_t prefix = tail >> nbits;
|
||||
const size_t code = (nbits << 1) + prefix + 4;
|
||||
WriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
WriteBits(nbits, copylen - (prefix << nbits), storage_ix, storage);
|
||||
BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
|
||||
++histo[code];
|
||||
} else if (copylen < 136) {
|
||||
copylen -= 8;
|
||||
const size_t code = (copylen >> 5) + 30;
|
||||
WriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
WriteBits(5, copylen & 31, storage_ix, storage);
|
||||
WriteBits(depth[64], bits[64], storage_ix, storage);
|
||||
const size_t tail = copylen - 8;
|
||||
const size_t code = (tail >> 5) + 30;
|
||||
BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
BrotliWriteBits(5, tail & 31, storage_ix, storage);
|
||||
BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
|
||||
++histo[code];
|
||||
++histo[64];
|
||||
} else if (copylen < 2120) {
|
||||
copylen -= 72;
|
||||
const uint32_t nbits = Log2FloorNonZero(copylen);
|
||||
const size_t tail = copylen - 72;
|
||||
const uint32_t nbits = Log2FloorNonZero(tail);
|
||||
const size_t code = nbits + 28;
|
||||
WriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
WriteBits(nbits, copylen - (1 << nbits), storage_ix, storage);
|
||||
WriteBits(depth[64], bits[64], storage_ix, storage);
|
||||
BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
BrotliWriteBits(nbits, tail - (1u << nbits), storage_ix, storage);
|
||||
BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
|
||||
++histo[code];
|
||||
++histo[64];
|
||||
} else {
|
||||
WriteBits(depth[39], bits[39], storage_ix, storage);
|
||||
WriteBits(24, copylen - 2120, storage_ix, storage);
|
||||
WriteBits(depth[64], bits[64], storage_ix, storage);
|
||||
BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
|
||||
BrotliWriteBits(24, copylen - 2120, storage_ix, storage);
|
||||
BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
|
||||
++histo[47];
|
||||
++histo[64];
|
||||
}
|
||||
}
|
||||
|
||||
inline void EmitDistance(size_t distance,
|
||||
const uint8_t depth[128],
|
||||
const uint16_t bits[128],
|
||||
uint32_t histo[128],
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
distance += 3;
|
||||
const uint32_t nbits = Log2FloorNonZero(distance) - 1u;
|
||||
const size_t prefix = (distance >> nbits) & 1;
|
||||
static BROTLI_INLINE void EmitDistance(size_t distance,
|
||||
const uint8_t depth[128],
|
||||
const uint16_t bits[128],
|
||||
uint32_t histo[128],
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
const size_t d = distance + 3;
|
||||
const uint32_t nbits = Log2FloorNonZero(d) - 1u;
|
||||
const size_t prefix = (d >> nbits) & 1;
|
||||
const size_t offset = (2 + prefix) << nbits;
|
||||
const size_t distcode = 2 * (nbits - 1) + prefix + 80;
|
||||
WriteBits(depth[distcode], bits[distcode], storage_ix, storage);
|
||||
WriteBits(nbits, distance - offset, storage_ix, storage);
|
||||
BrotliWriteBits(depth[distcode], bits[distcode], storage_ix, storage);
|
||||
BrotliWriteBits(nbits, d - offset, storage_ix, storage);
|
||||
++histo[distcode];
|
||||
}
|
||||
|
||||
inline void EmitLiterals(const uint8_t* input, const size_t len,
|
||||
const uint8_t depth[256], const uint16_t bits[256],
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
for (size_t j = 0; j < len; j++) {
|
||||
static BROTLI_INLINE void EmitLiterals(const uint8_t* input, const size_t len,
|
||||
const uint8_t depth[256],
|
||||
const uint16_t bits[256],
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
size_t j;
|
||||
for (j = 0; j < len; j++) {
|
||||
const uint8_t lit = input[j];
|
||||
WriteBits(depth[lit], bits[lit], storage_ix, storage);
|
||||
BrotliWriteBits(depth[lit], bits[lit], storage_ix, storage);
|
||||
}
|
||||
}
|
||||
|
||||
/* REQUIRES: len <= 1 << 20. */
|
||||
static void StoreMetaBlockHeader(
|
||||
size_t len, bool is_uncompressed, size_t* storage_ix, uint8_t* storage) {
|
||||
static void BrotliStoreMetaBlockHeader(
|
||||
size_t len, int is_uncompressed, size_t* storage_ix, uint8_t* storage) {
|
||||
/* ISLAST */
|
||||
WriteBits(1, 0, storage_ix, storage);
|
||||
BrotliWriteBits(1, 0, storage_ix, storage);
|
||||
if (len <= (1U << 16)) {
|
||||
/* MNIBBLES is 4 */
|
||||
WriteBits(2, 0, storage_ix, storage);
|
||||
WriteBits(16, len - 1, storage_ix, storage);
|
||||
BrotliWriteBits(2, 0, storage_ix, storage);
|
||||
BrotliWriteBits(16, len - 1, storage_ix, storage);
|
||||
} else {
|
||||
/* MNIBBLES is 5 */
|
||||
WriteBits(2, 1, storage_ix, storage);
|
||||
WriteBits(20, len - 1, storage_ix, storage);
|
||||
BrotliWriteBits(2, 1, storage_ix, storage);
|
||||
BrotliWriteBits(20, len - 1, storage_ix, storage);
|
||||
}
|
||||
/* ISUNCOMPRESSED */
|
||||
WriteBits(1, is_uncompressed, storage_ix, storage);
|
||||
BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
|
||||
}
|
||||
|
||||
static void UpdateBits(size_t n_bits,
|
||||
uint32_t bits,
|
||||
size_t pos,
|
||||
uint8_t *array) {
|
||||
static void UpdateBits(size_t n_bits, uint32_t bits, size_t pos,
|
||||
uint8_t *array) {
|
||||
while (n_bits > 0) {
|
||||
size_t byte_pos = pos >> 3;
|
||||
size_t n_unchanged_bits = pos & 7;
|
||||
size_t n_changed_bits = std::min(n_bits, 8 - n_unchanged_bits);
|
||||
size_t n_changed_bits = BROTLI_MIN(size_t, n_bits, 8 - n_unchanged_bits);
|
||||
size_t total_bits = n_unchanged_bits + n_changed_bits;
|
||||
uint32_t mask = (~((1 << total_bits) - 1)) | ((1 << n_unchanged_bits) - 1);
|
||||
uint32_t mask =
|
||||
(~((1u << total_bits) - 1u)) | ((1u << n_unchanged_bits) - 1u);
|
||||
uint32_t unchanged_bits = array[byte_pos] & mask;
|
||||
uint32_t changed_bits = bits & ((1 << n_changed_bits) - 1);
|
||||
uint32_t changed_bits = bits & ((1u << n_changed_bits) - 1u);
|
||||
array[byte_pos] =
|
||||
static_cast<uint8_t>((changed_bits << n_unchanged_bits) |
|
||||
unchanged_bits);
|
||||
(uint8_t)((changed_bits << n_unchanged_bits) | unchanged_bits);
|
||||
n_bits -= n_changed_bits;
|
||||
bits >>= n_changed_bits;
|
||||
pos += n_changed_bits;
|
||||
@ -342,69 +362,72 @@ static void RewindBitPosition(const size_t new_storage_ix,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
const size_t bitpos = new_storage_ix & 7;
|
||||
const size_t mask = (1u << bitpos) - 1;
|
||||
storage[new_storage_ix >> 3] &= static_cast<uint8_t>(mask);
|
||||
storage[new_storage_ix >> 3] &= (uint8_t)mask;
|
||||
*storage_ix = new_storage_ix;
|
||||
}
|
||||
|
||||
static bool ShouldMergeBlock(const uint8_t* data, size_t len,
|
||||
const uint8_t* depths) {
|
||||
static int ShouldMergeBlock(const uint8_t* data, size_t len,
|
||||
const uint8_t* depths) {
|
||||
size_t histo[256] = { 0 };
|
||||
static const size_t kSampleRate = 43;
|
||||
for (size_t i = 0; i < len; i += kSampleRate) {
|
||||
size_t i;
|
||||
for (i = 0; i < len; i += kSampleRate) {
|
||||
++histo[data[i]];
|
||||
}
|
||||
const size_t total = (len + kSampleRate - 1) / kSampleRate;
|
||||
double r = (FastLog2(total) + 0.5) * static_cast<double>(total) + 200;
|
||||
for (size_t i = 0; i < 256; ++i) {
|
||||
r -= static_cast<double>(histo[i]) * (depths[i] + FastLog2(histo[i]));
|
||||
{
|
||||
const size_t total = (len + kSampleRate - 1) / kSampleRate;
|
||||
double r = (FastLog2(total) + 0.5) * (double)total + 200;
|
||||
for (i = 0; i < 256; ++i) {
|
||||
r -= (double)histo[i] * (depths[i] + FastLog2(histo[i]));
|
||||
}
|
||||
return (r >= 0.0) ? 1 : 0;
|
||||
}
|
||||
return r >= 0.0;
|
||||
}
|
||||
|
||||
inline bool ShouldUseUncompressedMode(const uint8_t* metablock_start,
|
||||
const uint8_t* next_emit,
|
||||
const size_t insertlen,
|
||||
const uint8_t literal_depths[256]) {
|
||||
const size_t compressed = static_cast<size_t>(next_emit - metablock_start);
|
||||
/* Acceptable loss for uncompressible speedup is 2% */
|
||||
#define MIN_RATIO 980
|
||||
|
||||
static BROTLI_INLINE int ShouldUseUncompressedMode(
|
||||
const uint8_t* metablock_start, const uint8_t* next_emit,
|
||||
const size_t insertlen, const size_t literal_ratio) {
|
||||
const size_t compressed = (size_t)(next_emit - metablock_start);
|
||||
if (compressed * 50 > insertlen) {
|
||||
return false;
|
||||
return 0;
|
||||
} else {
|
||||
return (literal_ratio > MIN_RATIO) ? 1 : 0;
|
||||
}
|
||||
static const double kAcceptableLossForUncompressibleSpeedup = 0.02;
|
||||
static const double kMinEntropy =
|
||||
8 * (1.0 - kAcceptableLossForUncompressibleSpeedup);
|
||||
uint32_t sum = 0;
|
||||
for (int i = 0; i < 256; ++i) {
|
||||
const uint32_t n = literal_depths[i];
|
||||
sum += n << (15 - n);
|
||||
}
|
||||
return sum > static_cast<uint32_t>((1 << 15) * kMinEntropy);
|
||||
}
|
||||
|
||||
static void EmitUncompressedMetaBlock(const uint8_t* begin, const uint8_t* end,
|
||||
const size_t storage_ix_start,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
const size_t len = static_cast<size_t>(end - begin);
|
||||
const size_t len = (size_t)(end - begin);
|
||||
RewindBitPosition(storage_ix_start, storage_ix, storage);
|
||||
StoreMetaBlockHeader(len, 1, storage_ix, storage);
|
||||
BrotliStoreMetaBlockHeader(len, 1, storage_ix, storage);
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
memcpy(&storage[*storage_ix >> 3], begin, len);
|
||||
*storage_ix += len << 3;
|
||||
storage[*storage_ix >> 3] = 0;
|
||||
}
|
||||
|
||||
void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
|
||||
bool is_last,
|
||||
static uint32_t kCmdHistoSeed[128] = {
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
void BrotliCompressFragmentFast(MemoryManager* m,
|
||||
const uint8_t* input, size_t input_size,
|
||||
int is_last,
|
||||
int* table, size_t table_size,
|
||||
uint8_t cmd_depth[128], uint16_t cmd_bits[128],
|
||||
size_t* cmd_code_numbits, uint8_t* cmd_code,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
if (input_size == 0) {
|
||||
assert(is_last);
|
||||
WriteBits(1, 1, storage_ix, storage); // islast
|
||||
WriteBits(1, 1, storage_ix, storage); // isempty
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
return;
|
||||
}
|
||||
uint32_t cmd_histo[128];
|
||||
const uint8_t* ip_end;
|
||||
|
||||
/* "next_emit" is a pointer to the first byte that is not covered by a
|
||||
previous copy. Bytes between "next_emit" and the start of the next copy or
|
||||
@ -417,66 +440,81 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
|
||||
static const size_t kFirstBlockSize = 3 << 15;
|
||||
static const size_t kMergeBlockSize = 1 << 16;
|
||||
|
||||
const size_t kInputMarginBytes = 16;
|
||||
const size_t kMinMatchLen = 5;
|
||||
|
||||
const uint8_t* metablock_start = input;
|
||||
size_t block_size = std::min(input_size, kFirstBlockSize);
|
||||
size_t block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
|
||||
size_t total_block_size = block_size;
|
||||
/* Save the bit position of the MLEN field of the meta-block header, so that
|
||||
we can update it later if we decide to extend this meta-block. */
|
||||
size_t mlen_storage_ix = *storage_ix + 3;
|
||||
StoreMetaBlockHeader(block_size, 0, storage_ix, storage);
|
||||
// No block splits, no contexts.
|
||||
WriteBits(13, 0, storage_ix, storage);
|
||||
|
||||
uint8_t lit_depth[256] = { 0 };
|
||||
uint16_t lit_bits[256] = { 0 };
|
||||
BuildAndStoreLiteralPrefixCode(input, block_size, lit_depth, lit_bits,
|
||||
storage_ix, storage);
|
||||
uint8_t lit_depth[256];
|
||||
uint16_t lit_bits[256];
|
||||
|
||||
// Store the pre-compressed command and distance prefix codes.
|
||||
for (size_t i = 0; i + 7 < *cmd_code_numbits; i += 8) {
|
||||
WriteBits(8, cmd_code[i >> 3], storage_ix, storage);
|
||||
size_t literal_ratio;
|
||||
|
||||
const uint8_t* ip;
|
||||
int last_distance;
|
||||
|
||||
const size_t shift = 64u - Log2FloorNonZero(table_size);
|
||||
assert(table_size);
|
||||
assert(table_size <= (1u << 31));
|
||||
/* table must be power of two */
|
||||
assert((table_size & (table_size - 1)) == 0);
|
||||
assert(table_size - 1 ==
|
||||
(size_t)(MAKE_UINT64_T(0xFFFFFFFF, 0xFFFFFF) >> shift));
|
||||
|
||||
if (input_size == 0) {
|
||||
assert(is_last);
|
||||
BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
|
||||
BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
return;
|
||||
}
|
||||
WriteBits(*cmd_code_numbits & 7, cmd_code[*cmd_code_numbits >> 3],
|
||||
storage_ix, storage);
|
||||
|
||||
BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
|
||||
/* No block splits, no contexts. */
|
||||
BrotliWriteBits(13, 0, storage_ix, storage);
|
||||
|
||||
literal_ratio = BuildAndStoreLiteralPrefixCode(
|
||||
m, input, block_size, lit_depth, lit_bits, storage_ix, storage);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
|
||||
{
|
||||
/* Store the pre-compressed command and distance prefix codes. */
|
||||
size_t i;
|
||||
for (i = 0; i + 7 < *cmd_code_numbits; i += 8) {
|
||||
BrotliWriteBits(8, cmd_code[i >> 3], storage_ix, storage);
|
||||
}
|
||||
}
|
||||
BrotliWriteBits(*cmd_code_numbits & 7, cmd_code[*cmd_code_numbits >> 3],
|
||||
storage_ix, storage);
|
||||
|
||||
emit_commands:
|
||||
/* Initialize the command and distance histograms. We will gather
|
||||
statistics of command and distance codes during the processing
|
||||
of this block and use it to update the command and distance
|
||||
prefix codes for the next block. */
|
||||
uint32_t cmd_histo[128] = {
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 0, 0, 0, 0,
|
||||
};
|
||||
memcpy(cmd_histo, kCmdHistoSeed, sizeof(kCmdHistoSeed));
|
||||
|
||||
// "ip" is the input pointer.
|
||||
const uint8_t* ip = input;
|
||||
assert(table_size);
|
||||
assert(table_size <= (1u << 31));
|
||||
assert((table_size & (table_size - 1)) == 0); // table must be power of two
|
||||
const size_t shift = 64u - Log2FloorNonZero(table_size);
|
||||
assert(table_size - 1 == static_cast<size_t>(
|
||||
MAKE_UINT64_T(0xFFFFFFFF, 0xFFFFFF) >> shift));
|
||||
const uint8_t* ip_end = input + block_size;
|
||||
/* "ip" is the input pointer. */
|
||||
ip = input;
|
||||
last_distance = -1;
|
||||
ip_end = input + block_size;
|
||||
|
||||
int last_distance = -1;
|
||||
const size_t kInputMarginBytes = 16;
|
||||
const size_t kMinMatchLen = 5;
|
||||
if (PREDICT_TRUE(block_size >= kInputMarginBytes)) {
|
||||
/* For the last block, we need to keep a 16 bytes margin so that we can be
|
||||
sure that all distances are at most window size - 16.
|
||||
For all other blocks, we only need to keep a margin of 5 bytes so that
|
||||
we don't go over the block size with a copy. */
|
||||
const size_t len_limit = std::min(block_size - kMinMatchLen,
|
||||
input_size - kInputMarginBytes);
|
||||
const size_t len_limit = BROTLI_MIN(size_t, block_size - kMinMatchLen,
|
||||
input_size - kInputMarginBytes);
|
||||
const uint8_t* ip_limit = input + len_limit;
|
||||
|
||||
for (uint32_t next_hash = Hash(++ip, shift); ; ) {
|
||||
assert(next_emit < ip);
|
||||
uint32_t next_hash;
|
||||
for (next_hash = Hash(++ip, shift); ; ) {
|
||||
/* Step 1: Scan forward in the input looking for a 5-byte-long match.
|
||||
If we get close to exhausting the input then goto emit_remainder.
|
||||
|
||||
@ -496,11 +534,13 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
|
||||
|
||||
const uint8_t* next_ip = ip;
|
||||
const uint8_t* candidate;
|
||||
assert(next_emit < ip);
|
||||
|
||||
do {
|
||||
ip = next_ip;
|
||||
uint32_t hash = next_hash;
|
||||
assert(hash == Hash(ip, shift));
|
||||
uint32_t bytes_between_hash_lookups = skip++ >> 5;
|
||||
assert(hash == Hash(next_ip, shift));
|
||||
ip = next_ip;
|
||||
next_ip = ip + bytes_between_hash_lookups;
|
||||
if (PREDICT_FALSE(next_ip > ip_limit)) {
|
||||
goto emit_remainder;
|
||||
@ -509,7 +549,7 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
|
||||
candidate = ip - last_distance;
|
||||
if (IsMatch(ip, candidate)) {
|
||||
if (PREDICT_TRUE(candidate < ip)) {
|
||||
table[hash] = static_cast<int>(ip - base_ip);
|
||||
table[hash] = (int)(ip - base_ip);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -517,33 +557,32 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
|
||||
assert(candidate >= base_ip);
|
||||
assert(candidate < ip);
|
||||
|
||||
table[hash] = static_cast<int>(ip - base_ip);
|
||||
table[hash] = (int)(ip - base_ip);
|
||||
} while (PREDICT_TRUE(!IsMatch(ip, candidate)));
|
||||
|
||||
/* Step 2: Emit the found match together with the literal bytes from
|
||||
"next_emit" to the bit stream, and then see if we can find a next macth
|
||||
immediately afterwards. Repeat until we find no match for the input
|
||||
without emitting some literal bytes. */
|
||||
uint64_t input_bytes;
|
||||
|
||||
{
|
||||
/* We have a 5-byte match at ip, and we need to emit bytes in
|
||||
[next_emit, ip). */
|
||||
const uint8_t* base = ip;
|
||||
size_t matched = 5 + FindMatchLengthWithLimit(
|
||||
candidate + 5, ip + 5, static_cast<size_t>(ip_end - ip) - 5);
|
||||
candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
|
||||
int distance = (int)(base - candidate); /* > 0 */
|
||||
size_t insert = (size_t)(base - next_emit);
|
||||
ip += matched;
|
||||
int distance = static_cast<int>(base - candidate); /* > 0 */
|
||||
size_t insert = static_cast<size_t>(base - next_emit);
|
||||
assert(0 == memcmp(base, candidate, matched));
|
||||
if (PREDICT_TRUE(insert < 6210)) {
|
||||
EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
|
||||
storage_ix, storage);
|
||||
} else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
|
||||
lit_depth)) {
|
||||
literal_ratio)) {
|
||||
EmitUncompressedMetaBlock(metablock_start, base, mlen_storage_ix - 3,
|
||||
storage_ix, storage);
|
||||
input_size -= static_cast<size_t>(base - input);
|
||||
input_size -= (size_t)(base - input);
|
||||
input = base;
|
||||
next_emit = input;
|
||||
goto next_block;
|
||||
@ -554,10 +593,10 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
|
||||
EmitLiterals(next_emit, insert, lit_depth, lit_bits,
|
||||
storage_ix, storage);
|
||||
if (distance == last_distance) {
|
||||
WriteBits(cmd_depth[64], cmd_bits[64], storage_ix, storage);
|
||||
BrotliWriteBits(cmd_depth[64], cmd_bits[64], storage_ix, storage);
|
||||
++cmd_histo[64];
|
||||
} else {
|
||||
EmitDistance(static_cast<size_t>(distance), cmd_depth, cmd_bits,
|
||||
EmitDistance((size_t)distance, cmd_depth, cmd_bits,
|
||||
cmd_histo, storage_ix, storage);
|
||||
last_distance = distance;
|
||||
}
|
||||
@ -571,17 +610,19 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
|
||||
/* We could immediately start working at ip now, but to improve
|
||||
compression we first update "table" with the hashes of some positions
|
||||
within the last copy. */
|
||||
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3);
|
||||
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 3);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 2);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 1);
|
||||
{
|
||||
uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3);
|
||||
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
|
||||
table[prev_hash] = (int)(ip - base_ip - 3);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
|
||||
table[prev_hash] = (int)(ip - base_ip - 2);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
|
||||
table[prev_hash] = (int)(ip - base_ip - 1);
|
||||
|
||||
uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
|
||||
candidate = base_ip + table[cur_hash];
|
||||
table[cur_hash] = static_cast<int>(ip - base_ip);
|
||||
candidate = base_ip + table[cur_hash];
|
||||
table[cur_hash] = (int)(ip - base_ip);
|
||||
}
|
||||
}
|
||||
|
||||
while (IsMatch(ip, candidate)) {
|
||||
@ -589,13 +630,13 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
|
||||
prior to ip. */
|
||||
const uint8_t* base = ip;
|
||||
size_t matched = 5 + FindMatchLengthWithLimit(
|
||||
candidate + 5, ip + 5, static_cast<size_t>(ip_end - ip) - 5);
|
||||
candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
|
||||
ip += matched;
|
||||
last_distance = static_cast<int>(base - candidate); /* > 0 */
|
||||
last_distance = (int)(base - candidate); /* > 0 */
|
||||
assert(0 == memcmp(base, candidate, matched));
|
||||
EmitCopyLen(matched, cmd_depth, cmd_bits, cmd_histo,
|
||||
storage_ix, storage);
|
||||
EmitDistance(static_cast<size_t>(last_distance), cmd_depth, cmd_bits,
|
||||
EmitDistance((size_t)last_distance, cmd_depth, cmd_bits,
|
||||
cmd_histo, storage_ix, storage);
|
||||
|
||||
next_emit = ip;
|
||||
@ -605,17 +646,19 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
|
||||
/* We could immediately start working at ip now, but to improve
|
||||
compression we first update "table" with the hashes of some positions
|
||||
within the last copy. */
|
||||
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3);
|
||||
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 3);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 2);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 1);
|
||||
{
|
||||
uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3);
|
||||
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
|
||||
table[prev_hash] = (int)(ip - base_ip - 3);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
|
||||
table[prev_hash] = (int)(ip - base_ip - 2);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
|
||||
table[prev_hash] = (int)(ip - base_ip - 1);
|
||||
|
||||
uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
|
||||
candidate = base_ip + table[cur_hash];
|
||||
table[cur_hash] = static_cast<int>(ip - base_ip);
|
||||
candidate = base_ip + table[cur_hash];
|
||||
table[cur_hash] = (int)(ip - base_ip);
|
||||
}
|
||||
}
|
||||
|
||||
next_hash = Hash(++ip, shift);
|
||||
@ -626,7 +669,7 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
|
||||
assert(next_emit <= ip_end);
|
||||
input += block_size;
|
||||
input_size -= block_size;
|
||||
block_size = std::min(input_size, kMergeBlockSize);
|
||||
block_size = BROTLI_MIN(size_t, input_size, kMergeBlockSize);
|
||||
|
||||
/* Decide if we want to continue this meta-block instead of emitting the
|
||||
last insert-only command. */
|
||||
@ -638,20 +681,19 @@ void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
|
||||
We can do this because the current size and the new size both have 5
|
||||
nibbles. */
|
||||
total_block_size += block_size;
|
||||
UpdateBits(20, static_cast<uint32_t>(total_block_size - 1),
|
||||
mlen_storage_ix, storage);
|
||||
UpdateBits(20, (uint32_t)(total_block_size - 1), mlen_storage_ix, storage);
|
||||
goto emit_commands;
|
||||
}
|
||||
|
||||
/* Emit the remaining bytes as literals. */
|
||||
if (next_emit < ip_end) {
|
||||
const size_t insert = static_cast<size_t>(ip_end - next_emit);
|
||||
const size_t insert = (size_t)(ip_end - next_emit);
|
||||
if (PREDICT_TRUE(insert < 6210)) {
|
||||
EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
|
||||
storage_ix, storage);
|
||||
EmitLiterals(next_emit, insert, lit_depth, lit_bits, storage_ix, storage);
|
||||
} else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
|
||||
lit_depth)) {
|
||||
literal_ratio)) {
|
||||
EmitUncompressedMetaBlock(metablock_start, ip_end, mlen_storage_ix - 3,
|
||||
storage_ix, storage);
|
||||
} else {
|
||||
@ -668,26 +710,25 @@ next_block:
|
||||
then continue emitting commands. */
|
||||
if (input_size > 0) {
|
||||
metablock_start = input;
|
||||
block_size = std::min(input_size, kFirstBlockSize);
|
||||
block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
|
||||
total_block_size = block_size;
|
||||
/* Save the bit position of the MLEN field of the meta-block header, so that
|
||||
we can update it later if we decide to extend this meta-block. */
|
||||
mlen_storage_ix = *storage_ix + 3;
|
||||
StoreMetaBlockHeader(block_size, 0, storage_ix, storage);
|
||||
BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
|
||||
/* No block splits, no contexts. */
|
||||
WriteBits(13, 0, storage_ix, storage);
|
||||
memset(lit_depth, 0, sizeof(lit_depth));
|
||||
memset(lit_bits, 0, sizeof(lit_bits));
|
||||
BuildAndStoreLiteralPrefixCode(input, block_size, lit_depth, lit_bits,
|
||||
storage_ix, storage);
|
||||
BrotliWriteBits(13, 0, storage_ix, storage);
|
||||
literal_ratio = BuildAndStoreLiteralPrefixCode(
|
||||
m, input, block_size, lit_depth, lit_bits, storage_ix, storage);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
|
||||
storage_ix, storage);
|
||||
goto emit_commands;
|
||||
}
|
||||
|
||||
if (is_last) {
|
||||
WriteBits(1, 1, storage_ix, storage); /* islast */
|
||||
WriteBits(1, 1, storage_ix, storage); /* isempty */
|
||||
BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
|
||||
BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
} else {
|
||||
/* If this is not the last block, update the command and distance prefix
|
||||
@ -699,4 +740,6 @@ next_block:
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace brotli
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
@ -13,8 +13,12 @@
|
||||
#define BROTLI_ENC_COMPRESS_FRAGMENT_H_
|
||||
|
||||
#include "../common/types.h"
|
||||
#include "./memory.h"
|
||||
#include "./port.h"
|
||||
|
||||
namespace brotli {
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Compresses "input" string to the "*storage" buffer as one or more complete
|
||||
meta-blocks, and updates the "*storage_ix" bit position.
|
||||
@ -35,13 +39,20 @@ namespace brotli {
|
||||
REQUIRES: "input_size" is greater than zero, or "is_last" is 1.
|
||||
REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
|
||||
REQUIRES: "table_size" is a power of two */
|
||||
void BrotliCompressFragmentFast(const uint8_t* input, size_t input_size,
|
||||
bool is_last,
|
||||
int* table, size_t table_size,
|
||||
uint8_t cmd_depth[128], uint16_t cmd_bits[128],
|
||||
size_t* cmd_code_numbits, uint8_t* cmd_code,
|
||||
size_t* storage_ix, uint8_t* storage);
|
||||
BROTLI_INTERNAL void BrotliCompressFragmentFast(MemoryManager* m,
|
||||
const uint8_t* input,
|
||||
size_t input_size,
|
||||
int is_last,
|
||||
int* table, size_t table_size,
|
||||
uint8_t cmd_depth[128],
|
||||
uint16_t cmd_bits[128],
|
||||
size_t* cmd_code_numbits,
|
||||
uint8_t* cmd_code,
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage);
|
||||
|
||||
} // namespace brotli
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* BROTLI_ENC_COMPRESS_FRAGMENT_H_ */
|
||||
|
@ -12,7 +12,7 @@
|
||||
|
||||
#include "./compress_fragment_two_pass.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <string.h> /* memcmp, memcpy, memset */
|
||||
|
||||
#include "../common/types.h"
|
||||
#include "./bit_cost.h"
|
||||
@ -20,10 +20,14 @@
|
||||
#include "./entropy_encode.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./find_match_length.h"
|
||||
#include "./memory.h"
|
||||
#include "./port.h"
|
||||
#include "./write_bits.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* kHashMul32 multiplier has these properties:
|
||||
* The multiplier must be odd. Otherwise we may lose the highest bit.
|
||||
@ -33,19 +37,22 @@ namespace brotli {
|
||||
* The number has been tuned heuristically against compression benchmarks. */
|
||||
static const uint32_t kHashMul32 = 0x1e35a7bd;
|
||||
|
||||
static inline uint32_t Hash(const uint8_t* p, size_t shift) {
|
||||
static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) {
|
||||
const uint64_t h = (BROTLI_UNALIGNED_LOAD64(p) << 16) * kHashMul32;
|
||||
return static_cast<uint32_t>(h >> shift);
|
||||
return (uint32_t)(h >> shift);
|
||||
}
|
||||
|
||||
static inline uint32_t HashBytesAtOffset(uint64_t v, int offset, size_t shift) {
|
||||
static BROTLI_INLINE uint32_t HashBytesAtOffset(
|
||||
uint64_t v, int offset, size_t shift) {
|
||||
assert(offset >= 0);
|
||||
assert(offset <= 2);
|
||||
const uint64_t h = ((v >> (8 * offset)) << 16) * kHashMul32;
|
||||
return static_cast<uint32_t>(h >> shift);
|
||||
{
|
||||
const uint64_t h = ((v >> (8 * offset)) << 16) * kHashMul32;
|
||||
return (uint32_t)(h >> shift);
|
||||
}
|
||||
}
|
||||
|
||||
static inline int IsMatch(const uint8_t* p1, const uint8_t* p2) {
|
||||
static BROTLI_INLINE int IsMatch(const uint8_t* p1, const uint8_t* p2) {
|
||||
return (BROTLI_UNALIGNED_LOAD32(p1) == BROTLI_UNALIGNED_LOAD32(p2) &&
|
||||
p1[4] == p2[4] &&
|
||||
p1[5] == p2[5]);
|
||||
@ -58,64 +65,66 @@ static void BuildAndStoreCommandPrefixCode(
|
||||
uint8_t depth[128], uint16_t bits[128],
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
/* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
|
||||
static const size_t kTreeSize = 129;
|
||||
HuffmanTree tree[kTreeSize];
|
||||
CreateHuffmanTree(histogram, 64, 15, tree, depth);
|
||||
CreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
|
||||
HuffmanTree tree[129];
|
||||
uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS] = { 0 };
|
||||
uint16_t cmd_bits[64];
|
||||
BrotliCreateHuffmanTree(histogram, 64, 15, tree, depth);
|
||||
BrotliCreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
|
||||
/* We have to jump through a few hoopes here in order to compute
|
||||
the command bits because the symbols are in a different order than in
|
||||
the full alphabet. This looks complicated, but having the symbols
|
||||
in this order in the command bits saves a few branches in the Emit*
|
||||
functions. */
|
||||
uint8_t cmd_depth[64];
|
||||
uint16_t cmd_bits[64];
|
||||
memcpy(cmd_depth, depth + 24, 24);
|
||||
memcpy(cmd_depth + 24, depth, 8);
|
||||
memcpy(cmd_depth + 32, depth + 48, 8);
|
||||
memcpy(cmd_depth + 40, depth + 8, 8);
|
||||
memcpy(cmd_depth + 48, depth + 56, 8);
|
||||
memcpy(cmd_depth + 56, depth + 16, 8);
|
||||
ConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
|
||||
BrotliConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
|
||||
memcpy(bits, cmd_bits + 24, 16);
|
||||
memcpy(bits + 8, cmd_bits + 40, 16);
|
||||
memcpy(bits + 16, cmd_bits + 56, 16);
|
||||
memcpy(bits + 24, cmd_bits, 48);
|
||||
memcpy(bits + 48, cmd_bits + 32, 16);
|
||||
memcpy(bits + 56, cmd_bits + 48, 16);
|
||||
ConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
|
||||
BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
|
||||
{
|
||||
/* Create the bit length array for the full command alphabet. */
|
||||
uint8_t cmd_depth[704] = { 0 };
|
||||
size_t i;
|
||||
memset(cmd_depth, 0, 64); /* only 64 first values were used */
|
||||
memcpy(cmd_depth, depth + 24, 8);
|
||||
memcpy(cmd_depth + 64, depth + 32, 8);
|
||||
memcpy(cmd_depth + 128, depth + 40, 8);
|
||||
memcpy(cmd_depth + 192, depth + 48, 8);
|
||||
memcpy(cmd_depth + 384, depth + 56, 8);
|
||||
for (size_t i = 0; i < 8; ++i) {
|
||||
for (i = 0; i < 8; ++i) {
|
||||
cmd_depth[128 + 8 * i] = depth[i];
|
||||
cmd_depth[256 + 8 * i] = depth[8 + i];
|
||||
cmd_depth[448 + 8 * i] = depth[16 + i];
|
||||
}
|
||||
StoreHuffmanTree(cmd_depth, 704, tree, storage_ix, storage);
|
||||
BrotliStoreHuffmanTree(
|
||||
cmd_depth, BROTLI_NUM_COMMAND_SYMBOLS, tree, storage_ix, storage);
|
||||
}
|
||||
StoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
|
||||
BrotliStoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
|
||||
}
|
||||
|
||||
inline void EmitInsertLen(uint32_t insertlen, uint32_t** commands) {
|
||||
static BROTLI_INLINE void EmitInsertLen(
|
||||
uint32_t insertlen, uint32_t** commands) {
|
||||
if (insertlen < 6) {
|
||||
**commands = insertlen;
|
||||
} else if (insertlen < 130) {
|
||||
insertlen -= 2;
|
||||
const uint32_t nbits = Log2FloorNonZero(insertlen) - 1u;
|
||||
const uint32_t prefix = insertlen >> nbits;
|
||||
const uint32_t tail = insertlen - 2;
|
||||
const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
|
||||
const uint32_t prefix = tail >> nbits;
|
||||
const uint32_t inscode = (nbits << 1) + prefix + 2;
|
||||
const uint32_t extra = insertlen - (prefix << nbits);
|
||||
const uint32_t extra = tail - (prefix << nbits);
|
||||
**commands = inscode | (extra << 8);
|
||||
} else if (insertlen < 2114) {
|
||||
insertlen -= 66;
|
||||
const uint32_t nbits = Log2FloorNonZero(insertlen);
|
||||
const uint32_t tail = insertlen - 66;
|
||||
const uint32_t nbits = Log2FloorNonZero(tail);
|
||||
const uint32_t code = nbits + 10;
|
||||
const uint32_t extra = insertlen - (1 << nbits);
|
||||
const uint32_t extra = tail - (1u << nbits);
|
||||
**commands = code | (extra << 8);
|
||||
} else if (insertlen < 6210) {
|
||||
const uint32_t extra = insertlen - 2114;
|
||||
@ -130,108 +139,103 @@ inline void EmitInsertLen(uint32_t insertlen, uint32_t** commands) {
|
||||
++(*commands);
|
||||
}
|
||||
|
||||
inline void EmitCopyLen(size_t copylen, uint32_t** commands) {
|
||||
static BROTLI_INLINE void EmitCopyLen(size_t copylen, uint32_t** commands) {
|
||||
if (copylen < 10) {
|
||||
**commands = static_cast<uint32_t>(copylen + 38);
|
||||
**commands = (uint32_t)(copylen + 38);
|
||||
} else if (copylen < 134) {
|
||||
copylen -= 6;
|
||||
const size_t nbits = Log2FloorNonZero(copylen) - 1;
|
||||
const size_t prefix = copylen >> nbits;
|
||||
const size_t tail = copylen - 6;
|
||||
const size_t nbits = Log2FloorNonZero(tail) - 1;
|
||||
const size_t prefix = tail >> nbits;
|
||||
const size_t code = (nbits << 1) + prefix + 44;
|
||||
const size_t extra = copylen - (prefix << nbits);
|
||||
**commands = static_cast<uint32_t>(code | (extra << 8));
|
||||
const size_t extra = tail - (prefix << nbits);
|
||||
**commands = (uint32_t)(code | (extra << 8));
|
||||
} else if (copylen < 2118) {
|
||||
copylen -= 70;
|
||||
const size_t nbits = Log2FloorNonZero(copylen);
|
||||
const size_t tail = copylen - 70;
|
||||
const size_t nbits = Log2FloorNonZero(tail);
|
||||
const size_t code = nbits + 52;
|
||||
const size_t extra = copylen - (1 << nbits);
|
||||
**commands = static_cast<uint32_t>(code | (extra << 8));
|
||||
const size_t extra = tail - (1u << nbits);
|
||||
**commands = (uint32_t)(code | (extra << 8));
|
||||
} else {
|
||||
const size_t extra = copylen - 2118;
|
||||
**commands = static_cast<uint32_t>(63 | (extra << 8));
|
||||
**commands = (uint32_t)(63 | (extra << 8));
|
||||
}
|
||||
++(*commands);
|
||||
}
|
||||
|
||||
inline void EmitCopyLenLastDistance(size_t copylen, uint32_t** commands) {
|
||||
static BROTLI_INLINE void EmitCopyLenLastDistance(
|
||||
size_t copylen, uint32_t** commands) {
|
||||
if (copylen < 12) {
|
||||
**commands = static_cast<uint32_t>(copylen + 20);
|
||||
**commands = (uint32_t)(copylen + 20);
|
||||
++(*commands);
|
||||
} else if (copylen < 72) {
|
||||
copylen -= 8;
|
||||
const size_t nbits = Log2FloorNonZero(copylen) - 1;
|
||||
const size_t prefix = copylen >> nbits;
|
||||
const size_t tail = copylen - 8;
|
||||
const size_t nbits = Log2FloorNonZero(tail) - 1;
|
||||
const size_t prefix = tail >> nbits;
|
||||
const size_t code = (nbits << 1) + prefix + 28;
|
||||
const size_t extra = copylen - (prefix << nbits);
|
||||
**commands = static_cast<uint32_t>(code | (extra << 8));
|
||||
const size_t extra = tail - (prefix << nbits);
|
||||
**commands = (uint32_t)(code | (extra << 8));
|
||||
++(*commands);
|
||||
} else if (copylen < 136) {
|
||||
copylen -= 8;
|
||||
const size_t code = (copylen >> 5) + 54;
|
||||
const size_t extra = copylen & 31;
|
||||
**commands = static_cast<uint32_t>(code | (extra << 8));
|
||||
const size_t tail = copylen - 8;
|
||||
const size_t code = (tail >> 5) + 54;
|
||||
const size_t extra = tail & 31;
|
||||
**commands = (uint32_t)(code | (extra << 8));
|
||||
++(*commands);
|
||||
**commands = 64;
|
||||
++(*commands);
|
||||
} else if (copylen < 2120) {
|
||||
copylen -= 72;
|
||||
const size_t nbits = Log2FloorNonZero(copylen);
|
||||
const size_t tail = copylen - 72;
|
||||
const size_t nbits = Log2FloorNonZero(tail);
|
||||
const size_t code = nbits + 52;
|
||||
const size_t extra = copylen - (1 << nbits);
|
||||
**commands = static_cast<uint32_t>(code | (extra << 8));
|
||||
const size_t extra = tail - (1u << nbits);
|
||||
**commands = (uint32_t)(code | (extra << 8));
|
||||
++(*commands);
|
||||
**commands = 64;
|
||||
++(*commands);
|
||||
} else {
|
||||
const size_t extra = copylen - 2120;
|
||||
**commands = static_cast<uint32_t>(63 | (extra << 8));
|
||||
**commands = (uint32_t)(63 | (extra << 8));
|
||||
++(*commands);
|
||||
**commands = 64;
|
||||
++(*commands);
|
||||
}
|
||||
}
|
||||
|
||||
inline void EmitDistance(uint32_t distance, uint32_t** commands) {
|
||||
distance += 3;
|
||||
uint32_t nbits = Log2FloorNonZero(distance) - 1;
|
||||
const uint32_t prefix = (distance >> nbits) & 1;
|
||||
static BROTLI_INLINE void EmitDistance(uint32_t distance, uint32_t** commands) {
|
||||
uint32_t d = distance + 3;
|
||||
uint32_t nbits = Log2FloorNonZero(d) - 1;
|
||||
const uint32_t prefix = (d >> nbits) & 1;
|
||||
const uint32_t offset = (2 + prefix) << nbits;
|
||||
const uint32_t distcode = 2 * (nbits - 1) + prefix + 80;
|
||||
uint32_t extra = distance - offset;
|
||||
uint32_t extra = d - offset;
|
||||
**commands = distcode | (extra << 8);
|
||||
++(*commands);
|
||||
}
|
||||
|
||||
/* REQUIRES: len <= 1 << 20. */
|
||||
static void StoreMetaBlockHeader(
|
||||
size_t len, bool is_uncompressed, size_t* storage_ix, uint8_t* storage) {
|
||||
static void BrotliStoreMetaBlockHeader(
|
||||
size_t len, int is_uncompressed, size_t* storage_ix, uint8_t* storage) {
|
||||
/* ISLAST */
|
||||
WriteBits(1, 0, storage_ix, storage);
|
||||
BrotliWriteBits(1, 0, storage_ix, storage);
|
||||
if (len <= (1U << 16)) {
|
||||
/* MNIBBLES is 4 */
|
||||
WriteBits(2, 0, storage_ix, storage);
|
||||
WriteBits(16, len - 1, storage_ix, storage);
|
||||
BrotliWriteBits(2, 0, storage_ix, storage);
|
||||
BrotliWriteBits(16, len - 1, storage_ix, storage);
|
||||
} else {
|
||||
/* MNIBBLES is 5 */
|
||||
WriteBits(2, 1, storage_ix, storage);
|
||||
WriteBits(20, len - 1, storage_ix, storage);
|
||||
BrotliWriteBits(2, 1, storage_ix, storage);
|
||||
BrotliWriteBits(20, len - 1, storage_ix, storage);
|
||||
}
|
||||
/* ISUNCOMPRESSED */
|
||||
WriteBits(1, is_uncompressed, storage_ix, storage);
|
||||
BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
|
||||
}
|
||||
|
||||
static void CreateCommands(const uint8_t* input, size_t block_size,
|
||||
size_t input_size, const uint8_t* base_ip,
|
||||
int* table, size_t table_size,
|
||||
uint8_t** literals, uint32_t** commands) {
|
||||
size_t input_size, const uint8_t* base_ip, int* table, size_t table_size,
|
||||
uint8_t** literals, uint32_t** commands) {
|
||||
/* "ip" is the input pointer. */
|
||||
const uint8_t* ip = input;
|
||||
assert(table_size);
|
||||
assert(table_size <= (1u << 31));
|
||||
assert((table_size & (table_size - 1)) == 0); // table must be power of two
|
||||
const size_t shift = 64u - Log2FloorNonZero(table_size);
|
||||
assert(table_size - 1 == static_cast<size_t>(
|
||||
MAKE_UINT64_T(0xFFFFFFFF, 0xFFFFFF) >> shift));
|
||||
const uint8_t* ip_end = input + block_size;
|
||||
/* "next_emit" is a pointer to the first byte that is not covered by a
|
||||
previous copy. Bytes between "next_emit" and the start of the next copy or
|
||||
@ -241,17 +245,25 @@ static void CreateCommands(const uint8_t* input, size_t block_size,
|
||||
int last_distance = -1;
|
||||
const size_t kInputMarginBytes = 16;
|
||||
const size_t kMinMatchLen = 6;
|
||||
|
||||
assert(table_size);
|
||||
assert(table_size <= (1u << 31));
|
||||
/* table must be power of two */
|
||||
assert((table_size & (table_size - 1)) == 0);
|
||||
assert(table_size - 1 ==
|
||||
(size_t)(MAKE_UINT64_T(0xFFFFFFFF, 0xFFFFFF) >> shift));
|
||||
|
||||
if (PREDICT_TRUE(block_size >= kInputMarginBytes)) {
|
||||
/* For the last block, we need to keep a 16 bytes margin so that we can be
|
||||
sure that all distances are at most window size - 16.
|
||||
For all other blocks, we only need to keep a margin of 5 bytes so that
|
||||
we don't go over the block size with a copy. */
|
||||
const size_t len_limit = std::min(block_size - kMinMatchLen,
|
||||
input_size - kInputMarginBytes);
|
||||
const size_t len_limit = BROTLI_MIN(size_t, block_size - kMinMatchLen,
|
||||
input_size - kInputMarginBytes);
|
||||
const uint8_t* ip_limit = input + len_limit;
|
||||
|
||||
for (uint32_t next_hash = Hash(++ip, shift); ; ) {
|
||||
assert(next_emit < ip);
|
||||
uint32_t next_hash;
|
||||
for (next_hash = Hash(++ip, shift); ; ) {
|
||||
/* Step 1: Scan forward in the input looking for a 6-byte-long match.
|
||||
If we get close to exhausting the input then goto emit_remainder.
|
||||
|
||||
@ -271,11 +283,14 @@ static void CreateCommands(const uint8_t* input, size_t block_size,
|
||||
|
||||
const uint8_t* next_ip = ip;
|
||||
const uint8_t* candidate;
|
||||
|
||||
assert(next_emit < ip);
|
||||
|
||||
do {
|
||||
ip = next_ip;
|
||||
uint32_t hash = next_hash;
|
||||
assert(hash == Hash(ip, shift));
|
||||
uint32_t bytes_between_hash_lookups = skip++ >> 5;
|
||||
ip = next_ip;
|
||||
assert(hash == Hash(ip, shift));
|
||||
next_ip = ip + bytes_between_hash_lookups;
|
||||
if (PREDICT_FALSE(next_ip > ip_limit)) {
|
||||
goto emit_remainder;
|
||||
@ -284,7 +299,7 @@ static void CreateCommands(const uint8_t* input, size_t block_size,
|
||||
candidate = ip - last_distance;
|
||||
if (IsMatch(ip, candidate)) {
|
||||
if (PREDICT_TRUE(candidate < ip)) {
|
||||
table[hash] = static_cast<int>(ip - base_ip);
|
||||
table[hash] = (int)(ip - base_ip);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -292,33 +307,32 @@ static void CreateCommands(const uint8_t* input, size_t block_size,
|
||||
assert(candidate >= base_ip);
|
||||
assert(candidate < ip);
|
||||
|
||||
table[hash] = static_cast<int>(ip - base_ip);
|
||||
table[hash] = (int)(ip - base_ip);
|
||||
} while (PREDICT_TRUE(!IsMatch(ip, candidate)));
|
||||
|
||||
/* Step 2: Emit the found match together with the literal bytes from
|
||||
"next_emit", and then see if we can find a next macth immediately
|
||||
afterwards. Repeat until we find no match for the input
|
||||
without emitting some literal bytes. */
|
||||
uint64_t input_bytes;
|
||||
|
||||
{
|
||||
/* We have a 6-byte match at ip, and we need to emit bytes in
|
||||
[next_emit, ip). */
|
||||
const uint8_t* base = ip;
|
||||
size_t matched = 6 + FindMatchLengthWithLimit(
|
||||
candidate + 6, ip + 6, static_cast<size_t>(ip_end - ip) - 6);
|
||||
candidate + 6, ip + 6, (size_t)(ip_end - ip) - 6);
|
||||
int distance = (int)(base - candidate); /* > 0 */
|
||||
int insert = (int)(base - next_emit);
|
||||
ip += matched;
|
||||
int distance = static_cast<int>(base - candidate); /* > 0 */
|
||||
int insert = static_cast<int>(base - next_emit);
|
||||
assert(0 == memcmp(base, candidate, matched));
|
||||
EmitInsertLen(static_cast<uint32_t>(insert), commands);
|
||||
memcpy(*literals, next_emit, static_cast<size_t>(insert));
|
||||
EmitInsertLen((uint32_t)insert, commands);
|
||||
memcpy(*literals, next_emit, (size_t)insert);
|
||||
*literals += insert;
|
||||
if (distance == last_distance) {
|
||||
**commands = 64;
|
||||
++(*commands);
|
||||
} else {
|
||||
EmitDistance(static_cast<uint32_t>(distance), commands);
|
||||
EmitDistance((uint32_t)distance, commands);
|
||||
last_distance = distance;
|
||||
}
|
||||
EmitCopyLenLastDistance(matched, commands);
|
||||
@ -327,25 +341,28 @@ static void CreateCommands(const uint8_t* input, size_t block_size,
|
||||
if (PREDICT_FALSE(ip >= ip_limit)) {
|
||||
goto emit_remainder;
|
||||
}
|
||||
{
|
||||
/* We could immediately start working at ip now, but to improve
|
||||
compression we first update "table" with the hashes of some
|
||||
positions within the last copy. */
|
||||
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 5);
|
||||
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 5);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 4);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 3);
|
||||
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 2);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 2);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 1);
|
||||
uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 5);
|
||||
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
uint32_t cur_hash;
|
||||
table[prev_hash] = (int)(ip - base_ip - 5);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
|
||||
table[prev_hash] = (int)(ip - base_ip - 4);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
|
||||
table[prev_hash] = (int)(ip - base_ip - 3);
|
||||
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 2);
|
||||
cur_hash = HashBytesAtOffset(input_bytes, 2, shift);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
table[prev_hash] = (int)(ip - base_ip - 2);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
|
||||
table[prev_hash] = (int)(ip - base_ip - 1);
|
||||
|
||||
uint32_t cur_hash = HashBytesAtOffset(input_bytes, 2, shift);
|
||||
candidate = base_ip + table[cur_hash];
|
||||
table[cur_hash] = static_cast<int>(ip - base_ip);
|
||||
candidate = base_ip + table[cur_hash];
|
||||
table[cur_hash] = (int)(ip - base_ip);
|
||||
}
|
||||
}
|
||||
|
||||
while (IsMatch(ip, candidate)) {
|
||||
@ -353,36 +370,39 @@ static void CreateCommands(const uint8_t* input, size_t block_size,
|
||||
literal bytes prior to ip. */
|
||||
const uint8_t* base = ip;
|
||||
size_t matched = 6 + FindMatchLengthWithLimit(
|
||||
candidate + 6, ip + 6, static_cast<size_t>(ip_end - ip) - 6);
|
||||
candidate + 6, ip + 6, (size_t)(ip_end - ip) - 6);
|
||||
ip += matched;
|
||||
last_distance = static_cast<int>(base - candidate); /* > 0 */
|
||||
last_distance = (int)(base - candidate); /* > 0 */
|
||||
assert(0 == memcmp(base, candidate, matched));
|
||||
EmitCopyLen(matched, commands);
|
||||
EmitDistance(static_cast<uint32_t>(last_distance), commands);
|
||||
EmitDistance((uint32_t)last_distance, commands);
|
||||
|
||||
next_emit = ip;
|
||||
if (PREDICT_FALSE(ip >= ip_limit)) {
|
||||
goto emit_remainder;
|
||||
}
|
||||
{
|
||||
/* We could immediately start working at ip now, but to improve
|
||||
compression we first update "table" with the hashes of some
|
||||
positions within the last copy. */
|
||||
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 5);
|
||||
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 5);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 4);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 3);
|
||||
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 2);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 2);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
|
||||
table[prev_hash] = static_cast<int>(ip - base_ip - 1);
|
||||
uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 5);
|
||||
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
uint32_t cur_hash;
|
||||
table[prev_hash] = (int)(ip - base_ip - 5);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
|
||||
table[prev_hash] = (int)(ip - base_ip - 4);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
|
||||
table[prev_hash] = (int)(ip - base_ip - 3);
|
||||
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 2);
|
||||
cur_hash = HashBytesAtOffset(input_bytes, 2, shift);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
table[prev_hash] = (int)(ip - base_ip - 2);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
|
||||
table[prev_hash] = (int)(ip - base_ip - 1);
|
||||
|
||||
uint32_t cur_hash = HashBytesAtOffset(input_bytes, 2, shift);
|
||||
candidate = base_ip + table[cur_hash];
|
||||
table[cur_hash] = static_cast<int>(ip - base_ip);
|
||||
candidate = base_ip + table[cur_hash];
|
||||
table[cur_hash] = (int)(ip - base_ip);
|
||||
}
|
||||
}
|
||||
|
||||
next_hash = Hash(++ip, shift);
|
||||
@ -393,40 +413,17 @@ emit_remainder:
|
||||
assert(next_emit <= ip_end);
|
||||
/* Emit the remaining bytes as literals. */
|
||||
if (next_emit < ip_end) {
|
||||
const uint32_t insert = static_cast<uint32_t>(ip_end - next_emit);
|
||||
const uint32_t insert = (uint32_t)(ip_end - next_emit);
|
||||
EmitInsertLen(insert, commands);
|
||||
memcpy(*literals, next_emit, insert);
|
||||
*literals += insert;
|
||||
}
|
||||
}
|
||||
|
||||
static void StoreCommands(const uint8_t* literals, const size_t num_literals,
|
||||
static void StoreCommands(MemoryManager* m,
|
||||
const uint8_t* literals, const size_t num_literals,
|
||||
const uint32_t* commands, const size_t num_commands,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
uint8_t lit_depths[256] = { 0 };
|
||||
uint16_t lit_bits[256] = { 0 };
|
||||
uint32_t lit_histo[256] = { 0 };
|
||||
for (size_t i = 0; i < num_literals; ++i) {
|
||||
++lit_histo[literals[i]];
|
||||
}
|
||||
BuildAndStoreHuffmanTreeFast(lit_histo, num_literals,
|
||||
/* max_bits = */ 8,
|
||||
lit_depths, lit_bits,
|
||||
storage_ix, storage);
|
||||
|
||||
uint8_t cmd_depths[128] = { 0 };
|
||||
uint16_t cmd_bits[128] = { 0 };
|
||||
uint32_t cmd_histo[128] = { 0 };
|
||||
for (size_t i = 0; i < num_commands; ++i) {
|
||||
++cmd_histo[commands[i] & 0xff];
|
||||
}
|
||||
cmd_histo[1] += 1;
|
||||
cmd_histo[2] += 1;
|
||||
cmd_histo[64] += 1;
|
||||
cmd_histo[84] += 1;
|
||||
BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depths, cmd_bits,
|
||||
storage_ix, storage);
|
||||
|
||||
static const uint32_t kNumExtraBits[128] = {
|
||||
0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 12, 14, 24,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4,
|
||||
@ -441,45 +438,73 @@ static void StoreCommands(const uint8_t* literals, const size_t num_literals,
|
||||
1090, 2114, 6210, 22594,
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < num_commands; ++i) {
|
||||
uint8_t lit_depths[256];
|
||||
uint16_t lit_bits[256];
|
||||
uint32_t lit_histo[256] = { 0 };
|
||||
uint8_t cmd_depths[128] = { 0 };
|
||||
uint16_t cmd_bits[128] = { 0 };
|
||||
uint32_t cmd_histo[128] = { 0 };
|
||||
size_t i;
|
||||
for (i = 0; i < num_literals; ++i) {
|
||||
++lit_histo[literals[i]];
|
||||
}
|
||||
BrotliBuildAndStoreHuffmanTreeFast(m, lit_histo, num_literals,
|
||||
/* max_bits = */ 8,
|
||||
lit_depths, lit_bits,
|
||||
storage_ix, storage);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
|
||||
for (i = 0; i < num_commands; ++i) {
|
||||
++cmd_histo[commands[i] & 0xff];
|
||||
}
|
||||
cmd_histo[1] += 1;
|
||||
cmd_histo[2] += 1;
|
||||
cmd_histo[64] += 1;
|
||||
cmd_histo[84] += 1;
|
||||
BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depths, cmd_bits,
|
||||
storage_ix, storage);
|
||||
|
||||
for (i = 0; i < num_commands; ++i) {
|
||||
const uint32_t cmd = commands[i];
|
||||
const uint32_t code = cmd & 0xff;
|
||||
const uint32_t extra = cmd >> 8;
|
||||
WriteBits(cmd_depths[code], cmd_bits[code], storage_ix, storage);
|
||||
WriteBits(kNumExtraBits[code], extra, storage_ix, storage);
|
||||
BrotliWriteBits(cmd_depths[code], cmd_bits[code], storage_ix, storage);
|
||||
BrotliWriteBits(kNumExtraBits[code], extra, storage_ix, storage);
|
||||
if (code < 24) {
|
||||
const uint32_t insert = kInsertOffset[code] + extra;
|
||||
for (uint32_t j = 0; j < insert; ++j) {
|
||||
uint32_t j;
|
||||
for (j = 0; j < insert; ++j) {
|
||||
const uint8_t lit = *literals;
|
||||
WriteBits(lit_depths[lit], lit_bits[lit], storage_ix, storage);
|
||||
BrotliWriteBits(lit_depths[lit], lit_bits[lit], storage_ix, storage);
|
||||
++literals;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool ShouldCompress(const uint8_t* input, size_t input_size,
|
||||
size_t num_literals) {
|
||||
static const double kAcceptableLossForUncompressibleSpeedup = 0.02;
|
||||
static const double kMaxRatioOfLiterals =
|
||||
1.0 - kAcceptableLossForUncompressibleSpeedup;
|
||||
if (num_literals < kMaxRatioOfLiterals * static_cast<double>(input_size)) {
|
||||
return true;
|
||||
/* Acceptable loss for uncompressible speedup is 2% */
|
||||
#define MIN_RATIO 0.98
|
||||
#define SAMPLE_RATE 43
|
||||
|
||||
static int ShouldCompress(const uint8_t* input, size_t input_size,
|
||||
size_t num_literals) {
|
||||
double corpus_size = (double)input_size;
|
||||
if (num_literals < MIN_RATIO * corpus_size) {
|
||||
return 1;
|
||||
} else {
|
||||
uint32_t literal_histo[256] = { 0 };
|
||||
const double max_total_bit_cost = corpus_size * 8 * MIN_RATIO / SAMPLE_RATE;
|
||||
size_t i;
|
||||
for (i = 0; i < input_size; i += SAMPLE_RATE) {
|
||||
++literal_histo[input[i]];
|
||||
}
|
||||
return BitsEntropy(literal_histo, 256) < max_total_bit_cost;
|
||||
}
|
||||
uint32_t literal_histo[256] = { 0 };
|
||||
static const uint32_t kSampleRate = 43;
|
||||
static const double kMaxEntropy =
|
||||
8 * (1.0 - kAcceptableLossForUncompressibleSpeedup);
|
||||
const double max_total_bit_cost =
|
||||
static_cast<double>(input_size) * kMaxEntropy / kSampleRate;
|
||||
for (size_t i = 0; i < input_size; i += kSampleRate) {
|
||||
++literal_histo[input[i]];
|
||||
}
|
||||
return BitsEntropy(literal_histo, 256) < max_total_bit_cost;
|
||||
}
|
||||
|
||||
void BrotliCompressFragmentTwoPass(const uint8_t* input, size_t input_size,
|
||||
bool is_last,
|
||||
void BrotliCompressFragmentTwoPass(MemoryManager* m,
|
||||
const uint8_t* input, size_t input_size,
|
||||
int is_last,
|
||||
uint32_t* command_buf, uint8_t* literal_buf,
|
||||
int* table, size_t table_size,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
@ -488,24 +513,27 @@ void BrotliCompressFragmentTwoPass(const uint8_t* input, size_t input_size,
|
||||
const uint8_t* base_ip = input;
|
||||
|
||||
while (input_size > 0) {
|
||||
size_t block_size = std::min(input_size, kCompressFragmentTwoPassBlockSize);
|
||||
size_t block_size =
|
||||
BROTLI_MIN(size_t, input_size, kCompressFragmentTwoPassBlockSize);
|
||||
uint32_t* commands = command_buf;
|
||||
uint8_t* literals = literal_buf;
|
||||
size_t num_literals;
|
||||
CreateCommands(input, block_size, input_size, base_ip, table, table_size,
|
||||
&literals, &commands);
|
||||
const size_t num_literals = static_cast<size_t>(literals - literal_buf);
|
||||
const size_t num_commands = static_cast<size_t>(commands - command_buf);
|
||||
num_literals = (size_t)(literals - literal_buf);
|
||||
if (ShouldCompress(input, block_size, num_literals)) {
|
||||
StoreMetaBlockHeader(block_size, 0, storage_ix, storage);
|
||||
const size_t num_commands = (size_t)(commands - command_buf);
|
||||
BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
|
||||
/* No block splits, no contexts. */
|
||||
WriteBits(13, 0, storage_ix, storage);
|
||||
StoreCommands(literal_buf, num_literals, command_buf, num_commands,
|
||||
BrotliWriteBits(13, 0, storage_ix, storage);
|
||||
StoreCommands(m, literal_buf, num_literals, command_buf, num_commands,
|
||||
storage_ix, storage);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
} else {
|
||||
/* Since we did not find many backward references and the entropy of
|
||||
the data is close to 8 bits, we can simply emit an uncompressed block.
|
||||
This makes compression speed of uncompressible data about 3x faster. */
|
||||
StoreMetaBlockHeader(block_size, 1, storage_ix, storage);
|
||||
BrotliStoreMetaBlockHeader(block_size, 1, storage_ix, storage);
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
memcpy(&storage[*storage_ix >> 3], input, block_size);
|
||||
*storage_ix += block_size << 3;
|
||||
@ -516,10 +544,12 @@ void BrotliCompressFragmentTwoPass(const uint8_t* input, size_t input_size,
|
||||
}
|
||||
|
||||
if (is_last) {
|
||||
WriteBits(1, 1, storage_ix, storage); /* islast */
|
||||
WriteBits(1, 1, storage_ix, storage); /* isempty */
|
||||
BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
|
||||
BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace brotli
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
@ -14,8 +14,12 @@
|
||||
#define BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
|
||||
|
||||
#include "../common/types.h"
|
||||
#include "./memory.h"
|
||||
#include "./port.h"
|
||||
|
||||
namespace brotli {
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static const size_t kCompressFragmentTwoPassBlockSize = 1 << 17;
|
||||
|
||||
@ -29,12 +33,19 @@ static const size_t kCompressFragmentTwoPassBlockSize = 1 << 17;
|
||||
kCompressFragmentTwoPassBlockSize long arrays.
|
||||
REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
|
||||
REQUIRES: "table_size" is a power of two */
|
||||
void BrotliCompressFragmentTwoPass(const uint8_t* input, size_t input_size,
|
||||
bool is_last,
|
||||
uint32_t* command_buf, uint8_t* literal_buf,
|
||||
int* table, size_t table_size,
|
||||
size_t* storage_ix, uint8_t* storage);
|
||||
BROTLI_INTERNAL void BrotliCompressFragmentTwoPass(MemoryManager* m,
|
||||
const uint8_t* input,
|
||||
size_t input_size,
|
||||
int is_last,
|
||||
uint32_t* command_buf,
|
||||
uint8_t* literal_buf,
|
||||
int* table,
|
||||
size_t table_size,
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage);
|
||||
|
||||
} // namespace brotli
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_ */
|
||||
|
138
enc/compressor.cc
Normal file
138
enc/compressor.cc
Normal file
@ -0,0 +1,138 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Brotli compressor API C++ wrapper and utilities. */
|
||||
|
||||
#include "./compressor.h"
|
||||
|
||||
#include <cstdlib> /* exit */
|
||||
|
||||
namespace brotli {
|
||||
|
||||
static void ConvertParams(const BrotliParams* from, BrotliEncoderParams* to) {
|
||||
BrotliEncoderParamsSetDefault(to);
|
||||
if (from->mode == BrotliParams::MODE_TEXT) {
|
||||
to->mode = BROTLI_MODE_TEXT;
|
||||
} else if (from->mode == BrotliParams::MODE_FONT) {
|
||||
to->mode = BROTLI_MODE_FONT;
|
||||
}
|
||||
to->quality = from->quality;
|
||||
to->lgwin = from->lgwin;
|
||||
to->lgblock = from->lgblock;
|
||||
}
|
||||
|
||||
BrotliCompressor::BrotliCompressor(BrotliParams params) {
|
||||
BrotliEncoderParams encoder_params;
|
||||
ConvertParams(¶ms, &encoder_params);
|
||||
state_ = BrotliEncoderCreateState(&encoder_params, 0, 0, 0);
|
||||
if (state_ == 0) std::exit(EXIT_FAILURE); /* OOM */
|
||||
}
|
||||
|
||||
BrotliCompressor::~BrotliCompressor(void) { BrotliEncoderDestroyState(state_); }
|
||||
|
||||
bool BrotliCompressor::WriteMetaBlock(const size_t input_size,
|
||||
const uint8_t* input_buffer,
|
||||
const bool is_last, size_t* encoded_size,
|
||||
uint8_t* encoded_buffer) {
|
||||
return !!BrotliEncoderWriteMetaBlock(state_, input_size, input_buffer,
|
||||
is_last ? 1 : 0, encoded_size,
|
||||
encoded_buffer);
|
||||
}
|
||||
|
||||
bool BrotliCompressor::WriteMetadata(const size_t input_size,
|
||||
const uint8_t* input_buffer,
|
||||
const bool is_last, size_t* encoded_size,
|
||||
uint8_t* encoded_buffer) {
|
||||
return !!BrotliEncoderWriteMetadata(state_, input_size, input_buffer,
|
||||
is_last ? 1 : 0, encoded_size,
|
||||
encoded_buffer);
|
||||
}
|
||||
|
||||
bool BrotliCompressor::FinishStream(size_t* encoded_size,
|
||||
uint8_t* encoded_buffer) {
|
||||
return !!BrotliEncoderFinishStream(state_, encoded_size, encoded_buffer);
|
||||
}
|
||||
|
||||
void BrotliCompressor::CopyInputToRingBuffer(const size_t input_size,
|
||||
const uint8_t* input_buffer) {
|
||||
BrotliEncoderCopyInputToRingBuffer(state_, input_size, input_buffer);
|
||||
}
|
||||
|
||||
bool BrotliCompressor::WriteBrotliData(const bool is_last,
|
||||
const bool force_flush, size_t* out_size,
|
||||
uint8_t** output) {
|
||||
return !!BrotliEncoderWriteData(
|
||||
state_, is_last ? 1 : 0, force_flush ? 1 : 0, out_size, output);
|
||||
}
|
||||
|
||||
void BrotliCompressor::BrotliSetCustomDictionary(size_t size,
|
||||
const uint8_t* dict) {
|
||||
BrotliEncoderSetCustomDictionary(state_, size, dict);
|
||||
}
|
||||
|
||||
int BrotliCompressBuffer(BrotliParams params, size_t input_size,
|
||||
const uint8_t* input_buffer, size_t* encoded_size,
|
||||
uint8_t* encoded_buffer) {
|
||||
return BrotliEncoderCompress(params.quality, params.lgwin,
|
||||
(BrotliEncoderMode)params.mode, input_size, input_buffer,
|
||||
encoded_size, encoded_buffer);
|
||||
}
|
||||
|
||||
int BrotliCompress(BrotliParams params, BrotliIn* in, BrotliOut* out) {
|
||||
return BrotliCompressWithCustomDictionary(0, 0, params, in, out);
|
||||
}
|
||||
|
||||
int BrotliCompressWithCustomDictionary(size_t dictsize, const uint8_t* dict,
|
||||
BrotliParams params, BrotliIn* in,
|
||||
BrotliOut* out) {
|
||||
const size_t kOutputBufferSize = 65536;
|
||||
uint8_t* output_buffer;
|
||||
bool result = true;
|
||||
size_t available_in = 0;
|
||||
const uint8_t* next_in = NULL;
|
||||
size_t total_out = 0;
|
||||
bool end_of_input = false;
|
||||
BrotliEncoderParams encoder_params;
|
||||
BrotliEncoderState* s;
|
||||
|
||||
ConvertParams(¶ms, &encoder_params);
|
||||
s = BrotliEncoderCreateState(&encoder_params, 0, 0, 0);
|
||||
if (!s) return 0;
|
||||
BrotliEncoderSetCustomDictionary(s, dictsize, dict);
|
||||
output_buffer = new uint8_t[kOutputBufferSize];
|
||||
|
||||
while (true) {
|
||||
if (available_in == 0 && !end_of_input) {
|
||||
next_in = reinterpret_cast<const uint8_t*>(
|
||||
in->Read(BrotliEncoderInputBlockSize(s), &available_in));
|
||||
if (!next_in) {
|
||||
end_of_input = true;
|
||||
available_in = 0;
|
||||
} else if (available_in == 0) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
size_t available_out = kOutputBufferSize;
|
||||
uint8_t* next_out = output_buffer;
|
||||
result = !!BrotliEncoderCompressStream(
|
||||
s, end_of_input ? BROTLI_OPERATION_FINISH : BROTLI_OPERATION_PROCESS,
|
||||
&available_in, &next_in, &available_out, &next_out, &total_out);
|
||||
if (!result) break;
|
||||
size_t used_output = kOutputBufferSize - available_out;
|
||||
if (used_output != 0) {
|
||||
result = out->Write(output_buffer, used_output);
|
||||
if (!result) break;
|
||||
}
|
||||
if (BrotliEncoderIsFinished(s)) break;
|
||||
}
|
||||
|
||||
delete[] output_buffer;
|
||||
BrotliEncoderDestroyState(s);
|
||||
return result ? 1 : 0;
|
||||
}
|
||||
|
||||
|
||||
} /* namespace brotli */
|
141
enc/compressor.h
141
enc/compressor.h
@ -12,4 +12,145 @@
|
||||
#include "./encode.h"
|
||||
#include "./streams.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
struct BrotliParams {
|
||||
BrotliParams(void)
|
||||
: mode(MODE_GENERIC),
|
||||
quality(11),
|
||||
lgwin(22),
|
||||
lgblock(0),
|
||||
enable_dictionary(true),
|
||||
enable_transforms(false),
|
||||
greedy_block_split(false),
|
||||
enable_context_modeling(true) {}
|
||||
|
||||
enum Mode {
|
||||
/* Default compression mode. The compressor does not know anything in
|
||||
advance about the properties of the input. */
|
||||
MODE_GENERIC = 0,
|
||||
/* Compression mode for UTF-8 format text input. */
|
||||
MODE_TEXT = 1,
|
||||
/* Compression mode used in WOFF 2.0. */
|
||||
MODE_FONT = 2
|
||||
};
|
||||
Mode mode;
|
||||
|
||||
/* Controls the compression-speed vs compression-density tradeoffs. The higher
|
||||
the |quality|, the slower the compression. Range is 0 to 11. */
|
||||
int quality;
|
||||
/* Base 2 logarithm of the sliding window size. Range is 10 to 24. */
|
||||
int lgwin;
|
||||
/* Base 2 logarithm of the maximum input block size. Range is 16 to 24.
|
||||
If set to 0, the value will be set based on the quality. */
|
||||
int lgblock;
|
||||
|
||||
/* These settings are deprecated and will be ignored.
|
||||
All speed vs. size compromises are controlled by the |quality| param. */
|
||||
bool enable_dictionary;
|
||||
bool enable_transforms;
|
||||
bool greedy_block_split;
|
||||
bool enable_context_modeling;
|
||||
};
|
||||
|
||||
/* An instance can not be reused for multiple brotli streams. */
|
||||
class BrotliCompressor {
|
||||
public:
|
||||
explicit BrotliCompressor(BrotliParams params);
|
||||
~BrotliCompressor(void);
|
||||
|
||||
/* The maximum input size that can be processed at once. */
|
||||
size_t input_block_size(void) const {
|
||||
return BrotliEncoderInputBlockSize(state_);
|
||||
}
|
||||
|
||||
/* Encodes the data in |input_buffer| as a meta-block and writes it to
|
||||
|encoded_buffer| (|*encoded_size should| be set to the size of
|
||||
|encoded_buffer|) and sets |*encoded_size| to the number of bytes that
|
||||
was written. The |input_size| must not be greater than input_block_size().
|
||||
Returns false if there was an error and true otherwise. */
|
||||
bool WriteMetaBlock(const size_t input_size,
|
||||
const uint8_t* input_buffer,
|
||||
const bool is_last,
|
||||
size_t* encoded_size,
|
||||
uint8_t* encoded_buffer);
|
||||
|
||||
/* Writes a metadata meta-block containing the given input to encoded_buffer.
|
||||
|*encoded_size| should be set to the size of the encoded_buffer.
|
||||
Sets |*encoded_size| to the number of bytes that was written.
|
||||
Note that the given input data will not be part of the sliding window and
|
||||
thus no backward references can be made to this data from subsequent
|
||||
metablocks. |input_size| must not be greater than 2^24 and provided
|
||||
|*encoded_size| must not be less than |input_size| + 6.
|
||||
Returns false if there was an error and true otherwise. */
|
||||
bool WriteMetadata(const size_t input_size,
|
||||
const uint8_t* input_buffer,
|
||||
const bool is_last,
|
||||
size_t* encoded_size,
|
||||
uint8_t* encoded_buffer);
|
||||
|
||||
/* Writes a zero-length meta-block with end-of-input bit set to the
|
||||
internal output buffer and copies the output buffer to |encoded_buffer|
|
||||
(|*encoded_size| should be set to the size of |encoded_buffer|) and sets
|
||||
|*encoded_size| to the number of bytes written.
|
||||
Returns false if there was an error and true otherwise. */
|
||||
bool FinishStream(size_t* encoded_size, uint8_t* encoded_buffer);
|
||||
|
||||
/* Copies the given input data to the internal ring buffer of the compressor.
|
||||
No processing of the data occurs at this time and this function can be
|
||||
called multiple times before calling WriteBrotliData() to process the
|
||||
accumulated input. At most input_block_size() bytes of input data can be
|
||||
copied to the ring buffer, otherwise the next WriteBrotliData() will fail.
|
||||
*/
|
||||
void CopyInputToRingBuffer(const size_t input_size,
|
||||
const uint8_t* input_buffer);
|
||||
|
||||
/* Processes the accumulated input data and sets |*out_size| to the length of
|
||||
the new output meta-block, or to zero if no new output meta-block has been
|
||||
created (in this case the processed input data is buffered internally).
|
||||
If |*out_size| is positive, |*output| points to the start of the output
|
||||
data. If |is_last| or |force_flush| is true, an output meta-block is always
|
||||
created. However, until |is_last| is true encoder may retain up to 7 bits
|
||||
of the last byte of output. To force encoder to dump the remaining bits
|
||||
use WriteMetadata() to append an empty meta-data block.
|
||||
Returns false if the size of the input data is larger than
|
||||
input_block_size(). */
|
||||
bool WriteBrotliData(const bool is_last, const bool force_flush,
|
||||
size_t* out_size, uint8_t** output);
|
||||
|
||||
/* Fills the new state with a dictionary for LZ77, warming up the ringbuffer,
|
||||
e.g. for custom static dictionaries for data formats.
|
||||
Not to be confused with the built-in transformable dictionary of Brotli.
|
||||
To decode, use BrotliSetCustomDictionary() of the decoder with the same
|
||||
dictionary. */
|
||||
void BrotliSetCustomDictionary(size_t size, const uint8_t* dict);
|
||||
|
||||
/* No-op, but we keep it here for API backward-compatibility. */
|
||||
void WriteStreamHeader(void) {}
|
||||
|
||||
private:
|
||||
BrotliEncoderState* state_;
|
||||
};
|
||||
|
||||
/* Compresses the data in |input_buffer| into |encoded_buffer|, and sets
|
||||
|*encoded_size| to the compressed length.
|
||||
Returns 0 if there was an error and 1 otherwise. */
|
||||
int BrotliCompressBuffer(BrotliParams params,
|
||||
size_t input_size,
|
||||
const uint8_t* input_buffer,
|
||||
size_t* encoded_size,
|
||||
uint8_t* encoded_buffer);
|
||||
|
||||
/* Same as above, but uses the specified input and output classes instead
|
||||
of reading from and writing to pre-allocated memory buffers. */
|
||||
int BrotliCompress(BrotliParams params, BrotliIn* in, BrotliOut* out);
|
||||
|
||||
/* Before compressing the data, sets a custom LZ77 dictionary with
|
||||
BrotliCompressor::BrotliSetCustomDictionary. */
|
||||
int BrotliCompressWithCustomDictionary(size_t dictsize, const uint8_t* dict,
|
||||
BrotliParams params,
|
||||
BrotliIn* in, BrotliOut* out);
|
||||
|
||||
} /* namespace brotli */
|
||||
|
||||
#endif /* BROTLI_ENC_COMPRESSOR_H_ */
|
||||
|
@ -10,8 +10,11 @@
|
||||
#define BROTLI_ENC_CONTEXT_H_
|
||||
|
||||
#include "../common/types.h"
|
||||
#include "../common/port.h"
|
||||
|
||||
namespace brotli {
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Second-order context lookup table for UTF8 byte streams.
|
||||
|
||||
@ -151,29 +154,31 @@ static const uint8_t kSigned3BitContextLookup[] = {
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7,
|
||||
};
|
||||
|
||||
enum ContextType {
|
||||
typedef enum ContextType {
|
||||
CONTEXT_LSB6 = 0,
|
||||
CONTEXT_MSB6 = 1,
|
||||
CONTEXT_UTF8 = 2,
|
||||
CONTEXT_SIGNED = 3
|
||||
};
|
||||
} ContextType;
|
||||
|
||||
static inline uint8_t Context(uint8_t p1, uint8_t p2, ContextType mode) {
|
||||
static BROTLI_INLINE uint8_t Context(uint8_t p1, uint8_t p2, ContextType mode) {
|
||||
switch (mode) {
|
||||
case CONTEXT_LSB6:
|
||||
return p1 & 0x3f;
|
||||
case CONTEXT_MSB6:
|
||||
return static_cast<uint8_t>(p1 >> 2);
|
||||
return (uint8_t)(p1 >> 2);
|
||||
case CONTEXT_UTF8:
|
||||
return kUTF8ContextLookup[p1] | kUTF8ContextLookup[p2 + 256];
|
||||
case CONTEXT_SIGNED:
|
||||
return static_cast<uint8_t>((kSigned3BitContextLookup[p1] << 3) +
|
||||
kSigned3BitContextLookup[p2]);
|
||||
return (uint8_t)((kSigned3BitContextLookup[p1] << 3) +
|
||||
kSigned3BitContextLookup[p2]);
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace brotli
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* BROTLI_ENC_CONTEXT_H_ */
|
||||
|
@ -11,7 +11,9 @@
|
||||
|
||||
#include "../common/types.h"
|
||||
|
||||
namespace brotli {
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static const uint16_t kStaticDictionaryHash[] = {
|
||||
0x7d48, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
@ -4112,6 +4114,8 @@ static const uint16_t kStaticDictionaryHash[] = {
|
||||
0x0000, 0x0000, 0x0d88, 0x4ac5, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
};
|
||||
|
||||
} // namespace brotli
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* BROTLI_ENC_DICTIONARY_HASH_H_ */
|
||||
|
1692
enc/encode.c
1692
enc/encode.c
File diff suppressed because it is too large
Load Diff
370
enc/encode.h
370
enc/encode.h
@ -9,45 +9,44 @@
|
||||
#ifndef BROTLI_ENC_ENCODE_H_
|
||||
#define BROTLI_ENC_ENCODE_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "../common/types.h"
|
||||
#include "./command.h"
|
||||
#include "./hash.h"
|
||||
#include "./ringbuffer.h"
|
||||
#include "./static_dict.h"
|
||||
#include "./streams.h"
|
||||
|
||||
namespace brotli {
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static const int kMaxWindowBits = 24;
|
||||
static const int kMinWindowBits = 10;
|
||||
static const int kMinInputBlockBits = 16;
|
||||
static const int kMaxInputBlockBits = 24;
|
||||
static const int kBrotliMaxWindowBits = 24;
|
||||
static const int kBrotliMinWindowBits = 10;
|
||||
static const int kBrotliMinInputBlockBits = 16;
|
||||
static const int kBrotliMaxInputBlockBits = 24;
|
||||
|
||||
struct BrotliParams {
|
||||
BrotliParams(void)
|
||||
: mode(MODE_GENERIC),
|
||||
quality(11),
|
||||
lgwin(22),
|
||||
lgblock(0),
|
||||
enable_dictionary(true),
|
||||
enable_transforms(false),
|
||||
greedy_block_split(false),
|
||||
enable_context_modeling(true) {}
|
||||
|
||||
enum Mode {
|
||||
typedef enum BrotliEncoderMode {
|
||||
/* Default compression mode. The compressor does not know anything in
|
||||
advance about the properties of the input. */
|
||||
MODE_GENERIC = 0,
|
||||
BROTLI_MODE_GENERIC = 0,
|
||||
/* Compression mode for UTF-8 format text input. */
|
||||
MODE_TEXT = 1,
|
||||
BROTLI_MODE_TEXT = 1,
|
||||
/* Compression mode used in WOFF 2.0. */
|
||||
MODE_FONT = 2
|
||||
};
|
||||
Mode mode;
|
||||
BROTLI_MODE_FONT = 2
|
||||
} BrotliEncoderMode;
|
||||
|
||||
#define BROTLI_DEFAULT_QUALITY 11
|
||||
#define BROTLI_DEFAULT_WINDOW 22
|
||||
#define BROTLI_DEFAULT_MODE BROTLI_MODE_GENERIC
|
||||
|
||||
typedef enum BrotliEncoderOperation {
|
||||
BROTLI_OPERATION_PROCESS = 0,
|
||||
/* Request output stream to flush. Performed when input stream is depleted
|
||||
and there is enough space in output stream. */
|
||||
BROTLI_OPERATION_FLUSH = 1,
|
||||
/* Request output stream to finish. Performed when input stream is depleted
|
||||
and there is enough space in output stream. */
|
||||
BROTLI_OPERATION_FINISH = 2
|
||||
} BrotliEncoderOperation;
|
||||
|
||||
/* DEPRECATED */
|
||||
typedef struct BrotliEncoderParams {
|
||||
BrotliEncoderMode mode;
|
||||
/* Controls the compression-speed vs compression-density tradeoffs. The higher
|
||||
the |quality|, the slower the compression. Range is 0 to 11. */
|
||||
int quality;
|
||||
@ -56,155 +55,202 @@ struct BrotliParams {
|
||||
/* Base 2 logarithm of the maximum input block size. Range is 16 to 24.
|
||||
If set to 0, the value will be set based on the quality. */
|
||||
int lgblock;
|
||||
} BrotliEncoderParams;
|
||||
|
||||
// These settings are deprecated and will be ignored.
|
||||
// All speed vs. size compromises are controlled by the quality param.
|
||||
bool enable_dictionary;
|
||||
bool enable_transforms;
|
||||
bool greedy_block_split;
|
||||
bool enable_context_modeling;
|
||||
};
|
||||
typedef enum BrotliEncoderParameter {
|
||||
BROTLI_PARAM_MODE = 0,
|
||||
/* Controls the compression-speed vs compression-density tradeoffs. The higher
|
||||
the quality, the slower the compression. Range is 0 to 11. */
|
||||
BROTLI_PARAM_QUALITY = 1,
|
||||
/* Base 2 logarithm of the sliding window size. Range is 10 to 24. */
|
||||
BROTLI_PARAM_LGWIN = 2,
|
||||
/* Base 2 logarithm of the maximum input block size. Range is 16 to 24.
|
||||
If set to 0, the value will be set based on the quality. */
|
||||
BROTLI_PARAM_LGBLOCK = 3
|
||||
} BrotliEncoderParameter;
|
||||
|
||||
// An instance can not be reused for multiple brotli streams.
|
||||
class BrotliCompressor {
|
||||
public:
|
||||
explicit BrotliCompressor(BrotliParams params);
|
||||
~BrotliCompressor(void);
|
||||
/* DEPRECATED */
|
||||
void BrotliEncoderParamsSetDefault(BrotliEncoderParams* params);
|
||||
|
||||
// The maximum input size that can be processed at once.
|
||||
size_t input_block_size(void) const { return size_t(1) << params_.lgblock; }
|
||||
/* A state can not be reused for multiple brotli streams. */
|
||||
typedef struct BrotliEncoderStateStruct BrotliEncoderState;
|
||||
|
||||
// Encodes the data in input_buffer as a meta-block and writes it to
|
||||
// encoded_buffer (*encoded_size should be set to the size of
|
||||
// encoded_buffer) and sets *encoded_size to the number of bytes that
|
||||
// was written. The input_size must be <= input_block_size().
|
||||
// Returns 0 if there was an error and 1 otherwise.
|
||||
bool WriteMetaBlock(const size_t input_size,
|
||||
const uint8_t* input_buffer,
|
||||
const bool is_last,
|
||||
size_t* encoded_size,
|
||||
uint8_t* encoded_buffer);
|
||||
int BrotliEncoderSetParameter(
|
||||
BrotliEncoderState* state, BrotliEncoderParameter p, uint32_t value);
|
||||
|
||||
// Writes a metadata meta-block containing the given input to encoded_buffer.
|
||||
// *encoded_size should be set to the size of the encoded_buffer.
|
||||
// Sets *encoded_size to the number of bytes that was written.
|
||||
// Note that the given input data will not be part of the sliding window and
|
||||
// thus no backward references can be made to this data from subsequent
|
||||
// metablocks.
|
||||
bool WriteMetadata(const size_t input_size,
|
||||
const uint8_t* input_buffer,
|
||||
const bool is_last,
|
||||
size_t* encoded_size,
|
||||
uint8_t* encoded_buffer);
|
||||
/* Creates the instance of BrotliEncoderState and initializes it.
|
||||
|alloc_func| and |free_func| MUST be both zero or both non-zero. In the case
|
||||
they are both zero, default memory allocators are used. |opaque| is passed to
|
||||
|alloc_func| and |free_func| when they are called. */
|
||||
BrotliEncoderState* BrotliEncoderCreateInstance(brotli_alloc_func alloc_func,
|
||||
brotli_free_func free_func,
|
||||
void* opaque);
|
||||
/* DEPRECATED */
|
||||
static inline BrotliEncoderState* BrotliEncoderCreateState(
|
||||
const BrotliEncoderParams* params, brotli_alloc_func alloc_func,
|
||||
brotli_free_func free_func, void* opaque) {
|
||||
BrotliEncoderState* result = BrotliEncoderCreateInstance(
|
||||
alloc_func, free_func, opaque);
|
||||
if (!result) return result;
|
||||
BrotliEncoderSetParameter(
|
||||
result, BROTLI_PARAM_MODE, (uint32_t)params->mode);
|
||||
BrotliEncoderSetParameter(
|
||||
result, BROTLI_PARAM_QUALITY, (uint32_t)params->quality);
|
||||
BrotliEncoderSetParameter(
|
||||
result, BROTLI_PARAM_LGWIN, (uint32_t)params->lgwin);
|
||||
BrotliEncoderSetParameter(
|
||||
result, BROTLI_PARAM_LGBLOCK, (uint32_t)params->lgblock);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Writes a zero-length meta-block with end-of-input bit set to the
|
||||
// internal output buffer and copies the output buffer to encoded_buffer
|
||||
// (*encoded_size should be set to the size of encoded_buffer) and sets
|
||||
// *encoded_size to the number of bytes written. Returns false if there was
|
||||
// an error and true otherwise.
|
||||
bool FinishStream(size_t* encoded_size, uint8_t* encoded_buffer);
|
||||
/* Deinitializes and frees BrotliEncoderState instance. */
|
||||
void BrotliEncoderDestroyInstance(BrotliEncoderState* state);
|
||||
/* DEPRECATED */
|
||||
static inline void BrotliEncoderDestroyState(BrotliEncoderState* state) {
|
||||
BrotliEncoderDestroyInstance(state);
|
||||
}
|
||||
|
||||
// Copies the given input data to the internal ring buffer of the compressor.
|
||||
// No processing of the data occurs at this time and this function can be
|
||||
// called multiple times before calling WriteBrotliData() to process the
|
||||
// accumulated input. At most input_block_size() bytes of input data can be
|
||||
// copied to the ring buffer, otherwise the next WriteBrotliData() will fail.
|
||||
void CopyInputToRingBuffer(const size_t input_size,
|
||||
const uint8_t* input_buffer);
|
||||
/* The maximum input size that can be processed at once. */
|
||||
size_t BrotliEncoderInputBlockSize(BrotliEncoderState* state);
|
||||
|
||||
// Processes the accumulated input data and sets *out_size to the length of
|
||||
// the new output meta-block, or to zero if no new output meta-block was
|
||||
// created (in this case the processed input data is buffered internally).
|
||||
// If *out_size is positive, *output points to the start of the output data.
|
||||
// If is_last or force_flush is true, an output meta-block is always created.
|
||||
// Returns false if the size of the input data is larger than
|
||||
// input_block_size().
|
||||
bool WriteBrotliData(const bool is_last, const bool force_flush,
|
||||
size_t* out_size, uint8_t** output);
|
||||
/* Encodes the data in |input_buffer| as a meta-block and writes it to
|
||||
|encoded_buffer| (|*encoded_size should| be set to the size of
|
||||
|encoded_buffer|) and sets |*encoded_size| to the number of bytes that
|
||||
was written. The |input_size| must not be greater than input_block_size().
|
||||
Returns 0 if there was an error and 1 otherwise. */
|
||||
int BrotliEncoderWriteMetaBlock(BrotliEncoderState* state,
|
||||
const size_t input_size,
|
||||
const uint8_t* input_buffer, const int is_last,
|
||||
size_t* encoded_size, uint8_t* encoded_buffer);
|
||||
|
||||
// Fills the new state with a dictionary for LZ77, warming up the ringbuffer,
|
||||
// e.g. for custom static dictionaries for data formats.
|
||||
// Not to be confused with the built-in transformable dictionary of Brotli.
|
||||
// To decode, use BrotliSetCustomDictionary of the decoder with the same
|
||||
// dictionary.
|
||||
void BrotliSetCustomDictionary(size_t size, const uint8_t* dict);
|
||||
/* Writes a metadata meta-block containing the given input to encoded_buffer.
|
||||
|*encoded_size| should be set to the size of the encoded_buffer.
|
||||
Sets |*encoded_size| to the number of bytes that was written.
|
||||
Note that the given input data will not be part of the sliding window and
|
||||
thus no backward references can be made to this data from subsequent
|
||||
metablocks. |input_size| must not be greater than 2^24 and provided
|
||||
|*encoded_size| must not be less than |input_size| + 6.
|
||||
Returns 0 if there was an error and 1 otherwise. */
|
||||
int BrotliEncoderWriteMetadata(BrotliEncoderState* state,
|
||||
const size_t input_size,
|
||||
const uint8_t* input_buffer, const int is_last,
|
||||
size_t* encoded_size, uint8_t* encoded_buffer);
|
||||
|
||||
// No-op, but we keep it here for API backward-compatibility.
|
||||
void WriteStreamHeader(void) {}
|
||||
/* Writes a zero-length meta-block with end-of-input bit set to the
|
||||
internal output buffer and copies the output buffer to |encoded_buffer|
|
||||
(|*encoded_size| should be set to the size of |encoded_buffer|) and sets
|
||||
|*encoded_size| to the number of bytes written.
|
||||
Returns 0 if there was an error and 1 otherwise. */
|
||||
int BrotliEncoderFinishStream(BrotliEncoderState* state, size_t* encoded_size,
|
||||
uint8_t* encoded_buffer);
|
||||
|
||||
private:
|
||||
uint8_t* GetBrotliStorage(size_t size);
|
||||
/* Copies the given input data to the internal ring buffer of the compressor.
|
||||
No processing of the data occurs at this time and this function can be
|
||||
called multiple times before calling WriteBrotliData() to process the
|
||||
accumulated input. At most input_block_size() bytes of input data can be
|
||||
copied to the ring buffer, otherwise the next WriteBrotliData() will fail.
|
||||
*/
|
||||
void BrotliEncoderCopyInputToRingBuffer(BrotliEncoderState* state,
|
||||
const size_t input_size,
|
||||
const uint8_t* input_buffer);
|
||||
|
||||
// Allocates and clears a hash table using memory in "*this",
|
||||
// stores the number of buckets in "*table_size" and returns a pointer to
|
||||
// the base of the hash table.
|
||||
int* GetHashTable(int quality,
|
||||
size_t input_size, size_t* table_size);
|
||||
/* Processes the accumulated input data and sets |*out_size| to the length of
|
||||
the new output meta-block, or to zero if no new output meta-block has been
|
||||
created (in this case the processed input data is buffered internally).
|
||||
If |*out_size| is positive, |*output| points to the start of the output
|
||||
data. If |is_last| or |force_flush| is 1, an output meta-block is always
|
||||
created. However, until |is_last| is 1 encoder may retain up to 7 bits
|
||||
of the last byte of output. To force encoder to dump the remaining bits
|
||||
use WriteMetadata() to append an empty meta-data block.
|
||||
Returns 0 if the size of the input data is larger than
|
||||
input_block_size(). */
|
||||
int BrotliEncoderWriteData(BrotliEncoderState* state, const int is_last,
|
||||
const int force_flush, size_t* out_size,
|
||||
uint8_t** output);
|
||||
|
||||
BrotliParams params_;
|
||||
Hashers* hashers_;
|
||||
int hash_type_;
|
||||
uint64_t input_pos_;
|
||||
RingBuffer* ringbuffer_;
|
||||
size_t cmd_alloc_size_;
|
||||
Command* commands_;
|
||||
size_t num_commands_;
|
||||
size_t num_literals_;
|
||||
size_t last_insert_len_;
|
||||
uint64_t last_flush_pos_;
|
||||
uint64_t last_processed_pos_;
|
||||
int dist_cache_[4];
|
||||
int saved_dist_cache_[4];
|
||||
uint8_t last_byte_;
|
||||
uint8_t last_byte_bits_;
|
||||
uint8_t prev_byte_;
|
||||
uint8_t prev_byte2_;
|
||||
size_t storage_size_;
|
||||
uint8_t* storage_;
|
||||
// Hash table for quality 0 mode.
|
||||
int small_table_[1 << 10]; // 2KB
|
||||
int* large_table_; // Allocated only when needed
|
||||
// Command and distance prefix codes (each 64 symbols, stored back-to-back)
|
||||
// used for the next block in quality 0. The command prefix code is over a
|
||||
// smaller alphabet with the following 64 symbols:
|
||||
// 0 - 15: insert length code 0, copy length code 0 - 15, same distance
|
||||
// 16 - 39: insert length code 0, copy length code 0 - 23
|
||||
// 40 - 63: insert length code 0 - 23, copy length code 0
|
||||
// Note that symbols 16 and 40 represent the same code in the full alphabet,
|
||||
// but we do not use either of them in quality 0.
|
||||
uint8_t cmd_depths_[128];
|
||||
uint16_t cmd_bits_[128];
|
||||
// The compressed form of the command and distance prefix codes for the next
|
||||
// block in quality 0.
|
||||
uint8_t cmd_code_[512];
|
||||
size_t cmd_code_numbits_;
|
||||
// Command and literal buffers for quality 1.
|
||||
uint32_t* command_buf_;
|
||||
uint8_t* literal_buf_;
|
||||
|
||||
int is_last_block_emitted_;
|
||||
};
|
||||
/* Fills the new state with a dictionary for LZ77, warming up the ringbuffer,
|
||||
e.g. for custom static dictionaries for data formats.
|
||||
Not to be confused with the built-in transformable dictionary of Brotli.
|
||||
To decode, use BrotliSetCustomDictionary() of the decoder with the same
|
||||
dictionary. */
|
||||
void BrotliEncoderSetCustomDictionary(BrotliEncoderState* state, size_t size,
|
||||
const uint8_t* dict);
|
||||
|
||||
// Compresses the data in input_buffer into encoded_buffer, and sets
|
||||
// *encoded_size to the compressed length.
|
||||
// Returns 0 if there was an error and 1 otherwise.
|
||||
int BrotliCompressBuffer(BrotliParams params,
|
||||
size_t input_size,
|
||||
const uint8_t* input_buffer,
|
||||
size_t* encoded_size,
|
||||
uint8_t* encoded_buffer);
|
||||
/* Returns buffer size that is large enough to contain BrotliEncoderCompress
|
||||
output for any input.
|
||||
Returns 0 if result does not fit size_t. */
|
||||
size_t BrotliEncoderMaxCompressedSize(size_t input_size);
|
||||
|
||||
// Same as above, but uses the specified input and output classes instead
|
||||
// of reading from and writing to pre-allocated memory buffers.
|
||||
int BrotliCompress(BrotliParams params, BrotliIn* in, BrotliOut* out);
|
||||
/* Compresses the data in |input_buffer| into |encoded_buffer|, and sets
|
||||
|*encoded_size| to the compressed length.
|
||||
BROTLI_DEFAULT_QUALITY, BROTLI_DEFAULT_WINDOW and BROTLI_DEFAULT_MODE should
|
||||
be used as |quality|, |lgwin| and |mode| if there are no specific
|
||||
requirements to encoder speed and compression ratio.
|
||||
If compression fails, |*encoded_size| is set to 0.
|
||||
If BrotliEncoderMaxCompressedSize(|input_size|) is not zero, then
|
||||
|*encoded_size| is never set to the bigger value.
|
||||
Returns 0 if there was an error and 1 otherwise. */
|
||||
int BrotliEncoderCompress(int quality, int lgwin, BrotliEncoderMode mode,
|
||||
size_t input_size, const uint8_t* input_buffer,
|
||||
size_t* encoded_size, uint8_t* encoded_buffer);
|
||||
|
||||
// Before compressing the data, sets a custom LZ77 dictionary with
|
||||
// BrotliCompressor::BrotliSetCustomDictionary.
|
||||
int BrotliCompressWithCustomDictionary(size_t dictsize, const uint8_t* dict,
|
||||
BrotliParams params,
|
||||
BrotliIn* in, BrotliOut* out);
|
||||
/* Progressively compress input stream and push produced bytes to output stream.
|
||||
Internally workflow consists of 3 tasks:
|
||||
* (optional) copy input data to internal buffer
|
||||
* actually compress data and (optionally) store it to internal buffer
|
||||
* (optional) copy compressed bytes from internal buffer to output stream
|
||||
Whenever all 3 tasks can't move forward anymore, or error occurs, this
|
||||
method returns.
|
||||
|
||||
|available_in| and |next_in| represent input stream; when X bytes of input
|
||||
are consumed, X is subtracted from |available_in| and added to |next_in|.
|
||||
|available_out| and |next_out| represent output stream; when Y bytes are
|
||||
pushed to output, Y is subtracted from |available_out| and added to
|
||||
|next_out|. |total_out|, if it is not a null-pointer, is assigned to the
|
||||
total amount of bytes pushed by the instance of encoder to output.
|
||||
|
||||
|op| is used to perform flush or finish the stream.
|
||||
|
||||
Flushing the stream means forcing encoding of all input passed to encoder and
|
||||
completing the current output block, so it could be fully decoded by stream
|
||||
decoder. To perform flush |op| must be set to BROTLI_OPERATION_FLUSH. Under
|
||||
some circumstances (e.g. lack of output stream capacity) this operation would
|
||||
require several calls to BrotliEncoderCompressStream. The method must be
|
||||
called again until both input stream is depleted and encoder has no more
|
||||
output (see BrotliEncoderHasMoreOutput) after the method is called.
|
||||
|
||||
Finishing the stream means encoding of all input passed to encoder and
|
||||
adding specific "final" marks, so stream decoder could determine that stream
|
||||
is complete. To perform finish |op| must be set to BROTLI_OPERATION_FINISH.
|
||||
Under some circumstances (e.g. lack of output stream capacity) this operation
|
||||
would require several calls to BrotliEncoderCompressStream. The method must
|
||||
be called again until both input stream is depleted and encoder has no more
|
||||
output (see BrotliEncoderHasMoreOutput) after the method is called.
|
||||
|
||||
WARNING: when flushing and finishing, |op| should not change until operation
|
||||
is complete; input stream should not be refilled as well.
|
||||
|
||||
Returns 0 if there was an error and 1 otherwise.
|
||||
*/
|
||||
int BrotliEncoderCompressStream(BrotliEncoderState* s,
|
||||
BrotliEncoderOperation op, size_t* available_in,
|
||||
const uint8_t** next_in, size_t* available_out,
|
||||
uint8_t** next_out, size_t* total_out);
|
||||
|
||||
/* Check if encoder is in "finished" state, i.e. no more input is acceptable and
|
||||
no more output will be produced.
|
||||
Works only with BrotliEncoderCompressStream workflow.
|
||||
Returns 1 if stream is finished and 0 otherwise. */
|
||||
int BrotliEncoderIsFinished(BrotliEncoderState* s);
|
||||
|
||||
/* Check if encoder has more output bytes in internal buffer.
|
||||
Works only with BrotliEncoderCompressStream workflow.
|
||||
Returns 1 if has more output (in internal buffer) and 0 otherwise. */
|
||||
int BrotliEncoderHasMoreOutput(BrotliEncoderState* s);
|
||||
|
||||
|
||||
} // namespace brotli
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* BROTLI_ENC_ENCODE_H_ */
|
||||
|
@ -8,40 +8,34 @@
|
||||
|
||||
#include "./encode_parallel.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
#include "./backward_references.h"
|
||||
#include "./bit_cost.h"
|
||||
#include "./block_splitter.h"
|
||||
#include "./brotli_bit_stream.h"
|
||||
#include "./cluster.h"
|
||||
#include "./context.h"
|
||||
#include "./entropy_encode.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./hash.h"
|
||||
#include "./histogram.h"
|
||||
#include "./metablock.h"
|
||||
#include "./port.h"
|
||||
#include "./prefix.h"
|
||||
#include "./transform.h"
|
||||
#include "./utf8_util.h"
|
||||
#include "./write_bits.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
namespace {
|
||||
|
||||
void RecomputeDistancePrefixes(Command* cmds, size_t num_commands,
|
||||
uint32_t num_direct_distance_codes,
|
||||
uint32_t distance_postfix_bits) {
|
||||
static void RecomputeDistancePrefixes(Command* cmds, size_t num_commands,
|
||||
uint32_t num_direct_distance_codes,
|
||||
uint32_t distance_postfix_bits) {
|
||||
if (num_direct_distance_codes == 0 &&
|
||||
distance_postfix_bits == 0) {
|
||||
return;
|
||||
}
|
||||
for (size_t i = 0; i < num_commands; ++i) {
|
||||
Command* cmd = &cmds[i];
|
||||
if (cmd->copy_len() && cmd->cmd_prefix_ >= 128) {
|
||||
PrefixEncodeCopyDistance(cmd->DistanceCode(),
|
||||
if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
|
||||
PrefixEncodeCopyDistance(CommandDistanceCode(cmd),
|
||||
num_direct_distance_codes,
|
||||
distance_postfix_bits,
|
||||
&cmd->dist_prefix_,
|
||||
@ -50,102 +44,115 @@ void RecomputeDistancePrefixes(Command* cmds, size_t num_commands,
|
||||
}
|
||||
}
|
||||
|
||||
bool WriteMetaBlockParallel(const BrotliParams& params,
|
||||
const uint32_t input_size,
|
||||
const uint8_t* input_buffer,
|
||||
const uint32_t prefix_size,
|
||||
const uint8_t* prefix_buffer,
|
||||
const bool is_first,
|
||||
const bool is_last,
|
||||
size_t* encoded_size,
|
||||
uint8_t* encoded_buffer) {
|
||||
/* Returns 1 on success, otherwise 0. */
|
||||
int WriteMetaBlockParallel(const BrotliParams& params,
|
||||
const uint32_t input_size,
|
||||
const uint8_t* input_buffer,
|
||||
const uint32_t prefix_size,
|
||||
const uint8_t* prefix_buffer,
|
||||
const int is_first,
|
||||
const int is_last,
|
||||
size_t* encoded_size,
|
||||
uint8_t* encoded_buffer) {
|
||||
if (input_size == 0) {
|
||||
return false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
MemoryManager memory_manager;
|
||||
MemoryManager* m = &memory_manager;
|
||||
BrotliInitMemoryManager(m, 0, 0, 0);
|
||||
|
||||
uint8_t* storage;
|
||||
size_t storage_ix;
|
||||
uint8_t first_byte;
|
||||
size_t first_byte_bits;
|
||||
size_t output_size;
|
||||
uint32_t num_direct_distance_codes;
|
||||
uint32_t distance_postfix_bits;
|
||||
ContextType literal_context_mode;
|
||||
size_t last_insert_len = 0;
|
||||
size_t num_commands = 0;
|
||||
size_t num_literals = 0;
|
||||
int dist_cache[4] = { -4, -4, -4, -4 };
|
||||
Command* commands;
|
||||
int hash_type = BROTLI_MIN(int, 10, params.quality);
|
||||
Hashers* hashers;
|
||||
int use_utf8_mode;
|
||||
uint8_t prev_byte;
|
||||
uint8_t prev_byte2;
|
||||
const uint32_t mask = BROTLI_UINT32_MAX >> 1;
|
||||
|
||||
/* Copy prefix + next input block into a continuous area. */
|
||||
uint32_t input_pos = prefix_size;
|
||||
/* CreateBackwardReferences reads up to 3 bytes past the end of input if the
|
||||
mask points past the end of input.
|
||||
FindMatchLengthWithLimit could do another 8 bytes look-forward. */
|
||||
std::vector<uint8_t> input(prefix_size + input_size + 4 + 8);
|
||||
memcpy(&input[0], prefix_buffer, prefix_size);
|
||||
memcpy(&input[input_pos], input_buffer, input_size);
|
||||
uint8_t* input = BROTLI_ALLOC(m, uint8_t, prefix_size + input_size + 4 + 8);
|
||||
if (BROTLI_IS_OOM(m)) goto oom;
|
||||
memcpy(input, prefix_buffer, prefix_size);
|
||||
memcpy(input + input_pos, input_buffer, input_size);
|
||||
/* Since we don't have a ringbuffer, masking is a no-op.
|
||||
We use one less bit than the full range because some of the code uses
|
||||
mask + 1 as the size of the ringbuffer. */
|
||||
const uint32_t mask = std::numeric_limits<uint32_t>::max() >> 1;
|
||||
|
||||
uint8_t prev_byte = input_pos > 0 ? input[(input_pos - 1) & mask] : 0;
|
||||
uint8_t prev_byte2 = input_pos > 1 ? input[(input_pos - 2) & mask] : 0;
|
||||
prev_byte = input_pos > 0 ? input[(input_pos - 1) & mask] : 0;
|
||||
prev_byte2 = input_pos > 1 ? input[(input_pos - 2) & mask] : 0;
|
||||
|
||||
/* Decide about UTF8 mode. */
|
||||
static const double kMinUTF8Ratio = 0.75;
|
||||
bool utf8_mode = IsMostlyUTF8(&input[0], input_pos, mask, input_size,
|
||||
kMinUTF8Ratio);
|
||||
use_utf8_mode = BrotliIsMostlyUTF8(
|
||||
input, input_pos, mask, input_size, kMinUTF8Ratio);
|
||||
|
||||
/* Initialize hashers. */
|
||||
int hash_type = std::min(10, params.quality);
|
||||
Hashers* hashers = new Hashers();
|
||||
hashers->Init(hash_type);
|
||||
hashers = BROTLI_ALLOC(m, Hashers, 1);
|
||||
if (BROTLI_IS_OOM(m)) goto oom;
|
||||
InitHashers(hashers);
|
||||
HashersSetup(m, hashers, hash_type);
|
||||
if (BROTLI_IS_OOM(m)) goto oom;
|
||||
|
||||
/* Compute backward references. */
|
||||
size_t last_insert_len = 0;
|
||||
size_t num_commands = 0;
|
||||
size_t num_literals = 0;
|
||||
int dist_cache[4] = { -4, -4, -4, -4 };
|
||||
Command* commands = static_cast<Command*>(
|
||||
malloc(sizeof(Command) * ((input_size + 1) >> 1)));
|
||||
if (commands == 0) {
|
||||
delete hashers;
|
||||
return false;
|
||||
}
|
||||
CreateBackwardReferences(
|
||||
input_size, input_pos, is_last,
|
||||
&input[0], mask,
|
||||
params.quality,
|
||||
params.lgwin,
|
||||
hashers,
|
||||
hash_type,
|
||||
dist_cache,
|
||||
&last_insert_len,
|
||||
commands,
|
||||
&num_commands,
|
||||
&num_literals);
|
||||
delete hashers;
|
||||
commands = BROTLI_ALLOC(m, Command, ((input_size + 1) >> 1));
|
||||
if (BROTLI_IS_OOM(m)) goto oom;
|
||||
BrotliCreateBackwardReferences(m, input_size, input_pos, is_last, input,
|
||||
mask, params.quality, params.lgwin, hashers, hash_type, dist_cache,
|
||||
&last_insert_len, commands, &num_commands, &num_literals);
|
||||
if (BROTLI_IS_OOM(m)) goto oom;
|
||||
DestroyHashers(m, hashers);
|
||||
BROTLI_FREE(m, hashers);
|
||||
if (last_insert_len > 0) {
|
||||
commands[num_commands++] = Command(last_insert_len);
|
||||
InitInsertCommand(&commands[num_commands++], last_insert_len);
|
||||
num_literals += last_insert_len;
|
||||
}
|
||||
assert(num_commands != 0);
|
||||
|
||||
/* Build the meta-block. */
|
||||
MetaBlockSplit mb;
|
||||
uint32_t num_direct_distance_codes =
|
||||
params.mode == BrotliParams::MODE_FONT ? 12 : 0;
|
||||
uint32_t distance_postfix_bits =
|
||||
params.mode == BrotliParams::MODE_FONT ? 1 : 0;
|
||||
ContextType literal_context_mode = utf8_mode ? CONTEXT_UTF8 : CONTEXT_SIGNED;
|
||||
InitMetaBlockSplit(&mb);
|
||||
num_direct_distance_codes = params.mode == BrotliParams::MODE_FONT ? 12 : 0;
|
||||
distance_postfix_bits = params.mode == BrotliParams::MODE_FONT ? 1 : 0;
|
||||
literal_context_mode = use_utf8_mode ? CONTEXT_UTF8 : CONTEXT_SIGNED;
|
||||
RecomputeDistancePrefixes(commands, num_commands,
|
||||
num_direct_distance_codes,
|
||||
distance_postfix_bits);
|
||||
if (params.quality <= 9) {
|
||||
BuildMetaBlockGreedy(&input[0], input_pos, mask,
|
||||
commands, num_commands,
|
||||
&mb);
|
||||
BrotliBuildMetaBlockGreedy(m, input, input_pos, mask,
|
||||
commands, num_commands,
|
||||
&mb);
|
||||
if (BROTLI_IS_OOM(m)) goto oom;
|
||||
} else {
|
||||
BuildMetaBlock(&input[0], input_pos, mask,
|
||||
prev_byte, prev_byte2,
|
||||
commands, num_commands,
|
||||
literal_context_mode,
|
||||
&mb);
|
||||
BrotliBuildMetaBlock(m, input, input_pos, mask, params.quality,
|
||||
prev_byte, prev_byte2,
|
||||
commands, num_commands,
|
||||
literal_context_mode,
|
||||
&mb);
|
||||
if (BROTLI_IS_OOM(m)) goto oom;
|
||||
}
|
||||
|
||||
/* Set up the temporary output storage. */
|
||||
const size_t max_out_size = 2 * input_size + 500;
|
||||
std::vector<uint8_t> storage(max_out_size);
|
||||
uint8_t first_byte = 0;
|
||||
size_t first_byte_bits = 0;
|
||||
storage = BROTLI_ALLOC(m, uint8_t, 2 * input_size + 500);
|
||||
if (BROTLI_IS_OOM(m)) goto oom;
|
||||
first_byte = 0;
|
||||
first_byte_bits = 0;
|
||||
if (is_first) {
|
||||
if (params.lgwin == 16) {
|
||||
first_byte = 0;
|
||||
@ -159,45 +166,55 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
|
||||
}
|
||||
}
|
||||
storage[0] = static_cast<uint8_t>(first_byte);
|
||||
size_t storage_ix = first_byte_bits;
|
||||
storage_ix = first_byte_bits;
|
||||
|
||||
/* Store the meta-block to the temporary output. */
|
||||
StoreMetaBlock(&input[0], input_pos, input_size, mask,
|
||||
prev_byte, prev_byte2,
|
||||
is_last,
|
||||
num_direct_distance_codes,
|
||||
distance_postfix_bits,
|
||||
literal_context_mode,
|
||||
commands, num_commands,
|
||||
mb,
|
||||
&storage_ix, &storage[0]);
|
||||
free(commands);
|
||||
BrotliStoreMetaBlock(m, input, input_pos, input_size, mask,
|
||||
prev_byte, prev_byte2,
|
||||
is_last,
|
||||
num_direct_distance_codes,
|
||||
distance_postfix_bits,
|
||||
literal_context_mode,
|
||||
commands, num_commands,
|
||||
&mb,
|
||||
&storage_ix, storage);
|
||||
if (BROTLI_IS_OOM(m)) goto oom;
|
||||
DestroyMetaBlockSplit(m, &mb);
|
||||
BROTLI_FREE(m, commands);
|
||||
|
||||
/* If this is not the last meta-block, store an empty metadata
|
||||
meta-block so that the meta-block will end at a byte boundary. */
|
||||
if (!is_last) {
|
||||
StoreSyncMetaBlock(&storage_ix, &storage[0]);
|
||||
BrotliStoreSyncMetaBlock(&storage_ix, storage);
|
||||
}
|
||||
|
||||
/* If the compressed data is too large, fall back to an uncompressed
|
||||
meta-block. */
|
||||
size_t output_size = storage_ix >> 3;
|
||||
output_size = storage_ix >> 3;
|
||||
if (input_size + 4 < output_size) {
|
||||
storage[0] = static_cast<uint8_t>(first_byte);
|
||||
storage_ix = first_byte_bits;
|
||||
StoreUncompressedMetaBlock(is_last, &input[0], input_pos, mask,
|
||||
input_size,
|
||||
&storage_ix, &storage[0]);
|
||||
BrotliStoreUncompressedMetaBlock(is_last, input, input_pos, mask,
|
||||
input_size,
|
||||
&storage_ix, storage);
|
||||
output_size = storage_ix >> 3;
|
||||
}
|
||||
|
||||
/* Copy the temporary output with size-check to the output. */
|
||||
if (output_size > *encoded_size) {
|
||||
return false;
|
||||
BROTLI_FREE(m, storage);
|
||||
BROTLI_FREE(m, input);
|
||||
return 0;
|
||||
}
|
||||
memcpy(encoded_buffer, &storage[0], output_size);
|
||||
memcpy(encoded_buffer, storage, output_size);
|
||||
*encoded_size = output_size;
|
||||
return true;
|
||||
BROTLI_FREE(m, storage);
|
||||
BROTLI_FREE(m, input);
|
||||
return 1;
|
||||
|
||||
oom:
|
||||
BrotliWipeOutMemoryManager(m);
|
||||
return 0;
|
||||
}
|
||||
|
||||
} /* namespace */
|
||||
@ -217,20 +234,20 @@ int BrotliCompressBufferParallel(BrotliParams params,
|
||||
}
|
||||
|
||||
/* Sanitize params. */
|
||||
if (params.lgwin < kMinWindowBits) {
|
||||
params.lgwin = kMinWindowBits;
|
||||
} else if (params.lgwin > kMaxWindowBits) {
|
||||
params.lgwin = kMaxWindowBits;
|
||||
if (params.lgwin < kBrotliMinWindowBits) {
|
||||
params.lgwin = kBrotliMinWindowBits;
|
||||
} else if (params.lgwin > kBrotliMaxWindowBits) {
|
||||
params.lgwin = kBrotliMaxWindowBits;
|
||||
}
|
||||
if (params.lgblock == 0) {
|
||||
params.lgblock = 16;
|
||||
if (params.quality >= 9 && params.lgwin > params.lgblock) {
|
||||
params.lgblock = std::min(21, params.lgwin);
|
||||
params.lgblock = BROTLI_MIN(int, 21, params.lgwin);
|
||||
}
|
||||
} else if (params.lgblock < kMinInputBlockBits) {
|
||||
params.lgblock = kMinInputBlockBits;
|
||||
} else if (params.lgblock > kMaxInputBlockBits) {
|
||||
params.lgblock = kMaxInputBlockBits;
|
||||
} else if (params.lgblock < kBrotliMinInputBlockBits) {
|
||||
params.lgblock = kBrotliMinInputBlockBits;
|
||||
} else if (params.lgblock > kBrotliMaxInputBlockBits) {
|
||||
params.lgblock = kBrotliMaxInputBlockBits;
|
||||
}
|
||||
size_t max_input_block_size = 1 << params.lgblock;
|
||||
size_t max_prefix_size = 1u << params.lgwin;
|
||||
@ -239,10 +256,10 @@ int BrotliCompressBufferParallel(BrotliParams params,
|
||||
|
||||
/* Compress block-by-block independently. */
|
||||
for (size_t pos = 0; pos < input_size; ) {
|
||||
uint32_t input_block_size =
|
||||
static_cast<uint32_t>(std::min(max_input_block_size, input_size - pos));
|
||||
uint32_t input_block_size = static_cast<uint32_t>(
|
||||
BROTLI_MIN(size_t, max_input_block_size, input_size - pos));
|
||||
uint32_t prefix_size =
|
||||
static_cast<uint32_t>(std::min(max_prefix_size, pos));
|
||||
static_cast<uint32_t>(BROTLI_MIN(size_t, max_prefix_size, pos));
|
||||
size_t out_size = input_block_size + (input_block_size >> 3) + 1024;
|
||||
std::vector<uint8_t> out(out_size);
|
||||
if (!WriteMetaBlockParallel(params,
|
||||
@ -250,11 +267,11 @@ int BrotliCompressBufferParallel(BrotliParams params,
|
||||
&input_buffer[pos],
|
||||
prefix_size,
|
||||
&input_buffer[pos - prefix_size],
|
||||
pos == 0,
|
||||
pos + input_block_size == input_size,
|
||||
(pos == 0) ? 1 : 0,
|
||||
(pos + input_block_size == input_size) ? 1 : 0,
|
||||
&out_size,
|
||||
&out[0])) {
|
||||
return false;
|
||||
return 0;
|
||||
}
|
||||
out.resize(out_size);
|
||||
compressed_pieces.push_back(out);
|
||||
@ -266,14 +283,14 @@ int BrotliCompressBufferParallel(BrotliParams params,
|
||||
for (size_t i = 0; i < compressed_pieces.size(); ++i) {
|
||||
const std::vector<uint8_t>& out = compressed_pieces[i];
|
||||
if (out_pos + out.size() > *encoded_size) {
|
||||
return false;
|
||||
return 0;
|
||||
}
|
||||
memcpy(&encoded_buffer[out_pos], &out[0], out.size());
|
||||
out_pos += out.size();
|
||||
}
|
||||
*encoded_size = out_pos;
|
||||
|
||||
return true;
|
||||
return 1;
|
||||
}
|
||||
|
||||
} /* namespace brotli */
|
||||
|
@ -12,7 +12,7 @@
|
||||
#define BROTLI_ENC_ENCODE_PARALLEL_H_
|
||||
|
||||
#include "../common/types.h"
|
||||
#include "./encode.h"
|
||||
#include "./compressor.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
|
@ -8,36 +8,46 @@
|
||||
|
||||
#include "./entropy_encode.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
#include <limits>
|
||||
#include <string.h> /* memset */
|
||||
|
||||
#include "../common/constants.h"
|
||||
#include "../common/types.h"
|
||||
#include "./histogram.h"
|
||||
#include "./port.h"
|
||||
|
||||
namespace brotli {
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void SetDepth(const HuffmanTree &p,
|
||||
HuffmanTree *pool,
|
||||
uint8_t *depth,
|
||||
uint8_t level) {
|
||||
if (p.index_left_ >= 0) {
|
||||
++level;
|
||||
SetDepth(pool[p.index_left_], pool, depth, level);
|
||||
SetDepth(pool[p.index_right_or_value_], pool, depth, level);
|
||||
} else {
|
||||
depth[p.index_right_or_value_] = level;
|
||||
int BrotliSetDepth(int p0, HuffmanTree* pool, uint8_t* depth, int max_depth) {
|
||||
int stack[16];
|
||||
int level = 0;
|
||||
int p = p0;
|
||||
assert(max_depth <= 15);
|
||||
stack[0] = -1;
|
||||
while (1) {
|
||||
if (pool[p].index_left_ >= 0) {
|
||||
level++;
|
||||
if (level > max_depth) return 0;
|
||||
stack[level] = pool[p].index_right_or_value_;
|
||||
p = pool[p].index_left_;
|
||||
continue;
|
||||
} else {
|
||||
depth[pool[p].index_right_or_value_] = (uint8_t)level;
|
||||
}
|
||||
while (level >= 0 && stack[level] == -1) level--;
|
||||
if (level < 0) return 1;
|
||||
p = stack[level];
|
||||
stack[level] = -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Sort the root nodes, least popular first. */
|
||||
static inline bool SortHuffmanTree(const HuffmanTree& v0,
|
||||
const HuffmanTree& v1) {
|
||||
if (v0.total_count_ != v1.total_count_) {
|
||||
return v0.total_count_ < v1.total_count_;
|
||||
static inline int SortHuffmanTree(const HuffmanTree* v0,
|
||||
const HuffmanTree* v1) {
|
||||
if (v0->total_count_ != v1->total_count_) {
|
||||
return (v0->total_count_ < v1->total_count_) ? 1 : 0;
|
||||
}
|
||||
return v0.index_right_or_value_ > v1.index_right_or_value_;
|
||||
return (v0->index_right_or_value_ > v1->index_right_or_value_) ? 1 : 0;
|
||||
}
|
||||
|
||||
/* This function will create a Huffman tree.
|
||||
@ -55,31 +65,37 @@ static inline bool SortHuffmanTree(const HuffmanTree& v0,
|
||||
we are not planning to use this with extremely long blocks.
|
||||
|
||||
See http://en.wikipedia.org/wiki/Huffman_coding */
|
||||
void CreateHuffmanTree(const uint32_t *data,
|
||||
const size_t length,
|
||||
const int tree_limit,
|
||||
HuffmanTree* tree,
|
||||
uint8_t *depth) {
|
||||
void BrotliCreateHuffmanTree(const uint32_t *data,
|
||||
const size_t length,
|
||||
const int tree_limit,
|
||||
HuffmanTree* tree,
|
||||
uint8_t *depth) {
|
||||
uint32_t count_limit;
|
||||
HuffmanTree sentinel;
|
||||
InitHuffmanTree(&sentinel, BROTLI_UINT32_MAX, -1, -1);
|
||||
/* For block sizes below 64 kB, we never need to do a second iteration
|
||||
of this loop. Probably all of our block sizes will be smaller than
|
||||
that, so this loop is mostly of academic interest. If we actually
|
||||
would need this, we would be better off with the Katajainen algorithm. */
|
||||
for (uint32_t count_limit = 1; ; count_limit *= 2) {
|
||||
for (count_limit = 1; ; count_limit *= 2) {
|
||||
size_t n = 0;
|
||||
for (size_t i = length; i != 0;) {
|
||||
size_t i;
|
||||
size_t j;
|
||||
size_t k;
|
||||
for (i = length; i != 0;) {
|
||||
--i;
|
||||
if (data[i]) {
|
||||
const uint32_t count = std::max(data[i], count_limit);
|
||||
tree[n++] = HuffmanTree(count, -1, static_cast<int16_t>(i));
|
||||
const uint32_t count = BROTLI_MAX(uint32_t, data[i], count_limit);
|
||||
InitHuffmanTree(&tree[n++], count, -1, (int16_t)i);
|
||||
}
|
||||
}
|
||||
|
||||
if (n == 1) {
|
||||
depth[tree[0].index_right_or_value_] = 1; // Only one element.
|
||||
depth[tree[0].index_right_or_value_] = 1; /* Only one element. */
|
||||
break;
|
||||
}
|
||||
|
||||
std::sort(tree, tree + n, SortHuffmanTree);
|
||||
SortHuffmanTreeItems(tree, n, SortHuffmanTree);
|
||||
|
||||
/* The nodes are:
|
||||
[0, n): the sorted leaf nodes that we start with.
|
||||
@ -88,13 +104,12 @@ void CreateHuffmanTree(const uint32_t *data,
|
||||
(n+1). These are naturally in ascending order.
|
||||
[2n]: we add a sentinel at the end as well.
|
||||
There will be (2n+1) elements at the end. */
|
||||
const HuffmanTree sentinel(std::numeric_limits<uint32_t>::max(), -1, -1);
|
||||
tree[n] = sentinel;
|
||||
tree[n + 1] = sentinel;
|
||||
|
||||
size_t i = 0; /* Points to the next leaf node. */
|
||||
size_t j = n + 1; /* Points to the next non-leaf node. */
|
||||
for (size_t k = n - 1; k != 0; --k) {
|
||||
i = 0; /* Points to the next leaf node. */
|
||||
j = n + 1; /* Points to the next non-leaf node. */
|
||||
for (k = n - 1; k != 0; --k) {
|
||||
size_t left, right;
|
||||
if (tree[i].total_count_ <= tree[j].total_count_) {
|
||||
left = i;
|
||||
@ -111,21 +126,21 @@ void CreateHuffmanTree(const uint32_t *data,
|
||||
++j;
|
||||
}
|
||||
|
||||
{
|
||||
/* The sentinel node becomes the parent node. */
|
||||
size_t j_end = 2 * n - k;
|
||||
tree[j_end].total_count_ =
|
||||
tree[left].total_count_ + tree[right].total_count_;
|
||||
tree[j_end].index_left_ = static_cast<int16_t>(left);
|
||||
tree[j_end].index_right_or_value_ = static_cast<int16_t>(right);
|
||||
size_t j_end = 2 * n - k;
|
||||
tree[j_end].total_count_ =
|
||||
tree[left].total_count_ + tree[right].total_count_;
|
||||
tree[j_end].index_left_ = (int16_t)left;
|
||||
tree[j_end].index_right_or_value_ = (int16_t)right;
|
||||
|
||||
/* Add back the last sentinel node. */
|
||||
tree[j_end + 1] = sentinel;
|
||||
tree[j_end + 1] = sentinel;
|
||||
}
|
||||
}
|
||||
SetDepth(tree[2 * n - 1], &tree[0], depth, 0);
|
||||
|
||||
if (BrotliSetDepth((int)(2 * n - 1), &tree[0], depth, tree_limit)) {
|
||||
/* We need to pack the Huffman tree in tree_limit bits. If this was not
|
||||
successful, add fake entities to the lowest values and retry. */
|
||||
if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -142,7 +157,7 @@ static void Reverse(uint8_t* v, size_t start, size_t end) {
|
||||
}
|
||||
}
|
||||
|
||||
static void WriteHuffmanTreeRepetitions(
|
||||
static void BrotliWriteHuffmanTreeRepetitions(
|
||||
const uint8_t previous_value,
|
||||
const uint8_t value,
|
||||
size_t repetitions,
|
||||
@ -163,16 +178,17 @@ static void WriteHuffmanTreeRepetitions(
|
||||
--repetitions;
|
||||
}
|
||||
if (repetitions < 3) {
|
||||
for (size_t i = 0; i < repetitions; ++i) {
|
||||
size_t i;
|
||||
for (i = 0; i < repetitions; ++i) {
|
||||
tree[*tree_size] = value;
|
||||
extra_bits_data[*tree_size] = 0;
|
||||
++(*tree_size);
|
||||
}
|
||||
} else {
|
||||
repetitions -= 3;
|
||||
size_t start = *tree_size;
|
||||
while (true) {
|
||||
tree[*tree_size] = 16;
|
||||
repetitions -= 3;
|
||||
while (1) {
|
||||
tree[*tree_size] = BROTLI_REPEAT_PREVIOUS_CODE_LENGTH;
|
||||
extra_bits_data[*tree_size] = repetitions & 0x3;
|
||||
++(*tree_size);
|
||||
repetitions >>= 2;
|
||||
@ -186,7 +202,7 @@ static void WriteHuffmanTreeRepetitions(
|
||||
}
|
||||
}
|
||||
|
||||
static void WriteHuffmanTreeRepetitionsZeros(
|
||||
static void BrotliWriteHuffmanTreeRepetitionsZeros(
|
||||
size_t repetitions,
|
||||
size_t* tree_size,
|
||||
uint8_t* tree,
|
||||
@ -198,16 +214,17 @@ static void WriteHuffmanTreeRepetitionsZeros(
|
||||
--repetitions;
|
||||
}
|
||||
if (repetitions < 3) {
|
||||
for (size_t i = 0; i < repetitions; ++i) {
|
||||
size_t i;
|
||||
for (i = 0; i < repetitions; ++i) {
|
||||
tree[*tree_size] = 0;
|
||||
extra_bits_data[*tree_size] = 0;
|
||||
++(*tree_size);
|
||||
}
|
||||
} else {
|
||||
repetitions -= 3;
|
||||
size_t start = *tree_size;
|
||||
while (true) {
|
||||
tree[*tree_size] = 17;
|
||||
repetitions -= 3;
|
||||
while (1) {
|
||||
tree[*tree_size] = BROTLI_REPEAT_ZERO_CODE_LENGTH;
|
||||
extra_bits_data[*tree_size] = repetitions & 0x7;
|
||||
++(*tree_size);
|
||||
repetitions >>= 3;
|
||||
@ -221,8 +238,8 @@ static void WriteHuffmanTreeRepetitionsZeros(
|
||||
}
|
||||
}
|
||||
|
||||
void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
|
||||
uint8_t* good_for_rle) {
|
||||
void BrotliOptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
|
||||
uint8_t* good_for_rle) {
|
||||
size_t nonzero_count = 0;
|
||||
size_t stride;
|
||||
size_t limit;
|
||||
@ -260,8 +277,8 @@ void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
|
||||
/* Small histogram will model it well. */
|
||||
return;
|
||||
}
|
||||
size_t zeros = length - nonzeros;
|
||||
if (smallest_nonzero < 4) {
|
||||
size_t zeros = length - nonzeros;
|
||||
if (zeros < 6) {
|
||||
for (i = 1; i < length - 1; ++i) {
|
||||
if (counts[i - 1] != 0 && counts[i] == 0 && counts[i + 1] != 0) {
|
||||
@ -324,7 +341,7 @@ void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
|
||||
for (k = 0; k < stride; ++k) {
|
||||
/* We don't want to change value at counts[i],
|
||||
that is already belonging to the next stride. Thus - 1. */
|
||||
counts[i - k - 1] = static_cast<uint32_t>(count);
|
||||
counts[i - k - 1] = (uint32_t)count;
|
||||
}
|
||||
}
|
||||
stride = 0;
|
||||
@ -353,16 +370,18 @@ void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
|
||||
}
|
||||
|
||||
static void DecideOverRleUse(const uint8_t* depth, const size_t length,
|
||||
bool *use_rle_for_non_zero,
|
||||
bool *use_rle_for_zero) {
|
||||
int *use_rle_for_non_zero,
|
||||
int *use_rle_for_zero) {
|
||||
size_t total_reps_zero = 0;
|
||||
size_t total_reps_non_zero = 0;
|
||||
size_t count_reps_zero = 1;
|
||||
size_t count_reps_non_zero = 1;
|
||||
for (size_t i = 0; i < length;) {
|
||||
size_t i;
|
||||
for (i = 0; i < length;) {
|
||||
const uint8_t value = depth[i];
|
||||
size_t reps = 1;
|
||||
for (size_t k = i + 1; k < length && depth[k] == value; ++k) {
|
||||
size_t k;
|
||||
for (k = i + 1; k < length && depth[k] == value; ++k) {
|
||||
++reps;
|
||||
}
|
||||
if (reps >= 3 && value == 0) {
|
||||
@ -375,20 +394,24 @@ static void DecideOverRleUse(const uint8_t* depth, const size_t length,
|
||||
}
|
||||
i += reps;
|
||||
}
|
||||
*use_rle_for_non_zero = total_reps_non_zero > count_reps_non_zero * 2;
|
||||
*use_rle_for_zero = total_reps_zero > count_reps_zero * 2;
|
||||
*use_rle_for_non_zero =
|
||||
(total_reps_non_zero > count_reps_non_zero * 2) ? 1 : 0;
|
||||
*use_rle_for_zero = (total_reps_zero > count_reps_zero * 2) ? 1 : 0;
|
||||
}
|
||||
|
||||
void WriteHuffmanTree(const uint8_t* depth,
|
||||
size_t length,
|
||||
size_t* tree_size,
|
||||
uint8_t* tree,
|
||||
uint8_t* extra_bits_data) {
|
||||
uint8_t previous_value = 8;
|
||||
void BrotliWriteHuffmanTree(const uint8_t* depth,
|
||||
size_t length,
|
||||
size_t* tree_size,
|
||||
uint8_t* tree,
|
||||
uint8_t* extra_bits_data) {
|
||||
uint8_t previous_value = BROTLI_INITIAL_REPEATED_CODE_LENGTH;
|
||||
size_t i;
|
||||
int use_rle_for_non_zero = 0;
|
||||
int use_rle_for_zero = 0;
|
||||
|
||||
/* Throw away trailing zeros. */
|
||||
size_t new_length = length;
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
for (i = 0; i < length; ++i) {
|
||||
if (depth[length - i - 1] == 0) {
|
||||
--new_length;
|
||||
} else {
|
||||
@ -397,8 +420,6 @@ void WriteHuffmanTree(const uint8_t* depth,
|
||||
}
|
||||
|
||||
/* First gather statistics on if it is a good idea to do rle. */
|
||||
bool use_rle_for_non_zero = false;
|
||||
bool use_rle_for_zero = false;
|
||||
if (length > 50) {
|
||||
/* Find rle coding for longer codes.
|
||||
Shorter codes seem not to benefit from rle. */
|
||||
@ -407,73 +428,73 @@ void WriteHuffmanTree(const uint8_t* depth,
|
||||
}
|
||||
|
||||
/* Actual rle coding. */
|
||||
for (size_t i = 0; i < new_length;) {
|
||||
for (i = 0; i < new_length;) {
|
||||
const uint8_t value = depth[i];
|
||||
size_t reps = 1;
|
||||
if ((value != 0 && use_rle_for_non_zero) ||
|
||||
(value == 0 && use_rle_for_zero)) {
|
||||
for (size_t k = i + 1; k < new_length && depth[k] == value; ++k) {
|
||||
size_t k;
|
||||
for (k = i + 1; k < new_length && depth[k] == value; ++k) {
|
||||
++reps;
|
||||
}
|
||||
}
|
||||
if (value == 0) {
|
||||
WriteHuffmanTreeRepetitionsZeros(reps, tree_size, tree, extra_bits_data);
|
||||
BrotliWriteHuffmanTreeRepetitionsZeros(
|
||||
reps, tree_size, tree, extra_bits_data);
|
||||
} else {
|
||||
WriteHuffmanTreeRepetitions(previous_value,
|
||||
value, reps, tree_size,
|
||||
tree, extra_bits_data);
|
||||
BrotliWriteHuffmanTreeRepetitions(previous_value,
|
||||
value, reps, tree_size,
|
||||
tree, extra_bits_data);
|
||||
previous_value = value;
|
||||
}
|
||||
i += reps;
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
uint16_t ReverseBits(int num_bits, uint16_t bits) {
|
||||
static uint16_t BrotliReverseBits(size_t num_bits, uint16_t bits) {
|
||||
static const size_t kLut[16] = { /* Pre-reversed 4-bit values. */
|
||||
0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
|
||||
0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf
|
||||
};
|
||||
size_t retval = kLut[bits & 0xf];
|
||||
for (int i = 4; i < num_bits; i += 4) {
|
||||
size_t i;
|
||||
for (i = 4; i < num_bits; i += 4) {
|
||||
retval <<= 4;
|
||||
bits = static_cast<uint16_t>(bits >> 4);
|
||||
bits = (uint16_t)(bits >> 4);
|
||||
retval |= kLut[bits & 0xf];
|
||||
}
|
||||
retval >>= (-num_bits & 0x3);
|
||||
return static_cast<uint16_t>(retval);
|
||||
return (uint16_t)retval;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
/* 0..15 are values for bits */
|
||||
#define MAX_HUFFMAN_BITS 16
|
||||
|
||||
void ConvertBitDepthsToSymbols(const uint8_t *depth,
|
||||
size_t len,
|
||||
uint16_t *bits) {
|
||||
void BrotliConvertBitDepthsToSymbols(const uint8_t *depth,
|
||||
size_t len,
|
||||
uint16_t *bits) {
|
||||
/* In Brotli, all bit depths are [1..15]
|
||||
0 bit depth means that the symbol does not exist. */
|
||||
const int kMaxBits = 16; // 0..15 are values for bits
|
||||
uint16_t bl_count[kMaxBits] = { 0 };
|
||||
{
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
++bl_count[depth[i]];
|
||||
}
|
||||
bl_count[0] = 0;
|
||||
uint16_t bl_count[MAX_HUFFMAN_BITS] = { 0 };
|
||||
uint16_t next_code[MAX_HUFFMAN_BITS];
|
||||
size_t i;
|
||||
int code = 0;
|
||||
for (i = 0; i < len; ++i) {
|
||||
++bl_count[depth[i]];
|
||||
}
|
||||
uint16_t next_code[kMaxBits];
|
||||
bl_count[0] = 0;
|
||||
next_code[0] = 0;
|
||||
{
|
||||
int code = 0;
|
||||
for (int bits = 1; bits < kMaxBits; ++bits) {
|
||||
code = (code + bl_count[bits - 1]) << 1;
|
||||
next_code[bits] = static_cast<uint16_t>(code);
|
||||
}
|
||||
for (i = 1; i < MAX_HUFFMAN_BITS; ++i) {
|
||||
code = (code + bl_count[i - 1]) << 1;
|
||||
next_code[i] = (uint16_t)code;
|
||||
}
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
for (i = 0; i < len; ++i) {
|
||||
if (depth[i]) {
|
||||
bits[i] = ReverseBits(depth[i], next_code[depth[i]]++);
|
||||
bits[i] = BrotliReverseBits(depth[i], next_code[depth[i]]++);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace brotli
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
@ -9,29 +9,30 @@
|
||||
#ifndef BROTLI_ENC_ENTROPY_ENCODE_H_
|
||||
#define BROTLI_ENC_ENTROPY_ENCODE_H_
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "../common/types.h"
|
||||
#include "./histogram.h"
|
||||
#include "./prefix.h"
|
||||
#include "./port.h"
|
||||
|
||||
namespace brotli {
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* A node of a Huffman tree. */
|
||||
struct HuffmanTree {
|
||||
HuffmanTree() {}
|
||||
HuffmanTree(uint32_t count, int16_t left, int16_t right)
|
||||
: total_count_(count),
|
||||
index_left_(left),
|
||||
index_right_or_value_(right) {
|
||||
}
|
||||
typedef struct HuffmanTree {
|
||||
uint32_t total_count_;
|
||||
int16_t index_left_;
|
||||
int16_t index_right_or_value_;
|
||||
};
|
||||
} HuffmanTree;
|
||||
|
||||
void SetDepth(const HuffmanTree &p, HuffmanTree *pool,
|
||||
uint8_t *depth, uint8_t level);
|
||||
static BROTLI_INLINE void InitHuffmanTree(HuffmanTree* self, uint32_t count,
|
||||
int16_t left, int16_t right) {
|
||||
self->total_count_ = count;
|
||||
self->index_left_ = left;
|
||||
self->index_right_or_value_ = right;
|
||||
}
|
||||
|
||||
/* Returns 1 is assignment of depths succeded, otherwise 0. */
|
||||
BROTLI_INTERNAL int BrotliSetDepth(
|
||||
int p, HuffmanTree* pool, uint8_t* depth, int max_depth);
|
||||
|
||||
/* This function will create a Huffman tree.
|
||||
|
||||
@ -45,11 +46,11 @@ void SetDepth(const HuffmanTree &p, HuffmanTree *pool,
|
||||
be at least 2 * length + 1 long.
|
||||
|
||||
See http://en.wikipedia.org/wiki/Huffman_coding */
|
||||
void CreateHuffmanTree(const uint32_t *data,
|
||||
const size_t length,
|
||||
const int tree_limit,
|
||||
HuffmanTree* tree,
|
||||
uint8_t *depth);
|
||||
BROTLI_INTERNAL void BrotliCreateHuffmanTree(const uint32_t *data,
|
||||
const size_t length,
|
||||
const int tree_limit,
|
||||
HuffmanTree* tree,
|
||||
uint8_t *depth);
|
||||
|
||||
/* Change the population counts in a way that the consequent
|
||||
Huffman tree compression, especially its rle-part will be more
|
||||
@ -58,48 +59,63 @@ void CreateHuffmanTree(const uint32_t *data,
|
||||
length contains the size of the histogram.
|
||||
counts contains the population counts.
|
||||
good_for_rle is a buffer of at least length size */
|
||||
void OptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
|
||||
uint8_t* good_for_rle);
|
||||
BROTLI_INTERNAL void BrotliOptimizeHuffmanCountsForRle(
|
||||
size_t length, uint32_t* counts, uint8_t* good_for_rle);
|
||||
|
||||
/* Write a Huffman tree from bit depths into the bitstream representation
|
||||
of a Huffman tree. The generated Huffman tree is to be compressed once
|
||||
more using a Huffman tree */
|
||||
void WriteHuffmanTree(const uint8_t* depth,
|
||||
size_t num,
|
||||
size_t* tree_size,
|
||||
uint8_t* tree,
|
||||
uint8_t* extra_bits_data);
|
||||
BROTLI_INTERNAL void BrotliWriteHuffmanTree(const uint8_t* depth,
|
||||
size_t num,
|
||||
size_t* tree_size,
|
||||
uint8_t* tree,
|
||||
uint8_t* extra_bits_data);
|
||||
|
||||
/* Get the actual bit values for a tree of bit depths. */
|
||||
void ConvertBitDepthsToSymbols(const uint8_t *depth,
|
||||
size_t len,
|
||||
uint16_t *bits);
|
||||
BROTLI_INTERNAL void BrotliConvertBitDepthsToSymbols(const uint8_t *depth,
|
||||
size_t len,
|
||||
uint16_t *bits);
|
||||
|
||||
template<int kSize>
|
||||
struct EntropyCode {
|
||||
// How many bits for symbol.
|
||||
uint8_t depth_[kSize];
|
||||
// Actual bits used to represent the symbol.
|
||||
uint16_t bits_[kSize];
|
||||
// How many non-zero depth.
|
||||
int count_;
|
||||
// First four symbols with non-zero depth.
|
||||
int symbols_[4];
|
||||
};
|
||||
/* Input size optimized Shell sort. */
|
||||
typedef int (*HuffmanTreeComparator)(const HuffmanTree*, const HuffmanTree*);
|
||||
static BROTLI_INLINE void SortHuffmanTreeItems(HuffmanTree* items,
|
||||
const size_t n, HuffmanTreeComparator comparator) {
|
||||
static const size_t gaps[] = {132, 57, 23, 10, 4, 1};
|
||||
if (n < 13) {
|
||||
/* Insertion sort. */
|
||||
size_t i;
|
||||
for (i = 1; i < n; ++i) {
|
||||
HuffmanTree tmp = items[i];
|
||||
size_t k = i;
|
||||
size_t j = i - 1;
|
||||
while (comparator(&tmp, &items[j])) {
|
||||
items[k] = items[j];
|
||||
k = j;
|
||||
if (!j--) break;
|
||||
}
|
||||
items[k] = tmp;
|
||||
}
|
||||
return;
|
||||
} else {
|
||||
/* Shell sort. */
|
||||
int g = n < 57 ? 2 : 0;
|
||||
for (; g < 6; ++g) {
|
||||
size_t gap = gaps[g];
|
||||
size_t i;
|
||||
for (i = gap; i < n; ++i) {
|
||||
size_t j = i;
|
||||
HuffmanTree tmp = items[i];
|
||||
for (; j >= gap && comparator(&tmp, &items[j - gap]); j -= gap) {
|
||||
items[j] = items[j - gap];
|
||||
}
|
||||
items[j] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static const int kCodeLengthCodes = 18;
|
||||
|
||||
// Literal entropy code.
|
||||
typedef EntropyCode<256> EntropyCodeLiteral;
|
||||
// Prefix entropy codes.
|
||||
typedef EntropyCode<kNumCommandPrefixes> EntropyCodeCommand;
|
||||
typedef EntropyCode<kNumDistancePrefixes> EntropyCodeDistance;
|
||||
typedef EntropyCode<kNumBlockLenPrefixes> EntropyCodeBlockLength;
|
||||
// Context map entropy code, 256 Huffman tree indexes + 16 run length codes.
|
||||
typedef EntropyCode<272> EntropyCodeContextMap;
|
||||
// Block type entropy code, 256 block types + 2 special symbols.
|
||||
typedef EntropyCode<258> EntropyCodeBlockType;
|
||||
|
||||
} // namespace brotli
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* BROTLI_ENC_ENTROPY_ENCODE_H_ */
|
||||
|
@ -9,17 +9,20 @@
|
||||
#ifndef BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
|
||||
#define BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
|
||||
|
||||
#include "../common/constants.h"
|
||||
#include "../common/port.h"
|
||||
#include "../common/types.h"
|
||||
#include "./prefix.h"
|
||||
#include "./write_bits.h"
|
||||
|
||||
namespace brotli {
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static const uint8_t kCodeLengthDepth[18] = {
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 0, 4, 4,
|
||||
};
|
||||
|
||||
static const uint8_t kStaticCommandCodeDepth[kNumCommandPrefixes] = {
|
||||
static const uint8_t kStaticCommandCodeDepth[BROTLI_NUM_COMMAND_SYMBOLS] = {
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
@ -77,11 +80,13 @@ static const uint32_t kCodeLengthBits[18] = {
|
||||
0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 15, 31, 0, 11, 7,
|
||||
};
|
||||
|
||||
inline void StoreStaticCodeLengthCode(size_t* storage_ix, uint8_t* storage) {
|
||||
WriteBits(40, MAKE_UINT64_T(0xff, 0x55555554), storage_ix, storage);
|
||||
static BROTLI_INLINE void StoreStaticCodeLengthCode(
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
BrotliWriteBits(
|
||||
40, MAKE_UINT64_T(0x0000ffU, 0x55555554U), storage_ix, storage);
|
||||
}
|
||||
|
||||
static const uint64_t kZeroRepsBits[704] = {
|
||||
static const uint64_t kZeroRepsBits[BROTLI_NUM_COMMAND_SYMBOLS] = {
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000007, 0x00000017, 0x00000027,
|
||||
0x00000037, 0x00000047, 0x00000057, 0x00000067, 0x00000077, 0x00000770,
|
||||
0x00000b87, 0x00001387, 0x00001b87, 0x00002387, 0x00002b87, 0x00003387,
|
||||
@ -202,7 +207,7 @@ static const uint64_t kZeroRepsBits[704] = {
|
||||
0x06f9cb87, 0x08f9cb87,
|
||||
};
|
||||
|
||||
static const uint32_t kZeroRepsDepth[704] = {
|
||||
static const uint32_t kZeroRepsDepth[BROTLI_NUM_COMMAND_SYMBOLS] = {
|
||||
0, 4, 8, 7, 7, 7, 7, 7, 7, 7, 7, 11, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
|
||||
@ -249,7 +254,7 @@ static const uint32_t kZeroRepsDepth[704] = {
|
||||
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
|
||||
};
|
||||
|
||||
static const uint64_t kNonZeroRepsBits[704] = {
|
||||
static const uint64_t kNonZeroRepsBits[BROTLI_NUM_COMMAND_SYMBOLS] = {
|
||||
0x0000000b, 0x0000001b, 0x0000002b, 0x0000003b, 0x000002cb, 0x000006cb,
|
||||
0x00000acb, 0x00000ecb, 0x000002db, 0x000006db, 0x00000adb, 0x00000edb,
|
||||
0x000002eb, 0x000006eb, 0x00000aeb, 0x00000eeb, 0x000002fb, 0x000006fb,
|
||||
@ -370,7 +375,7 @@ static const uint64_t kNonZeroRepsBits[704] = {
|
||||
0x2baeb6db, 0x3baeb6db,
|
||||
};
|
||||
|
||||
static const uint32_t kNonZeroRepsDepth[704] = {
|
||||
static const uint32_t kNonZeroRepsDepth[BROTLI_NUM_COMMAND_SYMBOLS] = {
|
||||
6, 6, 6, 6, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
|
||||
12, 12, 12, 12, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
|
||||
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
|
||||
@ -417,47 +422,7 @@ static const uint32_t kNonZeroRepsDepth[704] = {
|
||||
30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
};
|
||||
|
||||
static const uint16_t kStaticLiteralCodeBits[256] = {
|
||||
0, 128, 64, 192, 32, 160, 96, 224,
|
||||
16, 144, 80, 208, 48, 176, 112, 240,
|
||||
8, 136, 72, 200, 40, 168, 104, 232,
|
||||
24, 152, 88, 216, 56, 184, 120, 248,
|
||||
4, 132, 68, 196, 36, 164, 100, 228,
|
||||
20, 148, 84, 212, 52, 180, 116, 244,
|
||||
12, 140, 76, 204, 44, 172, 108, 236,
|
||||
28, 156, 92, 220, 60, 188, 124, 252,
|
||||
2, 130, 66, 194, 34, 162, 98, 226,
|
||||
18, 146, 82, 210, 50, 178, 114, 242,
|
||||
10, 138, 74, 202, 42, 170, 106, 234,
|
||||
26, 154, 90, 218, 58, 186, 122, 250,
|
||||
6, 134, 70, 198, 38, 166, 102, 230,
|
||||
22, 150, 86, 214, 54, 182, 118, 246,
|
||||
14, 142, 78, 206, 46, 174, 110, 238,
|
||||
30, 158, 94, 222, 62, 190, 126, 254,
|
||||
1, 129, 65, 193, 33, 161, 97, 225,
|
||||
17, 145, 81, 209, 49, 177, 113, 241,
|
||||
9, 137, 73, 201, 41, 169, 105, 233,
|
||||
25, 153, 89, 217, 57, 185, 121, 249,
|
||||
5, 133, 69, 197, 37, 165, 101, 229,
|
||||
21, 149, 85, 213, 53, 181, 117, 245,
|
||||
13, 141, 77, 205, 45, 173, 109, 237,
|
||||
29, 157, 93, 221, 61, 189, 125, 253,
|
||||
3, 131, 67, 195, 35, 163, 99, 227,
|
||||
19, 147, 83, 211, 51, 179, 115, 243,
|
||||
11, 139, 75, 203, 43, 171, 107, 235,
|
||||
27, 155, 91, 219, 59, 187, 123, 251,
|
||||
7, 135, 71, 199, 39, 167, 103, 231,
|
||||
23, 151, 87, 215, 55, 183, 119, 247,
|
||||
15, 143, 79, 207, 47, 175, 111, 239,
|
||||
31, 159, 95, 223, 63, 191, 127, 255,
|
||||
};
|
||||
|
||||
inline void StoreStaticLiteralHuffmanTree(size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
WriteBits(32, 0x00010003U, storage_ix, storage);
|
||||
}
|
||||
|
||||
static const uint16_t kStaticCommandCodeBits[kNumCommandPrefixes] = {
|
||||
static const uint16_t kStaticCommandCodeBits[BROTLI_NUM_COMMAND_SYMBOLS] = {
|
||||
0, 256, 128, 384, 64, 320, 192, 448,
|
||||
32, 288, 160, 416, 96, 352, 224, 480,
|
||||
16, 272, 144, 400, 80, 336, 208, 464,
|
||||
@ -548,10 +513,11 @@ static const uint16_t kStaticCommandCodeBits[kNumCommandPrefixes] = {
|
||||
255, 1279, 767, 1791, 511, 1535, 1023, 2047,
|
||||
};
|
||||
|
||||
inline void StoreStaticCommandHuffmanTree(size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
WriteBits(28, 0x0000000006307003U, storage_ix, storage);
|
||||
WriteBits(31, 0x0000000009262441U, storage_ix, storage);
|
||||
static BROTLI_INLINE void StoreStaticCommandHuffmanTree(
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
BrotliWriteBits(
|
||||
56, MAKE_UINT64_T(0x926244U, 0x16307003U), storage_ix, storage);
|
||||
BrotliWriteBits(3, 0x00000000U, storage_ix, storage);
|
||||
}
|
||||
|
||||
static const uint16_t kStaticDistanceCodeBits[64] = {
|
||||
@ -561,12 +527,13 @@ static const uint16_t kStaticDistanceCodeBits[64] = {
|
||||
3, 35, 19, 51, 11, 43, 27, 59, 7, 39, 23, 55, 15, 47, 31, 63,
|
||||
};
|
||||
|
||||
inline void StoreStaticDistanceHuffmanTree(size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
WriteBits(18, 0x000000000001dc03U, storage_ix, storage);
|
||||
WriteBits(10, 0x00000000000000daU, storage_ix, storage);
|
||||
static BROTLI_INLINE void StoreStaticDistanceHuffmanTree(
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
BrotliWriteBits(28, 0x0369dc03U, storage_ix, storage);
|
||||
}
|
||||
|
||||
} // namespace brotli
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_ */
|
||||
|
@ -9,16 +9,18 @@
|
||||
#ifndef BROTLI_ENC_FAST_LOG_H_
|
||||
#define BROTLI_ENC_FAST_LOG_H_
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "../common/types.h"
|
||||
#include "../common/port.h"
|
||||
|
||||
namespace brotli {
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static inline uint32_t Log2FloorNonZero(size_t n) {
|
||||
static BROTLI_INLINE uint32_t Log2FloorNonZero(size_t n) {
|
||||
#ifdef __GNUC__
|
||||
return 31u ^ static_cast<uint32_t>(__builtin_clz(static_cast<uint32_t>(n)));
|
||||
return 31u ^ (uint32_t)__builtin_clz((uint32_t)n);
|
||||
#else
|
||||
uint32_t result = 0;
|
||||
while (n >>= 1) result++;
|
||||
@ -120,7 +122,7 @@ static const float kLog2Table[] = {
|
||||
};
|
||||
|
||||
/* Faster logarithm for small integers, with the property of log2(0) == 0. */
|
||||
static inline double FastLog2(size_t v) {
|
||||
static BROTLI_INLINE double FastLog2(size_t v) {
|
||||
if (v < sizeof(kLog2Table) / sizeof(kLog2Table[0])) {
|
||||
return kLog2Table[v];
|
||||
}
|
||||
@ -129,12 +131,14 @@ static inline double FastLog2(size_t v) {
|
||||
/* Visual Studio 2010 and Android API levels < 18 do not have the log2()
|
||||
* function defined, so we use log() and a multiplication instead. */
|
||||
static const double kLog2Inv = 1.4426950408889634f;
|
||||
return log(static_cast<double>(v)) * kLog2Inv;
|
||||
return log((double)v) * kLog2Inv;
|
||||
#else
|
||||
return log2(static_cast<double>(v));
|
||||
return log2((double)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace brotli
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* BROTLI_ENC_FAST_LOG_H_ */
|
||||
|
@ -12,14 +12,16 @@
|
||||
#include "../common/types.h"
|
||||
#include "./port.h"
|
||||
|
||||
namespace brotli {
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Separate implementation for little-endian 64-bit targets, for speed. */
|
||||
#if defined(__GNUC__) && defined(_LP64) && defined(IS_LITTLE_ENDIAN)
|
||||
|
||||
static inline size_t FindMatchLengthWithLimit(const uint8_t* s1,
|
||||
const uint8_t* s2,
|
||||
size_t limit) {
|
||||
static BROTLI_INLINE size_t FindMatchLengthWithLimit(const uint8_t* s1,
|
||||
const uint8_t* s2,
|
||||
size_t limit) {
|
||||
size_t matched = 0;
|
||||
size_t limit2 = (limit >> 3) + 1; /* + 1 is for pre-decrement in while */
|
||||
while (PREDICT_TRUE(--limit2)) {
|
||||
@ -30,7 +32,7 @@ static inline size_t FindMatchLengthWithLimit(const uint8_t* s1,
|
||||
} else {
|
||||
uint64_t x =
|
||||
BROTLI_UNALIGNED_LOAD64(s2) ^ BROTLI_UNALIGNED_LOAD64(s1 + matched);
|
||||
size_t matching_bits = static_cast<size_t>(__builtin_ctzll(x));
|
||||
size_t matching_bits = (size_t)__builtin_ctzll(x);
|
||||
matched += matching_bits >> 3;
|
||||
return matched;
|
||||
}
|
||||
@ -47,9 +49,9 @@ static inline size_t FindMatchLengthWithLimit(const uint8_t* s1,
|
||||
return matched;
|
||||
}
|
||||
#else
|
||||
static inline size_t FindMatchLengthWithLimit(const uint8_t* s1,
|
||||
const uint8_t* s2,
|
||||
size_t limit) {
|
||||
static BROTLI_INLINE size_t FindMatchLengthWithLimit(const uint8_t* s1,
|
||||
const uint8_t* s2,
|
||||
size_t limit) {
|
||||
size_t matched = 0;
|
||||
const uint8_t* s2_limit = s2 + limit;
|
||||
const uint8_t* s2_ptr = s2;
|
||||
@ -71,6 +73,8 @@ static inline size_t FindMatchLengthWithLimit(const uint8_t* s1,
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace brotli
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* BROTLI_ENC_FIND_MATCH_LENGTH_H_ */
|
||||
|
1334
enc/hash.h
1334
enc/hash.h
File diff suppressed because it is too large
Load Diff
285
enc/hash_longest_match_inc.h
Normal file
285
enc/hash_longest_match_inc.h
Normal file
@ -0,0 +1,285 @@
|
||||
/* NOLINT(build/header_guard) */
|
||||
/* Copyright 2010 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* template parameters: FN, BUCKET_BITS, BLOCK_BITS,
|
||||
NUM_LAST_DISTANCES_TO_CHECK */
|
||||
|
||||
/* A (forgetful) hash table to the data seen by the compressor, to
|
||||
help create backward references to previous data.
|
||||
|
||||
This is a hash map of fixed size (BUCKET_SIZE) to a ring buffer of
|
||||
fixed size (BLOCK_SIZE). The ring buffer contains the last BLOCK_SIZE
|
||||
index positions of the given hash key in the compressed data. */
|
||||
|
||||
#define HashLongestMatch HASHER()
|
||||
|
||||
/* Number of hash buckets. */
|
||||
#define BUCKET_SIZE (1 << BUCKET_BITS)
|
||||
|
||||
/* Only BLOCK_SIZE newest backward references are kept,
|
||||
and the older are forgotten. */
|
||||
#define BLOCK_SIZE (1u << BLOCK_BITS)
|
||||
|
||||
/* Mask for accessing entries in a block (in a ringbuffer manner). */
|
||||
#define BLOCK_MASK ((1 << BLOCK_BITS) - 1)
|
||||
|
||||
#define HASH_MAP_SIZE (2 << BUCKET_BITS)
|
||||
|
||||
static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
|
||||
static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
|
||||
|
||||
/* HashBytes is the function that chooses the bucket to place
|
||||
the address in. The HashLongestMatch and HashLongestMatchQuickly
|
||||
classes have separate, different implementations of hashing. */
|
||||
static uint32_t FN(HashBytes)(const uint8_t *data) {
|
||||
uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
|
||||
/* The higher bits contain more mixture from the multiplication,
|
||||
so we take our results from there. */
|
||||
return h >> (32 - BUCKET_BITS);
|
||||
}
|
||||
|
||||
typedef struct HashLongestMatch {
|
||||
/* Number of entries in a particular bucket. */
|
||||
uint16_t num_[BUCKET_SIZE];
|
||||
|
||||
/* Buckets containing BLOCK_SIZE of backward references. */
|
||||
uint32_t buckets_[BLOCK_SIZE << BUCKET_BITS];
|
||||
|
||||
/* True if num_ array needs to be initialized. */
|
||||
int is_dirty_;
|
||||
|
||||
size_t num_dict_lookups_;
|
||||
size_t num_dict_matches_;
|
||||
} HashLongestMatch;
|
||||
|
||||
static void FN(Reset)(HashLongestMatch* self) {
|
||||
self->is_dirty_ = 1;
|
||||
self->num_dict_lookups_ = 0;
|
||||
self->num_dict_matches_ = 0;
|
||||
}
|
||||
|
||||
static void FN(InitEmpty)(HashLongestMatch* self) {
|
||||
if (self->is_dirty_) {
|
||||
memset(self->num_, 0, sizeof(self->num_));
|
||||
self->is_dirty_ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void FN(InitForData)(HashLongestMatch* self, const uint8_t* data,
|
||||
size_t num) {
|
||||
size_t i;
|
||||
for (i = 0; i < num; ++i) {
|
||||
const uint32_t key = FN(HashBytes)(&data[i]);
|
||||
self->num_[key] = 0;
|
||||
}
|
||||
if (num != 0) {
|
||||
self->is_dirty_ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void FN(Init)(
|
||||
MemoryManager* m, HashLongestMatch* self, const uint8_t* data, int lgwin,
|
||||
size_t position, size_t bytes, int is_last) {
|
||||
/* Choose which init method is faster.
|
||||
Init() is about 100 times faster than InitForData(). */
|
||||
const size_t kMaxBytesForPartialHashInit = HASH_MAP_SIZE >> 7;
|
||||
BROTLI_UNUSED(m);
|
||||
BROTLI_UNUSED(lgwin);
|
||||
if (position == 0 && is_last && bytes <= kMaxBytesForPartialHashInit) {
|
||||
FN(InitForData)(self, data, bytes);
|
||||
} else {
|
||||
FN(InitEmpty)(self);
|
||||
}
|
||||
}
|
||||
|
||||
/* Look at 4 bytes at &data[ix & mask].
|
||||
Compute a hash from these, and store the value of ix at that position. */
|
||||
static BROTLI_INLINE void FN(Store)(HashLongestMatch* self, const uint8_t *data,
|
||||
const size_t mask, const size_t ix) {
|
||||
const uint32_t key = FN(HashBytes)(&data[ix & mask]);
|
||||
const size_t minor_ix = self->num_[key] & BLOCK_MASK;
|
||||
self->buckets_[minor_ix + (key << BLOCK_BITS)] = (uint32_t)ix;
|
||||
++self->num_[key];
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void FN(StoreRange)(HashLongestMatch* self,
|
||||
const uint8_t *data, const size_t mask, const size_t ix_start,
|
||||
const size_t ix_end) {
|
||||
size_t i;
|
||||
for (i = ix_start; i < ix_end; ++i) {
|
||||
FN(Store)(self, data, mask, i);
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void FN(StitchToPreviousBlock)(HashLongestMatch* self,
|
||||
size_t num_bytes, size_t position, const uint8_t* ringbuffer,
|
||||
size_t ringbuffer_mask) {
|
||||
if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
|
||||
/* Prepare the hashes for three last bytes of the last write.
|
||||
These could not be calculated before, since they require knowledge
|
||||
of both the previous and the current block. */
|
||||
FN(Store)(self, ringbuffer, ringbuffer_mask, position - 3);
|
||||
FN(Store)(self, ringbuffer, ringbuffer_mask, position - 2);
|
||||
FN(Store)(self, ringbuffer, ringbuffer_mask, position - 1);
|
||||
}
|
||||
}
|
||||
|
||||
/* Find a longest backward match of &data[cur_ix] up to the length of
|
||||
max_length and stores the position cur_ix in the hash table.
|
||||
|
||||
Does not look for matches longer than max_length.
|
||||
Does not look for matches further away than max_backward.
|
||||
Writes the best found match length into best_len_out.
|
||||
Writes the index (&data[index]) offset from the start of the best match
|
||||
into best_distance_out.
|
||||
Write the score of the best match into best_score_out.
|
||||
Returns 1 when match is found, otherwise 0. */
|
||||
static BROTLI_INLINE int FN(FindLongestMatch)(HashLongestMatch* self,
|
||||
const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
|
||||
const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
|
||||
const size_t max_length, const size_t max_backward,
|
||||
size_t* BROTLI_RESTRICT best_len_out,
|
||||
size_t* BROTLI_RESTRICT best_len_code_out,
|
||||
size_t* BROTLI_RESTRICT best_distance_out,
|
||||
double* BROTLI_RESTRICT best_score_out) {
|
||||
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
|
||||
int is_match_found = 0;
|
||||
/* Don't accept a short copy from far away. */
|
||||
double best_score = *best_score_out;
|
||||
size_t best_len = *best_len_out;
|
||||
size_t i;
|
||||
*best_len_code_out = 0;
|
||||
*best_len_out = 0;
|
||||
/* Try last distance first. */
|
||||
for (i = 0; i < NUM_LAST_DISTANCES_TO_CHECK; ++i) {
|
||||
const size_t idx = kDistanceCacheIndex[i];
|
||||
const size_t backward =
|
||||
(size_t)(distance_cache[idx] + kDistanceCacheOffset[i]);
|
||||
size_t prev_ix = (size_t)(cur_ix - backward);
|
||||
if (prev_ix >= cur_ix) {
|
||||
continue;
|
||||
}
|
||||
if (PREDICT_FALSE(backward > max_backward)) {
|
||||
continue;
|
||||
}
|
||||
prev_ix &= ring_buffer_mask;
|
||||
|
||||
if (cur_ix_masked + best_len > ring_buffer_mask ||
|
||||
prev_ix + best_len > ring_buffer_mask ||
|
||||
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
|
||||
continue;
|
||||
}
|
||||
{
|
||||
const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
|
||||
&data[cur_ix_masked],
|
||||
max_length);
|
||||
if (len >= 3 || (len == 2 && i < 2)) {
|
||||
/* Comparing for >= 2 does not change the semantics, but just saves for
|
||||
a few unnecessary binary logarithms in backward reference score,
|
||||
since we are not interested in such short matches. */
|
||||
double score = BackwardReferenceScoreUsingLastDistance(len, i);
|
||||
if (best_score < score) {
|
||||
best_score = score;
|
||||
best_len = len;
|
||||
*best_len_out = best_len;
|
||||
*best_len_code_out = best_len;
|
||||
*best_distance_out = backward;
|
||||
*best_score_out = best_score;
|
||||
is_match_found = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
const uint32_t key = FN(HashBytes)(&data[cur_ix_masked]);
|
||||
const uint32_t * BROTLI_RESTRICT const bucket =
|
||||
&self->buckets_[key << BLOCK_BITS];
|
||||
const size_t down =
|
||||
(self->num_[key] > BLOCK_SIZE) ? (self->num_[key] - BLOCK_SIZE) : 0u;
|
||||
for (i = self->num_[key]; i > down;) {
|
||||
size_t prev_ix = bucket[--i & BLOCK_MASK];
|
||||
const size_t backward = cur_ix - prev_ix;
|
||||
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
|
||||
break;
|
||||
}
|
||||
prev_ix &= ring_buffer_mask;
|
||||
if (cur_ix_masked + best_len > ring_buffer_mask ||
|
||||
prev_ix + best_len > ring_buffer_mask ||
|
||||
data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
|
||||
continue;
|
||||
}
|
||||
{
|
||||
const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
|
||||
&data[cur_ix_masked],
|
||||
max_length);
|
||||
if (len >= 4) {
|
||||
/* Comparing for >= 3 does not change the semantics, but just saves
|
||||
for a few unnecessary binary logarithms in backward reference
|
||||
score, since we are not interested in such short matches. */
|
||||
double score = BackwardReferenceScore(len, backward);
|
||||
if (best_score < score) {
|
||||
best_score = score;
|
||||
best_len = len;
|
||||
*best_len_out = best_len;
|
||||
*best_len_code_out = best_len;
|
||||
*best_distance_out = backward;
|
||||
*best_score_out = best_score;
|
||||
is_match_found = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
self->buckets_[(key << BLOCK_BITS) + (self->num_[key] & BLOCK_MASK)] =
|
||||
(uint32_t)cur_ix;
|
||||
++self->num_[key];
|
||||
}
|
||||
if (!is_match_found &&
|
||||
self->num_dict_matches_ >= (self->num_dict_lookups_ >> 7)) {
|
||||
size_t dict_key = Hash14(&data[cur_ix_masked]) << 1;
|
||||
int k;
|
||||
for (k = 0; k < 2; ++k, ++dict_key) {
|
||||
const uint16_t v = kStaticDictionaryHash[dict_key];
|
||||
++self->num_dict_lookups_;
|
||||
if (v > 0) {
|
||||
const size_t len = v & 31;
|
||||
const size_t dist = v >> 5;
|
||||
const size_t offset =
|
||||
kBrotliDictionaryOffsetsByLength[len] + len * dist;
|
||||
if (len <= max_length) {
|
||||
const size_t matchlen =
|
||||
FindMatchLengthWithLimit(&data[cur_ix_masked],
|
||||
&kBrotliDictionary[offset], len);
|
||||
if (matchlen + kCutoffTransformsCount > len && matchlen > 0) {
|
||||
const size_t transform_id = kCutoffTransforms[len - matchlen];
|
||||
const size_t word_id = dist +
|
||||
transform_id * (1u << kBrotliDictionarySizeBitsByLength[len]);
|
||||
const size_t backward = max_backward + word_id + 1;
|
||||
double score = BackwardReferenceScore(matchlen, backward);
|
||||
if (best_score < score) {
|
||||
++self->num_dict_matches_;
|
||||
best_score = score;
|
||||
best_len = matchlen;
|
||||
*best_len_out = best_len;
|
||||
*best_len_code_out = len;
|
||||
*best_distance_out = backward;
|
||||
*best_score_out = best_score;
|
||||
is_match_found = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return is_match_found;
|
||||
}
|
||||
|
||||
#undef HASH_MAP_SIZE
|
||||
#undef BLOCK_MASK
|
||||
#undef BLOCK_SIZE
|
||||
#undef BUCKET_SIZE
|
||||
|
||||
#undef HashLongestMatch
|
268
enc/hash_longest_match_quickly_inc.h
Normal file
268
enc/hash_longest_match_quickly_inc.h
Normal file
@ -0,0 +1,268 @@
|
||||
/* NOLINT(build/header_guard) */
|
||||
/* Copyright 2010 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* template parameters: FN, BUCKET_BITS, BUCKET_SWEEP, USE_DICTIONARY */
|
||||
|
||||
#define HashLongestMatchQuickly HASHER()
|
||||
|
||||
#define BUCKET_SIZE (1 << BUCKET_BITS)
|
||||
|
||||
#define HASH_MAP_SIZE (4 << BUCKET_BITS)
|
||||
|
||||
static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 8; }
|
||||
static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 8; }
|
||||
|
||||
/* HashBytes is the function that chooses the bucket to place
|
||||
the address in. The HashLongestMatch and HashLongestMatchQuickly
|
||||
classes have separate, different implementations of hashing. */
|
||||
static uint32_t FN(HashBytes)(const uint8_t *data) {
|
||||
/* Computing a hash based on 5 bytes works much better for
|
||||
qualities 1 and 3, where the next hash value is likely to replace */
|
||||
uint64_t h = (BROTLI_UNALIGNED_LOAD64(data) << 24) * kHashMul32;
|
||||
/* The higher bits contain more mixture from the multiplication,
|
||||
so we take our results from there. */
|
||||
return (uint32_t)(h >> (64 - BUCKET_BITS));
|
||||
}
|
||||
|
||||
/* A (forgetful) hash table to the data seen by the compressor, to
|
||||
help create backward references to previous data.
|
||||
|
||||
This is a hash map of fixed size (BUCKET_SIZE). Starting from the
|
||||
given index, BUCKET_SWEEP buckets are used to store values of a key. */
|
||||
typedef struct HashLongestMatchQuickly {
|
||||
uint32_t buckets_[BUCKET_SIZE + BUCKET_SWEEP];
|
||||
/* True if buckets_ array needs to be initialized. */
|
||||
int is_dirty_;
|
||||
size_t num_dict_lookups_;
|
||||
size_t num_dict_matches_;
|
||||
} HashLongestMatchQuickly;
|
||||
|
||||
static void FN(Reset)(HashLongestMatchQuickly* self) {
|
||||
self->is_dirty_ = 1;
|
||||
self->num_dict_lookups_ = 0;
|
||||
self->num_dict_matches_ = 0;
|
||||
}
|
||||
|
||||
static void FN(InitEmpty)(HashLongestMatchQuickly* self) {
|
||||
if (self->is_dirty_) {
|
||||
/* It is not strictly necessary to fill this buffer here, but
|
||||
not filling will make the results of the compression stochastic
|
||||
(but correct). This is because random data would cause the
|
||||
system to find accidentally good backward references here and there. */
|
||||
memset(&self->buckets_[0], 0, sizeof(self->buckets_));
|
||||
self->is_dirty_ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void FN(InitForData)(HashLongestMatchQuickly* self, const uint8_t* data,
|
||||
size_t num) {
|
||||
size_t i;
|
||||
for (i = 0; i < num; ++i) {
|
||||
const uint32_t key = FN(HashBytes)(&data[i]);
|
||||
memset(&self->buckets_[key], 0, BUCKET_SWEEP * sizeof(self->buckets_[0]));
|
||||
}
|
||||
if (num != 0) {
|
||||
self->is_dirty_ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void FN(Init)(
|
||||
MemoryManager* m, HashLongestMatchQuickly* self, const uint8_t* data,
|
||||
int lgwin, size_t position, size_t bytes, int is_last) {
|
||||
/* Choose which init method is faster.
|
||||
Init() is about 100 times faster than InitForData(). */
|
||||
const size_t kMaxBytesForPartialHashInit = HASH_MAP_SIZE >> 7;
|
||||
BROTLI_UNUSED(m);
|
||||
BROTLI_UNUSED(lgwin);
|
||||
if (position == 0 && is_last && bytes <= kMaxBytesForPartialHashInit) {
|
||||
FN(InitForData)(self, data, bytes);
|
||||
} else {
|
||||
FN(InitEmpty)(self);
|
||||
}
|
||||
}
|
||||
|
||||
/* Look at 5 bytes at &data[ix & mask].
|
||||
Compute a hash from these, and store the value somewhere within
|
||||
[ix .. ix+3]. */
|
||||
static BROTLI_INLINE void FN(Store)(HashLongestMatchQuickly* self,
|
||||
const uint8_t *data, const size_t mask, const size_t ix) {
|
||||
const uint32_t key = FN(HashBytes)(&data[ix & mask]);
|
||||
/* Wiggle the value with the bucket sweep range. */
|
||||
const uint32_t off = (ix >> 3) % BUCKET_SWEEP;
|
||||
self->buckets_[key + off] = (uint32_t)ix;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void FN(StoreRange)(HashLongestMatchQuickly* self,
|
||||
const uint8_t *data, const size_t mask, const size_t ix_start,
|
||||
const size_t ix_end) {
|
||||
size_t i;
|
||||
for (i = ix_start; i < ix_end; ++i) {
|
||||
FN(Store)(self, data, mask, i);
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void FN(StitchToPreviousBlock)(
|
||||
HashLongestMatchQuickly* self, size_t num_bytes, size_t position,
|
||||
const uint8_t* ringbuffer, size_t ringbuffer_mask) {
|
||||
if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
|
||||
/* Prepare the hashes for three last bytes of the last write.
|
||||
These could not be calculated before, since they require knowledge
|
||||
of both the previous and the current block. */
|
||||
FN(Store)(self, ringbuffer, ringbuffer_mask, position - 3);
|
||||
FN(Store)(self, ringbuffer, ringbuffer_mask, position - 2);
|
||||
FN(Store)(self, ringbuffer, ringbuffer_mask, position - 1);
|
||||
}
|
||||
}
|
||||
|
||||
/* Find a longest backward match of &ring_buffer[cur_ix & ring_buffer_mask]
|
||||
up to the length of max_length and stores the position cur_ix in the
|
||||
hash table.
|
||||
|
||||
Does not look for matches longer than max_length.
|
||||
Does not look for matches further away than max_backward.
|
||||
Writes the best found match length into best_len_out.
|
||||
Writes the index (&data[index]) of the start of the best match into
|
||||
best_distance_out.
|
||||
Returns 1 if match is found, otherwise 0. */
|
||||
static BROTLI_INLINE int FN(FindLongestMatch)(HashLongestMatchQuickly* self,
|
||||
const uint8_t* BROTLI_RESTRICT ring_buffer, const size_t ring_buffer_mask,
|
||||
const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
|
||||
const size_t max_length, const size_t max_backward,
|
||||
size_t* BROTLI_RESTRICT best_len_out,
|
||||
size_t* BROTLI_RESTRICT best_len_code_out,
|
||||
size_t* BROTLI_RESTRICT best_distance_out,
|
||||
double* BROTLI_RESTRICT best_score_out) {
|
||||
const size_t best_len_in = *best_len_out;
|
||||
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
|
||||
const uint32_t key = FN(HashBytes)(&ring_buffer[cur_ix_masked]);
|
||||
int compare_char = ring_buffer[cur_ix_masked + best_len_in];
|
||||
double best_score = *best_score_out;
|
||||
size_t best_len = best_len_in;
|
||||
size_t cached_backward = (size_t)distance_cache[0];
|
||||
size_t prev_ix = cur_ix - cached_backward;
|
||||
int is_match_found = 0;
|
||||
if (prev_ix < cur_ix) {
|
||||
prev_ix &= (uint32_t)ring_buffer_mask;
|
||||
if (compare_char == ring_buffer[prev_ix + best_len]) {
|
||||
size_t len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
|
||||
&ring_buffer[cur_ix_masked],
|
||||
max_length);
|
||||
if (len >= 4) {
|
||||
best_score = BackwardReferenceScoreUsingLastDistance(len, 0);
|
||||
best_len = len;
|
||||
*best_len_out = len;
|
||||
*best_len_code_out = len;
|
||||
*best_distance_out = cached_backward;
|
||||
*best_score_out = best_score;
|
||||
compare_char = ring_buffer[cur_ix_masked + best_len];
|
||||
if (BUCKET_SWEEP == 1) {
|
||||
self->buckets_[key] = (uint32_t)cur_ix;
|
||||
return 1;
|
||||
} else {
|
||||
is_match_found = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (BUCKET_SWEEP == 1) {
|
||||
size_t backward;
|
||||
size_t len;
|
||||
/* Only one to look for, don't bother to prepare for a loop. */
|
||||
prev_ix = self->buckets_[key];
|
||||
self->buckets_[key] = (uint32_t)cur_ix;
|
||||
backward = cur_ix - prev_ix;
|
||||
prev_ix &= (uint32_t)ring_buffer_mask;
|
||||
if (compare_char != ring_buffer[prev_ix + best_len_in]) {
|
||||
return 0;
|
||||
}
|
||||
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
|
||||
return 0;
|
||||
}
|
||||
len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
|
||||
&ring_buffer[cur_ix_masked],
|
||||
max_length);
|
||||
if (len >= 4) {
|
||||
*best_len_out = len;
|
||||
*best_len_code_out = len;
|
||||
*best_distance_out = backward;
|
||||
*best_score_out = BackwardReferenceScore(len, backward);
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
uint32_t *bucket = self->buckets_ + key;
|
||||
int i;
|
||||
prev_ix = *bucket++;
|
||||
for (i = 0; i < BUCKET_SWEEP; ++i, prev_ix = *bucket++) {
|
||||
const size_t backward = cur_ix - prev_ix;
|
||||
size_t len;
|
||||
prev_ix &= (uint32_t)ring_buffer_mask;
|
||||
if (compare_char != ring_buffer[prev_ix + best_len]) {
|
||||
continue;
|
||||
}
|
||||
if (PREDICT_FALSE(backward == 0 || backward > max_backward)) {
|
||||
continue;
|
||||
}
|
||||
len = FindMatchLengthWithLimit(&ring_buffer[prev_ix],
|
||||
&ring_buffer[cur_ix_masked],
|
||||
max_length);
|
||||
if (len >= 4) {
|
||||
const double score = BackwardReferenceScore(len, backward);
|
||||
if (best_score < score) {
|
||||
best_score = score;
|
||||
best_len = len;
|
||||
*best_len_out = best_len;
|
||||
*best_len_code_out = best_len;
|
||||
*best_distance_out = backward;
|
||||
*best_score_out = score;
|
||||
compare_char = ring_buffer[cur_ix_masked + best_len];
|
||||
is_match_found = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (USE_DICTIONARY && !is_match_found &&
|
||||
self->num_dict_matches_ >= (self->num_dict_lookups_ >> 7)) {
|
||||
const uint32_t dict_key = Hash14(&ring_buffer[cur_ix_masked]) << 1;
|
||||
const uint16_t v = kStaticDictionaryHash[dict_key];
|
||||
++self->num_dict_lookups_;
|
||||
if (v > 0) {
|
||||
const uint32_t len = v & 31;
|
||||
const uint32_t dist = v >> 5;
|
||||
const size_t offset =
|
||||
kBrotliDictionaryOffsetsByLength[len] + len * dist;
|
||||
if (len <= max_length) {
|
||||
const size_t matchlen =
|
||||
FindMatchLengthWithLimit(&ring_buffer[cur_ix_masked],
|
||||
&kBrotliDictionary[offset], len);
|
||||
if (matchlen + kCutoffTransformsCount > len && matchlen > 0) {
|
||||
const size_t transform_id = kCutoffTransforms[len - matchlen];
|
||||
const size_t word_id = dist +
|
||||
transform_id * (1u << kBrotliDictionarySizeBitsByLength[len]);
|
||||
const size_t backward = max_backward + word_id + 1;
|
||||
const double score = BackwardReferenceScore(matchlen, backward);
|
||||
if (best_score < score) {
|
||||
++self->num_dict_matches_;
|
||||
best_score = score;
|
||||
best_len = matchlen;
|
||||
*best_len_out = best_len;
|
||||
*best_len_code_out = len;
|
||||
*best_distance_out = backward;
|
||||
*best_score_out = best_score;
|
||||
is_match_found = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
self->buckets_[key + ((cur_ix >> 3) % BUCKET_SWEEP)] = (uint32_t)cur_ix;
|
||||
return is_match_found;
|
||||
}
|
||||
|
||||
#undef HASH_MAP_SIZE
|
||||
#undef BUCKET_SIZE
|
||||
|
||||
#undef HashLongestMatchQuickly
|
106
enc/histogram.c
106
enc/histogram.c
@ -8,60 +8,88 @@
|
||||
|
||||
#include "./histogram.h"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include "./block_splitter.h"
|
||||
#include "./command.h"
|
||||
#include "./context.h"
|
||||
#include "./prefix.h"
|
||||
|
||||
namespace brotli {
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void BuildHistograms(
|
||||
const Command* cmds,
|
||||
const size_t num_commands,
|
||||
const BlockSplit& literal_split,
|
||||
const BlockSplit& insert_and_copy_split,
|
||||
const BlockSplit& dist_split,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t start_pos,
|
||||
size_t mask,
|
||||
uint8_t prev_byte,
|
||||
uint8_t prev_byte2,
|
||||
const std::vector<ContextType>& context_modes,
|
||||
std::vector<HistogramLiteral>* literal_histograms,
|
||||
std::vector<HistogramCommand>* insert_and_copy_histograms,
|
||||
std::vector<HistogramDistance>* copy_dist_histograms) {
|
||||
typedef struct BlockSplitIterator {
|
||||
const BlockSplit* split_; /* Not owned. */
|
||||
size_t idx_;
|
||||
size_t type_;
|
||||
size_t length_;
|
||||
} BlockSplitIterator;
|
||||
|
||||
static void InitBlockSplitIterator(BlockSplitIterator* self,
|
||||
const BlockSplit* split) {
|
||||
self->split_ = split;
|
||||
self->idx_ = 0;
|
||||
self->type_ = 0;
|
||||
self->length_ = split->lengths ? split->lengths[0] : 0;
|
||||
}
|
||||
|
||||
static void BlockSplitIteratorNext(BlockSplitIterator* self) {
|
||||
if (self->length_ == 0) {
|
||||
++self->idx_;
|
||||
self->type_ = self->split_->types[self->idx_];
|
||||
self->length_ = self->split_->lengths[self->idx_];
|
||||
}
|
||||
--self->length_;
|
||||
}
|
||||
|
||||
void BrotliBuildHistogramsWithContext(
|
||||
const Command* cmds, const size_t num_commands,
|
||||
const BlockSplit* literal_split, const BlockSplit* insert_and_copy_split,
|
||||
const BlockSplit* dist_split, const uint8_t* ringbuffer, size_t start_pos,
|
||||
size_t mask, uint8_t prev_byte, uint8_t prev_byte2,
|
||||
const ContextType* context_modes, HistogramLiteral* literal_histograms,
|
||||
HistogramCommand* insert_and_copy_histograms,
|
||||
HistogramDistance* copy_dist_histograms) {
|
||||
size_t pos = start_pos;
|
||||
BlockSplitIterator literal_it(literal_split);
|
||||
BlockSplitIterator insert_and_copy_it(insert_and_copy_split);
|
||||
BlockSplitIterator dist_it(dist_split);
|
||||
for (size_t i = 0; i < num_commands; ++i) {
|
||||
const Command &cmd = cmds[i];
|
||||
insert_and_copy_it.Next();
|
||||
(*insert_and_copy_histograms)[insert_and_copy_it.type_].Add(
|
||||
cmd.cmd_prefix_);
|
||||
for (size_t j = cmd.insert_len_; j != 0; --j) {
|
||||
literal_it.Next();
|
||||
size_t context = (literal_it.type_ << kLiteralContextBits) +
|
||||
BlockSplitIterator literal_it;
|
||||
BlockSplitIterator insert_and_copy_it;
|
||||
BlockSplitIterator dist_it;
|
||||
size_t i;
|
||||
|
||||
InitBlockSplitIterator(&literal_it, literal_split);
|
||||
InitBlockSplitIterator(&insert_and_copy_it, insert_and_copy_split);
|
||||
InitBlockSplitIterator(&dist_it, dist_split);
|
||||
for (i = 0; i < num_commands; ++i) {
|
||||
const Command* cmd = &cmds[i];
|
||||
size_t j;
|
||||
BlockSplitIteratorNext(&insert_and_copy_it);
|
||||
HistogramAddCommand(&insert_and_copy_histograms[insert_and_copy_it.type_],
|
||||
cmd->cmd_prefix_);
|
||||
for (j = cmd->insert_len_; j != 0; --j) {
|
||||
size_t context;
|
||||
BlockSplitIteratorNext(&literal_it);
|
||||
context = (literal_it.type_ << BROTLI_LITERAL_CONTEXT_BITS) +
|
||||
Context(prev_byte, prev_byte2, context_modes[literal_it.type_]);
|
||||
(*literal_histograms)[context].Add(ringbuffer[pos & mask]);
|
||||
HistogramAddLiteral(&literal_histograms[context],
|
||||
ringbuffer[pos & mask]);
|
||||
prev_byte2 = prev_byte;
|
||||
prev_byte = ringbuffer[pos & mask];
|
||||
++pos;
|
||||
}
|
||||
pos += cmd.copy_len();
|
||||
if (cmd.copy_len()) {
|
||||
pos += CommandCopyLen(cmd);
|
||||
if (CommandCopyLen(cmd)) {
|
||||
prev_byte2 = ringbuffer[(pos - 2) & mask];
|
||||
prev_byte = ringbuffer[(pos - 1) & mask];
|
||||
if (cmd.cmd_prefix_ >= 128) {
|
||||
dist_it.Next();
|
||||
size_t context = (dist_it.type_ << kDistanceContextBits) +
|
||||
cmd.DistanceContext();
|
||||
(*copy_dist_histograms)[context].Add(cmd.dist_prefix_);
|
||||
if (cmd->cmd_prefix_ >= 128) {
|
||||
size_t context;
|
||||
BlockSplitIteratorNext(&dist_it);
|
||||
context = (dist_it.type_ << BROTLI_DISTANCE_CONTEXT_BITS) +
|
||||
CommandDistanceContext(cmd);
|
||||
HistogramAddDistance(©_dist_histograms[context],
|
||||
cmd->dist_prefix_);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace brotli
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
109
enc/histogram.h
109
enc/histogram.h
@ -9,87 +9,52 @@
|
||||
#ifndef BROTLI_ENC_HISTOGRAM_H_
|
||||
#define BROTLI_ENC_HISTOGRAM_H_
|
||||
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
#include <string.h> /* memset */
|
||||
|
||||
#include "../common/constants.h"
|
||||
#include "../common/types.h"
|
||||
#include "./block_splitter.h"
|
||||
#include "./command.h"
|
||||
#include "./context.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./prefix.h"
|
||||
#include "./port.h"
|
||||
|
||||
namespace brotli {
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct BlockSplit;
|
||||
#define FN(X) X ## Literal
|
||||
#define DATA_SIZE BROTLI_NUM_LITERAL_SYMBOLS
|
||||
#define DataType uint8_t
|
||||
#include "./histogram_inc.h" /* NOLINT(build/include) */
|
||||
#undef DataType
|
||||
#undef DATA_SIZE
|
||||
#undef FN
|
||||
|
||||
// A simple container for histograms of data in blocks.
|
||||
template<int kDataSize>
|
||||
struct Histogram {
|
||||
Histogram(void) {
|
||||
Clear();
|
||||
}
|
||||
void Clear(void) {
|
||||
memset(data_, 0, sizeof(data_));
|
||||
total_count_ = 0;
|
||||
bit_cost_ = std::numeric_limits<double>::infinity();
|
||||
}
|
||||
void Add(size_t val) {
|
||||
++data_[val];
|
||||
++total_count_;
|
||||
}
|
||||
void Remove(size_t val) {
|
||||
--data_[val];
|
||||
--total_count_;
|
||||
}
|
||||
template<typename DataType>
|
||||
void Add(const DataType *p, size_t n) {
|
||||
total_count_ += n;
|
||||
n += 1;
|
||||
while(--n) ++data_[*p++];
|
||||
}
|
||||
void AddHistogram(const Histogram& v) {
|
||||
total_count_ += v.total_count_;
|
||||
for (size_t i = 0; i < kDataSize; ++i) {
|
||||
data_[i] += v.data_[i];
|
||||
}
|
||||
}
|
||||
#define FN(X) X ## Command
|
||||
#define DataType uint16_t
|
||||
#define DATA_SIZE BROTLI_NUM_COMMAND_SYMBOLS
|
||||
#include "./histogram_inc.h" /* NOLINT(build/include) */
|
||||
#undef DATA_SIZE
|
||||
#undef FN
|
||||
|
||||
uint32_t data_[kDataSize];
|
||||
size_t total_count_;
|
||||
double bit_cost_;
|
||||
};
|
||||
#define FN(X) X ## Distance
|
||||
#define DATA_SIZE BROTLI_NUM_DISTANCE_SYMBOLS
|
||||
#include "./histogram_inc.h" /* NOLINT(build/include) */
|
||||
#undef DataType
|
||||
#undef DATA_SIZE
|
||||
#undef FN
|
||||
|
||||
// Literal histogram.
|
||||
typedef Histogram<256> HistogramLiteral;
|
||||
// Prefix histograms.
|
||||
typedef Histogram<kNumCommandPrefixes> HistogramCommand;
|
||||
typedef Histogram<kNumDistancePrefixes> HistogramDistance;
|
||||
typedef Histogram<kNumBlockLenPrefixes> HistogramBlockLength;
|
||||
// Context map histogram, 256 Huffman tree indexes + 16 run length codes.
|
||||
typedef Histogram<272> HistogramContextMap;
|
||||
// Block type histogram, 256 block types + 2 special symbols.
|
||||
typedef Histogram<258> HistogramBlockType;
|
||||
BROTLI_INTERNAL void BrotliBuildHistogramsWithContext(
|
||||
const Command* cmds, const size_t num_commands,
|
||||
const BlockSplit* literal_split, const BlockSplit* insert_and_copy_split,
|
||||
const BlockSplit* dist_split, const uint8_t* ringbuffer, size_t pos,
|
||||
size_t mask, uint8_t prev_byte, uint8_t prev_byte2,
|
||||
const ContextType* context_modes, HistogramLiteral* literal_histograms,
|
||||
HistogramCommand* insert_and_copy_histograms,
|
||||
HistogramDistance* copy_dist_histograms);
|
||||
|
||||
static const size_t kLiteralContextBits = 6;
|
||||
static const size_t kDistanceContextBits = 2;
|
||||
|
||||
void BuildHistograms(
|
||||
const Command* cmds,
|
||||
const size_t num_commands,
|
||||
const BlockSplit& literal_split,
|
||||
const BlockSplit& insert_and_copy_split,
|
||||
const BlockSplit& dist_split,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t pos,
|
||||
size_t mask,
|
||||
uint8_t prev_byte,
|
||||
uint8_t prev_byte2,
|
||||
const std::vector<ContextType>& context_modes,
|
||||
std::vector<HistogramLiteral>* literal_histograms,
|
||||
std::vector<HistogramCommand>* insert_and_copy_histograms,
|
||||
std::vector<HistogramDistance>* copy_dist_histograms);
|
||||
|
||||
} // namespace brotli
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* BROTLI_ENC_HISTOGRAM_H_ */
|
||||
|
51
enc/histogram_inc.h
Normal file
51
enc/histogram_inc.h
Normal file
@ -0,0 +1,51 @@
|
||||
/* NOLINT(build/header_guard) */
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* template parameters: Histogram, DATA_SIZE, DataType */
|
||||
|
||||
/* A simple container for histograms of data in blocks. */
|
||||
|
||||
typedef struct FN(Histogram) {
|
||||
uint32_t data_[DATA_SIZE];
|
||||
size_t total_count_;
|
||||
double bit_cost_;
|
||||
} FN(Histogram);
|
||||
|
||||
static BROTLI_INLINE void FN(HistogramClear)(FN(Histogram)* self) {
|
||||
memset(self->data_, 0, sizeof(self->data_));
|
||||
self->total_count_ = 0;
|
||||
self->bit_cost_ = HUGE_VAL;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void FN(ClearHistograms)(
|
||||
FN(Histogram)* array, size_t length) {
|
||||
size_t i;
|
||||
for (i = 0; i < length; ++i) FN(HistogramClear)(array + i);
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void FN(HistogramAdd)(FN(Histogram)* self, size_t val) {
|
||||
++self->data_[val];
|
||||
++self->total_count_;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void FN(HistogramAddVector)(FN(Histogram)* self,
|
||||
const DataType *p, size_t n) {
|
||||
self->total_count_ += n;
|
||||
n += 1;
|
||||
while (--n) ++self->data_[*p++];
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void FN(HistogramAddHistogram)(FN(Histogram)* self,
|
||||
const FN(Histogram)* v) {
|
||||
size_t i;
|
||||
self->total_count_ += v->total_count_;
|
||||
for (i = 0; i < DATA_SIZE; ++i) {
|
||||
self->data_[i] += v->data_[i];
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE size_t FN(HistogramDataSize)(void) { return DATA_SIZE; }
|
@ -9,27 +9,26 @@
|
||||
|
||||
#include "./literal_cost.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "../common/types.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./port.h"
|
||||
#include "./utf8_util.h"
|
||||
|
||||
namespace brotli {
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static size_t UTF8Position(size_t last, size_t c, size_t clamp) {
|
||||
if (c < 128) {
|
||||
return 0; /* Next one is the 'Byte 1' again. */
|
||||
} else if (c >= 192) { /* Next one is the 'Byte 2' of utf-8 encoding. */
|
||||
return std::min<size_t>(1, clamp);
|
||||
return BROTLI_MIN(size_t, 1, clamp);
|
||||
} else {
|
||||
/* Let's decide over the last byte if this ends the sequence. */
|
||||
if (last < 0xe0) {
|
||||
return 0; /* Completed two or three byte coding. */
|
||||
} else { /* Next one is the 'Byte 3' of utf-8 encoding. */
|
||||
return std::min<size_t>(2, clamp);
|
||||
return BROTLI_MIN(size_t, 2, clamp);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -40,7 +39,8 @@ static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
|
||||
size_t max_utf8 = 1; /* should be 2, but 1 compresses better. */
|
||||
size_t last_c = 0;
|
||||
size_t utf8_pos = 0;
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
size_t i;
|
||||
for (i = 0; i < len; ++i) {
|
||||
size_t c = data[(pos + i) & mask];
|
||||
utf8_pos = UTF8Position(last_c, c, 2);
|
||||
++counts[utf8_pos];
|
||||
@ -62,28 +62,31 @@ static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
|
||||
const size_t max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data);
|
||||
size_t histogram[3][256] = { { 0 } };
|
||||
size_t window_half = 495;
|
||||
size_t in_window = std::min(window_half, len);
|
||||
size_t in_window = BROTLI_MIN(size_t, window_half, len);
|
||||
size_t in_window_utf8[3] = { 0 };
|
||||
|
||||
/* Bootstrap histograms. */
|
||||
size_t last_c = 0;
|
||||
size_t utf8_pos = 0;
|
||||
for (size_t i = 0; i < in_window; ++i) {
|
||||
size_t c = data[(pos + i) & mask];
|
||||
++histogram[utf8_pos][c];
|
||||
++in_window_utf8[utf8_pos];
|
||||
utf8_pos = UTF8Position(last_c, c, max_utf8);
|
||||
last_c = c;
|
||||
|
||||
size_t i;
|
||||
{ /* Bootstrap histograms. */
|
||||
size_t last_c = 0;
|
||||
size_t utf8_pos = 0;
|
||||
for (i = 0; i < in_window; ++i) {
|
||||
size_t c = data[(pos + i) & mask];
|
||||
++histogram[utf8_pos][c];
|
||||
++in_window_utf8[utf8_pos];
|
||||
utf8_pos = UTF8Position(last_c, c, max_utf8);
|
||||
last_c = c;
|
||||
}
|
||||
}
|
||||
|
||||
/* Compute bit costs with sliding window. */
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
for (i = 0; i < len; ++i) {
|
||||
if (i >= window_half) {
|
||||
/* Remove a byte in the past. */
|
||||
size_t c = i < window_half + 1 ?
|
||||
0 : data[(pos + i - window_half - 1) & mask];
|
||||
size_t last_c = i < window_half + 2 ?
|
||||
0 : data[(pos + i - window_half - 2) & mask];
|
||||
size_t c =
|
||||
i < window_half + 1 ? 0 : data[(pos + i - window_half - 1) & mask];
|
||||
size_t last_c =
|
||||
i < window_half + 2 ? 0 : data[(pos + i - window_half - 2) & mask];
|
||||
size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
|
||||
--histogram[utf8_pos2][data[(pos + i - window_half) & mask]];
|
||||
--in_window_utf8[utf8_pos2];
|
||||
@ -96,71 +99,80 @@ static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
|
||||
++histogram[utf8_pos2][data[(pos + i + window_half) & mask]];
|
||||
++in_window_utf8[utf8_pos2];
|
||||
}
|
||||
size_t c = i < 1 ? 0 : data[(pos + i - 1) & mask];
|
||||
size_t last_c = i < 2 ? 0 : data[(pos + i - 2) & mask];
|
||||
size_t utf8_pos = UTF8Position(last_c, c, max_utf8);
|
||||
size_t masked_pos = (pos + i) & mask;
|
||||
size_t histo = histogram[utf8_pos][data[masked_pos]];
|
||||
if (histo == 0) {
|
||||
histo = 1;
|
||||
}
|
||||
double lit_cost = FastLog2(in_window_utf8[utf8_pos]) - FastLog2(histo);
|
||||
lit_cost += 0.02905;
|
||||
if (lit_cost < 1.0) {
|
||||
lit_cost *= 0.5;
|
||||
lit_cost += 0.5;
|
||||
}
|
||||
{
|
||||
size_t c = i < 1 ? 0 : data[(pos + i - 1) & mask];
|
||||
size_t last_c = i < 2 ? 0 : data[(pos + i - 2) & mask];
|
||||
size_t utf8_pos = UTF8Position(last_c, c, max_utf8);
|
||||
size_t masked_pos = (pos + i) & mask;
|
||||
size_t histo = histogram[utf8_pos][data[masked_pos]];
|
||||
double lit_cost;
|
||||
if (histo == 0) {
|
||||
histo = 1;
|
||||
}
|
||||
lit_cost = FastLog2(in_window_utf8[utf8_pos]) - FastLog2(histo);
|
||||
lit_cost += 0.02905;
|
||||
if (lit_cost < 1.0) {
|
||||
lit_cost *= 0.5;
|
||||
lit_cost += 0.5;
|
||||
}
|
||||
/* Make the first bytes more expensive -- seems to help, not sure why.
|
||||
Perhaps because the entropy source is changing its properties
|
||||
rapidly in the beginning of the file, perhaps because the beginning
|
||||
of the data is a statistical "anomaly". */
|
||||
if (i < 2000) {
|
||||
lit_cost += 0.7 - (static_cast<double>(2000 - i) / 2000.0 * 0.35);
|
||||
if (i < 2000) {
|
||||
lit_cost += 0.7 - ((double)(2000 - i) / 2000.0 * 0.35);
|
||||
}
|
||||
cost[i] = (float)lit_cost;
|
||||
}
|
||||
cost[i] = static_cast<float>(lit_cost);
|
||||
}
|
||||
}
|
||||
|
||||
void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
|
||||
const uint8_t *data, float *cost) {
|
||||
if (IsMostlyUTF8(data, pos, mask, len, kMinUTF8Ratio)) {
|
||||
void BrotliEstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
|
||||
const uint8_t *data, float *cost) {
|
||||
if (BrotliIsMostlyUTF8(data, pos, mask, len, kMinUTF8Ratio)) {
|
||||
EstimateBitCostsForLiteralsUTF8(pos, len, mask, data, cost);
|
||||
return;
|
||||
}
|
||||
size_t histogram[256] = { 0 };
|
||||
size_t window_half = 2000;
|
||||
size_t in_window = std::min(window_half, len);
|
||||
} else {
|
||||
size_t histogram[256] = { 0 };
|
||||
size_t window_half = 2000;
|
||||
size_t in_window = BROTLI_MIN(size_t, window_half, len);
|
||||
|
||||
/* Bootstrap histogram. */
|
||||
for (size_t i = 0; i < in_window; ++i) {
|
||||
++histogram[data[(pos + i) & mask]];
|
||||
}
|
||||
size_t i;
|
||||
for (i = 0; i < in_window; ++i) {
|
||||
++histogram[data[(pos + i) & mask]];
|
||||
}
|
||||
|
||||
/* Compute bit costs with sliding window. */
|
||||
for (size_t i = 0; i < len; ++i) {
|
||||
if (i >= window_half) {
|
||||
for (i = 0; i < len; ++i) {
|
||||
size_t histo;
|
||||
if (i >= window_half) {
|
||||
/* Remove a byte in the past. */
|
||||
--histogram[data[(pos + i - window_half) & mask]];
|
||||
--in_window;
|
||||
}
|
||||
if (i + window_half < len) {
|
||||
--histogram[data[(pos + i - window_half) & mask]];
|
||||
--in_window;
|
||||
}
|
||||
if (i + window_half < len) {
|
||||
/* Add a byte in the future. */
|
||||
++histogram[data[(pos + i + window_half) & mask]];
|
||||
++in_window;
|
||||
++histogram[data[(pos + i + window_half) & mask]];
|
||||
++in_window;
|
||||
}
|
||||
histo = histogram[data[(pos + i) & mask]];
|
||||
if (histo == 0) {
|
||||
histo = 1;
|
||||
}
|
||||
{
|
||||
double lit_cost = FastLog2(in_window) - FastLog2(histo);
|
||||
lit_cost += 0.029;
|
||||
if (lit_cost < 1.0) {
|
||||
lit_cost *= 0.5;
|
||||
lit_cost += 0.5;
|
||||
}
|
||||
cost[i] = (float)lit_cost;
|
||||
}
|
||||
}
|
||||
size_t histo = histogram[data[(pos + i) & mask]];
|
||||
if (histo == 0) {
|
||||
histo = 1;
|
||||
}
|
||||
double lit_cost = FastLog2(in_window) - FastLog2(histo);
|
||||
lit_cost += 0.029;
|
||||
if (lit_cost < 1.0) {
|
||||
lit_cost *= 0.5;
|
||||
lit_cost += 0.5;
|
||||
}
|
||||
cost[i] = static_cast<float>(lit_cost);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace brotli
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
@ -11,15 +11,20 @@
|
||||
#define BROTLI_ENC_LITERAL_COST_H_
|
||||
|
||||
#include "../common/types.h"
|
||||
#include "./port.h"
|
||||
|
||||
namespace brotli {
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Estimates how many bits the literals in the interval [pos, pos + len) in the
|
||||
ringbuffer (data, mask) will take entropy coded and writes these estimates
|
||||
to the cost[0..len) array. */
|
||||
void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
|
||||
const uint8_t *data, float *cost);
|
||||
BROTLI_INTERNAL void BrotliEstimateBitCostsForLiterals(
|
||||
size_t pos, size_t len, size_t mask, const uint8_t *data, float *cost);
|
||||
|
||||
} // namespace brotli
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* BROTLI_ENC_LITERAL_COST_H_ */
|
||||
|
181
enc/memory.c
Normal file
181
enc/memory.c
Normal file
@ -0,0 +1,181 @@
|
||||
/* Copyright 2015 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Algorithms for distributing the literals and commands of a metablock between
|
||||
block types and contexts. */
|
||||
|
||||
#include "./memory.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h> /* exit, free, malloc */
|
||||
#include <strings.h> /* memcpy */
|
||||
|
||||
#include "../common/types.h"
|
||||
#include "./port.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define MAX_PERM_ALLOCATED 128
|
||||
#define MAX_NEW_ALLOCATED 64
|
||||
#define MAX_NEW_FREED 64
|
||||
|
||||
#define PERM_ALLOCATED_OFFSET 0
|
||||
#define NEW_ALLOCATED_OFFSET MAX_PERM_ALLOCATED
|
||||
#define NEW_FREED_OFFSET (MAX_PERM_ALLOCATED + MAX_NEW_ALLOCATED)
|
||||
|
||||
static void* DefaultAllocFunc(void* opaque, size_t size) {
|
||||
BROTLI_UNUSED(opaque);
|
||||
return malloc(size);
|
||||
}
|
||||
|
||||
static void DefaultFreeFunc(void* opaque, void* address) {
|
||||
BROTLI_UNUSED(opaque);
|
||||
free(address);
|
||||
}
|
||||
|
||||
void BrotliInitMemoryManager(
|
||||
MemoryManager* m, brotli_alloc_func alloc_func, brotli_free_func free_func,
|
||||
void* opaque) {
|
||||
if (!alloc_func) {
|
||||
m->alloc_func = DefaultAllocFunc;
|
||||
m->free_func = DefaultFreeFunc;
|
||||
m->opaque = 0;
|
||||
} else {
|
||||
m->alloc_func = alloc_func;
|
||||
m->free_func = free_func;
|
||||
m->opaque = opaque;
|
||||
}
|
||||
#if !defined(BROTLI_ENCODER_EXIT_ON_OOM)
|
||||
m->is_oom = 0;
|
||||
m->perm_allocated = 0;
|
||||
m->new_allocated = 0;
|
||||
m->new_freed = 0;
|
||||
#endif /* BROTLI_ENCODER_EXIT_ON_OOM */
|
||||
}
|
||||
|
||||
#if defined(BROTLI_ENCODER_EXIT_ON_OOM)
|
||||
|
||||
void* BrotliAllocate(MemoryManager* m, size_t n) {
|
||||
void* result = m->alloc_func(m->opaque, n);
|
||||
if (!result) exit(EXIT_FAILURE);
|
||||
return result;
|
||||
}
|
||||
|
||||
void BrotliFree(MemoryManager* m, void* p) {
|
||||
m->free_func(m->opaque, p);
|
||||
}
|
||||
|
||||
void BrotliWipeOutMemoryManager(MemoryManager* m) {
|
||||
BROTLI_UNUSED(m);
|
||||
}
|
||||
|
||||
#else /* BROTLI_ENCODER_EXIT_ON_OOM */
|
||||
|
||||
static void SortPointers(void** items, const size_t n) {
|
||||
/* Shell sort. */
|
||||
static const size_t gaps[] = {23, 10, 4, 1};
|
||||
int g = 0;
|
||||
for (; g < 4; ++g) {
|
||||
size_t gap = gaps[g];
|
||||
size_t i;
|
||||
for (i = gap; i < n; ++i) {
|
||||
size_t j = i;
|
||||
void* tmp = items[i];
|
||||
for (; j >= gap && tmp < items[j - gap]; j -= gap) {
|
||||
items[j] = items[j - gap];
|
||||
}
|
||||
items[j] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static size_t Annihilate(void** a, size_t a_len, void** b, size_t b_len) {
|
||||
size_t a_read_index = 0;
|
||||
size_t b_read_index = 0;
|
||||
size_t a_write_index = 0;
|
||||
size_t b_write_index = 0;
|
||||
size_t annihilated = 0;
|
||||
while (a_read_index < a_len && b_read_index < b_len) {
|
||||
if (a[a_read_index] == b[b_read_index]) {
|
||||
a_read_index++;
|
||||
b_read_index++;
|
||||
annihilated++;
|
||||
} else if (a[a_read_index] < b[b_read_index]) {
|
||||
a[a_write_index++] = a[a_read_index++];
|
||||
} else {
|
||||
b[b_write_index++] = b[b_read_index++];
|
||||
}
|
||||
}
|
||||
while (a_read_index < a_len) a[a_write_index++] = a[a_read_index++];
|
||||
while (b_read_index < b_len) b[b_write_index++] = b[b_read_index++];
|
||||
return annihilated;
|
||||
}
|
||||
|
||||
static void CollectGarbagePointers(MemoryManager* m) {
|
||||
size_t annihilated;
|
||||
SortPointers(m->pointers + NEW_ALLOCATED_OFFSET, m->new_allocated);
|
||||
SortPointers(m->pointers + NEW_FREED_OFFSET, m->new_freed);
|
||||
annihilated = Annihilate(
|
||||
m->pointers + NEW_ALLOCATED_OFFSET, m->new_allocated,
|
||||
m->pointers + NEW_FREED_OFFSET, m->new_freed);
|
||||
m->new_allocated -= annihilated;
|
||||
m->new_freed -= annihilated;
|
||||
|
||||
if (m->new_freed != 0) {
|
||||
annihilated = Annihilate(
|
||||
m->pointers + PERM_ALLOCATED_OFFSET, m->perm_allocated,
|
||||
m->pointers + NEW_FREED_OFFSET, m->new_freed);
|
||||
m->perm_allocated -= annihilated;
|
||||
m->new_freed -= annihilated;
|
||||
assert(m->new_freed == 0);
|
||||
}
|
||||
|
||||
if (m->new_allocated != 0) {
|
||||
assert(m->perm_allocated + m->new_allocated <= MAX_PERM_ALLOCATED);
|
||||
memcpy(m->pointers + PERM_ALLOCATED_OFFSET + m->perm_allocated,
|
||||
m->pointers + NEW_ALLOCATED_OFFSET,
|
||||
sizeof(void*) * m->new_allocated);
|
||||
m->perm_allocated += m->new_allocated;
|
||||
m->new_allocated = 0;
|
||||
SortPointers(m->pointers + PERM_ALLOCATED_OFFSET, m->perm_allocated);
|
||||
}
|
||||
}
|
||||
|
||||
void* BrotliAllocate(MemoryManager* m, size_t n) {
|
||||
void* result = m->alloc_func(m->opaque, n);
|
||||
if (!result) {
|
||||
m->is_oom = 1;
|
||||
return NULL;
|
||||
}
|
||||
if (m->new_allocated == MAX_NEW_ALLOCATED) CollectGarbagePointers(m);
|
||||
m->pointers[NEW_ALLOCATED_OFFSET + (m->new_allocated++)] = result;
|
||||
return result;
|
||||
}
|
||||
|
||||
void BrotliFree(MemoryManager* m, void* p) {
|
||||
if (!p) return;
|
||||
m->free_func(m->opaque, p);
|
||||
if (m->new_freed == MAX_NEW_FREED) CollectGarbagePointers(m);
|
||||
m->pointers[NEW_FREED_OFFSET + (m->new_freed++)] = p;
|
||||
}
|
||||
|
||||
void BrotliWipeOutMemoryManager(MemoryManager* m) {
|
||||
size_t i;
|
||||
CollectGarbagePointers(m);
|
||||
/* Now all unfreed pointers are in perm-allocated list. */
|
||||
for (i = 0; i < m->perm_allocated; ++i) {
|
||||
m->free_func(m->opaque, m->pointers[PERM_ALLOCATED_OFFSET + i]);
|
||||
}
|
||||
m->perm_allocated = 0;
|
||||
}
|
||||
|
||||
#endif /* BROTLI_ENCODER_EXIT_ON_OOM */
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
62
enc/memory.h
Normal file
62
enc/memory.h
Normal file
@ -0,0 +1,62 @@
|
||||
/* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Macros for memory management. */
|
||||
|
||||
#ifndef BROTLI_ENC_MEMORY_H_
|
||||
#define BROTLI_ENC_MEMORY_H_
|
||||
|
||||
#include "../common/types.h"
|
||||
#include "./port.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if !defined(BROTLI_ENCODER_CLEANUP_ON_OOM) && \
|
||||
!defined(BROTLI_ENCODER_EXIT_ON_OOM)
|
||||
#define BROTLI_ENCODER_EXIT_ON_OOM
|
||||
#endif
|
||||
|
||||
typedef struct MemoryManager {
|
||||
brotli_alloc_func alloc_func;
|
||||
brotli_free_func free_func;
|
||||
void* opaque;
|
||||
#if !defined(BROTLI_ENCODER_EXIT_ON_OOM)
|
||||
int is_oom;
|
||||
size_t perm_allocated;
|
||||
size_t new_allocated;
|
||||
size_t new_freed;
|
||||
void* pointers[256];
|
||||
#endif /* BROTLI_ENCODER_EXIT_ON_OOM */
|
||||
} MemoryManager;
|
||||
|
||||
BROTLI_INTERNAL void BrotliInitMemoryManager(
|
||||
MemoryManager* m, brotli_alloc_func alloc_func, brotli_free_func free_func,
|
||||
void* opaque);
|
||||
|
||||
BROTLI_INTERNAL void* BrotliAllocate(MemoryManager* m, size_t n);
|
||||
#define BROTLI_ALLOC(M, T, N) ((T*)BrotliAllocate((M), (N) * sizeof(T)))
|
||||
|
||||
BROTLI_INTERNAL void BrotliFree(MemoryManager* m, void* p);
|
||||
#define BROTLI_FREE(M, P) { \
|
||||
BrotliFree((M), (P)); \
|
||||
P = NULL; \
|
||||
}
|
||||
|
||||
#if defined(BROTLI_ENCODER_EXIT_ON_OOM)
|
||||
#define BROTLI_IS_OOM(M) (!!0)
|
||||
#else /* BROTLI_ENCODER_EXIT_ON_OOM */
|
||||
#define BROTLI_IS_OOM(M) (!!(M)->is_oom)
|
||||
#endif /* BROTLI_ENCODER_EXIT_ON_OOM */
|
||||
|
||||
BROTLI_INTERNAL void BrotliWipeOutMemoryManager(MemoryManager* m);
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* BROTLI_ENC_MEMORY_H_ */
|
846
enc/metablock.c
846
enc/metablock.c
@ -9,212 +9,199 @@
|
||||
|
||||
#include "./metablock.h"
|
||||
|
||||
#include "../common/constants.h"
|
||||
#include "../common/types.h"
|
||||
#include "./bit_cost.h"
|
||||
#include "./block_splitter.h"
|
||||
#include "./cluster.h"
|
||||
#include "./context.h"
|
||||
#include "./entropy_encode.h"
|
||||
#include "./histogram.h"
|
||||
#include "./memory.h"
|
||||
#include "./port.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
void BuildMetaBlock(const uint8_t* ringbuffer,
|
||||
const size_t pos,
|
||||
const size_t mask,
|
||||
uint8_t prev_byte,
|
||||
uint8_t prev_byte2,
|
||||
const Command* cmds,
|
||||
size_t num_commands,
|
||||
ContextType literal_context_mode,
|
||||
MetaBlockSplit* mb) {
|
||||
SplitBlock(cmds, num_commands,
|
||||
ringbuffer, pos, mask,
|
||||
&mb->literal_split,
|
||||
&mb->command_split,
|
||||
&mb->distance_split);
|
||||
|
||||
std::vector<ContextType> literal_context_modes(mb->literal_split.num_types,
|
||||
literal_context_mode);
|
||||
|
||||
size_t num_literal_contexts =
|
||||
mb->literal_split.num_types << kLiteralContextBits;
|
||||
size_t num_distance_contexts =
|
||||
mb->distance_split.num_types << kDistanceContextBits;
|
||||
std::vector<HistogramLiteral> literal_histograms(num_literal_contexts);
|
||||
mb->command_histograms.resize(mb->command_split.num_types);
|
||||
std::vector<HistogramDistance> distance_histograms(num_distance_contexts);
|
||||
BuildHistograms(cmds, num_commands,
|
||||
mb->literal_split,
|
||||
mb->command_split,
|
||||
mb->distance_split,
|
||||
ringbuffer,
|
||||
pos,
|
||||
mask,
|
||||
prev_byte,
|
||||
prev_byte2,
|
||||
literal_context_modes,
|
||||
&literal_histograms,
|
||||
&mb->command_histograms,
|
||||
&distance_histograms);
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void BrotliBuildMetaBlock(MemoryManager* m,
|
||||
const uint8_t* ringbuffer,
|
||||
const size_t pos,
|
||||
const size_t mask,
|
||||
const int quality,
|
||||
uint8_t prev_byte,
|
||||
uint8_t prev_byte2,
|
||||
const Command* cmds,
|
||||
size_t num_commands,
|
||||
ContextType literal_context_mode,
|
||||
MetaBlockSplit* mb) {
|
||||
/* Histogram ids need to fit in one byte. */
|
||||
static const size_t kMaxNumberOfHistograms = 256;
|
||||
HistogramDistance* distance_histograms;
|
||||
HistogramLiteral* literal_histograms;
|
||||
ContextType* literal_context_modes;
|
||||
size_t num_literal_contexts;
|
||||
size_t num_distance_contexts;
|
||||
size_t i;
|
||||
|
||||
ClusterHistograms(literal_histograms,
|
||||
1u << kLiteralContextBits,
|
||||
mb->literal_split.num_types,
|
||||
kMaxNumberOfHistograms,
|
||||
&mb->literal_histograms,
|
||||
&mb->literal_context_map);
|
||||
BrotliSplitBlock(m, cmds, num_commands,
|
||||
ringbuffer, pos, mask, quality,
|
||||
&mb->literal_split,
|
||||
&mb->command_split,
|
||||
&mb->distance_split);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
|
||||
ClusterHistograms(distance_histograms,
|
||||
1u << kDistanceContextBits,
|
||||
mb->distance_split.num_types,
|
||||
kMaxNumberOfHistograms,
|
||||
&mb->distance_histograms,
|
||||
&mb->distance_context_map);
|
||||
literal_context_modes =
|
||||
BROTLI_ALLOC(m, ContextType, mb->literal_split.num_types);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
for (i = 0; i < mb->literal_split.num_types; ++i) {
|
||||
literal_context_modes[i] = literal_context_mode;
|
||||
}
|
||||
|
||||
num_literal_contexts =
|
||||
mb->literal_split.num_types << BROTLI_LITERAL_CONTEXT_BITS;
|
||||
num_distance_contexts =
|
||||
mb->distance_split.num_types << BROTLI_DISTANCE_CONTEXT_BITS;
|
||||
literal_histograms = BROTLI_ALLOC(m, HistogramLiteral, num_literal_contexts);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
ClearHistogramsLiteral(literal_histograms, num_literal_contexts);
|
||||
|
||||
assert(mb->command_histograms == 0);
|
||||
mb->command_histograms_size = mb->command_split.num_types;
|
||||
mb->command_histograms =
|
||||
BROTLI_ALLOC(m, HistogramCommand, mb->command_histograms_size);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
ClearHistogramsCommand(mb->command_histograms, mb->command_histograms_size);
|
||||
distance_histograms =
|
||||
BROTLI_ALLOC(m, HistogramDistance, num_distance_contexts);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
ClearHistogramsDistance(distance_histograms, num_distance_contexts);
|
||||
BrotliBuildHistogramsWithContext(cmds, num_commands,
|
||||
&mb->literal_split, &mb->command_split, &mb->distance_split,
|
||||
ringbuffer, pos, mask, prev_byte, prev_byte2, literal_context_modes,
|
||||
literal_histograms, mb->command_histograms, distance_histograms);
|
||||
BROTLI_FREE(m, literal_context_modes);
|
||||
|
||||
assert(mb->literal_context_map == 0);
|
||||
mb->literal_context_map_size =
|
||||
mb->literal_split.num_types << BROTLI_LITERAL_CONTEXT_BITS;
|
||||
mb->literal_context_map =
|
||||
BROTLI_ALLOC(m, uint32_t, mb->literal_context_map_size);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
assert(mb->literal_histograms == 0);
|
||||
mb->literal_histograms_size = mb->literal_context_map_size;
|
||||
mb->literal_histograms =
|
||||
BROTLI_ALLOC(m, HistogramLiteral, mb->literal_histograms_size);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
BrotliClusterHistogramsLiteral(m, literal_histograms,
|
||||
mb->literal_context_map_size,
|
||||
kMaxNumberOfHistograms,
|
||||
mb->literal_histograms,
|
||||
&mb->literal_histograms_size,
|
||||
mb->literal_context_map);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
BROTLI_FREE(m, literal_histograms);
|
||||
|
||||
assert(mb->distance_context_map == 0);
|
||||
mb->distance_context_map_size =
|
||||
mb->distance_split.num_types << BROTLI_DISTANCE_CONTEXT_BITS;
|
||||
mb->distance_context_map =
|
||||
BROTLI_ALLOC(m, uint32_t, mb->distance_context_map_size);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
assert(mb->distance_histograms == 0);
|
||||
mb->distance_histograms_size = mb->distance_context_map_size;
|
||||
mb->distance_histograms =
|
||||
BROTLI_ALLOC(m, HistogramDistance, mb->distance_histograms_size);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
BrotliClusterHistogramsDistance(m, distance_histograms,
|
||||
mb->distance_context_map_size,
|
||||
kMaxNumberOfHistograms,
|
||||
mb->distance_histograms,
|
||||
&mb->distance_histograms_size,
|
||||
mb->distance_context_map);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
BROTLI_FREE(m, distance_histograms);
|
||||
}
|
||||
|
||||
// Greedy block splitter for one block category (literal, command or distance).
|
||||
template<typename HistogramType>
|
||||
class BlockSplitter {
|
||||
public:
|
||||
BlockSplitter(size_t alphabet_size,
|
||||
size_t min_block_size,
|
||||
double split_threshold,
|
||||
size_t num_symbols,
|
||||
BlockSplit* split,
|
||||
std::vector<HistogramType>* histograms)
|
||||
: alphabet_size_(alphabet_size),
|
||||
min_block_size_(min_block_size),
|
||||
split_threshold_(split_threshold),
|
||||
num_blocks_(0),
|
||||
split_(split),
|
||||
histograms_(histograms),
|
||||
target_block_size_(min_block_size),
|
||||
block_size_(0),
|
||||
curr_histogram_ix_(0),
|
||||
merge_last_count_(0) {
|
||||
size_t max_num_blocks = num_symbols / min_block_size + 1;
|
||||
// We have to allocate one more histogram than the maximum number of block
|
||||
// types for the current histogram when the meta-block is too big.
|
||||
size_t max_num_types = std::min<size_t>(max_num_blocks, kMaxBlockTypes + 1);
|
||||
split_->lengths.resize(max_num_blocks);
|
||||
split_->types.resize(max_num_blocks);
|
||||
histograms_->resize(max_num_types);
|
||||
last_histogram_ix_[0] = last_histogram_ix_[1] = 0;
|
||||
#define FN(X) X ## Literal
|
||||
#include "./metablock_inc.h" /* NOLINT(build/include) */
|
||||
#undef FN
|
||||
|
||||
#define FN(X) X ## Command
|
||||
#include "./metablock_inc.h" /* NOLINT(build/include) */
|
||||
#undef FN
|
||||
|
||||
#define FN(X) X ## Distance
|
||||
#include "./metablock_inc.h" /* NOLINT(build/include) */
|
||||
#undef FN
|
||||
|
||||
void BrotliBuildMetaBlockGreedy(MemoryManager* m,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t pos,
|
||||
size_t mask,
|
||||
const Command *commands,
|
||||
size_t n_commands,
|
||||
MetaBlockSplit* mb) {
|
||||
BlockSplitterLiteral lit_blocks;
|
||||
BlockSplitterCommand cmd_blocks;
|
||||
BlockSplitterDistance dist_blocks;
|
||||
size_t num_literals = 0;
|
||||
size_t i;
|
||||
for (i = 0; i < n_commands; ++i) {
|
||||
num_literals += commands[i].insert_len_;
|
||||
}
|
||||
|
||||
// Adds the next symbol to the current histogram. When the current histogram
|
||||
// reaches the target size, decides on merging the block.
|
||||
void AddSymbol(size_t symbol) {
|
||||
(*histograms_)[curr_histogram_ix_].Add(symbol);
|
||||
++block_size_;
|
||||
if (block_size_ == target_block_size_) {
|
||||
FinishBlock(/* is_final = */ false);
|
||||
InitBlockSplitterLiteral(m, &lit_blocks, 256, 512, 400.0, num_literals,
|
||||
&mb->literal_split, &mb->literal_histograms,
|
||||
&mb->literal_histograms_size);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
InitBlockSplitterCommand(m, &cmd_blocks, BROTLI_NUM_COMMAND_SYMBOLS, 1024,
|
||||
500.0, n_commands, &mb->command_split, &mb->command_histograms,
|
||||
&mb->command_histograms_size);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
InitBlockSplitterDistance(m, &dist_blocks, 64, 512, 100.0, n_commands,
|
||||
&mb->distance_split, &mb->distance_histograms,
|
||||
&mb->distance_histograms_size);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
|
||||
for (i = 0; i < n_commands; ++i) {
|
||||
const Command cmd = commands[i];
|
||||
size_t j;
|
||||
BlockSplitterAddSymbolCommand(&cmd_blocks, cmd.cmd_prefix_);
|
||||
for (j = cmd.insert_len_; j != 0; --j) {
|
||||
BlockSplitterAddSymbolLiteral(&lit_blocks, ringbuffer[pos & mask]);
|
||||
++pos;
|
||||
}
|
||||
pos += CommandCopyLen(&cmd);
|
||||
if (CommandCopyLen(&cmd) && cmd.cmd_prefix_ >= 128) {
|
||||
BlockSplitterAddSymbolDistance(&dist_blocks, cmd.dist_prefix_);
|
||||
}
|
||||
}
|
||||
|
||||
// Does either of three things:
|
||||
// (1) emits the current block with a new block type;
|
||||
// (2) emits the current block with the type of the second last block;
|
||||
// (3) merges the current block with the last block.
|
||||
void FinishBlock(bool is_final) {
|
||||
if (block_size_ < min_block_size_) {
|
||||
block_size_ = min_block_size_;
|
||||
}
|
||||
if (num_blocks_ == 0) {
|
||||
// Create first block.
|
||||
split_->lengths[0] = static_cast<uint32_t>(block_size_);
|
||||
split_->types[0] = 0;
|
||||
last_entropy_[0] =
|
||||
BitsEntropy(&(*histograms_)[0].data_[0], alphabet_size_);
|
||||
last_entropy_[1] = last_entropy_[0];
|
||||
++num_blocks_;
|
||||
++split_->num_types;
|
||||
++curr_histogram_ix_;
|
||||
block_size_ = 0;
|
||||
} else if (block_size_ > 0) {
|
||||
double entropy = BitsEntropy(&(*histograms_)[curr_histogram_ix_].data_[0],
|
||||
alphabet_size_);
|
||||
HistogramType combined_histo[2];
|
||||
double combined_entropy[2];
|
||||
double diff[2];
|
||||
for (size_t j = 0; j < 2; ++j) {
|
||||
size_t last_histogram_ix = last_histogram_ix_[j];
|
||||
combined_histo[j] = (*histograms_)[curr_histogram_ix_];
|
||||
combined_histo[j].AddHistogram((*histograms_)[last_histogram_ix]);
|
||||
combined_entropy[j] = BitsEntropy(
|
||||
&combined_histo[j].data_[0], alphabet_size_);
|
||||
diff[j] = combined_entropy[j] - entropy - last_entropy_[j];
|
||||
}
|
||||
|
||||
if (split_->num_types < kMaxBlockTypes &&
|
||||
diff[0] > split_threshold_ &&
|
||||
diff[1] > split_threshold_) {
|
||||
// Create new block.
|
||||
split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
|
||||
split_->types[num_blocks_] = static_cast<uint8_t>(split_->num_types);
|
||||
last_histogram_ix_[1] = last_histogram_ix_[0];
|
||||
last_histogram_ix_[0] = static_cast<uint8_t>(split_->num_types);
|
||||
last_entropy_[1] = last_entropy_[0];
|
||||
last_entropy_[0] = entropy;
|
||||
++num_blocks_;
|
||||
++split_->num_types;
|
||||
++curr_histogram_ix_;
|
||||
block_size_ = 0;
|
||||
merge_last_count_ = 0;
|
||||
target_block_size_ = min_block_size_;
|
||||
} else if (diff[1] < diff[0] - 20.0) {
|
||||
// Combine this block with second last block.
|
||||
split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
|
||||
split_->types[num_blocks_] = split_->types[num_blocks_ - 2];
|
||||
std::swap(last_histogram_ix_[0], last_histogram_ix_[1]);
|
||||
(*histograms_)[last_histogram_ix_[0]] = combined_histo[1];
|
||||
last_entropy_[1] = last_entropy_[0];
|
||||
last_entropy_[0] = combined_entropy[1];
|
||||
++num_blocks_;
|
||||
block_size_ = 0;
|
||||
(*histograms_)[curr_histogram_ix_].Clear();
|
||||
merge_last_count_ = 0;
|
||||
target_block_size_ = min_block_size_;
|
||||
} else {
|
||||
// Combine this block with last block.
|
||||
split_->lengths[num_blocks_ - 1] += static_cast<uint32_t>(block_size_);
|
||||
(*histograms_)[last_histogram_ix_[0]] = combined_histo[0];
|
||||
last_entropy_[0] = combined_entropy[0];
|
||||
if (split_->num_types == 1) {
|
||||
last_entropy_[1] = last_entropy_[0];
|
||||
}
|
||||
block_size_ = 0;
|
||||
(*histograms_)[curr_histogram_ix_].Clear();
|
||||
if (++merge_last_count_ > 1) {
|
||||
target_block_size_ += min_block_size_;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (is_final) {
|
||||
(*histograms_).resize(split_->num_types);
|
||||
split_->types.resize(num_blocks_);
|
||||
split_->lengths.resize(num_blocks_);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
static const uint16_t kMaxBlockTypes = 256;
|
||||
BlockSplitterFinishBlockLiteral(&lit_blocks, /* is_final = */ 1);
|
||||
BlockSplitterFinishBlockCommand(&cmd_blocks, /* is_final = */ 1);
|
||||
BlockSplitterFinishBlockDistance(&dist_blocks, /* is_final = */ 1);
|
||||
}
|
||||
|
||||
/* Greedy block splitter for one block category (literal, command or distance).
|
||||
Gathers histograms for all context buckets. */
|
||||
typedef struct ContextBlockSplitter {
|
||||
/* Alphabet size of particular block category. */
|
||||
const size_t alphabet_size_;
|
||||
size_t alphabet_size_;
|
||||
size_t num_contexts_;
|
||||
size_t max_block_types_;
|
||||
/* We collect at least this many symbols for each block. */
|
||||
const size_t min_block_size_;
|
||||
size_t min_block_size_;
|
||||
/* We merge histograms A and B if
|
||||
entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
|
||||
where A is the current histogram and B is the histogram of the last or the
|
||||
second last block type. */
|
||||
const double split_threshold_;
|
||||
double split_threshold_;
|
||||
|
||||
size_t num_blocks_;
|
||||
BlockSplit* split_; /* not owned */
|
||||
std::vector<HistogramType>* histograms_; /* not owned */
|
||||
HistogramLiteral* histograms_; /* not owned */
|
||||
size_t* histograms_size_; /* not owned */
|
||||
|
||||
/* The number of symbols that we want to collect before deciding on whether
|
||||
or not to merge the block with a previous one or emit a new block. */
|
||||
@ -226,315 +213,302 @@ class BlockSplitter {
|
||||
/* Offset of the histograms of the previous two block types. */
|
||||
size_t last_histogram_ix_[2];
|
||||
/* Entropy of the previous two block types. */
|
||||
double last_entropy_[2];
|
||||
double* last_entropy_;
|
||||
/* The number of times we merged the current block with the last one. */
|
||||
size_t merge_last_count_;
|
||||
};
|
||||
} ContextBlockSplitter;
|
||||
|
||||
void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
|
||||
size_t pos,
|
||||
size_t mask,
|
||||
const Command *commands,
|
||||
size_t n_commands,
|
||||
MetaBlockSplit* mb) {
|
||||
size_t num_literals = 0;
|
||||
for (size_t i = 0; i < n_commands; ++i) {
|
||||
num_literals += commands[i].insert_len_;
|
||||
}
|
||||
static void InitContextBlockSplitter(
|
||||
MemoryManager* m, ContextBlockSplitter* self, size_t alphabet_size,
|
||||
size_t num_contexts, size_t min_block_size, double split_threshold,
|
||||
size_t num_symbols, BlockSplit* split, HistogramLiteral** histograms,
|
||||
size_t* histograms_size) {
|
||||
size_t max_num_blocks = num_symbols / min_block_size + 1;
|
||||
size_t max_num_types;
|
||||
|
||||
BlockSplitter<HistogramLiteral> lit_blocks(
|
||||
256, 512, 400.0, num_literals,
|
||||
&mb->literal_split, &mb->literal_histograms);
|
||||
BlockSplitter<HistogramCommand> cmd_blocks(
|
||||
kNumCommandPrefixes, 1024, 500.0, n_commands,
|
||||
&mb->command_split, &mb->command_histograms);
|
||||
BlockSplitter<HistogramDistance> dist_blocks(
|
||||
64, 512, 100.0, n_commands,
|
||||
&mb->distance_split, &mb->distance_histograms);
|
||||
self->alphabet_size_ = alphabet_size;
|
||||
self->num_contexts_ = num_contexts;
|
||||
self->max_block_types_ = BROTLI_MAX_NUMBER_OF_BLOCK_TYPES / num_contexts;
|
||||
self->min_block_size_ = min_block_size;
|
||||
self->split_threshold_ = split_threshold;
|
||||
self->num_blocks_ = 0;
|
||||
self->split_ = split;
|
||||
self->histograms_size_ = histograms_size;
|
||||
self->target_block_size_ = min_block_size;
|
||||
self->block_size_ = 0;
|
||||
self->curr_histogram_ix_ = 0;
|
||||
self->merge_last_count_ = 0;
|
||||
|
||||
for (size_t i = 0; i < n_commands; ++i) {
|
||||
const Command cmd = commands[i];
|
||||
cmd_blocks.AddSymbol(cmd.cmd_prefix_);
|
||||
for (size_t j = cmd.insert_len_; j != 0; --j) {
|
||||
lit_blocks.AddSymbol(ringbuffer[pos & mask]);
|
||||
++pos;
|
||||
}
|
||||
pos += cmd.copy_len();
|
||||
if (cmd.copy_len() && cmd.cmd_prefix_ >= 128) {
|
||||
dist_blocks.AddSymbol(cmd.dist_prefix_);
|
||||
}
|
||||
}
|
||||
|
||||
lit_blocks.FinishBlock(/* is_final = */ true);
|
||||
cmd_blocks.FinishBlock(/* is_final = */ true);
|
||||
dist_blocks.FinishBlock(/* is_final = */ true);
|
||||
/* We have to allocate one more histogram than the maximum number of block
|
||||
types for the current histogram when the meta-block is too big. */
|
||||
max_num_types =
|
||||
BROTLI_MIN(size_t, max_num_blocks, self->max_block_types_ + 1);
|
||||
BROTLI_ENSURE_CAPACITY(m, uint8_t,
|
||||
split->types, split->types_alloc_size, max_num_blocks);
|
||||
BROTLI_ENSURE_CAPACITY(m, uint32_t,
|
||||
split->lengths, split->lengths_alloc_size, max_num_blocks);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
split->num_blocks = max_num_blocks;
|
||||
self->last_entropy_ = BROTLI_ALLOC(m, double, 2 * num_contexts);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
assert(*histograms == 0);
|
||||
*histograms_size = max_num_types * num_contexts;
|
||||
*histograms = BROTLI_ALLOC(m, HistogramLiteral, *histograms_size);
|
||||
self->histograms_ = *histograms;
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
/* Clear only current historgram. */
|
||||
ClearHistogramsLiteral(&self->histograms_[0], num_contexts);
|
||||
self->last_histogram_ix_[0] = self->last_histogram_ix_[1] = 0;
|
||||
}
|
||||
|
||||
// Greedy block splitter for one block category (literal, command or distance).
|
||||
// Gathers histograms for all context buckets.
|
||||
template<typename HistogramType>
|
||||
class ContextBlockSplitter {
|
||||
public:
|
||||
ContextBlockSplitter(size_t alphabet_size,
|
||||
size_t num_contexts,
|
||||
size_t min_block_size,
|
||||
double split_threshold,
|
||||
size_t num_symbols,
|
||||
BlockSplit* split,
|
||||
std::vector<HistogramType>* histograms)
|
||||
: alphabet_size_(alphabet_size),
|
||||
num_contexts_(num_contexts),
|
||||
max_block_types_(kMaxBlockTypes / num_contexts),
|
||||
min_block_size_(min_block_size),
|
||||
split_threshold_(split_threshold),
|
||||
num_blocks_(0),
|
||||
split_(split),
|
||||
histograms_(histograms),
|
||||
target_block_size_(min_block_size),
|
||||
block_size_(0),
|
||||
curr_histogram_ix_(0),
|
||||
last_entropy_(2 * num_contexts),
|
||||
merge_last_count_(0) {
|
||||
size_t max_num_blocks = num_symbols / min_block_size + 1;
|
||||
// We have to allocate one more histogram than the maximum number of block
|
||||
// types for the current histogram when the meta-block is too big.
|
||||
size_t max_num_types = std::min(max_num_blocks, max_block_types_ + 1);
|
||||
split_->lengths.resize(max_num_blocks);
|
||||
split_->types.resize(max_num_blocks);
|
||||
histograms_->resize(max_num_types * num_contexts);
|
||||
last_histogram_ix_[0] = last_histogram_ix_[1] = 0;
|
||||
}
|
||||
|
||||
// Adds the next symbol to the current block type and context. When the
|
||||
// current block reaches the target size, decides on merging the block.
|
||||
void AddSymbol(size_t symbol, size_t context) {
|
||||
(*histograms_)[curr_histogram_ix_ + context].Add(symbol);
|
||||
++block_size_;
|
||||
if (block_size_ == target_block_size_) {
|
||||
FinishBlock(/* is_final = */ false);
|
||||
}
|
||||
}
|
||||
static void CleanupContextBlockSplitter(
|
||||
MemoryManager* m, ContextBlockSplitter* self) {
|
||||
BROTLI_FREE(m, self->last_entropy_);
|
||||
}
|
||||
|
||||
/* Does either of three things:
|
||||
(1) emits the current block with a new block type;
|
||||
(2) emits the current block with the type of the second last block;
|
||||
(3) merges the current block with the last block. */
|
||||
void FinishBlock(bool is_final) {
|
||||
if (block_size_ < min_block_size_) {
|
||||
block_size_ = min_block_size_;
|
||||
static void ContextBlockSplitterFinishBlock(
|
||||
MemoryManager* m, ContextBlockSplitter* self, int is_final) {
|
||||
BlockSplit* split = self->split_;
|
||||
const size_t num_contexts = self->num_contexts_;
|
||||
double* last_entropy = self->last_entropy_;
|
||||
HistogramLiteral* histograms = self->histograms_;
|
||||
|
||||
if (self->block_size_ < self->min_block_size_) {
|
||||
self->block_size_ = self->min_block_size_;
|
||||
}
|
||||
if (self->num_blocks_ == 0) {
|
||||
size_t i;
|
||||
/* Create first block. */
|
||||
split->lengths[0] = (uint32_t)self->block_size_;
|
||||
split->types[0] = 0;
|
||||
|
||||
for (i = 0; i < num_contexts; ++i) {
|
||||
last_entropy[i] =
|
||||
BitsEntropy(histograms[i].data_, self->alphabet_size_);
|
||||
last_entropy[num_contexts + i] = last_entropy[i];
|
||||
}
|
||||
if (num_blocks_ == 0) {
|
||||
// Create first block.
|
||||
split_->lengths[0] = static_cast<uint32_t>(block_size_);
|
||||
split_->types[0] = 0;
|
||||
for (size_t i = 0; i < num_contexts_; ++i) {
|
||||
last_entropy_[i] =
|
||||
BitsEntropy(&(*histograms_)[i].data_[0], alphabet_size_);
|
||||
last_entropy_[num_contexts_ + i] = last_entropy_[i];
|
||||
}
|
||||
++num_blocks_;
|
||||
++split_->num_types;
|
||||
curr_histogram_ix_ += num_contexts_;
|
||||
block_size_ = 0;
|
||||
} else if (block_size_ > 0) {
|
||||
++self->num_blocks_;
|
||||
++split->num_types;
|
||||
self->curr_histogram_ix_ += num_contexts;
|
||||
if (self->curr_histogram_ix_ < *self->histograms_size_) {
|
||||
ClearHistogramsLiteral(
|
||||
&self->histograms_[self->curr_histogram_ix_], self->num_contexts_);
|
||||
}
|
||||
self->block_size_ = 0;
|
||||
} else if (self->block_size_ > 0) {
|
||||
/* Try merging the set of histograms for the current block type with the
|
||||
respective set of histograms for the last and second last block types.
|
||||
Decide over the split based on the total reduction of entropy across
|
||||
all contexts. */
|
||||
std::vector<double> entropy(num_contexts_);
|
||||
std::vector<HistogramType> combined_histo(2 * num_contexts_);
|
||||
std::vector<double> combined_entropy(2 * num_contexts_);
|
||||
double diff[2] = { 0.0 };
|
||||
for (size_t i = 0; i < num_contexts_; ++i) {
|
||||
size_t curr_histo_ix = curr_histogram_ix_ + i;
|
||||
entropy[i] = BitsEntropy(&(*histograms_)[curr_histo_ix].data_[0],
|
||||
alphabet_size_);
|
||||
for (size_t j = 0; j < 2; ++j) {
|
||||
size_t jx = j * num_contexts_ + i;
|
||||
size_t last_histogram_ix = last_histogram_ix_[j] + i;
|
||||
combined_histo[jx] = (*histograms_)[curr_histo_ix];
|
||||
combined_histo[jx].AddHistogram((*histograms_)[last_histogram_ix]);
|
||||
combined_entropy[jx] = BitsEntropy(
|
||||
&combined_histo[jx].data_[0], alphabet_size_);
|
||||
diff[j] += combined_entropy[jx] - entropy[i] - last_entropy_[jx];
|
||||
}
|
||||
double* entropy = BROTLI_ALLOC(m, double, num_contexts);
|
||||
HistogramLiteral* combined_histo =
|
||||
BROTLI_ALLOC(m, HistogramLiteral, 2 * num_contexts);
|
||||
double* combined_entropy = BROTLI_ALLOC(m, double, 2 * num_contexts);
|
||||
double diff[2] = { 0.0 };
|
||||
size_t i;
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
for (i = 0; i < num_contexts; ++i) {
|
||||
size_t curr_histo_ix = self->curr_histogram_ix_ + i;
|
||||
size_t j;
|
||||
entropy[i] = BitsEntropy(histograms[curr_histo_ix].data_,
|
||||
self->alphabet_size_);
|
||||
for (j = 0; j < 2; ++j) {
|
||||
size_t jx = j * num_contexts + i;
|
||||
size_t last_histogram_ix = self->last_histogram_ix_[j] + i;
|
||||
combined_histo[jx] = histograms[curr_histo_ix];
|
||||
HistogramAddHistogramLiteral(&combined_histo[jx],
|
||||
&histograms[last_histogram_ix]);
|
||||
combined_entropy[jx] = BitsEntropy(
|
||||
&combined_histo[jx].data_[0], self->alphabet_size_);
|
||||
diff[j] += combined_entropy[jx] - entropy[i] - last_entropy[jx];
|
||||
}
|
||||
}
|
||||
|
||||
if (split_->num_types < max_block_types_ &&
|
||||
diff[0] > split_threshold_ &&
|
||||
diff[1] > split_threshold_) {
|
||||
// Create new block.
|
||||
split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
|
||||
split_->types[num_blocks_] = static_cast<uint8_t>(split_->num_types);
|
||||
last_histogram_ix_[1] = last_histogram_ix_[0];
|
||||
last_histogram_ix_[0] = split_->num_types * num_contexts_;
|
||||
for (size_t i = 0; i < num_contexts_; ++i) {
|
||||
last_entropy_[num_contexts_ + i] = last_entropy_[i];
|
||||
last_entropy_[i] = entropy[i];
|
||||
}
|
||||
++num_blocks_;
|
||||
++split_->num_types;
|
||||
curr_histogram_ix_ += num_contexts_;
|
||||
block_size_ = 0;
|
||||
merge_last_count_ = 0;
|
||||
target_block_size_ = min_block_size_;
|
||||
} else if (diff[1] < diff[0] - 20.0) {
|
||||
// Combine this block with second last block.
|
||||
split_->lengths[num_blocks_] = static_cast<uint32_t>(block_size_);
|
||||
split_->types[num_blocks_] = split_->types[num_blocks_ - 2];
|
||||
std::swap(last_histogram_ix_[0], last_histogram_ix_[1]);
|
||||
for (size_t i = 0; i < num_contexts_; ++i) {
|
||||
(*histograms_)[last_histogram_ix_[0] + i] =
|
||||
combined_histo[num_contexts_ + i];
|
||||
last_entropy_[num_contexts_ + i] = last_entropy_[i];
|
||||
last_entropy_[i] = combined_entropy[num_contexts_ + i];
|
||||
(*histograms_)[curr_histogram_ix_ + i].Clear();
|
||||
}
|
||||
++num_blocks_;
|
||||
block_size_ = 0;
|
||||
merge_last_count_ = 0;
|
||||
target_block_size_ = min_block_size_;
|
||||
} else {
|
||||
// Combine this block with last block.
|
||||
split_->lengths[num_blocks_ - 1] += static_cast<uint32_t>(block_size_);
|
||||
for (size_t i = 0; i < num_contexts_; ++i) {
|
||||
(*histograms_)[last_histogram_ix_[0] + i] = combined_histo[i];
|
||||
last_entropy_[i] = combined_entropy[i];
|
||||
if (split_->num_types == 1) {
|
||||
last_entropy_[num_contexts_ + i] = last_entropy_[i];
|
||||
}
|
||||
(*histograms_)[curr_histogram_ix_ + i].Clear();
|
||||
}
|
||||
block_size_ = 0;
|
||||
if (++merge_last_count_ > 1) {
|
||||
target_block_size_ += min_block_size_;
|
||||
if (split->num_types < self->max_block_types_ &&
|
||||
diff[0] > self->split_threshold_ &&
|
||||
diff[1] > self->split_threshold_) {
|
||||
/* Create new block. */
|
||||
split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
|
||||
split->types[self->num_blocks_] = (uint8_t)split->num_types;
|
||||
self->last_histogram_ix_[1] = self->last_histogram_ix_[0];
|
||||
self->last_histogram_ix_[0] = split->num_types * num_contexts;
|
||||
for (i = 0; i < num_contexts; ++i) {
|
||||
last_entropy[num_contexts + i] = last_entropy[i];
|
||||
last_entropy[i] = entropy[i];
|
||||
}
|
||||
++self->num_blocks_;
|
||||
++split->num_types;
|
||||
self->curr_histogram_ix_ += num_contexts;
|
||||
if (self->curr_histogram_ix_ < *self->histograms_size_) {
|
||||
ClearHistogramsLiteral(
|
||||
&self->histograms_[self->curr_histogram_ix_], self->num_contexts_);
|
||||
}
|
||||
self->block_size_ = 0;
|
||||
self->merge_last_count_ = 0;
|
||||
self->target_block_size_ = self->min_block_size_;
|
||||
} else if (diff[1] < diff[0] - 20.0) {
|
||||
/* Combine this block with second last block. */
|
||||
split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
|
||||
split->types[self->num_blocks_] = split->types[self->num_blocks_ - 2];
|
||||
BROTLI_SWAP(size_t, self->last_histogram_ix_, 0, 1);
|
||||
for (i = 0; i < num_contexts; ++i) {
|
||||
histograms[self->last_histogram_ix_[0] + i] =
|
||||
combined_histo[num_contexts + i];
|
||||
last_entropy[num_contexts + i] = last_entropy[i];
|
||||
last_entropy[i] = combined_entropy[num_contexts + i];
|
||||
HistogramClearLiteral(&histograms[self->curr_histogram_ix_ + i]);
|
||||
}
|
||||
++self->num_blocks_;
|
||||
self->block_size_ = 0;
|
||||
self->merge_last_count_ = 0;
|
||||
self->target_block_size_ = self->min_block_size_;
|
||||
} else {
|
||||
/* Combine this block with last block. */
|
||||
split->lengths[self->num_blocks_ - 1] += (uint32_t)self->block_size_;
|
||||
for (i = 0; i < num_contexts; ++i) {
|
||||
histograms[self->last_histogram_ix_[0] + i] = combined_histo[i];
|
||||
last_entropy[i] = combined_entropy[i];
|
||||
if (split->num_types == 1) {
|
||||
last_entropy[num_contexts + i] = last_entropy[i];
|
||||
}
|
||||
HistogramClearLiteral(&histograms[self->curr_histogram_ix_ + i]);
|
||||
}
|
||||
self->block_size_ = 0;
|
||||
if (++self->merge_last_count_ > 1) {
|
||||
self->target_block_size_ += self->min_block_size_;
|
||||
}
|
||||
}
|
||||
if (is_final) {
|
||||
(*histograms_).resize(split_->num_types * num_contexts_);
|
||||
split_->types.resize(num_blocks_);
|
||||
split_->lengths.resize(num_blocks_);
|
||||
}
|
||||
BROTLI_FREE(m, combined_entropy);
|
||||
BROTLI_FREE(m, combined_histo);
|
||||
BROTLI_FREE(m, entropy);
|
||||
}
|
||||
if (is_final) {
|
||||
*self->histograms_size_ = split->num_types * num_contexts;
|
||||
split->num_blocks = self->num_blocks_;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
static const int kMaxBlockTypes = 256;
|
||||
/* Adds the next symbol to the current block type and context. When the
|
||||
current block reaches the target size, decides on merging the block. */
|
||||
static void ContextBlockSplitterAddSymbol(MemoryManager* m,
|
||||
ContextBlockSplitter* self, size_t symbol, size_t context) {
|
||||
HistogramAddLiteral(&self->histograms_[self->curr_histogram_ix_ + context],
|
||||
symbol);
|
||||
++self->block_size_;
|
||||
if (self->block_size_ == self->target_block_size_) {
|
||||
ContextBlockSplitterFinishBlock(m, self, /* is_final = */ 0);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
}
|
||||
}
|
||||
|
||||
// Alphabet size of particular block category.
|
||||
const size_t alphabet_size_;
|
||||
const size_t num_contexts_;
|
||||
const size_t max_block_types_;
|
||||
// We collect at least this many symbols for each block.
|
||||
const size_t min_block_size_;
|
||||
// We merge histograms A and B if
|
||||
// entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
|
||||
// where A is the current histogram and B is the histogram of the last or the
|
||||
// second last block type.
|
||||
const double split_threshold_;
|
||||
|
||||
size_t num_blocks_;
|
||||
BlockSplit* split_; // not owned
|
||||
std::vector<HistogramType>* histograms_; // not owned
|
||||
|
||||
// The number of symbols that we want to collect before deciding on whether
|
||||
// or not to merge the block with a previous one or emit a new block.
|
||||
size_t target_block_size_;
|
||||
// The number of symbols in the current histogram.
|
||||
size_t block_size_;
|
||||
// Offset of the current histogram.
|
||||
size_t curr_histogram_ix_;
|
||||
// Offset of the histograms of the previous two block types.
|
||||
size_t last_histogram_ix_[2];
|
||||
// Entropy of the previous two block types.
|
||||
std::vector<double> last_entropy_;
|
||||
// The number of times we merged the current block with the last one.
|
||||
size_t merge_last_count_;
|
||||
};
|
||||
|
||||
void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
|
||||
size_t pos,
|
||||
size_t mask,
|
||||
uint8_t prev_byte,
|
||||
uint8_t prev_byte2,
|
||||
ContextType literal_context_mode,
|
||||
size_t num_contexts,
|
||||
const uint32_t* static_context_map,
|
||||
const Command *commands,
|
||||
size_t n_commands,
|
||||
MetaBlockSplit* mb) {
|
||||
void BrotliBuildMetaBlockGreedyWithContexts(MemoryManager* m,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t pos,
|
||||
size_t mask,
|
||||
uint8_t prev_byte,
|
||||
uint8_t prev_byte2,
|
||||
ContextType literal_context_mode,
|
||||
size_t num_contexts,
|
||||
const uint32_t* static_context_map,
|
||||
const Command *commands,
|
||||
size_t n_commands,
|
||||
MetaBlockSplit* mb) {
|
||||
ContextBlockSplitter lit_blocks;
|
||||
BlockSplitterCommand cmd_blocks;
|
||||
BlockSplitterDistance dist_blocks;
|
||||
size_t num_literals = 0;
|
||||
for (size_t i = 0; i < n_commands; ++i) {
|
||||
size_t i;
|
||||
for (i = 0; i < n_commands; ++i) {
|
||||
num_literals += commands[i].insert_len_;
|
||||
}
|
||||
|
||||
ContextBlockSplitter<HistogramLiteral> lit_blocks(
|
||||
256, num_contexts, 512, 400.0, num_literals,
|
||||
&mb->literal_split, &mb->literal_histograms);
|
||||
BlockSplitter<HistogramCommand> cmd_blocks(
|
||||
kNumCommandPrefixes, 1024, 500.0, n_commands,
|
||||
&mb->command_split, &mb->command_histograms);
|
||||
BlockSplitter<HistogramDistance> dist_blocks(
|
||||
64, 512, 100.0, n_commands,
|
||||
&mb->distance_split, &mb->distance_histograms);
|
||||
InitContextBlockSplitter(m, &lit_blocks, 256, num_contexts, 512, 400.0,
|
||||
num_literals, &mb->literal_split, &mb->literal_histograms,
|
||||
&mb->literal_histograms_size);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
InitBlockSplitterCommand(m, &cmd_blocks, BROTLI_NUM_COMMAND_SYMBOLS, 1024,
|
||||
500.0, n_commands, &mb->command_split, &mb->command_histograms,
|
||||
&mb->command_histograms_size);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
InitBlockSplitterDistance(m, &dist_blocks, 64, 512, 100.0, n_commands,
|
||||
&mb->distance_split, &mb->distance_histograms,
|
||||
&mb->distance_histograms_size);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
|
||||
for (size_t i = 0; i < n_commands; ++i) {
|
||||
for (i = 0; i < n_commands; ++i) {
|
||||
const Command cmd = commands[i];
|
||||
cmd_blocks.AddSymbol(cmd.cmd_prefix_);
|
||||
for (size_t j = cmd.insert_len_; j != 0; --j) {
|
||||
size_t j;
|
||||
BlockSplitterAddSymbolCommand(&cmd_blocks, cmd.cmd_prefix_);
|
||||
for (j = cmd.insert_len_; j != 0; --j) {
|
||||
size_t context = Context(prev_byte, prev_byte2, literal_context_mode);
|
||||
uint8_t literal = ringbuffer[pos & mask];
|
||||
lit_blocks.AddSymbol(literal, static_context_map[context]);
|
||||
ContextBlockSplitterAddSymbol(
|
||||
m, &lit_blocks, literal, static_context_map[context]);
|
||||
prev_byte2 = prev_byte;
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
prev_byte = literal;
|
||||
++pos;
|
||||
}
|
||||
pos += cmd.copy_len();
|
||||
if (cmd.copy_len()) {
|
||||
pos += CommandCopyLen(&cmd);
|
||||
if (CommandCopyLen(&cmd)) {
|
||||
prev_byte2 = ringbuffer[(pos - 2) & mask];
|
||||
prev_byte = ringbuffer[(pos - 1) & mask];
|
||||
if (cmd.cmd_prefix_ >= 128) {
|
||||
dist_blocks.AddSymbol(cmd.dist_prefix_);
|
||||
BlockSplitterAddSymbolDistance(&dist_blocks, cmd.dist_prefix_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
lit_blocks.FinishBlock(/* is_final = */ true);
|
||||
cmd_blocks.FinishBlock(/* is_final = */ true);
|
||||
dist_blocks.FinishBlock(/* is_final = */ true);
|
||||
ContextBlockSplitterFinishBlock(m, &lit_blocks, /* is_final = */ 1);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
CleanupContextBlockSplitter(m, &lit_blocks);
|
||||
BlockSplitterFinishBlockCommand(&cmd_blocks, /* is_final = */ 1);
|
||||
BlockSplitterFinishBlockDistance(&dist_blocks, /* is_final = */ 1);
|
||||
|
||||
mb->literal_context_map.resize(
|
||||
mb->literal_split.num_types << kLiteralContextBits);
|
||||
for (size_t i = 0; i < mb->literal_split.num_types; ++i) {
|
||||
for (size_t j = 0; j < (1u << kLiteralContextBits); ++j) {
|
||||
mb->literal_context_map[(i << kLiteralContextBits) + j] =
|
||||
static_cast<uint32_t>(i * num_contexts) + static_context_map[j];
|
||||
assert(mb->literal_context_map == 0);
|
||||
mb->literal_context_map_size =
|
||||
mb->literal_split.num_types << BROTLI_LITERAL_CONTEXT_BITS;
|
||||
mb->literal_context_map =
|
||||
BROTLI_ALLOC(m, uint32_t, mb->literal_context_map_size);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
|
||||
for (i = 0; i < mb->literal_split.num_types; ++i) {
|
||||
size_t j;
|
||||
for (j = 0; j < (1u << BROTLI_LITERAL_CONTEXT_BITS); ++j) {
|
||||
mb->literal_context_map[(i << BROTLI_LITERAL_CONTEXT_BITS) + j] =
|
||||
(uint32_t)(i * num_contexts) + static_context_map[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void OptimizeHistograms(size_t num_direct_distance_codes,
|
||||
size_t distance_postfix_bits,
|
||||
MetaBlockSplit* mb) {
|
||||
uint8_t* good_for_rle = new uint8_t[kNumCommandPrefixes];
|
||||
for (size_t i = 0; i < mb->literal_histograms.size(); ++i) {
|
||||
OptimizeHuffmanCountsForRle(256, &mb->literal_histograms[i].data_[0],
|
||||
good_for_rle);
|
||||
void BrotliOptimizeHistograms(size_t num_direct_distance_codes,
|
||||
size_t distance_postfix_bits,
|
||||
MetaBlockSplit* mb) {
|
||||
uint8_t good_for_rle[BROTLI_NUM_COMMAND_SYMBOLS];
|
||||
size_t num_distance_codes;
|
||||
size_t i;
|
||||
for (i = 0; i < mb->literal_histograms_size; ++i) {
|
||||
BrotliOptimizeHuffmanCountsForRle(256, mb->literal_histograms[i].data_,
|
||||
good_for_rle);
|
||||
}
|
||||
for (size_t i = 0; i < mb->command_histograms.size(); ++i) {
|
||||
OptimizeHuffmanCountsForRle(kNumCommandPrefixes,
|
||||
&mb->command_histograms[i].data_[0],
|
||||
good_for_rle);
|
||||
for (i = 0; i < mb->command_histograms_size; ++i) {
|
||||
BrotliOptimizeHuffmanCountsForRle(BROTLI_NUM_COMMAND_SYMBOLS,
|
||||
mb->command_histograms[i].data_,
|
||||
good_for_rle);
|
||||
}
|
||||
size_t num_distance_codes =
|
||||
kNumDistanceShortCodes + num_direct_distance_codes +
|
||||
(48u << distance_postfix_bits);
|
||||
for (size_t i = 0; i < mb->distance_histograms.size(); ++i) {
|
||||
OptimizeHuffmanCountsForRle(num_distance_codes,
|
||||
&mb->distance_histograms[i].data_[0],
|
||||
good_for_rle);
|
||||
num_distance_codes = BROTLI_NUM_DISTANCE_SHORT_CODES +
|
||||
num_direct_distance_codes + (48u << distance_postfix_bits);
|
||||
for (i = 0; i < mb->distance_histograms_size; ++i) {
|
||||
BrotliOptimizeHuffmanCountsForRle(num_distance_codes,
|
||||
mb->distance_histograms[i].data_,
|
||||
good_for_rle);
|
||||
}
|
||||
delete[] good_for_rle;
|
||||
}
|
||||
|
||||
} // namespace brotli
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
124
enc/metablock.h
124
enc/metablock.h
@ -10,72 +10,100 @@
|
||||
#ifndef BROTLI_ENC_METABLOCK_H_
|
||||
#define BROTLI_ENC_METABLOCK_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "../common/types.h"
|
||||
#include "./block_splitter.h"
|
||||
#include "./command.h"
|
||||
#include "./context.h"
|
||||
#include "./histogram.h"
|
||||
#include "./memory.h"
|
||||
#include "./port.h"
|
||||
|
||||
namespace brotli {
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct BlockSplit {
|
||||
BlockSplit(void) : num_types(0) {}
|
||||
|
||||
size_t num_types;
|
||||
std::vector<uint8_t> types;
|
||||
std::vector<uint32_t> lengths;
|
||||
};
|
||||
|
||||
struct MetaBlockSplit {
|
||||
typedef struct MetaBlockSplit {
|
||||
BlockSplit literal_split;
|
||||
BlockSplit command_split;
|
||||
BlockSplit distance_split;
|
||||
std::vector<uint32_t> literal_context_map;
|
||||
std::vector<uint32_t> distance_context_map;
|
||||
std::vector<HistogramLiteral> literal_histograms;
|
||||
std::vector<HistogramCommand> command_histograms;
|
||||
std::vector<HistogramDistance> distance_histograms;
|
||||
};
|
||||
uint32_t* literal_context_map;
|
||||
size_t literal_context_map_size;
|
||||
uint32_t* distance_context_map;
|
||||
size_t distance_context_map_size;
|
||||
HistogramLiteral* literal_histograms;
|
||||
size_t literal_histograms_size;
|
||||
HistogramCommand* command_histograms;
|
||||
size_t command_histograms_size;
|
||||
HistogramDistance* distance_histograms;
|
||||
size_t distance_histograms_size;
|
||||
} MetaBlockSplit;
|
||||
|
||||
static BROTLI_INLINE void InitMetaBlockSplit(MetaBlockSplit* mb) {
|
||||
BrotliInitBlockSplit(&mb->literal_split);
|
||||
BrotliInitBlockSplit(&mb->command_split);
|
||||
BrotliInitBlockSplit(&mb->distance_split);
|
||||
mb->literal_context_map = 0;
|
||||
mb->literal_context_map_size = 0;
|
||||
mb->distance_context_map = 0;
|
||||
mb->distance_context_map_size = 0;
|
||||
mb->literal_histograms = 0;
|
||||
mb->literal_histograms_size = 0;
|
||||
mb->command_histograms = 0;
|
||||
mb->command_histograms_size = 0;
|
||||
mb->distance_histograms = 0;
|
||||
mb->distance_histograms_size = 0;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void DestroyMetaBlockSplit(
|
||||
MemoryManager* m, MetaBlockSplit* mb) {
|
||||
BrotliDestroyBlockSplit(m, &mb->literal_split);
|
||||
BrotliDestroyBlockSplit(m, &mb->command_split);
|
||||
BrotliDestroyBlockSplit(m, &mb->distance_split);
|
||||
BROTLI_FREE(m, mb->literal_context_map);
|
||||
BROTLI_FREE(m, mb->distance_context_map);
|
||||
BROTLI_FREE(m, mb->literal_histograms);
|
||||
BROTLI_FREE(m, mb->command_histograms);
|
||||
BROTLI_FREE(m, mb->distance_histograms);
|
||||
}
|
||||
|
||||
/* Uses the slow shortest-path block splitter and does context clustering. */
|
||||
void BuildMetaBlock(const uint8_t* ringbuffer,
|
||||
const size_t pos,
|
||||
const size_t mask,
|
||||
uint8_t prev_byte,
|
||||
uint8_t prev_byte2,
|
||||
const Command* cmds,
|
||||
size_t num_commands,
|
||||
ContextType literal_context_mode,
|
||||
MetaBlockSplit* mb);
|
||||
BROTLI_INTERNAL void BrotliBuildMetaBlock(MemoryManager* m,
|
||||
const uint8_t* ringbuffer,
|
||||
const size_t pos,
|
||||
const size_t mask,
|
||||
const int quality,
|
||||
uint8_t prev_byte,
|
||||
uint8_t prev_byte2,
|
||||
const Command* cmds,
|
||||
size_t num_commands,
|
||||
ContextType literal_context_mode,
|
||||
MetaBlockSplit* mb);
|
||||
|
||||
/* Uses a fast greedy block splitter that tries to merge current block with the
|
||||
last or the second last block and does not do any context modeling. */
|
||||
void BuildMetaBlockGreedy(const uint8_t* ringbuffer,
|
||||
size_t pos,
|
||||
size_t mask,
|
||||
const Command *commands,
|
||||
size_t n_commands,
|
||||
MetaBlockSplit* mb);
|
||||
BROTLI_INTERNAL void BrotliBuildMetaBlockGreedy(MemoryManager* m,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t pos,
|
||||
size_t mask,
|
||||
const Command* commands,
|
||||
size_t n_commands,
|
||||
MetaBlockSplit* mb);
|
||||
|
||||
/* Uses a fast greedy block splitter that tries to merge current block with the
|
||||
last or the second last block and uses a static context clustering which
|
||||
is the same for all block types. */
|
||||
void BuildMetaBlockGreedyWithContexts(const uint8_t* ringbuffer,
|
||||
size_t pos,
|
||||
size_t mask,
|
||||
uint8_t prev_byte,
|
||||
uint8_t prev_byte2,
|
||||
ContextType literal_context_mode,
|
||||
size_t num_contexts,
|
||||
const uint32_t* static_context_map,
|
||||
const Command *commands,
|
||||
size_t n_commands,
|
||||
MetaBlockSplit* mb);
|
||||
BROTLI_INTERNAL void BrotliBuildMetaBlockGreedyWithContexts(
|
||||
MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask,
|
||||
uint8_t prev_byte, uint8_t prev_byte2, ContextType literal_context_mode,
|
||||
size_t num_contexts, const uint32_t* static_context_map,
|
||||
const Command* commands, size_t n_commands, MetaBlockSplit* mb);
|
||||
|
||||
void OptimizeHistograms(size_t num_direct_distance_codes,
|
||||
size_t distance_postfix_bits,
|
||||
MetaBlockSplit* mb);
|
||||
BROTLI_INTERNAL void BrotliOptimizeHistograms(size_t num_direct_distance_codes,
|
||||
size_t distance_postfix_bits,
|
||||
MetaBlockSplit* mb);
|
||||
|
||||
} // namespace brotli
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* BROTLI_ENC_METABLOCK_H_ */
|
||||
|
183
enc/metablock_inc.h
Normal file
183
enc/metablock_inc.h
Normal file
@ -0,0 +1,183 @@
|
||||
/* NOLINT(build/header_guard) */
|
||||
/* Copyright 2015 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* template parameters: FN */
|
||||
|
||||
#define HistogramType FN(Histogram)
|
||||
|
||||
/* Greedy block splitter for one block category (literal, command or distance).
|
||||
*/
|
||||
typedef struct FN(BlockSplitter) {
|
||||
/* Alphabet size of particular block category. */
|
||||
size_t alphabet_size_;
|
||||
/* We collect at least this many symbols for each block. */
|
||||
size_t min_block_size_;
|
||||
/* We merge histograms A and B if
|
||||
entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
|
||||
where A is the current histogram and B is the histogram of the last or the
|
||||
second last block type. */
|
||||
double split_threshold_;
|
||||
|
||||
size_t num_blocks_;
|
||||
BlockSplit* split_; /* not owned */
|
||||
HistogramType* histograms_; /* not owned */
|
||||
size_t* histograms_size_; /* not owned */
|
||||
|
||||
/* The number of symbols that we want to collect before deciding on whether
|
||||
or not to merge the block with a previous one or emit a new block. */
|
||||
size_t target_block_size_;
|
||||
/* The number of symbols in the current histogram. */
|
||||
size_t block_size_;
|
||||
/* Offset of the current histogram. */
|
||||
size_t curr_histogram_ix_;
|
||||
/* Offset of the histograms of the previous two block types. */
|
||||
size_t last_histogram_ix_[2];
|
||||
/* Entropy of the previous two block types. */
|
||||
double last_entropy_[2];
|
||||
/* The number of times we merged the current block with the last one. */
|
||||
size_t merge_last_count_;
|
||||
} FN(BlockSplitter);
|
||||
|
||||
static void FN(InitBlockSplitter)(
|
||||
MemoryManager* m, FN(BlockSplitter)* self, size_t alphabet_size,
|
||||
size_t min_block_size, double split_threshold, size_t num_symbols,
|
||||
BlockSplit* split, HistogramType** histograms, size_t* histograms_size) {
|
||||
size_t max_num_blocks = num_symbols / min_block_size + 1;
|
||||
/* We have to allocate one more histogram than the maximum number of block
|
||||
types for the current histogram when the meta-block is too big. */
|
||||
size_t max_num_types =
|
||||
BROTLI_MIN(size_t, max_num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + 1);
|
||||
self->alphabet_size_ = alphabet_size;
|
||||
self->min_block_size_ = min_block_size;
|
||||
self->split_threshold_ = split_threshold;
|
||||
self->num_blocks_ = 0;
|
||||
self->split_ = split;
|
||||
self->histograms_size_ = histograms_size;
|
||||
self->target_block_size_ = min_block_size;
|
||||
self->block_size_ = 0;
|
||||
self->curr_histogram_ix_ = 0;
|
||||
self->merge_last_count_ = 0;
|
||||
BROTLI_ENSURE_CAPACITY(m, uint8_t,
|
||||
split->types, split->types_alloc_size, max_num_blocks);
|
||||
BROTLI_ENSURE_CAPACITY(m, uint32_t,
|
||||
split->lengths, split->lengths_alloc_size, max_num_blocks);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
self->split_->num_blocks = max_num_blocks;
|
||||
assert(*histograms == 0);
|
||||
*histograms_size = max_num_types;
|
||||
*histograms = BROTLI_ALLOC(m, HistogramType, *histograms_size);
|
||||
self->histograms_ = *histograms;
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
/* Clear only current histogram. */
|
||||
FN(HistogramClear)(&self->histograms_[0]);
|
||||
self->last_histogram_ix_[0] = self->last_histogram_ix_[1] = 0;
|
||||
}
|
||||
|
||||
/* Does either of three things:
|
||||
(1) emits the current block with a new block type;
|
||||
(2) emits the current block with the type of the second last block;
|
||||
(3) merges the current block with the last block. */
|
||||
static void FN(BlockSplitterFinishBlock)(FN(BlockSplitter)* self,
|
||||
int is_final) {
|
||||
BlockSplit* split = self->split_;
|
||||
double* last_entropy = self->last_entropy_;
|
||||
HistogramType* histograms = self->histograms_;
|
||||
self->block_size_ =
|
||||
BROTLI_MAX(size_t, self->block_size_, self->min_block_size_);
|
||||
if (self->num_blocks_ == 0) {
|
||||
/* Create first block. */
|
||||
split->lengths[0] = (uint32_t)self->block_size_;
|
||||
split->types[0] = 0;
|
||||
last_entropy[0] =
|
||||
BitsEntropy(histograms[0].data_, self->alphabet_size_);
|
||||
last_entropy[1] = last_entropy[0];
|
||||
++self->num_blocks_;
|
||||
++split->num_types;
|
||||
++self->curr_histogram_ix_;
|
||||
if (self->curr_histogram_ix_ < *self->histograms_size_)
|
||||
FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
|
||||
self->block_size_ = 0;
|
||||
} else if (self->block_size_ > 0) {
|
||||
double entropy = BitsEntropy(histograms[self->curr_histogram_ix_].data_,
|
||||
self->alphabet_size_);
|
||||
HistogramType combined_histo[2];
|
||||
double combined_entropy[2];
|
||||
double diff[2];
|
||||
size_t j;
|
||||
for (j = 0; j < 2; ++j) {
|
||||
size_t last_histogram_ix = self->last_histogram_ix_[j];
|
||||
combined_histo[j] = histograms[self->curr_histogram_ix_];
|
||||
FN(HistogramAddHistogram)(&combined_histo[j],
|
||||
&histograms[last_histogram_ix]);
|
||||
combined_entropy[j] = BitsEntropy(
|
||||
&combined_histo[j].data_[0], self->alphabet_size_);
|
||||
diff[j] = combined_entropy[j] - entropy - last_entropy[j];
|
||||
}
|
||||
|
||||
if (split->num_types < BROTLI_MAX_NUMBER_OF_BLOCK_TYPES &&
|
||||
diff[0] > self->split_threshold_ &&
|
||||
diff[1] > self->split_threshold_) {
|
||||
/* Create new block. */
|
||||
split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
|
||||
split->types[self->num_blocks_] = (uint8_t)split->num_types;
|
||||
self->last_histogram_ix_[1] = self->last_histogram_ix_[0];
|
||||
self->last_histogram_ix_[0] = (uint8_t)split->num_types;
|
||||
last_entropy[1] = last_entropy[0];
|
||||
last_entropy[0] = entropy;
|
||||
++self->num_blocks_;
|
||||
++split->num_types;
|
||||
++self->curr_histogram_ix_;
|
||||
if (self->curr_histogram_ix_ < *self->histograms_size_)
|
||||
FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
|
||||
self->block_size_ = 0;
|
||||
self->merge_last_count_ = 0;
|
||||
self->target_block_size_ = self->min_block_size_;
|
||||
} else if (diff[1] < diff[0] - 20.0) {
|
||||
/* Combine this block with second last block. */
|
||||
split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
|
||||
split->types[self->num_blocks_] = split->types[self->num_blocks_ - 2];
|
||||
BROTLI_SWAP(size_t, self->last_histogram_ix_, 0, 1);
|
||||
histograms[self->last_histogram_ix_[0]] = combined_histo[1];
|
||||
last_entropy[1] = last_entropy[0];
|
||||
last_entropy[0] = combined_entropy[1];
|
||||
++self->num_blocks_;
|
||||
self->block_size_ = 0;
|
||||
FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
|
||||
self->merge_last_count_ = 0;
|
||||
self->target_block_size_ = self->min_block_size_;
|
||||
} else {
|
||||
/* Combine this block with last block. */
|
||||
split->lengths[self->num_blocks_ - 1] += (uint32_t)self->block_size_;
|
||||
histograms[self->last_histogram_ix_[0]] = combined_histo[0];
|
||||
last_entropy[0] = combined_entropy[0];
|
||||
if (split->num_types == 1) {
|
||||
last_entropy[1] = last_entropy[0];
|
||||
}
|
||||
self->block_size_ = 0;
|
||||
FN(HistogramClear)(&histograms[self->curr_histogram_ix_]);
|
||||
if (++self->merge_last_count_ > 1) {
|
||||
self->target_block_size_ += self->min_block_size_;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (is_final) {
|
||||
*self->histograms_size_ = split->num_types;
|
||||
split->num_blocks = self->num_blocks_;
|
||||
}
|
||||
}
|
||||
|
||||
/* Adds the next symbol to the current histogram. When the current histogram
|
||||
reaches the target size, decides on merging the block. */
|
||||
static void FN(BlockSplitterAddSymbol)(FN(BlockSplitter)* self, size_t symbol) {
|
||||
FN(HistogramAdd)(&self->histograms_[self->curr_histogram_ix_], symbol);
|
||||
++self->block_size_;
|
||||
if (self->block_size_ == self->target_block_size_) {
|
||||
FN(BlockSplitterFinishBlock)(self, /* is_final = */ 0);
|
||||
}
|
||||
}
|
||||
|
||||
#undef HistogramType
|
61
enc/port.h
61
enc/port.h
@ -62,13 +62,13 @@
|
||||
but note: the FPU still sends unaligned loads and stores to a trap handler!
|
||||
*/
|
||||
|
||||
#define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
|
||||
#define BROTLI_UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64_t *>(_p))
|
||||
#define BROTLI_UNALIGNED_LOAD32(_p) (*(const uint32_t *)(_p))
|
||||
#define BROTLI_UNALIGNED_LOAD64(_p) (*(const uint64_t *)(_p))
|
||||
|
||||
#define BROTLI_UNALIGNED_STORE32(_p, _val) \
|
||||
(*reinterpret_cast<uint32_t *>(_p) = (_val))
|
||||
(*(uint32_t *)(_p) = (_val))
|
||||
#define BROTLI_UNALIGNED_STORE64(_p, _val) \
|
||||
(*reinterpret_cast<uint64_t *>(_p) = (_val))
|
||||
(*(uint64_t *)(_p) = (_val))
|
||||
|
||||
#elif defined(__arm__) && \
|
||||
!defined(__ARM_ARCH_5__) && \
|
||||
@ -87,17 +87,17 @@
|
||||
do an unaligned read and rotate the words around a bit, or do the reads very
|
||||
slowly (trip through kernel mode). */
|
||||
|
||||
#define BROTLI_UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32_t *>(_p))
|
||||
#define BROTLI_UNALIGNED_LOAD32(_p) (*(const uint32_t *)(_p))
|
||||
#define BROTLI_UNALIGNED_STORE32(_p, _val) \
|
||||
(*reinterpret_cast<uint32_t *>(_p) = (_val))
|
||||
(*(uint32_t *)(_p) = (_val))
|
||||
|
||||
static inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
|
||||
static BROTLI_INLINE uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
|
||||
uint64_t t;
|
||||
memcpy(&t, p, sizeof t);
|
||||
return t;
|
||||
}
|
||||
|
||||
static inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
|
||||
static BROTLI_INLINE void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
|
||||
memcpy(p, &v, sizeof v);
|
||||
}
|
||||
|
||||
@ -106,26 +106,63 @@ static inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
|
||||
/* These functions are provided for architectures that don't support */
|
||||
/* unaligned loads and stores. */
|
||||
|
||||
static inline uint32_t BROTLI_UNALIGNED_LOAD32(const void *p) {
|
||||
static BROTLI_INLINE uint32_t BROTLI_UNALIGNED_LOAD32(const void *p) {
|
||||
uint32_t t;
|
||||
memcpy(&t, p, sizeof t);
|
||||
return t;
|
||||
}
|
||||
|
||||
static inline uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
|
||||
static BROTLI_INLINE uint64_t BROTLI_UNALIGNED_LOAD64(const void *p) {
|
||||
uint64_t t;
|
||||
memcpy(&t, p, sizeof t);
|
||||
return t;
|
||||
}
|
||||
|
||||
static inline void BROTLI_UNALIGNED_STORE32(void *p, uint32_t v) {
|
||||
static BROTLI_INLINE void BROTLI_UNALIGNED_STORE32(void *p, uint32_t v) {
|
||||
memcpy(p, &v, sizeof v);
|
||||
}
|
||||
|
||||
static inline void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
|
||||
static BROTLI_INLINE void BROTLI_UNALIGNED_STORE64(void *p, uint64_t v) {
|
||||
memcpy(p, &v, sizeof v);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__cplusplus) && !defined(c_plusplus) && __STDC_VERSION__ >= 199901L
|
||||
#define BROTLI_RESTRICT restrict
|
||||
#elif BROTLI_GCC_VERSION > 295 || defined(__llvm__)
|
||||
#define BROTLI_RESTRICT __restrict
|
||||
#else
|
||||
#define BROTLI_RESTRICT
|
||||
#endif
|
||||
|
||||
#define _TEMPLATE(T) \
|
||||
static BROTLI_INLINE T brotli_min_ ## T (T a, T b) { return a < b ? a : b; } \
|
||||
static BROTLI_INLINE T brotli_max_ ## T (T a, T b) { return a > b ? a : b; }
|
||||
_TEMPLATE(double) _TEMPLATE(float) _TEMPLATE(int)
|
||||
_TEMPLATE(size_t) _TEMPLATE(uint32_t) _TEMPLATE(uint8_t)
|
||||
#undef _TEMPLATE
|
||||
#define BROTLI_MIN(T, A, B) (brotli_min_ ## T((A), (B)))
|
||||
#define BROTLI_MAX(T, A, B) (brotli_max_ ## T((A), (B)))
|
||||
|
||||
#define BROTLI_SWAP(T, A, I, J) { \
|
||||
T __brotli_swap_tmp = (A)[(I)]; \
|
||||
(A)[(I)] = (A)[(J)]; \
|
||||
(A)[(J)] = __brotli_swap_tmp; \
|
||||
}
|
||||
|
||||
#define BROTLI_ENSURE_CAPACITY(M, T, A, C, R) { \
|
||||
if (C < (R)) { \
|
||||
size_t _new_size = (C == 0) ? (R) : C; \
|
||||
T* new_array; \
|
||||
while (_new_size < (R)) _new_size *= 2; \
|
||||
new_array = BROTLI_ALLOC((M), T, _new_size); \
|
||||
if (!BROTLI_IS_OOM(m)) \
|
||||
memcpy(new_array, A, C * sizeof(T)); \
|
||||
BROTLI_FREE((M), A); \
|
||||
A = new_array; \
|
||||
C = _new_size; \
|
||||
} \
|
||||
}
|
||||
|
||||
#endif /* BROTLI_ENC_PORT_H_ */
|
||||
|
85
enc/prefix.h
85
enc/prefix.h
@ -10,70 +10,43 @@
|
||||
#ifndef BROTLI_ENC_PREFIX_H_
|
||||
#define BROTLI_ENC_PREFIX_H_
|
||||
|
||||
#include "../common/constants.h"
|
||||
#include "../common/port.h"
|
||||
#include "../common/types.h"
|
||||
#include "./fast_log.h"
|
||||
|
||||
namespace brotli {
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static const uint32_t kNumInsertLenPrefixes = 24;
|
||||
static const uint32_t kNumCopyLenPrefixes = 24;
|
||||
static const uint32_t kNumCommandPrefixes = 704;
|
||||
static const uint32_t kNumBlockLenPrefixes = 26;
|
||||
static const uint32_t kNumDistanceShortCodes = 16;
|
||||
static const uint32_t kNumDistancePrefixes = 520;
|
||||
|
||||
// Represents the range of values belonging to a prefix code:
|
||||
// [offset, offset + 2^nbits)
|
||||
struct PrefixCodeRange {
|
||||
uint32_t offset;
|
||||
uint32_t nbits;
|
||||
};
|
||||
|
||||
static const PrefixCodeRange kBlockLengthPrefixCode[kNumBlockLenPrefixes] = {
|
||||
{ 1, 2}, { 5, 2}, { 9, 2}, { 13, 2},
|
||||
{ 17, 3}, { 25, 3}, { 33, 3}, { 41, 3},
|
||||
{ 49, 4}, { 65, 4}, { 81, 4}, { 97, 4},
|
||||
{ 113, 5}, { 145, 5}, { 177, 5}, { 209, 5},
|
||||
{ 241, 6}, { 305, 6}, { 369, 7}, { 497, 8},
|
||||
{ 753, 9}, { 1265, 10}, {2289, 11}, {4337, 12},
|
||||
{8433, 13}, {16625, 24}
|
||||
};
|
||||
|
||||
inline void GetBlockLengthPrefixCode(uint32_t len, uint32_t* code,
|
||||
uint32_t* n_extra, uint32_t* extra) {
|
||||
*code = 0;
|
||||
while (*code < 25 && len >= kBlockLengthPrefixCode[*code + 1].offset) {
|
||||
++(*code);
|
||||
}
|
||||
*n_extra = kBlockLengthPrefixCode[*code].nbits;
|
||||
*extra = len - kBlockLengthPrefixCode[*code].offset;
|
||||
}
|
||||
|
||||
inline void PrefixEncodeCopyDistance(size_t distance_code,
|
||||
size_t num_direct_codes,
|
||||
size_t postfix_bits,
|
||||
uint16_t* code,
|
||||
uint32_t* extra_bits) {
|
||||
if (distance_code < kNumDistanceShortCodes + num_direct_codes) {
|
||||
*code = static_cast<uint16_t>(distance_code);
|
||||
static BROTLI_INLINE void PrefixEncodeCopyDistance(size_t distance_code,
|
||||
size_t num_direct_codes,
|
||||
size_t postfix_bits,
|
||||
uint16_t* code,
|
||||
uint32_t* extra_bits) {
|
||||
if (distance_code < BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_codes) {
|
||||
*code = (uint16_t)distance_code;
|
||||
*extra_bits = 0;
|
||||
return;
|
||||
} else {
|
||||
size_t dist = (1u << (postfix_bits + 2u)) +
|
||||
(distance_code - BROTLI_NUM_DISTANCE_SHORT_CODES - num_direct_codes);
|
||||
size_t bucket = Log2FloorNonZero(dist) - 1;
|
||||
size_t postfix_mask = (1u << postfix_bits) - 1;
|
||||
size_t postfix = dist & postfix_mask;
|
||||
size_t prefix = (dist >> bucket) & 1;
|
||||
size_t offset = (2 + prefix) << bucket;
|
||||
size_t nbits = bucket - postfix_bits;
|
||||
*code = (uint16_t)(
|
||||
(BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_codes +
|
||||
((2 * (nbits - 1) + prefix) << postfix_bits) + postfix));
|
||||
*extra_bits = (uint32_t)(
|
||||
(nbits << 24) | ((dist - offset) >> postfix_bits));
|
||||
}
|
||||
distance_code -= kNumDistanceShortCodes + num_direct_codes; /* >= 0 */
|
||||
distance_code += (1u << (postfix_bits + 2u)); /* > 0 */
|
||||
size_t bucket = Log2FloorNonZero(distance_code) - 1;
|
||||
size_t postfix_mask = (1 << postfix_bits) - 1;
|
||||
size_t postfix = distance_code & postfix_mask;
|
||||
size_t prefix = (distance_code >> bucket) & 1;
|
||||
size_t offset = (2 + prefix) << bucket;
|
||||
size_t nbits = bucket - postfix_bits;
|
||||
*code = static_cast<uint16_t>(
|
||||
(kNumDistanceShortCodes + num_direct_codes +
|
||||
((2 * (nbits - 1) + prefix) << postfix_bits) + postfix));
|
||||
*extra_bits = static_cast<uint32_t>(
|
||||
(nbits << 24) | ((distance_code - offset) >> postfix_bits));
|
||||
}
|
||||
|
||||
} // namespace brotli
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* BROTLI_ENC_PREFIX_H_ */
|
||||
|
228
enc/ringbuffer.h
228
enc/ringbuffer.h
@ -9,12 +9,15 @@
|
||||
#ifndef BROTLI_ENC_RINGBUFFER_H_
|
||||
#define BROTLI_ENC_RINGBUFFER_H_
|
||||
|
||||
#include <cstdlib> /* free, realloc */
|
||||
#include <string.h> /* memcpy */
|
||||
|
||||
#include "../common/types.h"
|
||||
#include "./memory.h"
|
||||
#include "./port.h"
|
||||
|
||||
namespace brotli {
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* A RingBuffer(window_bits, tail_bits) contains `1 << window_bits' bytes of
|
||||
data in a circular manner: writing a byte writes it to:
|
||||
@ -25,121 +28,130 @@ namespace brotli {
|
||||
and another copy of the last two bytes:
|
||||
buffer_[-1] == buffer_[(1 << window_bits) - 1] and
|
||||
buffer_[-2] == buffer_[(1 << window_bits) - 2]. */
|
||||
class RingBuffer {
|
||||
public:
|
||||
RingBuffer(int window_bits, int tail_bits)
|
||||
: size_(1u << window_bits),
|
||||
mask_((1u << window_bits) - 1),
|
||||
tail_size_(1u << tail_bits),
|
||||
total_size_(size_ + tail_size_),
|
||||
cur_size_(0),
|
||||
pos_(0),
|
||||
data_(0),
|
||||
buffer_(0) {}
|
||||
|
||||
~RingBuffer(void) {
|
||||
free(data_);
|
||||
}
|
||||
|
||||
/* Allocates or re-allocates data_ to the given length + plus some slack
|
||||
region before and after. Fills the slack regions with zeros. */
|
||||
inline void InitBuffer(const uint32_t buflen) {
|
||||
static const size_t kSlackForEightByteHashingEverywhere = 7;
|
||||
cur_size_ = buflen;
|
||||
data_ = static_cast<uint8_t*>(realloc(
|
||||
data_, 2 + buflen + kSlackForEightByteHashingEverywhere));
|
||||
buffer_ = data_ + 2;
|
||||
buffer_[-2] = buffer_[-1] = 0;
|
||||
for (size_t i = 0; i < kSlackForEightByteHashingEverywhere; ++i) {
|
||||
buffer_[cur_size_ + i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Push bytes into the ring buffer. */
|
||||
void Write(const uint8_t *bytes, size_t n) {
|
||||
if (pos_ == 0 && n < tail_size_) {
|
||||
/* Special case for the first write: to process the first block, we don't
|
||||
need to allocate the whole ringbuffer and we don't need the tail
|
||||
either. However, we do this memory usage optimization only if the
|
||||
first write is less than the tail size, which is also the input block
|
||||
size, otherwise it is likely that other blocks will follow and we
|
||||
will need to reallocate to the full size anyway. */
|
||||
pos_ = static_cast<uint32_t>(n);
|
||||
InitBuffer(pos_);
|
||||
memcpy(buffer_, bytes, n);
|
||||
return;
|
||||
}
|
||||
if (cur_size_ < total_size_) {
|
||||
/* Lazily allocate the full buffer. */
|
||||
InitBuffer(total_size_);
|
||||
/* Initialize the last two bytes to zero, so that we don't have to worry
|
||||
later when we copy the last two bytes to the first two positions. */
|
||||
buffer_[size_ - 2] = 0;
|
||||
buffer_[size_ - 1] = 0;
|
||||
}
|
||||
const size_t masked_pos = pos_ & mask_;
|
||||
/* The length of the writes is limited so that we do not need to worry
|
||||
about a write */
|
||||
WriteTail(bytes, n);
|
||||
if (PREDICT_TRUE(masked_pos + n <= size_)) {
|
||||
/* A single write fits. */
|
||||
memcpy(&buffer_[masked_pos], bytes, n);
|
||||
} else {
|
||||
/* Split into two writes.
|
||||
Copy into the end of the buffer, including the tail buffer. */
|
||||
memcpy(&buffer_[masked_pos], bytes,
|
||||
std::min(n, total_size_ - masked_pos));
|
||||
/* Copy into the beginning of the buffer */
|
||||
memcpy(&buffer_[0], bytes + (size_ - masked_pos),
|
||||
n - (size_ - masked_pos));
|
||||
}
|
||||
buffer_[-2] = buffer_[size_ - 2];
|
||||
buffer_[-1] = buffer_[size_ - 1];
|
||||
pos_ += static_cast<uint32_t>(n);
|
||||
if (pos_ > (1u << 30)) { /* Wrap, but preserve not-a-first-lap feature. */
|
||||
pos_ = (pos_ & ((1u << 30) - 1)) | (1u << 30);
|
||||
}
|
||||
}
|
||||
|
||||
void Reset(void) {
|
||||
pos_ = 0;
|
||||
}
|
||||
|
||||
// Logical cursor position in the ring buffer.
|
||||
uint32_t position(void) const { return pos_; }
|
||||
|
||||
// Bit mask for getting the physical position for a logical position.
|
||||
uint32_t mask(void) const { return mask_; }
|
||||
|
||||
uint8_t *start(void) { return &buffer_[0]; }
|
||||
const uint8_t *start(void) const { return &buffer_[0]; }
|
||||
|
||||
private:
|
||||
void WriteTail(const uint8_t *bytes, size_t n) {
|
||||
const size_t masked_pos = pos_ & mask_;
|
||||
if (PREDICT_FALSE(masked_pos < tail_size_)) {
|
||||
// Just fill the tail buffer with the beginning data.
|
||||
const size_t p = size_ + masked_pos;
|
||||
memcpy(&buffer_[p], bytes, std::min(n, tail_size_ - masked_pos));
|
||||
}
|
||||
}
|
||||
|
||||
// Size of the ringbuffer is (1 << window_bits) + tail_size_.
|
||||
typedef struct RingBuffer {
|
||||
/* Size of the ringbuffer is (1 << window_bits) + tail_size_. */
|
||||
const uint32_t size_;
|
||||
const uint32_t mask_;
|
||||
const uint32_t tail_size_;
|
||||
const uint32_t total_size_;
|
||||
|
||||
uint32_t cur_size_;
|
||||
// Position to write in the ring buffer.
|
||||
/* Position to write in the ring buffer. */
|
||||
uint32_t pos_;
|
||||
// The actual ring buffer containing the copy of the last two bytes, the data,
|
||||
// and the copy of the beginning as a tail.
|
||||
/* The actual ring buffer containing the copy of the last two bytes, the data,
|
||||
and the copy of the beginning as a tail. */
|
||||
uint8_t *data_;
|
||||
// The start of the ringbuffer.
|
||||
/* The start of the ringbuffer. */
|
||||
uint8_t *buffer_;
|
||||
};
|
||||
} RingBuffer;
|
||||
|
||||
} // namespace brotli
|
||||
static BROTLI_INLINE void RingBufferInit(RingBuffer* rb) {
|
||||
rb->cur_size_ = 0;
|
||||
rb->pos_ = 0;
|
||||
rb->data_ = 0;
|
||||
rb->buffer_ = 0;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void RingBufferSetup(
|
||||
int window_bits, int tail_bits, RingBuffer* rb) {
|
||||
*(uint32_t*)&rb->size_ = 1u << window_bits;
|
||||
*(uint32_t*)&rb->mask_ = (1u << window_bits) - 1;
|
||||
*(uint32_t*)&rb->tail_size_ = 1u << tail_bits;
|
||||
*(uint32_t*)&rb->total_size_ = rb->size_ + rb->tail_size_;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void RingBufferFree(MemoryManager* m, RingBuffer* rb) {
|
||||
BROTLI_FREE(m, rb->data_);
|
||||
}
|
||||
|
||||
/* Allocates or re-allocates data_ to the given length + plus some slack
|
||||
region before and after. Fills the slack regions with zeros. */
|
||||
static BROTLI_INLINE void RingBufferInitBuffer(
|
||||
MemoryManager* m, const uint32_t buflen, RingBuffer* rb) {
|
||||
static const size_t kSlackForEightByteHashingEverywhere = 7;
|
||||
uint8_t* new_data = BROTLI_ALLOC(
|
||||
m, uint8_t, 2 + buflen + kSlackForEightByteHashingEverywhere);
|
||||
size_t i;
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
if (rb->data_) {
|
||||
memcpy(new_data, rb->data_,
|
||||
2 + rb->cur_size_ + kSlackForEightByteHashingEverywhere);
|
||||
BROTLI_FREE(m, rb->data_);
|
||||
}
|
||||
rb->data_ = new_data;
|
||||
rb->cur_size_ = buflen;
|
||||
rb->buffer_ = rb->data_ + 2;
|
||||
rb->buffer_[-2] = rb->buffer_[-1] = 0;
|
||||
for (i = 0; i < kSlackForEightByteHashingEverywhere; ++i) {
|
||||
rb->buffer_[rb->cur_size_ + i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void RingBufferWriteTail(
|
||||
const uint8_t *bytes, size_t n, RingBuffer* rb) {
|
||||
const size_t masked_pos = rb->pos_ & rb->mask_;
|
||||
if (PREDICT_FALSE(masked_pos < rb->tail_size_)) {
|
||||
/* Just fill the tail buffer with the beginning data. */
|
||||
const size_t p = rb->size_ + masked_pos;
|
||||
memcpy(&rb->buffer_[p], bytes,
|
||||
BROTLI_MIN(size_t, n, rb->tail_size_ - masked_pos));
|
||||
}
|
||||
}
|
||||
|
||||
/* Push bytes into the ring buffer. */
|
||||
static BROTLI_INLINE void RingBufferWrite(
|
||||
MemoryManager* m, const uint8_t *bytes, size_t n, RingBuffer* rb) {
|
||||
if (rb->pos_ == 0 && n < rb->tail_size_) {
|
||||
/* Special case for the first write: to process the first block, we don't
|
||||
need to allocate the whole ringbuffer and we don't need the tail
|
||||
either. However, we do this memory usage optimization only if the
|
||||
first write is less than the tail size, which is also the input block
|
||||
size, otherwise it is likely that other blocks will follow and we
|
||||
will need to reallocate to the full size anyway. */
|
||||
rb->pos_ = (uint32_t)n;
|
||||
RingBufferInitBuffer(m, rb->pos_, rb);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
memcpy(rb->buffer_, bytes, n);
|
||||
return;
|
||||
}
|
||||
if (rb->cur_size_ < rb->total_size_) {
|
||||
/* Lazily allocate the full buffer. */
|
||||
RingBufferInitBuffer(m, rb->total_size_, rb);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
/* Initialize the last two bytes to zero, so that we don't have to worry
|
||||
later when we copy the last two bytes to the first two positions. */
|
||||
rb->buffer_[rb->size_ - 2] = 0;
|
||||
rb->buffer_[rb->size_ - 1] = 0;
|
||||
}
|
||||
{
|
||||
const size_t masked_pos = rb->pos_ & rb->mask_;
|
||||
/* The length of the writes is limited so that we do not need to worry
|
||||
about a write */
|
||||
RingBufferWriteTail(bytes, n, rb);
|
||||
if (PREDICT_TRUE(masked_pos + n <= rb->size_)) {
|
||||
/* A single write fits. */
|
||||
memcpy(&rb->buffer_[masked_pos], bytes, n);
|
||||
} else {
|
||||
/* Split into two writes.
|
||||
Copy into the end of the buffer, including the tail buffer. */
|
||||
memcpy(&rb->buffer_[masked_pos], bytes,
|
||||
BROTLI_MIN(size_t, n, rb->total_size_ - masked_pos));
|
||||
/* Copy into the beginning of the buffer */
|
||||
memcpy(&rb->buffer_[0], bytes + (rb->size_ - masked_pos),
|
||||
n - (rb->size_ - masked_pos));
|
||||
}
|
||||
}
|
||||
rb->buffer_[-2] = rb->buffer_[rb->size_ - 2];
|
||||
rb->buffer_[-1] = rb->buffer_[rb->size_ - 1];
|
||||
rb->pos_ += (uint32_t)n;
|
||||
if (rb->pos_ > (1u << 30)) {
|
||||
/* Wrap, but preserve not-a-first-lap feature. */
|
||||
rb->pos_ = (rb->pos_ & ((1u << 30) - 1)) | (1u << 30);
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* BROTLI_ENC_RINGBUFFER_H_ */
|
||||
|
@ -6,86 +6,102 @@
|
||||
|
||||
#include "./static_dict.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "../common/dictionary.h"
|
||||
#include "./find_match_length.h"
|
||||
#include "./port.h"
|
||||
#include "./static_dict_lut.h"
|
||||
#include "./transform.h"
|
||||
|
||||
namespace brotli {
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
inline uint32_t Hash(const uint8_t *data) {
|
||||
static const uint8_t kUppercaseFirst = 10;
|
||||
static const uint8_t kOmitLastNTransforms[10] = {
|
||||
0, 12, 27, 23, 42, 63, 56, 48, 59, 64,
|
||||
};
|
||||
|
||||
static BROTLI_INLINE uint32_t Hash(const uint8_t *data) {
|
||||
uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kDictHashMul32;
|
||||
/* The higher bits contain more mixture from the multiplication,
|
||||
so we take our results from there. */
|
||||
return h >> (32 - kDictNumBits);
|
||||
}
|
||||
|
||||
inline void AddMatch(size_t distance, size_t len, size_t len_code,
|
||||
uint32_t* matches) {
|
||||
uint32_t match = static_cast<uint32_t>((distance << 5) + len_code);
|
||||
matches[len] = std::min(matches[len], match);
|
||||
static BROTLI_INLINE void AddMatch(size_t distance, size_t len, size_t len_code,
|
||||
uint32_t* matches) {
|
||||
uint32_t match = (uint32_t)((distance << 5) + len_code);
|
||||
matches[len] = BROTLI_MIN(uint32_t, matches[len], match);
|
||||
}
|
||||
|
||||
inline size_t DictMatchLength(const uint8_t* data,
|
||||
size_t id,
|
||||
size_t len,
|
||||
size_t maxlen) {
|
||||
static BROTLI_INLINE size_t DictMatchLength(const uint8_t* data,
|
||||
size_t id,
|
||||
size_t len,
|
||||
size_t maxlen) {
|
||||
const size_t offset = kBrotliDictionaryOffsetsByLength[len] + len * id;
|
||||
return FindMatchLengthWithLimit(&kBrotliDictionary[offset], data,
|
||||
std::min(len, maxlen));
|
||||
BROTLI_MIN(size_t, len, maxlen));
|
||||
}
|
||||
|
||||
inline bool IsMatch(DictWord w, const uint8_t* data, size_t max_length) {
|
||||
if (w.len > max_length) return false;
|
||||
const size_t offset = kBrotliDictionaryOffsetsByLength[w.len] + w.len * w.idx;
|
||||
const uint8_t* dict = &kBrotliDictionary[offset];
|
||||
if (w.transform == 0) {
|
||||
static BROTLI_INLINE int IsMatch(
|
||||
DictWord w, const uint8_t* data, size_t max_length) {
|
||||
if (w.len > max_length) {
|
||||
return 0;
|
||||
} else {
|
||||
const size_t offset = kBrotliDictionaryOffsetsByLength[w.len] +
|
||||
(size_t)w.len * (size_t)w.idx;
|
||||
const uint8_t* dict = &kBrotliDictionary[offset];
|
||||
if (w.transform == 0) {
|
||||
/* Match against base dictionary word. */
|
||||
return FindMatchLengthWithLimit(dict, data, w.len) == w.len;
|
||||
} else if (w.transform == 10) {
|
||||
return FindMatchLengthWithLimit(dict, data, w.len) == w.len;
|
||||
} else if (w.transform == 10) {
|
||||
/* Match against uppercase first transform.
|
||||
Note that there are only ASCII uppercase words in the lookup table. */
|
||||
return (dict[0] >= 'a' && dict[0] <= 'z' &&
|
||||
(dict[0] ^ 32) == data[0] &&
|
||||
FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) ==
|
||||
w.len - 1u);
|
||||
} else {
|
||||
return (dict[0] >= 'a' && dict[0] <= 'z' &&
|
||||
(dict[0] ^ 32) == data[0] &&
|
||||
FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) ==
|
||||
w.len - 1u);
|
||||
} else {
|
||||
/* Match against uppercase all transform.
|
||||
Note that there are only ASCII uppercase words in the lookup table. */
|
||||
for (size_t i = 0; i < w.len; ++i) {
|
||||
if (dict[i] >= 'a' && dict[i] <= 'z') {
|
||||
if ((dict[i] ^ 32) != data[i]) return false;
|
||||
} else {
|
||||
if (dict[i] != data[i]) return false;
|
||||
size_t i;
|
||||
for (i = 0; i < w.len; ++i) {
|
||||
if (dict[i] >= 'a' && dict[i] <= 'z') {
|
||||
if ((dict[i] ^ 32) != data[i]) return 0;
|
||||
} else {
|
||||
if (dict[i] != data[i]) return 0;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
||||
size_t min_length,
|
||||
size_t max_length,
|
||||
uint32_t* matches) {
|
||||
bool found_match = false;
|
||||
size_t key = Hash(data);
|
||||
size_t bucket = kStaticDictionaryBuckets[key];
|
||||
if (bucket != 0) {
|
||||
size_t num = bucket & 0xff;
|
||||
size_t offset = bucket >> 8;
|
||||
for (size_t i = 0; i < num; ++i) {
|
||||
int BrotliFindAllStaticDictionaryMatches(const uint8_t* data,
|
||||
size_t min_length,
|
||||
size_t max_length,
|
||||
uint32_t* matches) {
|
||||
int has_found_match = 0;
|
||||
size_t key0 = Hash(data);
|
||||
size_t bucket0 = kStaticDictionaryBuckets[key0];
|
||||
if (bucket0 != 0) {
|
||||
size_t num = bucket0 & 0xff;
|
||||
size_t offset = bucket0 >> 8;
|
||||
size_t i;
|
||||
for (i = 0; i < num; ++i) {
|
||||
const DictWord w = kStaticDictionaryWords[offset + i];
|
||||
const size_t l = w.len;
|
||||
const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
|
||||
const size_t id = w.idx;
|
||||
if (w.transform == 0) {
|
||||
const size_t matchlen = DictMatchLength(data, id, l, max_length);
|
||||
const uint8_t* s;
|
||||
size_t minlen;
|
||||
size_t maxlen;
|
||||
size_t len;
|
||||
/* Transform "" + kIdentity + "" */
|
||||
if (matchlen == l) {
|
||||
AddMatch(id, l, l, matches);
|
||||
found_match = true;
|
||||
has_found_match = 1;
|
||||
}
|
||||
/* Transforms "" + kOmitLast1 + "" and "" + kOmitLast1 + "ing " */
|
||||
if (matchlen >= l - 1) {
|
||||
@ -95,20 +111,20 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
||||
data[l + 2] == ' ') {
|
||||
AddMatch(id + 49 * n, l + 3, l, matches);
|
||||
}
|
||||
found_match = true;
|
||||
has_found_match = 1;
|
||||
}
|
||||
/* Transform "" + kOmitLastN + "" (N = 2 .. 9) */
|
||||
size_t minlen = min_length;
|
||||
if (l > 9) minlen = std::max(minlen, l - 9);
|
||||
size_t maxlen = std::min(matchlen, l - 2);
|
||||
for (size_t len = minlen; len <= maxlen; ++len) {
|
||||
minlen = min_length;
|
||||
if (l > 9) minlen = BROTLI_MAX(size_t, minlen, l - 9);
|
||||
maxlen = BROTLI_MIN(size_t, matchlen, l - 2);
|
||||
for (len = minlen; len <= maxlen; ++len) {
|
||||
AddMatch(id + kOmitLastNTransforms[l - len] * n, len, l, matches);
|
||||
found_match = true;
|
||||
has_found_match = 1;
|
||||
}
|
||||
if (matchlen < l || l + 6 >= max_length) {
|
||||
continue;
|
||||
}
|
||||
const uint8_t* s = &data[l];
|
||||
s = &data[l];
|
||||
/* Transforms "" + kIdentity + <suffix> */
|
||||
if (s[0] == ' ') {
|
||||
AddMatch(id + n, l + 1, l, matches);
|
||||
@ -258,44 +274,45 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
||||
} else {
|
||||
/* Set is_all_caps=0 for kUppercaseFirst and
|
||||
is_all_caps=1 otherwise (kUppercaseAll) transform. */
|
||||
const bool t = w.transform != kUppercaseFirst;
|
||||
const int is_all_caps = (w.transform != kUppercaseFirst) ? 1 : 0;
|
||||
const uint8_t* s;
|
||||
if (!IsMatch(w, data, max_length)) {
|
||||
continue;
|
||||
}
|
||||
/* Transform "" + kUppercase{First,All} + "" */
|
||||
AddMatch(id + (t ? 44 : 9) * n, l, l, matches);
|
||||
found_match = true;
|
||||
AddMatch(id + (is_all_caps ? 44 : 9) * n, l, l, matches);
|
||||
has_found_match = 1;
|
||||
if (l + 1 >= max_length) {
|
||||
continue;
|
||||
}
|
||||
/* Transforms "" + kUppercase{First,All} + <suffix> */
|
||||
const uint8_t* s = &data[l];
|
||||
s = &data[l];
|
||||
if (s[0] == ' ') {
|
||||
AddMatch(id + (t ? 68 : 4) * n, l + 1, l, matches);
|
||||
AddMatch(id + (is_all_caps ? 68 : 4) * n, l + 1, l, matches);
|
||||
} else if (s[0] == '"') {
|
||||
AddMatch(id + (t ? 87 : 66) * n, l + 1, l, matches);
|
||||
AddMatch(id + (is_all_caps ? 87 : 66) * n, l + 1, l, matches);
|
||||
if (s[1] == '>') {
|
||||
AddMatch(id + (t ? 97 : 69) * n, l + 2, l, matches);
|
||||
AddMatch(id + (is_all_caps ? 97 : 69) * n, l + 2, l, matches);
|
||||
}
|
||||
} else if (s[0] == '.') {
|
||||
AddMatch(id + (t ? 101 : 79) * n, l + 1, l, matches);
|
||||
AddMatch(id + (is_all_caps ? 101 : 79) * n, l + 1, l, matches);
|
||||
if (s[1] == ' ') {
|
||||
AddMatch(id + (t ? 114 : 88) * n, l + 2, l, matches);
|
||||
AddMatch(id + (is_all_caps ? 114 : 88) * n, l + 2, l, matches);
|
||||
}
|
||||
} else if (s[0] == ',') {
|
||||
AddMatch(id + (t ? 112 : 99) * n, l + 1, l, matches);
|
||||
AddMatch(id + (is_all_caps ? 112 : 99) * n, l + 1, l, matches);
|
||||
if (s[1] == ' ') {
|
||||
AddMatch(id + (t ? 107 : 58) * n, l + 2, l, matches);
|
||||
AddMatch(id + (is_all_caps ? 107 : 58) * n, l + 2, l, matches);
|
||||
}
|
||||
} else if (s[0] == '\'') {
|
||||
AddMatch(id + (t ? 94 : 74) * n, l + 1, l, matches);
|
||||
AddMatch(id + (is_all_caps ? 94 : 74) * n, l + 1, l, matches);
|
||||
} else if (s[0] == '(') {
|
||||
AddMatch(id + (t ? 113 : 78) * n, l + 1, l, matches);
|
||||
AddMatch(id + (is_all_caps ? 113 : 78) * n, l + 1, l, matches);
|
||||
} else if (s[0] == '=') {
|
||||
if (s[1] == '"') {
|
||||
AddMatch(id + (t ? 105 : 104) * n, l + 2, l, matches);
|
||||
AddMatch(id + (is_all_caps ? 105 : 104) * n, l + 2, l, matches);
|
||||
} else if (s[1] == '\'') {
|
||||
AddMatch(id + (t ? 116 : 108) * n, l + 2, l, matches);
|
||||
AddMatch(id + (is_all_caps ? 116 : 108) * n, l + 2, l, matches);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -303,29 +320,31 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
||||
}
|
||||
/* Transforms with prefixes " " and "." */
|
||||
if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) {
|
||||
bool is_space = (data[0] == ' ');
|
||||
key = Hash(&data[1]);
|
||||
bucket = kStaticDictionaryBuckets[key];
|
||||
size_t num = bucket & 0xff;
|
||||
size_t offset = bucket >> 8;
|
||||
for (size_t i = 0; i < num; ++i) {
|
||||
int is_space = (data[0] == ' ') ? 1 : 0;
|
||||
size_t key1 = Hash(&data[1]);
|
||||
size_t bucket1 = kStaticDictionaryBuckets[key1];
|
||||
size_t num = bucket1 & 0xff;
|
||||
size_t offset = bucket1 >> 8;
|
||||
size_t i;
|
||||
for (i = 0; i < num; ++i) {
|
||||
const DictWord w = kStaticDictionaryWords[offset + i];
|
||||
const size_t l = w.len;
|
||||
const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
|
||||
const size_t id = w.idx;
|
||||
if (w.transform == 0) {
|
||||
const uint8_t* s;
|
||||
if (!IsMatch(w, &data[1], max_length - 1)) {
|
||||
continue;
|
||||
}
|
||||
/* Transforms " " + kIdentity + "" and "." + kIdentity + "" */
|
||||
AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches);
|
||||
found_match = true;
|
||||
has_found_match = 1;
|
||||
if (l + 2 >= max_length) {
|
||||
continue;
|
||||
}
|
||||
/* Transforms " " + kIdentity + <suffix> and "." + kIdentity + <suffix>
|
||||
*/
|
||||
const uint8_t* s = &data[l + 1];
|
||||
s = &data[l + 1];
|
||||
if (s[0] == ' ') {
|
||||
AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches);
|
||||
} else if (s[0] == '(') {
|
||||
@ -352,37 +371,38 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
||||
} else if (is_space) {
|
||||
/* Set is_all_caps=0 for kUppercaseFirst and
|
||||
is_all_caps=1 otherwise (kUppercaseAll) transform. */
|
||||
const bool t = w.transform != kUppercaseFirst;
|
||||
const int is_all_caps = (w.transform != kUppercaseFirst) ? 1 : 0;
|
||||
const uint8_t* s;
|
||||
if (!IsMatch(w, &data[1], max_length - 1)) {
|
||||
continue;
|
||||
}
|
||||
/* Transforms " " + kUppercase{First,All} + "" */
|
||||
AddMatch(id + (t ? 85 : 30) * n, l + 1, l, matches);
|
||||
found_match = true;
|
||||
AddMatch(id + (is_all_caps ? 85 : 30) * n, l + 1, l, matches);
|
||||
has_found_match = 1;
|
||||
if (l + 2 >= max_length) {
|
||||
continue;
|
||||
}
|
||||
/* Transforms " " + kUppercase{First,All} + <suffix> */
|
||||
const uint8_t* s = &data[l + 1];
|
||||
s = &data[l + 1];
|
||||
if (s[0] == ' ') {
|
||||
AddMatch(id + (t ? 83 : 15) * n, l + 2, l, matches);
|
||||
AddMatch(id + (is_all_caps ? 83 : 15) * n, l + 2, l, matches);
|
||||
} else if (s[0] == ',') {
|
||||
if (!t) {
|
||||
if (!is_all_caps) {
|
||||
AddMatch(id + 109 * n, l + 2, l, matches);
|
||||
}
|
||||
}
|
||||
if (s[1] == ' ') {
|
||||
AddMatch(id + (t ? 111 : 65) * n, l + 3, l, matches);
|
||||
AddMatch(id + (is_all_caps ? 111 : 65) * n, l + 3, l, matches);
|
||||
}
|
||||
} else if (s[0] == '.') {
|
||||
AddMatch(id + (t ? 115 : 96) * n, l + 2, l, matches);
|
||||
AddMatch(id + (is_all_caps ? 115 : 96) * n, l + 2, l, matches);
|
||||
if (s[1] == ' ') {
|
||||
AddMatch(id + (t ? 117 : 91) * n, l + 3, l, matches);
|
||||
AddMatch(id + (is_all_caps ? 117 : 91) * n, l + 3, l, matches);
|
||||
}
|
||||
} else if (s[0] == '=') {
|
||||
if (s[1] == '"') {
|
||||
AddMatch(id + (t ? 110 : 118) * n, l + 3, l, matches);
|
||||
AddMatch(id + (is_all_caps ? 110 : 118) * n, l + 3, l, matches);
|
||||
} else if (s[1] == '\'') {
|
||||
AddMatch(id + (t ? 119 : 120) * n, l + 3, l, matches);
|
||||
AddMatch(id + (is_all_caps ? 119 : 120) * n, l + 3, l, matches);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -393,11 +413,12 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
||||
if ((data[1] == ' ' &&
|
||||
(data[0] == 'e' || data[0] == 's' || data[0] == ',')) ||
|
||||
(data[0] == 0xc2 && data[1] == 0xa0)) {
|
||||
key = Hash(&data[2]);
|
||||
bucket = kStaticDictionaryBuckets[key];
|
||||
size_t num = bucket & 0xff;
|
||||
size_t offset = bucket >> 8;
|
||||
for (size_t i = 0; i < num; ++i) {
|
||||
size_t key2 = Hash(&data[2]);
|
||||
size_t bucket2 = kStaticDictionaryBuckets[key2];
|
||||
size_t num = bucket2 & 0xff;
|
||||
size_t offset = bucket2 >> 8;
|
||||
size_t i;
|
||||
for (i = 0; i < num; ++i) {
|
||||
const DictWord w = kStaticDictionaryWords[offset + i];
|
||||
const size_t l = w.len;
|
||||
const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
|
||||
@ -405,11 +426,11 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
||||
if (w.transform == 0 && IsMatch(w, &data[2], max_length - 2)) {
|
||||
if (data[0] == 0xc2) {
|
||||
AddMatch(id + 102 * n, l + 2, l, matches);
|
||||
found_match = true;
|
||||
has_found_match = 1;
|
||||
} else if (l + 2 < max_length && data[l + 2] == ' ') {
|
||||
size_t t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13);
|
||||
AddMatch(id + t * n, l + 3, l, matches);
|
||||
found_match = true;
|
||||
has_found_match = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -421,18 +442,19 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
||||
data[3] == 'e' && data[4] == ' ') ||
|
||||
(data[0] == '.' && data[1] == 'c' && data[2] == 'o' &&
|
||||
data[3] == 'm' && data[4] == '/')) {
|
||||
key = Hash(&data[5]);
|
||||
bucket = kStaticDictionaryBuckets[key];
|
||||
size_t num = bucket & 0xff;
|
||||
size_t offset = bucket >> 8;
|
||||
for (size_t i = 0; i < num; ++i) {
|
||||
size_t key5 = Hash(&data[5]);
|
||||
size_t bucket5 = kStaticDictionaryBuckets[key5];
|
||||
size_t num = bucket5 & 0xff;
|
||||
size_t offset = bucket5 >> 8;
|
||||
size_t i;
|
||||
for (i = 0; i < num; ++i) {
|
||||
const DictWord w = kStaticDictionaryWords[offset + i];
|
||||
const size_t l = w.len;
|
||||
const size_t n = 1u << kBrotliDictionarySizeBitsByLength[l];
|
||||
const size_t id = w.idx;
|
||||
if (w.transform == 0 && IsMatch(w, &data[5], max_length - 5)) {
|
||||
AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);
|
||||
found_match = true;
|
||||
has_found_match = 1;
|
||||
if (l + 5 < max_length) {
|
||||
const uint8_t* s = &data[l + 5];
|
||||
if (data[0] == ' ') {
|
||||
@ -450,7 +472,9 @@ bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
||||
}
|
||||
}
|
||||
}
|
||||
return found_match;
|
||||
return has_found_match;
|
||||
}
|
||||
|
||||
} // namespace brotli
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
@ -10,10 +10,13 @@
|
||||
#define BROTLI_ENC_STATIC_DICT_H_
|
||||
|
||||
#include "../common/types.h"
|
||||
#include "./port.h"
|
||||
|
||||
namespace brotli {
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static const size_t kMaxDictionaryMatchLen = 37;
|
||||
#define BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN 37
|
||||
static const uint32_t kInvalidMatch = 0xfffffff;
|
||||
|
||||
/* Matches data against static dictionary words, and for each length l,
|
||||
@ -23,11 +26,13 @@ static const uint32_t kInvalidMatch = 0xfffffff;
|
||||
Prerequisites:
|
||||
matches array is at least BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1 long
|
||||
all elements are initialized to kInvalidMatch */
|
||||
bool FindAllStaticDictionaryMatches(const uint8_t* data,
|
||||
size_t min_length,
|
||||
size_t max_length,
|
||||
uint32_t* matches);
|
||||
BROTLI_INTERNAL int BrotliFindAllStaticDictionaryMatches(const uint8_t* data,
|
||||
size_t min_length,
|
||||
size_t max_length,
|
||||
uint32_t* matches);
|
||||
|
||||
} // namespace brotli
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* BROTLI_ENC_STATIC_DICT_H_ */
|
||||
|
@ -6,21 +6,23 @@
|
||||
|
||||
/* Lookup table for static dictionary and transforms. */
|
||||
|
||||
#ifndef BROTLI_ENC_DICTIONARY_LUT_H_
|
||||
#define BROTLI_ENC_DICTIONARY_LUT_H_
|
||||
#ifndef BROTLI_ENC_STATIC_DICT_LUT_H_
|
||||
#define BROTLI_ENC_STATIC_DICT_LUT_H_
|
||||
|
||||
#include "../common/types.h"
|
||||
|
||||
namespace brotli {
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static const int kDictNumBits = 15;
|
||||
static const uint32_t kDictHashMul32 = 0x1e35a7bd;
|
||||
|
||||
struct DictWord {
|
||||
typedef struct DictWord {
|
||||
uint8_t len;
|
||||
uint8_t transform;
|
||||
uint16_t idx;
|
||||
};
|
||||
} DictWord;
|
||||
|
||||
static const uint32_t kStaticDictionaryBuckets[] = {
|
||||
0x000002, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000,
|
||||
@ -12050,6 +12052,8 @@ static const DictWord kStaticDictionaryWords[] = {
|
||||
{ 12, 10, 542 }, { 14, 11, 410 }, { 9, 11, 660 }, { 10, 11, 347 },
|
||||
};
|
||||
|
||||
} // namespace brotli
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* BROTLI_ENC_STATIC_DICT_LUT_H_ */
|
||||
|
@ -13,7 +13,6 @@
|
||||
#include <string>
|
||||
|
||||
#include "../common/types.h"
|
||||
#include "./port.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
|
248
enc/transform.h
248
enc/transform.h
@ -1,248 +0,0 @@
|
||||
/* Copyright 2010 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
// Transformations on dictionary words.
|
||||
|
||||
#ifndef BROTLI_ENC_TRANSFORM_H_
|
||||
#define BROTLI_ENC_TRANSFORM_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "../common/dictionary.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
enum WordTransformType {
|
||||
kIdentity = 0,
|
||||
kOmitLast1 = 1,
|
||||
kOmitLast2 = 2,
|
||||
kOmitLast3 = 3,
|
||||
kOmitLast4 = 4,
|
||||
kOmitLast5 = 5,
|
||||
kOmitLast6 = 6,
|
||||
kOmitLast7 = 7,
|
||||
kOmitLast8 = 8,
|
||||
kOmitLast9 = 9,
|
||||
kUppercaseFirst = 10,
|
||||
kUppercaseAll = 11,
|
||||
kOmitFirst1 = 12,
|
||||
kOmitFirst2 = 13,
|
||||
kOmitFirst3 = 14,
|
||||
kOmitFirst4 = 15,
|
||||
kOmitFirst5 = 16,
|
||||
kOmitFirst6 = 17,
|
||||
kOmitFirst7 = 18,
|
||||
kOmitFirst8 = 19,
|
||||
kOmitFirst9 = 20
|
||||
};
|
||||
|
||||
struct Transform {
|
||||
const char* prefix;
|
||||
WordTransformType word_transform;
|
||||
const char* suffix;
|
||||
};
|
||||
|
||||
static const Transform kTransforms[] = {
|
||||
{ "", kIdentity, "" },
|
||||
{ "", kIdentity, " " },
|
||||
{ " ", kIdentity, " " },
|
||||
{ "", kOmitFirst1, "" },
|
||||
{ "", kUppercaseFirst, " " },
|
||||
{ "", kIdentity, " the " },
|
||||
{ " ", kIdentity, "" },
|
||||
{ "s ", kIdentity, " " },
|
||||
{ "", kIdentity, " of " },
|
||||
{ "", kUppercaseFirst, "" },
|
||||
{ "", kIdentity, " and " },
|
||||
{ "", kOmitFirst2, "" },
|
||||
{ "", kOmitLast1, "" },
|
||||
{ ", ", kIdentity, " " },
|
||||
{ "", kIdentity, ", " },
|
||||
{ " ", kUppercaseFirst, " " },
|
||||
{ "", kIdentity, " in " },
|
||||
{ "", kIdentity, " to " },
|
||||
{ "e ", kIdentity, " " },
|
||||
{ "", kIdentity, "\"" },
|
||||
{ "", kIdentity, "." },
|
||||
{ "", kIdentity, "\">" },
|
||||
{ "", kIdentity, "\n" },
|
||||
{ "", kOmitLast3, "" },
|
||||
{ "", kIdentity, "]" },
|
||||
{ "", kIdentity, " for " },
|
||||
{ "", kOmitFirst3, "" },
|
||||
{ "", kOmitLast2, "" },
|
||||
{ "", kIdentity, " a " },
|
||||
{ "", kIdentity, " that " },
|
||||
{ " ", kUppercaseFirst, "" },
|
||||
{ "", kIdentity, ". " },
|
||||
{ ".", kIdentity, "" },
|
||||
{ " ", kIdentity, ", " },
|
||||
{ "", kOmitFirst4, "" },
|
||||
{ "", kIdentity, " with " },
|
||||
{ "", kIdentity, "'" },
|
||||
{ "", kIdentity, " from " },
|
||||
{ "", kIdentity, " by " },
|
||||
{ "", kOmitFirst5, "" },
|
||||
{ "", kOmitFirst6, "" },
|
||||
{ " the ", kIdentity, "" },
|
||||
{ "", kOmitLast4, "" },
|
||||
{ "", kIdentity, ". The " },
|
||||
{ "", kUppercaseAll, "" },
|
||||
{ "", kIdentity, " on " },
|
||||
{ "", kIdentity, " as " },
|
||||
{ "", kIdentity, " is " },
|
||||
{ "", kOmitLast7, "" },
|
||||
{ "", kOmitLast1, "ing " },
|
||||
{ "", kIdentity, "\n\t" },
|
||||
{ "", kIdentity, ":" },
|
||||
{ " ", kIdentity, ". " },
|
||||
{ "", kIdentity, "ed " },
|
||||
{ "", kOmitFirst9, "" },
|
||||
{ "", kOmitFirst7, "" },
|
||||
{ "", kOmitLast6, "" },
|
||||
{ "", kIdentity, "(" },
|
||||
{ "", kUppercaseFirst, ", " },
|
||||
{ "", kOmitLast8, "" },
|
||||
{ "", kIdentity, " at " },
|
||||
{ "", kIdentity, "ly " },
|
||||
{ " the ", kIdentity, " of " },
|
||||
{ "", kOmitLast5, "" },
|
||||
{ "", kOmitLast9, "" },
|
||||
{ " ", kUppercaseFirst, ", " },
|
||||
{ "", kUppercaseFirst, "\"" },
|
||||
{ ".", kIdentity, "(" },
|
||||
{ "", kUppercaseAll, " " },
|
||||
{ "", kUppercaseFirst, "\">" },
|
||||
{ "", kIdentity, "=\"" },
|
||||
{ " ", kIdentity, "." },
|
||||
{ ".com/", kIdentity, "" },
|
||||
{ " the ", kIdentity, " of the " },
|
||||
{ "", kUppercaseFirst, "'" },
|
||||
{ "", kIdentity, ". This " },
|
||||
{ "", kIdentity, "," },
|
||||
{ ".", kIdentity, " " },
|
||||
{ "", kUppercaseFirst, "(" },
|
||||
{ "", kUppercaseFirst, "." },
|
||||
{ "", kIdentity, " not " },
|
||||
{ " ", kIdentity, "=\"" },
|
||||
{ "", kIdentity, "er " },
|
||||
{ " ", kUppercaseAll, " " },
|
||||
{ "", kIdentity, "al " },
|
||||
{ " ", kUppercaseAll, "" },
|
||||
{ "", kIdentity, "='" },
|
||||
{ "", kUppercaseAll, "\"" },
|
||||
{ "", kUppercaseFirst, ". " },
|
||||
{ " ", kIdentity, "(" },
|
||||
{ "", kIdentity, "ful " },
|
||||
{ " ", kUppercaseFirst, ". " },
|
||||
{ "", kIdentity, "ive " },
|
||||
{ "", kIdentity, "less " },
|
||||
{ "", kUppercaseAll, "'" },
|
||||
{ "", kIdentity, "est " },
|
||||
{ " ", kUppercaseFirst, "." },
|
||||
{ "", kUppercaseAll, "\">" },
|
||||
{ " ", kIdentity, "='" },
|
||||
{ "", kUppercaseFirst, "," },
|
||||
{ "", kIdentity, "ize " },
|
||||
{ "", kUppercaseAll, "." },
|
||||
{ "\xc2\xa0", kIdentity, "" },
|
||||
{ " ", kIdentity, "," },
|
||||
{ "", kUppercaseFirst, "=\"" },
|
||||
{ "", kUppercaseAll, "=\"" },
|
||||
{ "", kIdentity, "ous " },
|
||||
{ "", kUppercaseAll, ", " },
|
||||
{ "", kUppercaseFirst, "='" },
|
||||
{ " ", kUppercaseFirst, "," },
|
||||
{ " ", kUppercaseAll, "=\"" },
|
||||
{ " ", kUppercaseAll, ", " },
|
||||
{ "", kUppercaseAll, "," },
|
||||
{ "", kUppercaseAll, "(" },
|
||||
{ "", kUppercaseAll, ". " },
|
||||
{ " ", kUppercaseAll, "." },
|
||||
{ "", kUppercaseAll, "='" },
|
||||
{ " ", kUppercaseAll, ". " },
|
||||
{ " ", kUppercaseFirst, "=\"" },
|
||||
{ " ", kUppercaseAll, "='" },
|
||||
{ " ", kUppercaseFirst, "='" },
|
||||
};
|
||||
|
||||
static const size_t kNumTransforms =
|
||||
sizeof(kTransforms) / sizeof(kTransforms[0]);
|
||||
|
||||
static const size_t kOmitLastNTransforms[10] = {
|
||||
0, 12, 27, 23, 42, 63, 56, 48, 59, 64,
|
||||
};
|
||||
|
||||
static size_t ToUpperCase(uint8_t *p, size_t len) {
|
||||
if (len == 1 || p[0] < 0xc0) {
|
||||
if (p[0] >= 'a' && p[0] <= 'z') {
|
||||
p[0] ^= 32;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
if (p[0] < 0xe0) {
|
||||
p[1] ^= 32;
|
||||
return 2;
|
||||
}
|
||||
if (len == 2) {
|
||||
return 2;
|
||||
}
|
||||
p[2] ^= 5;
|
||||
return 3;
|
||||
}
|
||||
|
||||
inline std::string TransformWord(
|
||||
WordTransformType transform_type, const uint8_t* word, size_t len) {
|
||||
if (transform_type <= kOmitLast9) {
|
||||
if (len <= static_cast<size_t>(transform_type)) {
|
||||
return std::string();
|
||||
}
|
||||
return std::string(word, word + len - transform_type);
|
||||
}
|
||||
|
||||
if (transform_type >= kOmitFirst1) {
|
||||
const size_t skip = transform_type - (kOmitFirst1 - 1);
|
||||
if (len <= skip) {
|
||||
return std::string();
|
||||
}
|
||||
return std::string(word + skip, word + len);
|
||||
}
|
||||
|
||||
std::string ret = std::string(word, word + len);
|
||||
uint8_t *uppercase = reinterpret_cast<uint8_t*>(&ret[0]);
|
||||
if (transform_type == kUppercaseFirst) {
|
||||
ToUpperCase(uppercase, len);
|
||||
} else if (transform_type == kUppercaseAll) {
|
||||
size_t position = 0;
|
||||
while (position < len) {
|
||||
size_t step = ToUpperCase(uppercase, len - position);
|
||||
uppercase += step;
|
||||
position += step;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline std::string ApplyTransform(
|
||||
const Transform& t, const uint8_t* word, size_t len) {
|
||||
return std::string(t.prefix) +
|
||||
TransformWord(t.word_transform, word, len) + std::string(t.suffix);
|
||||
}
|
||||
|
||||
inline std::string GetTransformedDictionaryWord(size_t len_code,
|
||||
size_t word_id) {
|
||||
size_t num_words = 1u << kBrotliDictionarySizeBitsByLength[len_code];
|
||||
size_t offset = kBrotliDictionaryOffsetsByLength[len_code];
|
||||
size_t t = word_id / num_words;
|
||||
size_t word_idx = word_id % num_words;
|
||||
offset += len_code * word_idx;
|
||||
const uint8_t* word = &kBrotliDictionary[offset];
|
||||
return ApplyTransform(kTransforms[t], word, len_code);
|
||||
}
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_TRANSFORM_H_
|
@ -10,11 +10,12 @@
|
||||
|
||||
#include "../common/types.h"
|
||||
|
||||
namespace brotli {
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
|
||||
size_t ParseAsUTF8(int* symbol, const uint8_t* input, size_t size) {
|
||||
static size_t BrotliParseAsUTF8(
|
||||
int* symbol, const uint8_t* input, size_t size) {
|
||||
/* ASCII */
|
||||
if ((input[0] & 0x80) == 0) {
|
||||
*symbol = input[0];
|
||||
@ -63,21 +64,21 @@ size_t ParseAsUTF8(int* symbol, const uint8_t* input, size_t size) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
/* Returns 1 if at least min_fraction of the data is UTF8-encoded.*/
|
||||
bool IsMostlyUTF8(const uint8_t* data, const size_t pos, const size_t mask,
|
||||
const size_t length, const double min_fraction) {
|
||||
int BrotliIsMostlyUTF8(const uint8_t* data, const size_t pos,
|
||||
const size_t mask, const size_t length, const double min_fraction) {
|
||||
size_t size_utf8 = 0;
|
||||
size_t i = 0;
|
||||
while (i < length) {
|
||||
int symbol;
|
||||
size_t bytes_read = ParseAsUTF8(
|
||||
&symbol, &data[(pos + i) & mask], length - i);
|
||||
size_t bytes_read =
|
||||
BrotliParseAsUTF8(&symbol, &data[(pos + i) & mask], length - i);
|
||||
i += bytes_read;
|
||||
if (symbol < 0x110000) size_utf8 += bytes_read;
|
||||
}
|
||||
return size_utf8 > min_fraction * static_cast<double>(length);
|
||||
return (size_utf8 > min_fraction * (double)length) ? 1 : 0;
|
||||
}
|
||||
|
||||
} // namespace brotli
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
@ -10,17 +10,23 @@
|
||||
#define BROTLI_ENC_UTF8_UTIL_H_
|
||||
|
||||
#include "../common/types.h"
|
||||
#include "./port.h"
|
||||
|
||||
namespace brotli {
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static const double kMinUTF8Ratio = 0.75;
|
||||
|
||||
/* Returns 1 if at least min_fraction of the bytes between pos and
|
||||
pos + length in the (data, mask) ringbuffer is UTF8-encoded, otherwise
|
||||
returns 0. */
|
||||
bool IsMostlyUTF8(const uint8_t* data, const size_t pos, const size_t mask,
|
||||
const size_t length, const double min_fraction);
|
||||
BROTLI_INTERNAL int BrotliIsMostlyUTF8(
|
||||
const uint8_t* data, const size_t pos, const size_t mask,
|
||||
const size_t length, const double min_fraction);
|
||||
|
||||
} // namespace brotli
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* BROTLI_ENC_UTF8_UTIL_H_ */
|
||||
|
@ -15,7 +15,9 @@
|
||||
#include "../common/types.h"
|
||||
#include "./port.h"
|
||||
|
||||
namespace brotli {
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*#define BIT_WRITER_DEBUG */
|
||||
|
||||
@ -34,15 +36,10 @@ namespace brotli {
|
||||
|
||||
For n bits, we take the last 5 bits, OR that with high bits in BYTE-0,
|
||||
and locate the rest in BYTE+1, BYTE+2, etc. */
|
||||
inline void WriteBits(size_t n_bits,
|
||||
uint64_t bits,
|
||||
size_t * __restrict pos,
|
||||
uint8_t * __restrict array) {
|
||||
#ifdef BIT_WRITER_DEBUG
|
||||
printf("WriteBits %2d 0x%016llx %10d\n", n_bits, bits, *pos);
|
||||
#endif
|
||||
assert((bits >> n_bits) == 0);
|
||||
assert(n_bits <= 56);
|
||||
static BROTLI_INLINE void BrotliWriteBits(size_t n_bits,
|
||||
uint64_t bits,
|
||||
size_t * BROTLI_RESTRICT pos,
|
||||
uint8_t * BROTLI_RESTRICT array) {
|
||||
#ifdef IS_LITTLE_ENDIAN
|
||||
/* This branch of the code can write up to 56 bits at a time,
|
||||
7 bits are lost by being perhaps already in *p and at least
|
||||
@ -51,6 +48,11 @@ inline void WriteBits(size_t n_bits,
|
||||
access a byte that was never initialized). */
|
||||
uint8_t *p = &array[*pos >> 3];
|
||||
uint64_t v = *p;
|
||||
#ifdef BIT_WRITER_DEBUG
|
||||
printf("WriteBits %2d 0x%016llx %10d\n", n_bits, bits, *pos);
|
||||
#endif
|
||||
assert((bits >> n_bits) == 0);
|
||||
assert(n_bits <= 56);
|
||||
v |= bits << (*pos & 7);
|
||||
BROTLI_UNALIGNED_STORE64(p, v); /* Set some bits. */
|
||||
*pos += n_bits;
|
||||
@ -59,19 +61,20 @@ inline void WriteBits(size_t n_bits,
|
||||
uint8_t *array_pos = &array[*pos >> 3];
|
||||
const size_t bits_reserved_in_first_byte = (*pos & 7);
|
||||
bits <<= bits_reserved_in_first_byte;
|
||||
*array_pos++ |= static_cast<uint8_t>(bits);
|
||||
*array_pos++ |= (uint8_t)bits;
|
||||
for (size_t bits_left_to_write = n_bits + bits_reserved_in_first_byte;
|
||||
bits_left_to_write >= 9;
|
||||
bits_left_to_write -= 8) {
|
||||
bits >>= 8;
|
||||
*array_pos++ = static_cast<uint8_t>(bits);
|
||||
*array_pos++ = (uint8_t)bits;
|
||||
}
|
||||
*array_pos = 0;
|
||||
*pos += n_bits;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void WriteBitsPrepareStorage(size_t pos, uint8_t *array) {
|
||||
static BROTLI_INLINE void BrotliWriteBitsPrepareStorage(
|
||||
size_t pos, uint8_t *array) {
|
||||
#ifdef BIT_WRITER_DEBUG
|
||||
printf("WriteBitsPrepareStorage %10d\n", pos);
|
||||
#endif
|
||||
@ -79,6 +82,8 @@ inline void WriteBitsPrepareStorage(size_t pos, uint8_t *array) {
|
||||
array[pos >> 3] = 0;
|
||||
}
|
||||
|
||||
} // namespace brotli
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* BROTLI_ENC_WRITE_BITS_H_ */
|
||||
|
@ -12,7 +12,7 @@ testdata/alice29.txt
|
||||
testdata/asyoulik.txt
|
||||
testdata/lcet10.txt
|
||||
testdata/plrabn12.txt
|
||||
../enc/encode.cc
|
||||
../enc/encode.c
|
||||
../common/dictionary.h
|
||||
../dec/decode.c
|
||||
%s
|
||||
|
@ -10,7 +10,7 @@ testdata/alice29.txt
|
||||
testdata/asyoulik.txt
|
||||
testdata/lcet10.txt
|
||||
testdata/plrabn12.txt
|
||||
../enc/encode.cc
|
||||
../enc/encode.c
|
||||
../common/dictionary.h
|
||||
../dec/decode.c
|
||||
$BRO
|
||||
|
159
tools/bro.cc
159
tools/bro.cc
@ -15,9 +15,10 @@
|
||||
#include <cstring>
|
||||
#include <ctime>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "../dec/decode.h"
|
||||
#include "../enc/compressor.h"
|
||||
#include "../enc/encode.h"
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <unistd.h>
|
||||
@ -52,7 +53,6 @@ static inline int ms_open(const char *filename, int oflag, int pmode) {
|
||||
}
|
||||
#endif /* WIN32 */
|
||||
|
||||
|
||||
static bool ParseQuality(const char* s, int* quality) {
|
||||
if (s[0] >= '0' && s[0] <= '9') {
|
||||
*quality = s[0] - '0';
|
||||
@ -68,6 +68,7 @@ static bool ParseQuality(const char* s, int* quality) {
|
||||
static void ParseArgv(int argc, char **argv,
|
||||
char **input_path,
|
||||
char **output_path,
|
||||
char **dictionary_path,
|
||||
int *force,
|
||||
int *quality,
|
||||
int *decompress,
|
||||
@ -125,6 +126,13 @@ static void ParseArgv(int argc, char **argv,
|
||||
*output_path = argv[k + 1];
|
||||
++k;
|
||||
continue;
|
||||
} else if (!strcmp("--custom-dictionary", argv[k])) {
|
||||
if (*dictionary_path != 0) {
|
||||
goto error;
|
||||
}
|
||||
*dictionary_path = argv[k + 1];
|
||||
++k;
|
||||
continue;
|
||||
} else if (!strcmp("--quality", argv[k]) ||
|
||||
!strcmp("-q", argv[k])) {
|
||||
if (!ParseQuality(argv[k + 1], quality)) {
|
||||
@ -158,7 +166,7 @@ error:
|
||||
fprintf(stderr,
|
||||
"Usage: %s [--force] [--quality n] [--decompress]"
|
||||
" [--input filename] [--output filename] [--repeat iters]"
|
||||
" [--verbose] [--window n]\n",
|
||||
" [--verbose] [--window n] [--custom-dictionary filename]\n",
|
||||
argv[0]);
|
||||
exit(1);
|
||||
}
|
||||
@ -196,7 +204,7 @@ static FILE *OpenOutputFile(const char *output_path, const int force) {
|
||||
return fdopen(fd, "wb");
|
||||
}
|
||||
|
||||
static int64_t FileSize(char *path) {
|
||||
static int64_t FileSize(const char *path) {
|
||||
FILE *f = fopen(path, "rb");
|
||||
if (f == NULL) {
|
||||
return -1;
|
||||
@ -212,13 +220,50 @@ static int64_t FileSize(char *path) {
|
||||
return retval;
|
||||
}
|
||||
|
||||
static std::vector<uint8_t> ReadDictionary(const char* path) {
|
||||
FILE *f = fopen(path, "rb");
|
||||
if (f == NULL) {
|
||||
perror("fopen");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int64_t file_size = FileSize(path);
|
||||
if (file_size == -1) {
|
||||
fprintf(stderr, "could not get size of dictionary file");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static const int kMaxDictionarySize = (1 << 24) - 16;
|
||||
if (file_size > kMaxDictionarySize) {
|
||||
fprintf(stderr, "dictionary is larger than maximum allowed: %d\n",
|
||||
kMaxDictionarySize);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
std::vector<uint8_t> buffer;
|
||||
buffer.resize(static_cast<size_t>(file_size));
|
||||
size_t bytes_read = fread(buffer.data(), sizeof(uint8_t), buffer.size(), f);
|
||||
if (bytes_read != buffer.size()) {
|
||||
fprintf(stderr, "could not read dictionary\n");
|
||||
exit(1);
|
||||
}
|
||||
fclose(f);
|
||||
return buffer;
|
||||
}
|
||||
|
||||
static const size_t kFileBufferSize = 65536;
|
||||
|
||||
static void Decompresss(FILE* fin, FILE* fout) {
|
||||
static int Decompress(FILE* fin, FILE* fout, const char* dictionary_path) {
|
||||
/* Dictionary should be kept during first rounds of decompression. */
|
||||
std::vector<uint8_t> dictionary;
|
||||
BrotliState* s = BrotliCreateState(NULL, NULL, NULL);
|
||||
if (!s) {
|
||||
fprintf(stderr, "out of memory\n");
|
||||
exit(1);
|
||||
return 0;
|
||||
}
|
||||
if (dictionary_path != NULL) {
|
||||
dictionary = ReadDictionary(dictionary_path);
|
||||
BrotliSetCustomDictionary(dictionary.size(), dictionary.data(), s);
|
||||
}
|
||||
uint8_t* input = new uint8_t[kFileBufferSize];
|
||||
uint8_t* output = new uint8_t[kFileBufferSize];
|
||||
@ -259,47 +304,109 @@ static void Decompresss(FILE* fin, FILE* fout) {
|
||||
BrotliDestroyState(s);
|
||||
if ((result == BROTLI_RESULT_NEEDS_MORE_OUTPUT) || ferror(fout)) {
|
||||
fprintf(stderr, "failed to write output\n");
|
||||
exit(1);
|
||||
return 0;
|
||||
} else if (result != BROTLI_RESULT_SUCCESS) { /* Error or needs more input. */
|
||||
fprintf(stderr, "corrupt input\n");
|
||||
exit(1);
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int Compress(int quality, int lgwin, FILE* fin, FILE* fout,
|
||||
const char *dictionary_path) {
|
||||
BrotliEncoderState* s = BrotliEncoderCreateInstance(0, 0, 0);
|
||||
uint8_t* buffer = reinterpret_cast<uint8_t*>(malloc(kFileBufferSize << 1));
|
||||
uint8_t* input = buffer;
|
||||
uint8_t* output = buffer + kFileBufferSize;
|
||||
size_t available_in = 0;
|
||||
const uint8_t* next_in = NULL;
|
||||
size_t available_out = kFileBufferSize;
|
||||
uint8_t* next_out = output;
|
||||
int is_eof = 0;
|
||||
int is_ok = 1;
|
||||
|
||||
if (!s || !buffer) {
|
||||
is_ok = 0;
|
||||
goto finish;
|
||||
}
|
||||
|
||||
BrotliEncoderSetParameter(s, BROTLI_PARAM_QUALITY, (uint32_t)quality);
|
||||
BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, (uint32_t)lgwin);
|
||||
if (dictionary_path != NULL) {
|
||||
std::vector<uint8_t> dictionary = ReadDictionary(dictionary_path);
|
||||
BrotliEncoderSetCustomDictionary(s, dictionary.size(),
|
||||
reinterpret_cast<const uint8_t*>(dictionary.data()));
|
||||
}
|
||||
|
||||
while (1) {
|
||||
if (available_in == 0 && !is_eof) {
|
||||
available_in = fread(input, 1, kFileBufferSize, fin);
|
||||
next_in = input;
|
||||
if (ferror(fin)) break;
|
||||
is_eof = feof(fin);
|
||||
}
|
||||
|
||||
if (!BrotliEncoderCompressStream(s,
|
||||
is_eof ? BROTLI_OPERATION_FINISH : BROTLI_OPERATION_PROCESS,
|
||||
&available_in, &next_in, &available_out, &next_out, NULL)) {
|
||||
is_ok = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
if (available_out != kFileBufferSize) {
|
||||
size_t out_size = kFileBufferSize - available_out;
|
||||
fwrite(output, 1, out_size, fout);
|
||||
if (ferror(fout)) break;
|
||||
available_out = kFileBufferSize;
|
||||
next_out = output;
|
||||
}
|
||||
|
||||
if (BrotliEncoderIsFinished(s)) break;
|
||||
}
|
||||
|
||||
finish:
|
||||
free(buffer);
|
||||
BrotliEncoderDestroyInstance(s);
|
||||
|
||||
if (!is_ok) {
|
||||
/* Should detect OOM? */
|
||||
fprintf(stderr, "failed to compress data\n");
|
||||
return 0;
|
||||
} else if (ferror(fout)) {
|
||||
fprintf(stderr, "failed to write output\n");
|
||||
return 0;
|
||||
} else if (ferror(fin)) {
|
||||
fprintf(stderr, "failed to read input\n");
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
char *input_path = 0;
|
||||
char *output_path = 0;
|
||||
char *dictionary_path = 0;
|
||||
int force = 0;
|
||||
int quality = 11;
|
||||
int decompress = 0;
|
||||
int repeat = 1;
|
||||
int verbose = 0;
|
||||
int lgwin = 0;
|
||||
ParseArgv(argc, argv, &input_path, &output_path, &force,
|
||||
ParseArgv(argc, argv, &input_path, &output_path, &dictionary_path, &force,
|
||||
&quality, &decompress, &repeat, &verbose, &lgwin);
|
||||
const clock_t clock_start = clock();
|
||||
for (int i = 0; i < repeat; ++i) {
|
||||
FILE* fin = OpenInputFile(input_path);
|
||||
FILE* fout = OpenOutputFile(output_path, force);
|
||||
int is_ok = false;
|
||||
if (decompress) {
|
||||
Decompresss(fin, fout);
|
||||
is_ok = Decompress(fin, fout, dictionary_path);
|
||||
} else {
|
||||
brotli::BrotliParams params;
|
||||
params.lgwin = lgwin;
|
||||
params.quality = quality;
|
||||
try {
|
||||
brotli::BrotliFileIn in(fin, 1 << 16);
|
||||
brotli::BrotliFileOut out(fout);
|
||||
if (!BrotliCompress(params, &in, &out)) {
|
||||
fprintf(stderr, "compression failed\n");
|
||||
unlink(output_path);
|
||||
exit(1);
|
||||
}
|
||||
} catch (std::bad_alloc&) {
|
||||
fprintf(stderr, "not enough memory\n");
|
||||
unlink(output_path);
|
||||
exit(1);
|
||||
}
|
||||
is_ok = Compress(quality, lgwin, fin, fout, dictionary_path);
|
||||
}
|
||||
if (!is_ok) {
|
||||
unlink(output_path);
|
||||
exit(1);
|
||||
}
|
||||
if (fclose(fin) != 0) {
|
||||
perror("fclose");
|
||||
|
Loading…
Reference in New Issue
Block a user