Update encoder (#497)

* pad dictionary LUTs to length 32, etc. (#493)
 * avoid using INFINITY constant (#496)
 * make dictionary_hash.h more compact
 * add "disable literal context modelling" parameter
This commit is contained in:
Eugene Kliuchnikov 2017-01-26 11:32:18 +01:00 committed by GitHub
parent 7e347a7c84
commit 8d3fdc1dfe
12 changed files with 1204 additions and 4153 deletions

View File

@ -10,17 +10,32 @@
extern "C" {
#endif
const uint32_t kBrotliDictionaryOffsetsByLength[] = {
0, 0, 0, 0, 0, 4096, 9216, 21504, 35840, 44032, 53248, 63488, 74752, 87040,
93696, 100864, 104704, 106752, 108928, 113536, 115968, 118528, 119872, 121280,
122016
/**
* Number of bits to encode index of dictionary word in a bucket.
*
* Specification: Appendix A. Static Dictionary Data
*
* Words in a dictionary are bucketed by length.
* @c 0 means that there are no words of a given length.
* Dictionary consists of words with length of [4..24] bytes.
* Values at [0..3] and [25..31] indices should not be addressed.
*/
const uint8_t kBrotliDictionarySizeBitsByLength[32] = {
0, 0, 0, 0, 10, 10, 11, 11,
10, 10, 10, 10, 10, 9, 9, 8,
7, 7, 8, 7, 7, 6, 6, 5,
5, 0, 0, 0, 0, 0, 0, 0
};
const uint8_t kBrotliDictionarySizeBitsByLength[] = {
0, 0, 0, 0, 10, 10, 11, 11, 10, 10, 10, 10, 10,
9, 9, 8, 7, 7, 8, 7, 7, 6, 6, 5, 5,
/* assert(offset[i + 1] == offset[i] + (bits[i] ? (i << bits[i]) : 0)) */
const uint32_t kBrotliDictionaryOffsetsByLength[32] = {
0, 0, 0, 0, 0, 4096, 9216, 21504,
35840, 44032, 53248, 63488, 74752, 87040, 93696, 100864,
104704, 106752, 108928, 113536, 115968, 118528, 119872, 121280,
122016, 122784, 122784, 122784, 122784, 122784, 122784, 122784
};
/* assert(sizeof(kBrotliDictionary) == offset[31]) */
const uint8_t kBrotliDictionary[122784] = {
0x74, 0x69, 0x6d, 0x65, 0x64, 0x6f, 0x77, 0x6e, 0x6c, 0x69, 0x66, 0x65, 0x6c,
0x65, 0x66, 0x74, 0x62, 0x61, 0x63, 0x6b, 0x63, 0x6f, 0x64, 0x65, 0x64, 0x61,

View File

@ -17,8 +17,8 @@ extern "C" {
#endif
BROTLI_COMMON_API extern const uint8_t kBrotliDictionary[122784];
BROTLI_COMMON_API extern const uint32_t kBrotliDictionaryOffsetsByLength[25];
BROTLI_COMMON_API extern const uint8_t kBrotliDictionarySizeBitsByLength[25];
BROTLI_COMMON_API extern const uint32_t kBrotliDictionaryOffsetsByLength[32];
BROTLI_COMMON_API extern const uint8_t kBrotliDictionarySizeBitsByLength[32];
#define BROTLI_MIN_DICTIONARY_WORD_LENGTH 4
#define BROTLI_MAX_DICTIONARY_WORD_LENGTH 24

View File

@ -8,7 +8,6 @@
#include "./backward_references.h"
#include <math.h> /* INFINITY */
#include <string.h> /* memcpy, memset */
#include "../common/constants.h"
@ -26,11 +25,7 @@
extern "C" {
#endif
#ifdef INFINITY
static const float kInfinity = INFINITY;
#else
static const float kInfinity = 3.4028e38f;
#endif
static const float kInfinity = 1.7e38f; /* ~= 2 ^ 127 */
void BrotliInitZopfliNodes(ZopfliNode* array, size_t length) {
ZopfliNode stub;

View File

@ -458,7 +458,9 @@ static void StoreCommands(MemoryManager* m,
if (BROTLI_IS_OOM(m)) return;
for (i = 0; i < num_commands; ++i) {
++cmd_histo[commands[i] & 0xff];
const uint32_t code = commands[i] & 0xFF;
assert(code < 128);
++cmd_histo[code];
}
cmd_histo[1] += 1;
cmd_histo[2] += 1;
@ -469,8 +471,9 @@ static void StoreCommands(MemoryManager* m,
for (i = 0; i < num_commands; ++i) {
const uint32_t cmd = commands[i];
const uint32_t code = cmd & 0xff;
const uint32_t code = cmd & 0xFF;
const uint32_t extra = cmd >> 8;
assert(code < 128);
BrotliWriteBits(cmd_depths[code], cmd_bits[code], storage_ix, storage);
BrotliWriteBits(kNumExtraBits[code], extra, storage_ix, storage);
if (code < 24) {

File diff suppressed because it is too large Load Diff

View File

@ -151,6 +151,11 @@ BROTLI_BOOL BrotliEncoderSetParameter(
state->params.lgblock = (int)value;
return BROTLI_TRUE;
case BROTLI_PARAM_DISABLE_LITERAL_CONTEXT_MODELING:
if ((value != 0) && (value != 1)) return BROTLI_FALSE;
state->params.disable_literal_context_modeling = TO_BROTLI_BOOL(!!value);
return BROTLI_TRUE;
default: return BROTLI_FALSE;
}
}
@ -495,12 +500,11 @@ static void WriteMetaBlockInternal(MemoryManager* m,
if (params->quality < MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING) {
size_t num_literal_contexts = 1;
const uint32_t* literal_context_map = NULL;
DecideOverLiteralContextModeling(data, wrapped_last_flush_pos,
bytes, mask,
params->quality,
&literal_context_mode,
&num_literal_contexts,
&literal_context_map);
if (!params->disable_literal_context_modeling) {
DecideOverLiteralContextModeling(
data, wrapped_last_flush_pos, bytes, mask, params->quality,
&literal_context_mode, &num_literal_contexts, &literal_context_map);
}
BrotliBuildMetaBlockGreedy(m, data, wrapped_last_flush_pos, mask,
prev_byte, prev_byte2, literal_context_mode, num_literal_contexts,
literal_context_map, commands, num_commands, &mb);
@ -584,6 +588,7 @@ static void BrotliEncoderInitState(BrotliEncoderState* s) {
s->params.quality = BROTLI_DEFAULT_QUALITY;
s->params.lgwin = BROTLI_DEFAULT_WINDOW;
s->params.lgblock = 0;
s->params.disable_literal_context_modeling = BROTLI_FALSE;
s->input_pos_ = 0;
s->num_commands_ = 0;
@ -1031,6 +1036,7 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
params.quality = 10;
params.lgwin = lgwin;
params.lgblock = 0;
params.disable_literal_context_modeling = BROTLI_FALSE;
SanitizeParams(&params);
params.lgblock = ComputeLgBlock(&params);
max_block_size = (size_t)1 << params.lgblock;

View File

@ -141,7 +141,7 @@ static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(
size_t matchlen;
size_t backward;
score_t score;
len = item & 31;
len = item & 0x1F;
dist = item >> 5;
offset = kBrotliDictionaryOffsetsByLength[len] + len * dist;
if (len > max_length) {

View File

@ -66,8 +66,10 @@ void BrotliBuildHistogramsWithContext(
for (j = cmd->insert_len_; j != 0; --j) {
size_t context;
BlockSplitIteratorNext(&literal_it);
context = (literal_it.type_ << BROTLI_LITERAL_CONTEXT_BITS) +
Context(prev_byte, prev_byte2, context_modes[literal_it.type_]);
context = context_modes ?
((literal_it.type_ << BROTLI_LITERAL_CONTEXT_BITS) +
Context(prev_byte, prev_byte2, context_modes[literal_it.type_])) :
literal_it.type_;
HistogramAddLiteral(&literal_histograms[context],
ringbuffer[pos & mask]);
prev_byte2 = prev_byte;

View File

@ -40,10 +40,11 @@ void BrotliBuildMetaBlock(MemoryManager* m,
static const size_t kMaxNumberOfHistograms = 256;
HistogramDistance* distance_histograms;
HistogramLiteral* literal_histograms;
ContextType* literal_context_modes;
size_t num_literal_contexts;
size_t num_distance_contexts;
ContextType* literal_context_modes = NULL;
size_t literal_histograms_size;
size_t distance_histograms_size;
size_t i;
size_t literal_context_multiplier = 1;
BrotliSplitBlock(m, cmds, num_commands,
ringbuffer, pos, mask, params,
@ -52,20 +53,29 @@ void BrotliBuildMetaBlock(MemoryManager* m,
&mb->distance_split);
if (BROTLI_IS_OOM(m)) return;
literal_context_modes =
BROTLI_ALLOC(m, ContextType, mb->literal_split.num_types);
if (BROTLI_IS_OOM(m)) return;
for (i = 0; i < mb->literal_split.num_types; ++i) {
literal_context_modes[i] = literal_context_mode;
if (!params->disable_literal_context_modeling) {
literal_context_multiplier = 1 << BROTLI_LITERAL_CONTEXT_BITS;
literal_context_modes =
BROTLI_ALLOC(m, ContextType, mb->literal_split.num_types);
if (BROTLI_IS_OOM(m)) return;
for (i = 0; i < mb->literal_split.num_types; ++i) {
literal_context_modes[i] = literal_context_mode;
}
}
num_literal_contexts =
mb->literal_split.num_types << BROTLI_LITERAL_CONTEXT_BITS;
num_distance_contexts =
mb->distance_split.num_types << BROTLI_DISTANCE_CONTEXT_BITS;
literal_histograms = BROTLI_ALLOC(m, HistogramLiteral, num_literal_contexts);
literal_histograms_size =
mb->literal_split.num_types * literal_context_multiplier;
literal_histograms =
BROTLI_ALLOC(m, HistogramLiteral, literal_histograms_size);
if (BROTLI_IS_OOM(m)) return;
ClearHistogramsLiteral(literal_histograms, num_literal_contexts);
ClearHistogramsLiteral(literal_histograms, literal_histograms_size);
distance_histograms_size =
mb->distance_split.num_types << BROTLI_DISTANCE_CONTEXT_BITS;
distance_histograms =
BROTLI_ALLOC(m, HistogramDistance, distance_histograms_size);
if (BROTLI_IS_OOM(m)) return;
ClearHistogramsDistance(distance_histograms, distance_histograms_size);
assert(mb->command_histograms == 0);
mb->command_histograms_size = mb->command_split.num_types;
@ -73,10 +83,7 @@ void BrotliBuildMetaBlock(MemoryManager* m,
BROTLI_ALLOC(m, HistogramCommand, mb->command_histograms_size);
if (BROTLI_IS_OOM(m)) return;
ClearHistogramsCommand(mb->command_histograms, mb->command_histograms_size);
distance_histograms =
BROTLI_ALLOC(m, HistogramDistance, num_distance_contexts);
if (BROTLI_IS_OOM(m)) return;
ClearHistogramsDistance(distance_histograms, num_distance_contexts);
BrotliBuildHistogramsWithContext(cmds, num_commands,
&mb->literal_split, &mb->command_split, &mb->distance_split,
ringbuffer, pos, mask, prev_byte, prev_byte2, literal_context_modes,
@ -89,31 +96,44 @@ void BrotliBuildMetaBlock(MemoryManager* m,
mb->literal_context_map =
BROTLI_ALLOC(m, uint32_t, mb->literal_context_map_size);
if (BROTLI_IS_OOM(m)) return;
assert(mb->literal_histograms == 0);
mb->literal_histograms_size = mb->literal_context_map_size;
mb->literal_histograms =
BROTLI_ALLOC(m, HistogramLiteral, mb->literal_histograms_size);
if (BROTLI_IS_OOM(m)) return;
BrotliClusterHistogramsLiteral(m, literal_histograms,
mb->literal_context_map_size,
kMaxNumberOfHistograms,
mb->literal_histograms,
&mb->literal_histograms_size,
mb->literal_context_map);
BrotliClusterHistogramsLiteral(m, literal_histograms, literal_histograms_size,
kMaxNumberOfHistograms, mb->literal_histograms,
&mb->literal_histograms_size, mb->literal_context_map);
if (BROTLI_IS_OOM(m)) return;
BROTLI_FREE(m, literal_histograms);
if (params->disable_literal_context_modeling) {
/* Distribute assignment to all contexts. */
for (i = mb->literal_split.num_types; i != 0;) {
size_t j = 0;
i--;
for (; j < (1 << BROTLI_LITERAL_CONTEXT_BITS); j++) {
mb->literal_context_map[(i << BROTLI_LITERAL_CONTEXT_BITS) + j] =
mb->literal_context_map[i];
}
}
}
assert(mb->distance_context_map == 0);
mb->distance_context_map_size =
mb->distance_split.num_types << BROTLI_DISTANCE_CONTEXT_BITS;
mb->distance_context_map =
BROTLI_ALLOC(m, uint32_t, mb->distance_context_map_size);
if (BROTLI_IS_OOM(m)) return;
assert(mb->distance_histograms == 0);
mb->distance_histograms_size = mb->distance_context_map_size;
mb->distance_histograms =
BROTLI_ALLOC(m, HistogramDistance, mb->distance_histograms_size);
if (BROTLI_IS_OOM(m)) return;
BrotliClusterHistogramsDistance(m, distance_histograms,
mb->distance_context_map_size,
kMaxNumberOfHistograms,

View File

@ -37,6 +37,7 @@ typedef struct BrotliEncoderParams {
int quality;
int lgwin;
int lgblock;
BROTLI_BOOL disable_literal_context_modeling;
} BrotliEncoderParams;
/* Returns hash-table size for quality levels 0 and 1. */

View File

@ -86,7 +86,7 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
BROTLI_BOOL end = !offset;
while (!end) {
DictWord w = kStaticDictionaryWords[offset++];
const size_t l = w.len & 0x7F;
const size_t l = w.len & 0x1F;
const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l];
const size_t id = w.idx;
end = !!(w.len & 0x80);
@ -325,7 +325,7 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
BROTLI_BOOL end = !offset;
while (!end) {
DictWord w = kStaticDictionaryWords[offset++];
const size_t l = w.len & 0x7F;
const size_t l = w.len & 0x1F;
const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l];
const size_t id = w.idx;
end = !!(w.len & 0x80);
@ -417,7 +417,7 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
BROTLI_BOOL end = !offset;
while (!end) {
DictWord w = kStaticDictionaryWords[offset++];
const size_t l = w.len & 0x7F;
const size_t l = w.len & 0x1F;
const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l];
const size_t id = w.idx;
end = !!(w.len & 0x80);
@ -445,7 +445,7 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
BROTLI_BOOL end = !offset;
while (!end) {
DictWord w = kStaticDictionaryWords[offset++];
const size_t l = w.len & 0x7F;
const size_t l = w.len & 0x1F;
const size_t n = (size_t)1 << kBrotliDictionarySizeBitsByLength[l];
const size_t id = w.idx;
end = !!(w.len & 0x80);

View File

@ -165,7 +165,13 @@ typedef enum BrotliEncoderParameter {
* memory. \n The rough formula of memory used for temporary input
* storage is `3 << lgBlock`.
*/
BROTLI_PARAM_LGBLOCK = 3
BROTLI_PARAM_LGBLOCK = 3,
/**
* Flag that affects usage of "literal context modeling" format feature.
*
* This flag is a "decoding-speed vs compression ratio" trade-off.
*/
BROTLI_PARAM_DISABLE_LITERAL_CONTEXT_MODELING = 4
} BrotliEncoderParameter;
/**