Merge-in SharedDictionary feature (#916)

Co-authored-by: Eugene Kliuchnikov <eustas@chromium.org>
This commit is contained in:
Eugene Kliuchnikov 2021-08-04 14:42:02 +02:00 committed by GitHub
parent 630b5084ee
commit 19d86fb9a6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
37 changed files with 3066 additions and 38 deletions

View File

@ -0,0 +1,515 @@
/* Copyright 2017 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Shared Dictionary definition and utilities. */
#include <brotli/shared_dictionary.h>
#include <memory.h>
#include <stdlib.h> /* malloc, free */
#include <stdio.h>
#include "./dictionary.h"
#include "./platform.h"
#include "./shared_dictionary_internal.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
#define BROTLI_NUM_ENCODED_LENGTHS (SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH \
- SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH + 1)
/* Max allowed by spec */
#define BROTLI_MAX_SIZE_BITS 15u
/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
static BROTLI_BOOL ReadBool(const uint8_t* encoded, size_t size, size_t* pos,
BROTLI_BOOL* result) {
uint8_t value;
size_t position = *pos;
if (position >= size) return BROTLI_FALSE; /* past file end */
value = encoded[position++];
if (value > 1) return BROTLI_FALSE; /* invalid bool */
*result = TO_BROTLI_BOOL(value);
*pos = position;
return BROTLI_TRUE; /* success */
}
/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
static BROTLI_BOOL ReadUint8(const uint8_t* encoded, size_t size, size_t* pos,
uint8_t* result) {
size_t position = *pos;
if (position + sizeof(uint8_t) > size) return BROTLI_FALSE;
*result = encoded[position++];
*pos = position;
return BROTLI_TRUE;
}
/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
static BROTLI_BOOL ReadUint16(const uint8_t* encoded, size_t size, size_t* pos,
uint16_t* result) {
size_t position = *pos;
if (position + sizeof(uint16_t) > size) return BROTLI_FALSE;
*result = BROTLI_UNALIGNED_LOAD16LE(&encoded[position]);
position += 2;
*pos = position;
return BROTLI_TRUE;
}
/* Reads a varint into a uint32_t, and returns error if it's too large */
/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
static BROTLI_BOOL ReadVarint32(const uint8_t* encoded, size_t size,
size_t* pos, uint32_t* result) {
int num = 0;
uint8_t byte;
*result = 0;
for (;;) {
if (*pos >= size) return BROTLI_FALSE;
byte = encoded[(*pos)++];
if (num == 4 && byte > 15) return BROTLI_FALSE;
*result |= (uint32_t)(byte & 127) << (num * 7);
if (byte < 128) return BROTLI_TRUE;
num++;
}
}
/* Returns the total length of word list. */
static size_t BrotliSizeBitsToOffsets(const uint8_t* size_bits_by_length,
uint32_t* offsets_by_length) {
uint32_t pos = 0;
uint32_t i;
for (i = 0; i <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; i++) {
offsets_by_length[i] = pos;
if (size_bits_by_length[i] != 0) {
pos += i << size_bits_by_length[i];
}
}
return pos;
}
static BROTLI_BOOL ParseWordList(size_t size, const uint8_t* encoded,
size_t* pos, BrotliDictionary* out) {
size_t offset;
size_t i;
size_t position = *pos;
if (position + BROTLI_NUM_ENCODED_LENGTHS > size) {
return BROTLI_FALSE;
}
memset(out->size_bits_by_length, 0, SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH);
memcpy(out->size_bits_by_length + SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH,
&encoded[position], BROTLI_NUM_ENCODED_LENGTHS);
for (i = SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH;
i <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; i++) {
if (out->size_bits_by_length[i] > BROTLI_MAX_SIZE_BITS) {
return BROTLI_FALSE;
}
}
position += BROTLI_NUM_ENCODED_LENGTHS;
offset = BrotliSizeBitsToOffsets(
out->size_bits_by_length, out->offsets_by_length);
out->data = &encoded[position];
out->data_size = offset;
position += offset;
if (position > size) return BROTLI_FALSE;
*pos = position;
return BROTLI_TRUE;
}
/* Computes the cutOffTransforms of a BrotliTransforms which already has the
transforms data correctly filled in. */
static void ComputeCutoffTransforms(BrotliTransforms* transforms) {
uint32_t i;
for (i = 0; i < BROTLI_TRANSFORMS_MAX_CUT_OFF + 1; i++) {
transforms->cutOffTransforms[i] = -1;
}
for (i = 0; i < transforms->num_transforms; i++) {
const uint8_t* prefix = BROTLI_TRANSFORM_PREFIX(transforms, i);
uint8_t type = BROTLI_TRANSFORM_TYPE(transforms, i);
const uint8_t* suffix = BROTLI_TRANSFORM_SUFFIX(transforms, i);
if (type <= BROTLI_TRANSFORM_OMIT_LAST_9 && *prefix == 0 && *suffix == 0 &&
transforms->cutOffTransforms[type] == -1) {
transforms->cutOffTransforms[type] = (int16_t)i;
}
}
}
static BROTLI_BOOL ParsePrefixSuffixTable(size_t size, const uint8_t* encoded,
size_t* pos, BrotliTransforms* out, uint16_t* out_table,
size_t* out_table_size) {
size_t position = *pos;
size_t offset = 0;
size_t stringlet_count = 0; /* NUM_PREFIX_SUFFIX */
size_t data_length = 0;
/* PREFIX_SUFFIX_LENGTH */
if (!ReadUint16(encoded, size, &position, &out->prefix_suffix_size)) {
return BROTLI_FALSE;
}
data_length = out->prefix_suffix_size;
/* Must at least have space for null terminator. */
if (data_length < 1) return BROTLI_FALSE;
out->prefix_suffix = &encoded[position];
if (position + data_length >= size) return BROTLI_FALSE;
while (BROTLI_TRUE) {
/* STRING_LENGTH */
size_t stringlet_len = encoded[position + offset];
out_table[stringlet_count] = (uint16_t)offset;
stringlet_count++;
offset++;
if (stringlet_len == 0) {
if (offset == data_length) {
break;
} else {
return BROTLI_FALSE;
}
}
if (stringlet_count > 255) return BROTLI_FALSE;
offset += stringlet_len;
if (offset >= data_length) return BROTLI_FALSE;
}
position += data_length;
*pos = position;
*out_table_size = (uint16_t)stringlet_count;
return BROTLI_TRUE;
}
static BROTLI_BOOL ParseTransformsList(size_t size, const uint8_t* encoded,
size_t* pos, BrotliTransforms* out, uint16_t* prefix_suffix_table,
size_t* prefix_suffix_count) {
uint32_t i;
BROTLI_BOOL has_params = BROTLI_FALSE;
BROTLI_BOOL prefix_suffix_ok = BROTLI_FALSE;
size_t position = *pos;
size_t stringlet_cnt = 0;
if (position >= size) return BROTLI_FALSE;
prefix_suffix_ok = ParsePrefixSuffixTable(
size, encoded, &position, out, prefix_suffix_table, &stringlet_cnt);
if (!prefix_suffix_ok) return BROTLI_FALSE;
out->prefix_suffix_map = prefix_suffix_table;
*prefix_suffix_count = stringlet_cnt;
out->num_transforms = encoded[position++];
out->transforms = &encoded[position];
position += (size_t)out->num_transforms * 3;
if (position > size) return BROTLI_FALSE;
/* Check for errors and read extra parameters. */
for (i = 0; i < out->num_transforms; i++) {
uint8_t prefix_id = BROTLI_TRANSFORM_PREFIX_ID(out, i);
uint8_t type = BROTLI_TRANSFORM_TYPE(out, i);
uint8_t suffix_id = BROTLI_TRANSFORM_SUFFIX_ID(out, i);
if (prefix_id >= stringlet_cnt) return BROTLI_FALSE;
if (type >= BROTLI_NUM_TRANSFORM_TYPES) return BROTLI_FALSE;
if (suffix_id >= stringlet_cnt) return BROTLI_FALSE;
if (type == BROTLI_TRANSFORM_SHIFT_FIRST ||
type == BROTLI_TRANSFORM_SHIFT_ALL) {
has_params = BROTLI_TRUE;
}
}
if (has_params) {
out->params = &encoded[position];
position += (size_t)out->num_transforms * 2;
if (position > size) return BROTLI_FALSE;
for (i = 0; i < out->num_transforms; i++) {
uint8_t type = BROTLI_TRANSFORM_TYPE(out, i);
if (type != BROTLI_TRANSFORM_SHIFT_FIRST &&
type != BROTLI_TRANSFORM_SHIFT_ALL) {
if (out->params[i * 2] != 0 || out->params[i * 2 + 1] != 0) {
return BROTLI_FALSE;
}
}
}
} else {
out->params = NULL;
}
ComputeCutoffTransforms(out);
*pos = position;
return BROTLI_TRUE;
}
static BROTLI_BOOL DryParseDictionary(const uint8_t* encoded,
size_t size, uint32_t* num_prefix, BROTLI_BOOL* is_custom_static_dict) {
size_t pos = 0;
uint32_t chunk_size = 0;
uint8_t num_word_lists;
uint8_t num_transform_lists;
*is_custom_static_dict = BROTLI_FALSE;
*num_prefix = 0;
/* Skip magic header bytes. */
pos += 2;
/* LZ77_DICTIONARY_LENGTH */
if (!ReadVarint32(encoded, size, &pos, &chunk_size)) return BROTLI_FALSE;
if (chunk_size != 0) {
/* This limitation is not specified but the 32-bit Brotli decoder for now */
if (chunk_size > 1073741823) return BROTLI_FALSE;
*num_prefix = 1;
if (pos + chunk_size > size) return BROTLI_FALSE;
pos += chunk_size;
}
if (!ReadUint8(encoded, size, &pos, &num_word_lists)) {
return BROTLI_FALSE;
}
if (!ReadUint8(encoded, size, &pos, &num_transform_lists)) {
return BROTLI_FALSE;
}
if (num_word_lists > 0 || num_transform_lists > 0) {
*is_custom_static_dict = BROTLI_TRUE;
}
return BROTLI_TRUE;
}
static BROTLI_BOOL ParseDictionary(const uint8_t* encoded, size_t size,
BrotliSharedDictionary* dict) {
uint32_t i;
size_t pos = 0;
uint32_t chunk_size = 0;
size_t total_prefix_suffix_count = 0;
size_t trasform_list_start[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];
uint16_t temporary_prefix_suffix_table[256];
/* Skip magic header bytes. */
pos += 2;
/* LZ77_DICTIONARY_LENGTH */
if (!ReadVarint32(encoded, size, &pos, &chunk_size)) return BROTLI_FALSE;
if (chunk_size != 0) {
if (pos + chunk_size > size) return BROTLI_FALSE;
dict->prefix_size[dict->num_prefix] = chunk_size;
dict->prefix[dict->num_prefix] = &encoded[pos];
dict->num_prefix++;
/* LZ77_DICTIONARY_LENGTH bytes. */
pos += chunk_size;
}
/* NUM_WORD_LISTS */
if (!ReadUint8(encoded, size, &pos, &dict->num_word_lists)) {
return BROTLI_FALSE;
}
if (dict->num_word_lists > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) {
return BROTLI_FALSE;
}
if (dict->num_word_lists != 0) {
dict->words_instances = (BrotliDictionary*)dict->alloc_func(
dict->memory_manager_opaque,
dict->num_word_lists * sizeof(*dict->words_instances));
if (!dict->words_instances) return BROTLI_FALSE; /* OOM */
}
for (i = 0; i < dict->num_word_lists; i++) {
if (!ParseWordList(size, encoded, &pos, &dict->words_instances[i])) {
return BROTLI_FALSE;
}
}
/* NUM_TRANSFORM_LISTS */
if (!ReadUint8(encoded, size, &pos, &dict->num_transform_lists)) {
return BROTLI_FALSE;
}
if (dict->num_transform_lists > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) {
return BROTLI_FALSE;
}
if (dict->num_transform_lists != 0) {
dict->transforms_instances = (BrotliTransforms*)dict->alloc_func(
dict->memory_manager_opaque,
dict->num_transform_lists * sizeof(*dict->transforms_instances));
if (!dict->transforms_instances) return BROTLI_FALSE; /* OOM */
}
for (i = 0; i < dict->num_transform_lists; i++) {
BROTLI_BOOL ok = BROTLI_FALSE;
size_t prefix_suffix_count = 0;
trasform_list_start[i] = pos;
dict->transforms_instances[i].prefix_suffix_map =
temporary_prefix_suffix_table;
ok = ParseTransformsList(
size, encoded, &pos, &dict->transforms_instances[i],
temporary_prefix_suffix_table, &prefix_suffix_count);
if (!ok) return BROTLI_FALSE;
total_prefix_suffix_count += prefix_suffix_count;
}
if (total_prefix_suffix_count != 0) {
dict->prefix_suffix_maps = (uint16_t*)dict->alloc_func(
dict->memory_manager_opaque,
total_prefix_suffix_count * sizeof(*dict->prefix_suffix_maps));
if (!dict->prefix_suffix_maps) return BROTLI_FALSE; /* OOM */
}
total_prefix_suffix_count = 0;
for (i = 0; i < dict->num_transform_lists; i++) {
size_t prefix_suffix_count = 0;
size_t position = trasform_list_start[i];
uint16_t* prefix_suffix_map =
&dict->prefix_suffix_maps[total_prefix_suffix_count];
BROTLI_BOOL ok = ParsePrefixSuffixTable(
size, encoded, &position, &dict->transforms_instances[i],
prefix_suffix_map, &prefix_suffix_count);
if (!ok) return BROTLI_FALSE;
dict->transforms_instances[i].prefix_suffix_map = prefix_suffix_map;
total_prefix_suffix_count += prefix_suffix_count;
}
if (dict->num_word_lists != 0 || dict->num_transform_lists != 0) {
if (!ReadUint8(encoded, size, &pos, &dict->num_dictionaries)) {
return BROTLI_FALSE;
}
if (dict->num_dictionaries == 0 ||
dict->num_dictionaries > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) {
return BROTLI_FALSE;
}
for (i = 0; i < dict->num_dictionaries; i++) {
uint8_t words_index;
uint8_t transforms_index;
if (!ReadUint8(encoded, size, &pos, &words_index)) {
return BROTLI_FALSE;
}
if (words_index > dict->num_word_lists) return BROTLI_FALSE;
if (!ReadUint8(encoded, size, &pos, &transforms_index)) {
return BROTLI_FALSE;
}
if (transforms_index > dict->num_transform_lists) return BROTLI_FALSE;
dict->words[i] = words_index == dict->num_word_lists ?
BrotliGetDictionary() : &dict->words_instances[words_index];
dict->transforms[i] = transforms_index == dict->num_transform_lists ?
BrotliGetTransforms(): &dict->transforms_instances[transforms_index];
}
/* CONTEXT_ENABLED */
if (!ReadBool(encoded, size, &pos, &dict->context_based)) {
return BROTLI_FALSE;
}
/* CONTEXT_MAP */
if (dict->context_based) {
for (i = 0; i < SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS; i++) {
if (!ReadUint8(encoded, size, &pos, &dict->context_map[i])) {
return BROTLI_FALSE;
}
if (dict->context_map[i] >= dict->num_dictionaries) {
return BROTLI_FALSE;
}
}
}
} else {
dict->context_based = BROTLI_FALSE;
dict->num_dictionaries = 1;
dict->words[0] = BrotliGetDictionary();
dict->transforms[0] = BrotliGetTransforms();
}
return BROTLI_TRUE;
}
/* Decodes shared dictionary and verifies correctness.
Returns BROTLI_TRUE if dictionary is valid, BROTLI_FALSE otherwise.
The BrotliSharedDictionary must already have been initialized. If the
BrotliSharedDictionary already contains data, compound dictionaries
will be appended, but an error will be returned if it already has
custom words or transforms.
TODO: link to RFC for shared brotli once published. */
static BROTLI_BOOL DecodeSharedDictionary(
const uint8_t* encoded, size_t size, BrotliSharedDictionary* dict) {
uint32_t num_prefix = 0;
BROTLI_BOOL is_custom_static_dict = BROTLI_FALSE;
BROTLI_BOOL has_custom_static_dict =
dict->num_word_lists > 0 || dict->num_transform_lists > 0;
/* Check magic header bytes. */
if (size < 2) return BROTLI_FALSE;
if (encoded[0] != 0x91 || encoded[1] != 0) return BROTLI_FALSE;
if (!DryParseDictionary(encoded, size, &num_prefix, &is_custom_static_dict)) {
return BROTLI_FALSE;
}
if (num_prefix + dict->num_prefix > SHARED_BROTLI_MAX_COMPOUND_DICTS) {
return BROTLI_FALSE;
}
/* Cannot combine different static dictionaries, only prefix dictionaries */
if (has_custom_static_dict && is_custom_static_dict) return BROTLI_FALSE;
return ParseDictionary(encoded, size, dict);
}
void BrotliSharedDictionaryDestroyInstance(
BrotliSharedDictionary* dict) {
if (!dict) {
return;
} else {
brotli_free_func free_func = dict->free_func;
void* opaque = dict->memory_manager_opaque;
/* Cleanup. */
free_func(opaque, dict->words_instances);
free_func(opaque, dict->transforms_instances);
free_func(opaque, dict->prefix_suffix_maps);
/* Self-destruction. */
free_func(opaque, dict);
}
}
BROTLI_BOOL BrotliSharedDictionaryAttach(
BrotliSharedDictionary* dict, BrotliSharedDictionaryType type,
size_t data_size, const uint8_t* data) {
if (!dict) {
return BROTLI_FALSE;
}
if (type == BROTLI_SHARED_DICTIONARY_SERIALIZED) {
return DecodeSharedDictionary(data, data_size, dict);
} else if (type == BROTLI_SHARED_DICTIONARY_RAW) {
if (dict->num_prefix >= SHARED_BROTLI_MAX_COMPOUND_DICTS) {
return BROTLI_FALSE;
}
dict->prefix_size[dict->num_prefix] = data_size;
dict->prefix[dict->num_prefix] = data;
dict->num_prefix++;
return BROTLI_TRUE;
} else {
return BROTLI_FALSE;
}
}
BrotliSharedDictionary* BrotliSharedDictionaryCreateInstance(
brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
BrotliSharedDictionary* dict = 0;
if (!alloc_func && !free_func) {
dict = (BrotliSharedDictionary*)malloc(sizeof(BrotliSharedDictionary));
} else if (alloc_func && free_func) {
dict = (BrotliSharedDictionary*)alloc_func(
opaque, sizeof(BrotliSharedDictionary));
}
if (dict == 0) {
return 0;
}
/* TODO: explicitly initialize all the fields? */
memset(dict, 0, sizeof(BrotliSharedDictionary));
dict->context_based = BROTLI_FALSE;
dict->num_dictionaries = 1;
dict->num_word_lists = 0;
dict->num_transform_lists = 0;
dict->words[0] = BrotliGetDictionary();
dict->transforms[0] = BrotliGetTransforms();
dict->alloc_func = alloc_func ? alloc_func : BrotliDefaultAllocFunc;
dict->free_func = free_func ? free_func : BrotliDefaultFreeFunc;
dict->memory_manager_opaque = opaque;
return dict;
}
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
#endif

View File

@ -0,0 +1,74 @@
/* Copyright 2017 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* (Transparent) Shared Dictionary definition. */
#ifndef BROTLI_COMMON_SHARED_DICTIONARY_INTERNAL_H_
#define BROTLI_COMMON_SHARED_DICTIONARY_INTERNAL_H_
#include "./dictionary.h"
#include <brotli/shared_dictionary.h>
#include "./transform.h"
#include <brotli/types.h>
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
struct BrotliSharedDictionaryStruct {
/* LZ77 prefixes (compound dictionary). */
uint32_t num_prefix; /* max SHARED_BROTLI_MAX_COMPOUND_DICTS */
size_t prefix_size[SHARED_BROTLI_MAX_COMPOUND_DICTS];
const uint8_t* prefix[SHARED_BROTLI_MAX_COMPOUND_DICTS];
/* If set, the context map is used to select word and transform list from 64
contexts, if not set, the context map is not used and only words[0] and
transforms[0] are to be used. */
BROTLI_BOOL context_based;
uint8_t context_map[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];
/* Amount of word_list+transform_list combinations. */
uint8_t num_dictionaries;
/* Must use num_dictionaries values. */
const BrotliDictionary* words[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];
/* Must use num_dictionaries values. */
const BrotliTransforms* transforms[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];
/* Amount of custom word lists. May be 0 if only Brotli's built-in is used */
uint8_t num_word_lists;
/* Contents of the custom words lists. Must be NULL if num_word_lists is 0. */
BrotliDictionary* words_instances;
/* Amount of custom transform lists. May be 0 if only Brotli's built-in is
used */
uint8_t num_transform_lists;
/* Contents of the custom transform lists. Must be NULL if num_transform_lists
is 0. */
BrotliTransforms* transforms_instances;
/* Concatenated prefix_suffix_maps of the custom transform lists. Must be NULL
if num_transform_lists is 0. */
uint16_t* prefix_suffix_maps;
/* Memory management */
brotli_alloc_func alloc_func;
brotli_free_func free_func;
void* memory_manager_opaque;
};
typedef struct BrotliSharedDictionaryStruct BrotliSharedDictionaryInternal;
#define BrotliSharedDictionary BrotliSharedDictionaryInternal
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
#endif
#endif /* BROTLI_COMMON_SHARED_DICTIONARY_INTERNAL_H_ */

View File

@ -13,6 +13,7 @@
#include "../common/context.h"
#include "../common/dictionary.h"
#include "../common/platform.h"
#include "../common/shared_dictionary_internal.h"
#include "../common/transform.h"
#include "../common/version.h"
#include "./bit_reader.h"
@ -42,8 +43,8 @@ extern "C" {
/* We need the slack region for the following reasons:
- doing up to two 16-byte copies for fast backward copying
- inserting transformed dictionary word:
5 prefix + 24 base + 8 suffix */
static const uint32_t kRingBufferWriteAheadSlack = 42;
255 prefix + 32 base + 255 suffix */
static const uint32_t kRingBufferWriteAheadSlack = 542;
static const uint8_t kCodeLengthCodeOrder[BROTLI_CODE_LENGTH_CODES] = {
1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@ -1403,6 +1404,114 @@ static BrotliDecoderErrorCode BROTLI_NOINLINE CopyUncompressedBlockToOutput(
BROTLI_DCHECK(0); /* Unreachable */
}
static BROTLI_BOOL AttachCompoundDictionary(
BrotliDecoderState* state, const uint8_t* data, size_t size) {
BrotliDecoderCompoundDictionary* addon = state->compound_dictionary;
if (state->state != BROTLI_STATE_UNINITED) return BROTLI_FALSE;
if (!addon) {
addon = (BrotliDecoderCompoundDictionary*)BROTLI_DECODER_ALLOC(
state, sizeof(BrotliDecoderCompoundDictionary));
if (!addon) return BROTLI_FALSE;
addon->num_chunks = 0;
addon->total_size = 0;
addon->br_length = 0;
addon->br_copied = 0;
addon->block_bits = -1;
addon->chunk_offsets[0] = 0;
state->compound_dictionary = addon;
}
if (addon->num_chunks == 15) return BROTLI_FALSE;
addon->chunks[addon->num_chunks] = data;
addon->num_chunks++;
addon->total_size += (int)size;
addon->chunk_offsets[addon->num_chunks] = addon->total_size;
return BROTLI_TRUE;
}
static void EnsureCoumpoundDictionaryInitialized(BrotliDecoderState* state) {
BrotliDecoderCompoundDictionary* addon = state->compound_dictionary;
/* 256 = (1 << 8) slots in block map. */
int block_bits = 8;
int cursor = 0;
int index = 0;
if (addon->block_bits != -1) return;
while (((addon->total_size - 1) >> block_bits) != 0) block_bits++;
block_bits -= 8;
addon->block_bits = block_bits;
while (cursor < addon->total_size) {
while (addon->chunk_offsets[index + 1] < cursor) index++;
addon->block_map[cursor >> block_bits] = (uint8_t)index;
cursor += 1 << block_bits;
}
}
static BROTLI_BOOL InitializeCompoundDictionaryCopy(BrotliDecoderState* s,
int address, int length) {
BrotliDecoderCompoundDictionary* addon = s->compound_dictionary;
int index;
EnsureCoumpoundDictionaryInitialized(s);
index = addon->block_map[address >> addon->block_bits];
while (address >= addon->chunk_offsets[index + 1]) index++;
if (addon->total_size < address + length) return BROTLI_FALSE;
/* Update the recent distances cache. */
s->dist_rb[s->dist_rb_idx & 3] = s->distance_code;
++s->dist_rb_idx;
s->meta_block_remaining_len -= length;
addon->br_index = index;
addon->br_offset = address - addon->chunk_offsets[index];
addon->br_length = length;
addon->br_copied = 0;
return BROTLI_TRUE;
}
static int GetCompoundDictionarySize(BrotliDecoderState* s) {
return s->compound_dictionary ? s->compound_dictionary->total_size : 0;
}
static int CopyFromCompoundDictionary(BrotliDecoderState* s, int pos) {
BrotliDecoderCompoundDictionary* addon = s->compound_dictionary;
int orig_pos = pos;
while (addon->br_length != addon->br_copied) {
uint8_t* copy_dst = &s->ringbuffer[pos];
const uint8_t* copy_src =
addon->chunks[addon->br_index] + addon->br_offset;
int space = s->ringbuffer_size - pos;
int rem_chunk_length = (addon->chunk_offsets[addon->br_index + 1] -
addon->chunk_offsets[addon->br_index]) - addon->br_offset;
int length = addon->br_length - addon->br_copied;
if (length > rem_chunk_length) length = rem_chunk_length;
if (length > space) length = space;
memcpy(copy_dst, copy_src, (size_t)length);
pos += length;
addon->br_offset += length;
addon->br_copied += length;
if (length == rem_chunk_length) {
addon->br_index++;
addon->br_offset = 0;
}
if (pos == s->ringbuffer_size) break;
}
return pos - orig_pos;
}
BROTLI_BOOL BrotliDecoderAttachDictionary(BrotliDecoderState* state,
BrotliSharedDictionaryType type, size_t data_size, const uint8_t* data) {
uint32_t i;
uint32_t num_prefix_before = state->dictionary->num_prefix;
if (state->state != BROTLI_STATE_UNINITED) return BROTLI_FALSE;
if (!BrotliSharedDictionaryAttach(state->dictionary, type, data_size, data)) {
return BROTLI_FALSE;
}
for (i = num_prefix_before; i < state->dictionary->num_prefix; i++) {
if (!AttachCompoundDictionary(
state, state->dictionary->prefix[i],
state->dictionary->prefix_size[i])) {
return BROTLI_FALSE;
}
}
return BROTLI_TRUE;
}
/* Calculates the smallest feasible ring buffer.
If we know the data size is small, do not allocate more ring buffer
@ -1737,6 +1846,7 @@ static BROTLI_INLINE BrotliDecoderErrorCode ProcessCommandsInternal(
int i = s->loop_counter;
BrotliDecoderErrorCode result = BROTLI_DECODER_SUCCESS;
BrotliBitReader* br = &s->br;
int compound_dictionary_size = GetCompoundDictionarySize(s);
if (!CheckInputAmount(safe, br, 28)) {
result = BROTLI_DECODER_NEEDS_MORE_INPUT;
@ -1903,20 +2013,75 @@ CommandPostDecodeLiterals:
pos, s->distance_code, i, s->meta_block_remaining_len));
return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_DISTANCE);
}
if (i >= BROTLI_MIN_DICTIONARY_WORD_LENGTH &&
i <= BROTLI_MAX_DICTIONARY_WORD_LENGTH) {
int address = s->distance_code - s->max_distance - 1;
const BrotliDictionary* words = s->dictionary;
const BrotliTransforms* transforms = s->transforms;
int offset = (int)s->dictionary->offsets_by_length[i];
uint32_t shift = s->dictionary->size_bits_by_length[i];
if (s->distance_code - s->max_distance - 1 < compound_dictionary_size) {
int address = compound_dictionary_size -
(s->distance_code - s->max_distance);
if (!InitializeCompoundDictionaryCopy(s, address, i)) {
return BROTLI_FAILURE(BROTLI_DECODER_ERROR_COMPOUND_DICTIONARY);
}
pos += CopyFromCompoundDictionary(s, pos);
if (pos >= s->ringbuffer_size) {
s->state = BROTLI_STATE_COMMAND_POST_WRITE_1;
goto saveStateAndReturn;
}
} else if (i >= SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH &&
i <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH) {
uint8_t p1 = s->ringbuffer[(pos - 1) & s->ringbuffer_mask];
uint8_t p2 = s->ringbuffer[(pos - 2) & s->ringbuffer_mask];
uint8_t dict_id = s->dictionary->context_based ?
s->dictionary->context_map[BROTLI_CONTEXT(p1, p2, s->context_lookup)]
: 0;
const BrotliDictionary* words = s->dictionary->words[dict_id];
const BrotliTransforms* transforms = s->dictionary->transforms[dict_id];
int offset = (int)words->offsets_by_length[i];
uint32_t shift = words->size_bits_by_length[i];
int address =
s->distance_code - s->max_distance - 1 - compound_dictionary_size;
int mask = (int)BitMask(shift);
int word_idx = address & mask;
int transform_idx = address >> shift;
/* Compensate double distance-ring-buffer roll. */
s->dist_rb_idx += s->distance_context;
offset += word_idx * i;
/* If the distance is out of bound, select a next static dictionary if
there exist multiple. */
if ((transform_idx >= (int)transforms->num_transforms ||
words->size_bits_by_length[i] == 0) &&
s->dictionary->num_dictionaries > 1) {
uint8_t dict_id2;
int dist_remaining = address -
(int)(((1u << shift) & ~1u)) * (int)transforms->num_transforms;
for (dict_id2 = 0; dict_id2 < s->dictionary->num_dictionaries;
dict_id2++) {
const BrotliDictionary* words2 = s->dictionary->words[dict_id2];
if (dict_id2 != dict_id && words2->size_bits_by_length[i] != 0) {
const BrotliTransforms* transforms2 =
s->dictionary->transforms[dict_id2];
uint32_t shift2 = words2->size_bits_by_length[i];
int num = (int)((1u << shift2) & ~1u) *
(int)transforms2->num_transforms;
if (dist_remaining < num) {
dict_id = dict_id2;
words = words2;
transforms = transforms2;
address = dist_remaining;
shift = shift2;
mask = (int)BitMask(shift);
word_idx = address & mask;
transform_idx = address >> shift;
offset = (int)words->offsets_by_length[i] + word_idx * i;
break;
}
dist_remaining -= num;
}
}
}
if (BROTLI_PREDICT_FALSE(words->size_bits_by_length[i] == 0)) {
BROTLI_LOG(("Invalid backward reference. pos: %d distance: %d "
"len: %d bytes left: %d\n",
pos, s->distance_code, i, s->meta_block_remaining_len));
return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_DICTIONARY);
}
if (BROTLI_PREDICT_FALSE(!words->data)) {
return BROTLI_FAILURE(BROTLI_DECODER_ERROR_DICTIONARY_NOT_SET);
}
@ -1933,6 +2098,10 @@ CommandPostDecodeLiterals:
BROTLI_LOG(("[ProcessCommandsInternal] dictionary word: [%.*s],"
" transform_idx = %d, transformed: [%.*s]\n",
i, word, transform_idx, len, &s->ringbuffer[pos]));
if (len == 0 && s->distance_code <= 120) {
BROTLI_LOG(("Invalid length-0 dictionary word after transform\n"));
return BROTLI_FAILURE(BROTLI_DECODER_ERROR_FORMAT_TRANSFORM);
}
}
pos += len;
s->meta_block_remaining_len -= len;
@ -2483,6 +2652,11 @@ BrotliDecoderResult BrotliDecoderDecompressStream(
s->max_distance = s->max_backward_distance;
}
if (s->state == BROTLI_STATE_COMMAND_POST_WRITE_1) {
BrotliDecoderCompoundDictionary* addon = s->compound_dictionary;
if (addon && (addon->br_length != addon->br_copied)) {
s->pos += CopyFromCompoundDictionary(s, s->pos);
if (s->pos >= s->ringbuffer_size) continue;
}
if (s->meta_block_remaining_len == 0) {
/* Next metablock, if any. */
s->state = BROTLI_STATE_METABLOCK_DONE;

View File

@ -8,6 +8,7 @@
#include <stdlib.h> /* free, malloc */
#include "../common/dictionary.h"
#include <brotli/types.h>
#include "./huffman.h"
@ -81,8 +82,10 @@ BROTLI_BOOL BrotliDecoderStateInit(BrotliDecoderState* s,
s->mtf_upper_bound = 63;
s->dictionary = BrotliGetDictionary();
s->transforms = BrotliGetTransforms();
s->compound_dictionary = NULL;
s->dictionary =
BrotliSharedDictionaryCreateInstance(alloc_func, free_func, opaque);
if (!s->dictionary) return BROTLI_FALSE;
return BROTLI_TRUE;
}
@ -129,6 +132,9 @@ void BrotliDecoderStateCleanupAfterMetablock(BrotliDecoderState* s) {
void BrotliDecoderStateCleanup(BrotliDecoderState* s) {
BrotliDecoderStateCleanupAfterMetablock(s);
BROTLI_DECODER_FREE(s, s->compound_dictionary);
BrotliSharedDictionaryDestroyInstance(s->dictionary);
s->dictionary = NULL;
BROTLI_DECODER_FREE(s, s->ringbuffer);
BROTLI_DECODER_FREE(s, s->block_type_trees);
}

View File

@ -12,6 +12,7 @@
#include "../common/constants.h"
#include "../common/dictionary.h"
#include "../common/platform.h"
#include <brotli/shared_dictionary.h>
#include "../common/transform.h"
#include <brotli/types.h>
#include "./bit_reader.h"
@ -189,6 +190,20 @@ typedef enum {
BROTLI_STATE_READ_BLOCK_LENGTH_SUFFIX
} BrotliRunningReadBlockLengthState;
/* BrotliDecoderState addon, used for Compound Dictionary functionality. */
typedef struct BrotliDecoderCompoundDictionary {
int num_chunks;
int total_size;
int br_index;
int br_offset;
int br_length;
int br_copied;
const uint8_t* chunks[16];
int chunk_offsets[16];
int block_bits;
uint8_t block_map[256];
} BrotliDecoderCompoundDictionary;
typedef struct BrotliMetablockHeaderArena {
BrotliRunningTreeGroupState substate_tree_group;
BrotliRunningContextMapState substate_context_map;
@ -327,8 +342,8 @@ struct BrotliDecoderStateStruct {
uint8_t* context_map;
uint8_t* context_modes;
const BrotliDictionary* dictionary;
const BrotliTransforms* transforms;
BrotliSharedDictionary* dictionary;
BrotliDecoderCompoundDictionary* compound_dictionary;
uint32_t trivial_literal_contexts[8]; /* 256 bits */

View File

@ -9,12 +9,13 @@
#include "./backward_references.h"
#include "../common/constants.h"
#include "../common/context.h"
#include "../common/dictionary.h"
#include "../common/platform.h"
#include <brotli/types.h>
#include "./command.h"
#include "./compound_dictionary.h"
#include "./dictionary_hash.h"
#include "./encoder_dict.h"
#include "./memory.h"
#include "./quality.h"
@ -52,6 +53,7 @@ static BROTLI_INLINE size_t ComputeDistanceCode(size_t distance,
#define EXPORT_FN(X) EXPAND_CAT(X, EXPAND_CAT(PREFIX(), HASHER()))
#define PREFIX() N
#define ENABLE_COMPOUND_DICTIONARY 0
#define HASHER() H2
/* NOLINTNEXTLINE(build/include) */
@ -113,6 +115,41 @@ static BROTLI_INLINE size_t ComputeDistanceCode(size_t distance,
#include "./backward_references_inc.h"
#undef HASHER
#undef ENABLE_COMPOUND_DICTIONARY
#undef PREFIX
#define PREFIX() D
#define ENABLE_COMPOUND_DICTIONARY 1
#define HASHER() H5
/* NOLINTNEXTLINE(build/include) */
#include "./backward_references_inc.h"
#undef HASHER
#define HASHER() H6
/* NOLINTNEXTLINE(build/include) */
#include "./backward_references_inc.h"
#undef HASHER
#define HASHER() H40
/* NOLINTNEXTLINE(build/include) */
#include "./backward_references_inc.h"
#undef HASHER
#define HASHER() H41
/* NOLINTNEXTLINE(build/include) */
#include "./backward_references_inc.h"
#undef HASHER
#define HASHER() H42
/* NOLINTNEXTLINE(build/include) */
#include "./backward_references_inc.h"
#undef HASHER
#define HASHER() H55
/* NOLINTNEXTLINE(build/include) */
#include "./backward_references_inc.h"
#undef HASHER
#define HASHER() H65
/* NOLINTNEXTLINE(build/include) */
#include "./backward_references_inc.h"
#undef HASHER
#undef ENABLE_COMPOUND_DICTIONARY
#undef PREFIX
#undef EXPORT_FN
@ -125,6 +162,28 @@ void BrotliCreateBackwardReferences(size_t num_bytes,
ContextLut literal_context_lut, const BrotliEncoderParams* params,
Hasher* hasher, int* dist_cache, size_t* last_insert_len,
Command* commands, size_t* num_commands, size_t* num_literals) {
if (params->dictionary.compound.num_chunks != 0) {
switch (params->hasher.type) {
#define CASE_(N) \
case N: \
CreateBackwardReferencesDH ## N(num_bytes, \
position, ringbuffer, ringbuffer_mask, \
literal_context_lut, params, hasher, dist_cache, \
last_insert_len, commands, num_commands, num_literals); \
return;
CASE_(5)
CASE_(6)
CASE_(40)
CASE_(41)
CASE_(42)
CASE_(55)
CASE_(65)
#undef CASE_
default:
break;
}
}
switch (params->hasher.type) {
#define CASE_(N) \
case N: \

View File

@ -11,10 +11,11 @@
#include <string.h> /* memcpy, memset */
#include "../common/constants.h"
#include "../common/context.h"
#include "../common/platform.h"
#include <brotli/types.h>
#include "./command.h"
#include "./compound_dictionary.h"
#include "./encoder_dict.h"
#include "./fast_log.h"
#include "./find_match_length.h"
#include "./literal_cost.h"
@ -430,7 +431,8 @@ static size_t UpdateNodes(
size_t min_len;
size_t result = 0;
size_t k;
size_t gap = 0;
const CompoundDictionary* addon = &params->dictionary.compound;
size_t gap = addon->total_size;
EvaluateNode(block_start + stream_offset, pos, max_backward_limit, gap,
starting_dist_cache, model, queue, nodes);
@ -484,6 +486,24 @@ static size_t UpdateNodes(
len = FindMatchLengthWithLimit(&ringbuffer[prev_ix],
&ringbuffer[cur_ix_masked],
max_len);
} else if (backward > dictionary_start) {
size_t d = 0;
size_t offset;
size_t limit;
const uint8_t* source;
offset = dictionary_start + 1 + addon->total_size - 1;
while (offset >= backward + addon->chunk_offsets[d + 1]) d++;
source = addon->chunk_source[d];
offset = offset - addon->chunk_offsets[d] - backward;
limit = addon->chunk_offsets[d + 1] - addon->chunk_offsets[d] - offset;
limit = limit > max_len ? max_len : limit;
if (best_len >= limit ||
continuation != source[offset + best_len]) {
continue;
}
len = FindMatchLengthWithLimit(&source[offset],
&ringbuffer[cur_ix_masked],
limit);
} else {
/* "Gray" area. It is addressable by decoder, but this encoder
instance does not have that data -> should not touch it. */
@ -589,7 +609,7 @@ void BrotliZopfliCreateCommands(const size_t num_bytes,
size_t pos = 0;
uint32_t offset = nodes[0].u.next;
size_t i;
size_t gap = 0;
size_t gap = params->dictionary.compound.total_size;
for (i = 0; offset != BROTLI_UINT32_MAX; i++) {
const ZopfliNode* next = &nodes[pos + offset];
size_t copy_length = ZopfliNodeCopyLength(next);
@ -665,6 +685,23 @@ static size_t ZopfliIterate(size_t num_bytes, size_t position,
return ComputeShortestPathFromNodes(num_bytes, nodes);
}
static void MergeMatches(BackwardMatch* dst,
BackwardMatch* src1, size_t len1, BackwardMatch* src2, size_t len2) {
while (len1 > 0 && len2 > 0) {
size_t l1 = BackwardMatchLength(src1);
size_t l2 = BackwardMatchLength(src2);
if (l1 < l2 || ((l1 == l2) && (src1->distance < src2->distance))) {
*dst++ = *src1++;
len1--;
} else {
*dst++ = *src2++;
len2--;
}
}
while (len1-- > 0) *dst++ = *src1++;
while (len2-- > 0) *dst++ = *src2++;
}
/* REQUIRES: nodes != NULL and len(nodes) >= num_bytes + 1 */
size_t BrotliZopfliComputeShortestPath(MemoryManager* m, size_t num_bytes,
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
@ -679,10 +716,11 @@ size_t BrotliZopfliComputeShortestPath(MemoryManager* m, size_t num_bytes,
const size_t store_end = num_bytes >= StoreLookaheadH10() ?
position + num_bytes - StoreLookaheadH10() + 1 : position;
size_t i;
size_t gap = 0;
size_t lz_matches_offset = 0;
const CompoundDictionary* addon = &params->dictionary.compound;
size_t gap = addon->total_size;
size_t lz_matches_offset =
(addon->num_chunks != 0) ? (MAX_NUM_MATCHES_H10 + 128) : 0;
ZopfliCostModel* model = BROTLI_ALLOC(m, ZopfliCostModel, 1);
BROTLI_UNUSED(literal_context_lut);
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(model) || BROTLI_IS_NULL(matches)) {
return 0;
}
@ -700,10 +738,28 @@ size_t BrotliZopfliComputeShortestPath(MemoryManager* m, size_t num_bytes,
pos + stream_offset, max_backward_limit);
size_t skip;
size_t num_matches;
int dict_id = 0;
if (params->dictionary.contextual.context_based) {
uint8_t p1 = pos >= 1 ?
ringbuffer[(size_t)(pos - 1) & ringbuffer_mask] : 0;
uint8_t p2 = pos >= 2 ?
ringbuffer[(size_t)(pos - 2) & ringbuffer_mask] : 0;
dict_id = params->dictionary.contextual.context_map[
BROTLI_CONTEXT(p1, p2, literal_context_lut)];
}
num_matches = FindAllMatchesH10(&hasher->privat._H10,
&params->dictionary,
params->dictionary.contextual.dict[dict_id],
ringbuffer, ringbuffer_mask, pos, num_bytes - i, max_distance,
dictionary_start + gap, params, &matches[lz_matches_offset]);
if (addon->num_chunks != 0) {
size_t cd_matches = LookupAllCompoundDictionaryMatches(addon,
ringbuffer, ringbuffer_mask, pos, 3, num_bytes - i,
dictionary_start, params->dist.max_distance,
&matches[lz_matches_offset - 64], 64);
MergeMatches(matches, &matches[lz_matches_offset - 64], cd_matches,
&matches[lz_matches_offset], num_matches);
num_matches += cd_matches;
}
if (num_matches > 0 &&
BackwardMatchLength(&matches[num_matches - 1]) > max_zopfli_len) {
matches[0] = matches[num_matches - 1];
@ -774,9 +830,10 @@ void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m, size_t num_bytes,
ZopfliCostModel* model = BROTLI_ALLOC(m, ZopfliCostModel, 1);
ZopfliNode* nodes;
BackwardMatch* matches = BROTLI_ALLOC(m, BackwardMatch, matches_size);
size_t gap = 0;
size_t shadow_matches = 0;
BROTLI_UNUSED(literal_context_lut);
const CompoundDictionary* addon = &params->dictionary.compound;
size_t gap = addon->total_size;
size_t shadow_matches =
(addon->num_chunks != 0) ? (MAX_NUM_MATCHES_H10 + 128) : 0;
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(model) ||
BROTLI_IS_NULL(num_matches) || BROTLI_IS_NULL(matches)) {
return;
@ -790,15 +847,34 @@ void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m, size_t num_bytes,
size_t num_found_matches;
size_t cur_match_end;
size_t j;
int dict_id = 0;
if (params->dictionary.contextual.context_based) {
uint8_t p1 = pos >= 1 ?
ringbuffer[(size_t)(pos - 1) & ringbuffer_mask] : 0;
uint8_t p2 = pos >= 2 ?
ringbuffer[(size_t)(pos - 2) & ringbuffer_mask] : 0;
dict_id = params->dictionary.contextual.context_map[
BROTLI_CONTEXT(p1, p2, literal_context_lut)];
}
/* Ensure that we have enough free slots. */
BROTLI_ENSURE_CAPACITY(m, BackwardMatch, matches, matches_size,
cur_match_pos + MAX_NUM_MATCHES_H10 + shadow_matches);
if (BROTLI_IS_OOM(m)) return;
num_found_matches = FindAllMatchesH10(&hasher->privat._H10,
&params->dictionary,
params->dictionary.contextual.dict[dict_id],
ringbuffer, ringbuffer_mask, pos, max_length,
max_distance, dictionary_start + gap, params,
&matches[cur_match_pos + shadow_matches]);
if (addon->num_chunks != 0) {
size_t cd_matches = LookupAllCompoundDictionaryMatches(addon,
ringbuffer, ringbuffer_mask, pos, 3, max_length,
dictionary_start, params->dist.max_distance,
&matches[cur_match_pos + shadow_matches - 64], 64);
MergeMatches(&matches[cur_match_pos],
&matches[cur_match_pos + shadow_matches - 64], cd_matches,
&matches[cur_match_pos + shadow_matches], num_found_matches);
num_found_matches += cd_matches;
}
cur_match_end = cur_match_pos + num_found_matches;
for (j = cur_match_pos; j + 1 < cur_match_end; ++j) {
BROTLI_DCHECK(BackwardMatchLength(&matches[j]) <=

View File

@ -28,13 +28,11 @@ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
const size_t random_heuristics_window_size =
LiteralSpreeLengthForSparseSearch(params);
size_t apply_random_heuristics = position + random_heuristics_window_size;
const size_t gap = 0;
const size_t gap = params->dictionary.compound.total_size;
/* Minimum score to accept a backward reference. */
const score_t kMinScore = BROTLI_SCORE_BASE + 100;
BROTLI_UNUSED(literal_context_lut);
FN(PrepareDistanceCache)(privat, dist_cache);
while (position + FN(HashTypeLength)() < pos_end) {
@ -43,13 +41,29 @@ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
size_t dictionary_start = BROTLI_MIN(size_t,
position + position_offset, max_backward_limit);
HasherSearchResult sr;
int dict_id = 0;
uint8_t p1 = 0;
uint8_t p2 = 0;
if (params->dictionary.contextual.context_based) {
p1 = position >= 1 ?
ringbuffer[(size_t)(position - 1) & ringbuffer_mask] : 0;
p2 = position >= 2 ?
ringbuffer[(size_t)(position - 2) & ringbuffer_mask] : 0;
dict_id = params->dictionary.contextual.context_map[
BROTLI_CONTEXT(p1, p2, literal_context_lut)];
}
sr.len = 0;
sr.len_code_delta = 0;
sr.distance = 0;
sr.score = kMinScore;
FN(FindLongestMatch)(privat, &params->dictionary,
FN(FindLongestMatch)(privat, params->dictionary.contextual.dict[dict_id],
ringbuffer, ringbuffer_mask, dist_cache, position, max_length,
max_distance, dictionary_start + gap, params->dist.max_distance, &sr);
if (ENABLE_COMPOUND_DICTIONARY) {
LookupCompoundDictionaryMatch(&params->dictionary.compound, ringbuffer,
ringbuffer_mask, dist_cache, position, max_length,
dictionary_start, params->dist.max_distance, &sr);
}
if (sr.score > kMinScore) {
/* Found a match. Let's look for something even better ahead. */
int delayed_backward_references_in_row = 0;
@ -65,11 +79,23 @@ static BROTLI_NOINLINE void EXPORT_FN(CreateBackwardReferences)(
max_distance = BROTLI_MIN(size_t, position + 1, max_backward_limit);
dictionary_start = BROTLI_MIN(size_t,
position + 1 + position_offset, max_backward_limit);
if (params->dictionary.contextual.context_based) {
p2 = p1;
p1 = ringbuffer[position & ringbuffer_mask];
dict_id = params->dictionary.contextual.context_map[
BROTLI_CONTEXT(p1, p2, literal_context_lut)];
}
FN(FindLongestMatch)(privat,
&params->dictionary,
params->dictionary.contextual.dict[dict_id],
ringbuffer, ringbuffer_mask, dist_cache, position + 1, max_length,
max_distance, dictionary_start + gap, params->dist.max_distance,
&sr2);
if (ENABLE_COMPOUND_DICTIONARY) {
LookupCompoundDictionaryMatch(
&params->dictionary.compound, ringbuffer,
ringbuffer_mask, dist_cache, position + 1, max_length,
dictionary_start, params->dist.max_distance, &sr2);
}
if (sr2.score >= sr.score + cost_diff_lazy) {
/* Ok, let's just write one byte for now and start a match from the
next byte. */

200
c/enc/compound_dictionary.c Normal file
View File

@ -0,0 +1,200 @@
/* Copyright 2017 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
#include "./compound_dictionary.h"
#include "../common/platform.h"
#include <brotli/types.h>
#include "./memory.h"
#include "./quality.h"
static PreparedDictionary* CreatePreparedDictionaryWithParams(MemoryManager* m,
const uint8_t* source, size_t source_size, uint32_t bucket_bits,
uint32_t slot_bits, uint32_t hash_bits, uint16_t bucket_limit) {
/* Step 1: create "bloated" hasher. */
uint32_t num_slots = 1u << slot_bits;
uint32_t num_buckets = 1u << bucket_bits;
uint32_t hash_shift = 64u - bucket_bits;
uint64_t hash_mask = (~((uint64_t)0U)) >> (64 - hash_bits);
uint32_t slot_mask = num_slots - 1;
size_t alloc_size = (sizeof(uint32_t) << slot_bits) +
(sizeof(uint32_t) << slot_bits) +
(sizeof(uint16_t) << bucket_bits) +
(sizeof(uint32_t) << bucket_bits) +
(sizeof(uint32_t) * source_size);
uint8_t* flat = NULL;
PreparedDictionary* result = NULL;
uint16_t* num = NULL;
uint32_t* bucket_heads = NULL;
uint32_t* next_bucket = NULL;
uint32_t* slot_offsets = NULL;
uint16_t* heads = NULL;
uint32_t* items = NULL;
uint8_t* source_copy = NULL;
uint32_t i;
uint32_t* slot_size = NULL;
uint32_t* slot_limit = NULL;
uint32_t total_items = 0;
if (slot_bits > 16) return NULL;
if (slot_bits > bucket_bits) return NULL;
if (bucket_bits - slot_bits >= 16) return NULL;
flat = BROTLI_ALLOC(m, uint8_t, alloc_size);
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(flat)) return NULL;
slot_size = (uint32_t*)flat;
slot_limit = (uint32_t*)(&slot_size[num_slots]);
num = (uint16_t*)(&slot_limit[num_slots]);
bucket_heads = (uint32_t*)(&num[num_buckets]);
next_bucket = (uint32_t*)(&bucket_heads[num_buckets]);
memset(num, 0, num_buckets * sizeof(num[0]));
/* TODO: apply custom "store" order. */
for (i = 0; i + 7 < source_size; ++i) {
const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(&source[i]) & hash_mask) *
kPreparedDictionaryHashMul64Long;
const uint32_t key = (uint32_t)(h >> hash_shift);
uint16_t count = num[key];
next_bucket[i] = (count == 0) ? ((uint32_t)(-1)) : bucket_heads[key];
bucket_heads[key] = i;
count++;
if (count > bucket_limit) count = bucket_limit;
num[key] = count;
}
/* Step 2: find slot limits. */
for (i = 0; i < num_slots; ++i) {
BROTLI_BOOL overflow = BROTLI_FALSE;
slot_limit[i] = bucket_limit;
while (BROTLI_TRUE) {
uint32_t limit = slot_limit[i];
size_t j;
uint32_t count = 0;
overflow = BROTLI_FALSE;
for (j = i; j < num_buckets; j += num_slots) {
uint32_t size = num[j];
/* Last chain may span behind 64K limit; overflow happens only if
we are about to use 0xFFFF+ as item offset. */
if (count >= 0xFFFF) {
overflow = BROTLI_TRUE;
break;
}
if (size > limit) size = limit;
count += size;
}
if (!overflow) {
slot_size[i] = count;
total_items += count;
break;
}
slot_limit[i]--;
}
}
/* Step 3: transfer data to "slim" hasher. */
alloc_size = sizeof(PreparedDictionary) + (sizeof(uint32_t) << slot_bits) +
(sizeof(uint16_t) << bucket_bits) + (sizeof(uint32_t) * total_items) +
source_size;
result = (PreparedDictionary*)BROTLI_ALLOC(m, uint8_t, alloc_size);
if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(result)) {
BROTLI_FREE(m, flat);
return NULL;
}
slot_offsets = (uint32_t*)(&result[1]);
heads = (uint16_t*)(&slot_offsets[num_slots]);
items = (uint32_t*)(&heads[num_buckets]);
source_copy = (uint8_t*)(&items[total_items]);
result->magic = kPreparedDictionaryMagic;
result->source_offset = total_items;
result->source_size = (uint32_t)source_size;
result->hash_bits = hash_bits;
result->bucket_bits = bucket_bits;
result->slot_bits = slot_bits;
total_items = 0;
for (i = 0; i < num_slots; ++i) {
slot_offsets[i] = total_items;
total_items += slot_size[i];
slot_size[i] = 0;
}
for (i = 0; i < num_buckets; ++i) {
uint32_t slot = i & slot_mask;
uint32_t count = num[i];
uint32_t pos;
size_t j;
size_t cursor = slot_size[slot];
if (count > slot_limit[slot]) count = slot_limit[slot];
if (count == 0) {
heads[i] = 0xFFFF;
continue;
}
heads[i] = (uint16_t)cursor;
cursor += slot_offsets[slot];
slot_size[slot] += count;
pos = bucket_heads[i];
for (j = 0; j < count; j++) {
items[cursor++] = pos;
pos = next_bucket[pos];
}
items[cursor - 1] |= 0x80000000;
}
BROTLI_FREE(m, flat);
memcpy(source_copy, source, source_size);
return result;
}
PreparedDictionary* CreatePreparedDictionary(MemoryManager* m,
const uint8_t* source, size_t source_size) {
uint32_t bucket_bits = 17;
uint32_t slot_bits = 7;
uint32_t hash_bits = 40;
uint16_t bucket_limit = 32;
size_t volume = 16u << bucket_bits;
/* Tune parameters to fit dictionary size. */
while (volume < source_size && bucket_bits < 22) {
bucket_bits++;
slot_bits++;
volume <<= 1;
}
return CreatePreparedDictionaryWithParams(m,
source, source_size, bucket_bits, slot_bits, hash_bits, bucket_limit);
}
void DestroyPreparedDictionary(MemoryManager* m,
PreparedDictionary* dictionary) {
if (!dictionary) return;
BROTLI_FREE(m, dictionary);
}
BROTLI_BOOL AttachPreparedDictionary(
CompoundDictionary* compound, const PreparedDictionary* dictionary) {
size_t length = 0;
size_t index = 0;
if (compound->num_chunks == SHARED_BROTLI_MAX_COMPOUND_DICTS) {
return BROTLI_FALSE;
}
if (!dictionary) return BROTLI_FALSE;
length = dictionary->source_size;
index = compound->num_chunks;
compound->total_size += length;
compound->chunks[index] = dictionary;
compound->chunk_offsets[index + 1] = compound->total_size;
{
uint32_t* slot_offsets = (uint32_t*)(&dictionary[1]);
uint16_t* heads = (uint16_t*)(&slot_offsets[1u << dictionary->slot_bits]);
uint32_t* items = (uint32_t*)(&heads[1u << dictionary->bucket_bits]);
compound->chunk_source[index] =
(const uint8_t*)(&items[dictionary->source_offset]);
}
compound->num_chunks++;
return BROTLI_TRUE;
}

View File

@ -0,0 +1,60 @@
/* Copyright 2017 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
#ifndef BROTLI_ENC_PREPARED_DICTIONARY_H_
#define BROTLI_ENC_PREPARED_DICTIONARY_H_
#include "../common/platform.h"
#include "../common/constants.h"
#include <brotli/shared_dictionary.h>
#include <brotli/types.h>
#include "./memory.h"
static const uint32_t kPreparedDictionaryMagic = 0xDEBCEDE0;
static const uint64_t kPreparedDictionaryHashMul64Long =
BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u);
typedef struct PreparedDictionary {
uint32_t magic;
uint32_t source_offset;
uint32_t source_size;
uint32_t hash_bits;
uint32_t bucket_bits;
uint32_t slot_bits;
/* --- Dynamic size members --- */
/* uint32_t slot_offsets[1 << slot_bits]; */
/* uint16_t heads[1 << bucket_bits]; */
/* uint32_t items[variable]; */
/* uint8_t source[source_size] */
} PreparedDictionary;
BROTLI_INTERNAL PreparedDictionary* CreatePreparedDictionary(MemoryManager* m,
const uint8_t* source, size_t source_size);
BROTLI_INTERNAL void DestroyPreparedDictionary(MemoryManager* m,
PreparedDictionary* dictionary);
typedef struct CompoundDictionary {
/* LZ77 prefix, compound dictionary */
size_t num_chunks;
size_t total_size;
/* Client instances. */
const PreparedDictionary* chunks[SHARED_BROTLI_MAX_COMPOUND_DICTS + 1];
const uint8_t* chunk_source[SHARED_BROTLI_MAX_COMPOUND_DICTS + 1];
size_t chunk_offsets[SHARED_BROTLI_MAX_COMPOUND_DICTS + 1];
size_t num_prepared_instances_;
/* Owned instances. */
PreparedDictionary* prepared_instances_[SHARED_BROTLI_MAX_COMPOUND_DICTS + 1];
} CompoundDictionary;
BROTLI_INTERNAL BROTLI_BOOL AttachPreparedDictionary(
CompoundDictionary* compound, const PreparedDictionary* dictionary);
#endif /* BROTLI_ENC_PREPARED_DICTIONARY */

View File

@ -21,6 +21,7 @@
#include "./brotli_bit_stream.h"
#include "./compress_fragment.h"
#include "./compress_fragment_two_pass.h"
#include "./dictionary_hash.h"
#include "./encoder_dict.h"
#include "./entropy_encode.h"
#include "./fast_log.h"
@ -746,7 +747,7 @@ static void BrotliEncoderInitParams(BrotliEncoderParams* params) {
params->stream_offset = 0;
params->size_hint = 0;
params->disable_literal_context_modeling = BROTLI_FALSE;
BrotliInitEncoderDictionary(&params->dictionary);
BrotliInitSharedEncoderDictionary(&params->dictionary);
params->dist.distance_postfix_bits = 0;
params->dist.num_direct_distance_codes = 0;
params->dist.alphabet_size_max =
@ -755,6 +756,11 @@ static void BrotliEncoderInitParams(BrotliEncoderParams* params) {
params->dist.max_distance = BROTLI_MAX_DISTANCE;
}
static void BrotliEncoderCleanupParams(MemoryManager* m,
BrotliEncoderParams* params) {
BrotliCleanupSharedEncoderDictionary(m, &params->dictionary);
}
static void BrotliEncoderInitState(BrotliEncoderState* s) {
BrotliEncoderInitParams(&s->params);
s->input_pos_ = 0;
@ -825,6 +831,7 @@ static void BrotliEncoderCleanupState(BrotliEncoderState* s) {
BROTLI_FREE(m, s->two_pass_arena_);
BROTLI_FREE(m, s->command_buf_);
BROTLI_FREE(m, s->literal_buf_);
BrotliEncoderCleanupParams(m, &s->params);
}
/* Deinitializes and frees BrotliEncoderState instance. */
@ -922,6 +929,8 @@ static void ExtendLastCommand(BrotliEncoderState* s, uint32_t* bytes,
uint64_t cmd_dist = (uint64_t)s->dist_cache_[0];
uint32_t distance_code = CommandRestoreDistanceCode(last_command,
&s->params.dist);
const CompoundDictionary* dict = &s->params.dictionary.compound;
size_t compound_dictionary_size = dict->total_size;
if (distance_code < BROTLI_NUM_DISTANCE_SHORT_CODES ||
distance_code - (BROTLI_NUM_DISTANCE_SHORT_CODES - 1) == cmd_dist) {
if (cmd_dist <= max_distance) {
@ -932,6 +941,38 @@ static void ExtendLastCommand(BrotliEncoderState* s, uint32_t* bytes,
(*wrapped_last_processed_pos)++;
}
} else {
if ((cmd_dist - max_distance - 1) < compound_dictionary_size &&
last_copy_len < cmd_dist - max_distance) {
size_t address =
compound_dictionary_size - (size_t)(cmd_dist - max_distance) +
(size_t)last_copy_len;
size_t br_index = 0;
size_t br_offset;
const uint8_t* chunk;
size_t chunk_length;
while (address >= dict->chunk_offsets[br_index + 1]) br_index++;
br_offset = address - dict->chunk_offsets[br_index];
chunk = dict->chunk_source[br_index];
chunk_length =
dict->chunk_offsets[br_index + 1] - dict->chunk_offsets[br_index];
while (*bytes != 0 && data[*wrapped_last_processed_pos & mask] ==
chunk[br_offset]) {
last_command->copy_len_++;
(*bytes)--;
(*wrapped_last_processed_pos)++;
if (++br_offset == chunk_length) {
br_index++;
br_offset = 0;
if (br_index != dict->num_chunks) {
chunk = dict->chunk_source[br_index];
chunk_length = dict->chunk_offsets[br_index + 1] -
dict->chunk_offsets[br_index];
} else {
break;
}
}
}
}
}
/* The copy length is at most the metablock size, and thus expressible. */
GetLengthCode(last_command->insert_len_,
@ -969,6 +1010,7 @@ static BROTLI_BOOL EncodeData(
data = s->ringbuffer_.buffer_;
mask = s->ringbuffer_.mask_;
if (s->params.quality > s->params.dictionary.max_quality) return BROTLI_FALSE;
/* Adding more blocks after "last" block is forbidden. */
if (s->is_last_block_emitted_) return BROTLI_FALSE;
if (is_last) s->is_last_block_emitted_ = BROTLI_TRUE;
@ -1421,6 +1463,7 @@ static BROTLI_BOOL BrotliCompressBufferQuality10(
*encoded_size = total_out_size;
DestroyHasher(m, hasher);
BROTLI_FREE(m, hasher);
BrotliEncoderCleanupParams(m, params);
BROTLI_FREE(m, params);
BrotliBootstrapFree(m, m);
return ok;
@ -1930,6 +1973,124 @@ uint32_t BrotliEncoderVersion(void) {
return BROTLI_VERSION;
}
BrotliEncoderPreparedDictionary* BrotliEncoderPrepareDictionary(
BrotliSharedDictionaryType type, size_t size, const uint8_t* data,
int quality,
brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
ManagedDictionary* managed_dictionary = NULL;
if (type != BROTLI_SHARED_DICTIONARY_RAW &&
type != BROTLI_SHARED_DICTIONARY_SERIALIZED) {
return NULL;
}
managed_dictionary =
BrotliCreateManagedDictionary(alloc_func, free_func, opaque);
if (managed_dictionary == NULL) {
return NULL;
}
if (type == BROTLI_SHARED_DICTIONARY_RAW) {
managed_dictionary->dictionary = (uint32_t*)CreatePreparedDictionary(
&managed_dictionary->memory_manager_, data, size);
} else {
SharedEncoderDictionary* dict = (SharedEncoderDictionary*)BrotliAllocate(
&managed_dictionary->memory_manager_, sizeof(SharedEncoderDictionary));
managed_dictionary->dictionary = (uint32_t*)dict;
if (dict != NULL) {
BROTLI_BOOL ok = BrotliInitCustomSharedEncoderDictionary(
&managed_dictionary->memory_manager_, data, size, quality, dict);
if (!ok) {
BrotliFree(&managed_dictionary->memory_manager_, dict);
managed_dictionary->dictionary = NULL;
}
}
}
if (managed_dictionary->dictionary == NULL) {
BrotliDestroyManagedDictionary(managed_dictionary);
return NULL;
}
return (BrotliEncoderPreparedDictionary*)managed_dictionary;
}
void BrotliEncoderDestroyPreparedDictionary(
BrotliEncoderPreparedDictionary* dictionary) {
ManagedDictionary* dict = (ManagedDictionary*)dictionary;
if (!dictionary) return;
/* First field of dictionary structs. */
/* Only managed dictionaries are eligible for destruction by this method. */
if (dict->magic != kManagedDictionaryMagic) {
return;
}
if (dict->dictionary == NULL) {
/* This should never ever happen. */
} else if (*dict->dictionary == kPreparedDictionaryMagic) {
DestroyPreparedDictionary(
&dict->memory_manager_, (PreparedDictionary*)dict->dictionary);
} else if (*dict->dictionary == kSharedDictionaryMagic) {
BrotliCleanupSharedEncoderDictionary(&dict->memory_manager_,
(SharedEncoderDictionary*)dict->dictionary);
BrotliFree(&dict->memory_manager_, dict->dictionary);
} else {
/* This should never ever happen. */
}
dict->dictionary = NULL;
BrotliDestroyManagedDictionary(dict);
}
BROTLI_BOOL BrotliEncoderAttachPreparedDictionary(BrotliEncoderState* state,
const BrotliEncoderPreparedDictionary* dictionary) {
/* First field of dictionary structs */
const BrotliEncoderPreparedDictionary* dict = dictionary;
uint32_t magic = *((const uint32_t*)dict);
SharedEncoderDictionary* current = NULL;
if (magic == kManagedDictionaryMagic) {
/* Unwrap managed dictionary. */
ManagedDictionary* managed_dictionary = (ManagedDictionary*)dict;
magic = *managed_dictionary->dictionary;
dict = (BrotliEncoderPreparedDictionary*)managed_dictionary->dictionary;
}
current = &state->params.dictionary;
if (magic == kPreparedDictionaryMagic) {
const PreparedDictionary* prepared = (const PreparedDictionary*)dict;
if (!AttachPreparedDictionary(&current->compound, prepared)) {
return BROTLI_FALSE;
}
} else if (magic == kSharedDictionaryMagic) {
const SharedEncoderDictionary* attached =
(const SharedEncoderDictionary*)dict;
BROTLI_BOOL was_default = !current->contextual.context_based &&
current->contextual.num_dictionaries == 1 &&
current->contextual.dict[0]->hash_table_words ==
kStaticDictionaryHashWords &&
current->contextual.dict[0]->hash_table_lengths ==
kStaticDictionaryHashLengths;
BROTLI_BOOL new_default = !attached->contextual.context_based &&
attached->contextual.num_dictionaries == 1 &&
attached->contextual.dict[0]->hash_table_words ==
kStaticDictionaryHashWords &&
attached->contextual.dict[0]->hash_table_lengths ==
kStaticDictionaryHashLengths;
size_t i;
if (state->is_initialized_) return BROTLI_FALSE;
current->max_quality =
BROTLI_MIN(int, current->max_quality, attached->max_quality);
for (i = 0; i < attached->compound.num_chunks; i++) {
if (!AttachPreparedDictionary(&current->compound,
attached->compound.chunks[i])) {
return BROTLI_FALSE;
}
}
if (!new_default) {
if (!was_default) return BROTLI_FALSE;
/* Copy by value, but then set num_instances_ to 0 because their memory
is managed by attached, not by current */
current->contextual = attached->contextual;
current->contextual.num_instances_ = 0;
}
} else {
return BROTLI_FALSE;
}
return BROTLI_TRUE;
}
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
#endif

View File

@ -6,16 +6,43 @@
#include "./encoder_dict.h"
#include <stdlib.h> /* malloc, free */
#include "../common/dictionary.h"
#include "../common/platform.h"
#include "../common/shared_dictionary_internal.h"
#include "../common/transform.h"
#include "./compound_dictionary.h"
#include "./dictionary_hash.h"
#include "./memory.h"
#include "./quality.h"
#include "./hash.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
void BrotliInitEncoderDictionary(BrotliEncoderDictionary* dict) {
#define NUM_HASH_BITS 15u
#define NUM_HASH_BUCKETS (1u << NUM_HASH_BITS)
static void BrotliTrieInit(BrotliTrie* trie) {
trie->pool_capacity = 0;
trie->pool_size = 0;
trie->pool = 0;
/* Set up the root node */
trie->root.single = 0;
trie->root.len_ = 0;
trie->root.idx_ = 0;
trie->root.sub = 0;
}
static void BrotliTrieFree(MemoryManager* m, BrotliTrie* trie) {
BrotliFree(m, trie->pool);
}
/* Initializes to RFC 7932 static dictionary / transforms. */
static void InitEncoderDictionary(BrotliEncoderDictionary* dict) {
dict->words = BrotliGetDictionary();
dict->num_transforms = (uint32_t)BrotliGetTransforms()->num_transforms;
@ -26,6 +53,574 @@ void BrotliInitEncoderDictionary(BrotliEncoderDictionary* dict) {
dict->cutoffTransformsCount = kCutoffTransformsCount;
dict->cutoffTransforms = kCutoffTransforms;
dict->parent = 0;
dict->hash_table_data_words_ = 0;
dict->hash_table_data_lengths_ = 0;
dict->buckets_alloc_size_ = 0;
dict->buckets_data_ = 0;
dict->dict_words_alloc_size_ = 0;
dict->dict_words_data_ = 0;
dict->words_instance_ = 0;
dict->has_words_heavy = BROTLI_FALSE;
BrotliTrieInit(&dict->trie);
}
static void BrotliDestroyEncoderDictionary(MemoryManager* m,
BrotliEncoderDictionary* dict) {
BrotliFree(m, dict->hash_table_data_words_);
BrotliFree(m, dict->hash_table_data_lengths_);
BrotliFree(m, dict->buckets_data_);
BrotliFree(m, dict->dict_words_data_);
BrotliFree(m, dict->words_instance_);
BrotliTrieFree(m, &dict->trie);
}
/* Word length must be at least 4 bytes */
static uint32_t Hash(const uint8_t* data, int bits) {
uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
/* The higher bits contain more mixture from the multiplication,
so we take our results from there. */
return h >> (32 - bits);
}
/* Theoretical max possible word size after transform */
#define kTransformedBufferSize \
(256 + 256 + SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH)
/* To be safe buffer must have at least kTransformedBufferSize */
static void TransformedDictionaryWord(uint32_t word_idx, int len, int transform,
const BrotliTransforms* transforms,
const BrotliEncoderDictionary* dict,
uint8_t* buffer, size_t* size) {
const uint8_t* dict_word = &dict->words->data[
dict->words->offsets_by_length[len] + (uint32_t)len * word_idx];
*size = (size_t)BrotliTransformDictionaryWord(buffer, dict_word, len,
transforms, transform);
}
static DictWord MakeDictWord(uint8_t len, uint8_t transform, uint16_t idx) {
DictWord result;
result.len = len;
result.transform = transform;
result.idx = idx;
return result;
}
static uint32_t BrotliTrieAlloc(MemoryManager* m, size_t num, BrotliTrie* trie,
BrotliTrieNode** keep) {
uint32_t result;
uint32_t keep_index = 0;
if (keep && *keep != &trie->root) {
/* Optional node to keep, since address may change after re-allocating */
keep_index = (uint32_t)(*keep - trie->pool);
}
if (trie->pool_size == 0) {
/* Have a dummy node in the front. We do not want the result to be 0, it
must be at least 1, 0 represents "null pointer" */
trie->pool_size = 1;
}
BROTLI_ENSURE_CAPACITY(m, BrotliTrieNode, trie->pool, trie->pool_capacity,
trie->pool_size + num);
if (BROTLI_IS_OOM(m)) return 0;
/* Init the new nodes to empty */
memset(trie->pool + trie->pool_size, 0, sizeof(*trie->pool) * num);
result = (uint32_t)trie->pool_size;
trie->pool_size += num;
if (keep && *keep != &trie->root) {
*keep = trie->pool + keep_index;
}
return result;
}
/**
* len and idx: payload for last node
* word, size: the string
* index: position in the string
*/
static BROTLI_BOOL BrotliTrieNodeAdd(MemoryManager* m, uint8_t len,
uint32_t idx, const uint8_t* word, size_t size, int index,
BrotliTrieNode* node, BrotliTrie* trie) {
BrotliTrieNode* child = 0;
uint8_t c;
if ((size_t)index == size) {
if (!node->len_ || idx < node->idx_) {
node->len_ = len;
node->idx_ = idx;
}
return BROTLI_TRUE;
}
c = word[index];
if (node->single && c != node->c) {
BrotliTrieNode old = trie->pool[node->sub];
uint32_t new_nodes = BrotliTrieAlloc(m, 32, trie, &node);
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
node->single = 0;
node->sub = new_nodes;
trie->pool[node->sub + (node->c >> 4)].sub = new_nodes + 16;
trie->pool[trie->pool[node->sub + (node->c >> 4)].sub + (node->c & 15)] =
old;
}
if (!node->sub) {
uint32_t new_node = BrotliTrieAlloc(m, 1, trie, &node);
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
node->single = 1;
node->c = c;
node->sub = new_node;
}
if (node->single) {
child = &trie->pool[node->sub];
} else {
if (!trie->pool[node->sub + (c >> 4)].sub) {
uint32_t new_nodes = BrotliTrieAlloc(m, 16, trie, &node);
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
trie->pool[node->sub + (c >> 4)].sub = new_nodes;
}
child = &trie->pool[trie->pool[node->sub + (c >> 4)].sub + (c & 15)];
}
return BrotliTrieNodeAdd(m, len, idx, word, size, index + 1, child, trie);
}
static BROTLI_BOOL BrotliTrieAdd(MemoryManager* m, uint8_t len, uint32_t idx,
const uint8_t* word, size_t size, BrotliTrie* trie) {
return BrotliTrieNodeAdd(m, len, idx, word, size, 0, &trie->root, trie);
}
const BrotliTrieNode* BrotliTrieSub(const BrotliTrie* trie,
const BrotliTrieNode* node, uint8_t c) {
BrotliTrieNode* temp_node;
if (node->single) {
if (node->c == c) return &trie->pool[node->sub];
return 0;
}
if (!node->sub) return 0;
temp_node = &trie->pool[node->sub + (c >> 4)];
if (!temp_node->sub) return 0;
return &trie->pool[temp_node->sub + (c & 15)];
}
static const BrotliTrieNode* BrotliTrieFind(const BrotliTrie* trie,
const uint8_t* word, size_t size) {
const BrotliTrieNode* node = &trie->root;
size_t i;
for (i = 0; i < size; i++) {
node = BrotliTrieSub(trie, node, word[i]);
if (!node) return 0;
}
return node;
}
static BROTLI_BOOL BuildDictionaryLut(MemoryManager* m,
const BrotliTransforms* transforms,
BrotliEncoderDictionary* dict) {
uint32_t i;
DictWord* dict_words;
uint16_t* buckets;
DictWord** words_by_hash;
size_t* words_by_hash_size;
size_t* words_by_hash_capacity;
BrotliTrie dedup;
uint8_t word[kTransformedBufferSize];
size_t word_size;
size_t total = 0;
uint8_t l;
uint16_t idx;
BrotliTrieInit(&dedup);
words_by_hash = (DictWord**)BrotliAllocate(m,
sizeof(*words_by_hash) * NUM_HASH_BUCKETS);
words_by_hash_size = (size_t*)BrotliAllocate(m,
sizeof(*words_by_hash_size) * NUM_HASH_BUCKETS);
words_by_hash_capacity = (size_t*)BrotliAllocate(m,
sizeof(*words_by_hash_capacity) * NUM_HASH_BUCKETS);
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
memset(words_by_hash, 0, sizeof(*words_by_hash) * NUM_HASH_BUCKETS);
memset(words_by_hash_size, 0, sizeof(*words_by_hash_size) * NUM_HASH_BUCKETS);
memset(words_by_hash_capacity, 0,
sizeof(*words_by_hash_capacity) * NUM_HASH_BUCKETS);
if (transforms->num_transforms > 0) {
for (l = SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH;
l <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; ++l) {
uint16_t n = dict->words->size_bits_by_length[l] ?
(uint16_t)(1 << dict->words->size_bits_by_length[l]) : 0u;
for (idx = 0; idx < n; ++idx) {
uint32_t key;
/* First transform (usually identity) */
TransformedDictionaryWord(idx, l, 0, transforms, dict, word,
&word_size);
/* Cannot hash words smaller than 4 bytes */
if (word_size < 4) {
/* Break instead of continue, all next words of this length will have
same length after transform */
break;
}
if (!BrotliTrieAdd(m, 0, idx, word, word_size, &dedup)) {
return BROTLI_FALSE;
}
key = Hash(word, NUM_HASH_BITS);
BROTLI_ENSURE_CAPACITY_APPEND(m, DictWord, words_by_hash[key],
words_by_hash_capacity[key], words_by_hash_size[key],
MakeDictWord(l, 0, idx));
++total;
}
}
}
/* These LUT transforms only supported if no custom transforms. This is
ok, we will use the heavy trie instead. */
if (transforms == BrotliGetTransforms()) {
for (l = SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH;
l <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; ++l) {
uint16_t n = dict->words->size_bits_by_length[l] ?
(uint16_t)(1 << dict->words->size_bits_by_length[l]) : 0u;
for (idx = 0; idx < n; ++idx) {
int k;
BROTLI_BOOL is_ascii = BROTLI_TRUE;
size_t offset = dict->words->offsets_by_length[l] + (size_t)l * idx;
const uint8_t* data = &dict->words->data[offset];
for (k = 0; k < l; ++k) {
if (data[k] >= 128) is_ascii = BROTLI_FALSE;
}
if (data[0] < 128) {
int transform = 9; /* {empty, uppercase first, empty} */
uint32_t ix = idx + (uint32_t)transform * n;
const BrotliTrieNode* it;
TransformedDictionaryWord(idx, l, transform, transforms,
dict, word, &word_size);
it = BrotliTrieFind(&dedup, word, word_size);
if (!it || it->idx_ > ix) {
uint32_t key = Hash(word, NUM_HASH_BITS);
if (!BrotliTrieAdd(m, 0, ix, word, word_size, &dedup)) {
return BROTLI_FALSE;
}
BROTLI_ENSURE_CAPACITY_APPEND(m, DictWord, words_by_hash[key],
words_by_hash_capacity[key], words_by_hash_size[key],
MakeDictWord(l, BROTLI_TRANSFORM_UPPERCASE_FIRST, idx));
++total;
}
}
if (is_ascii) {
int transform = 44; /* {empty, uppercase all, empty} */
uint32_t ix = idx + (uint32_t)transform * n;
const BrotliTrieNode* it;
TransformedDictionaryWord(idx, l, transform, transforms,
dict, word, &word_size);
it = BrotliTrieFind(&dedup, word, word_size);
if (!it || it->idx_ > ix) {
uint32_t key = Hash(word, NUM_HASH_BITS);
if (!BrotliTrieAdd(m, 0, ix, word, word_size, &dedup)) {
return BROTLI_FALSE;
}
BROTLI_ENSURE_CAPACITY_APPEND(m, DictWord, words_by_hash[key],
words_by_hash_capacity[key], words_by_hash_size[key],
MakeDictWord(l, BROTLI_TRANSFORM_UPPERCASE_ALL, idx));
++total;
}
}
}
}
}
dict_words = (DictWord*)BrotliAllocate(m,
sizeof(*dict->dict_words) * (total + 1));
buckets = (uint16_t*)BrotliAllocate(m,
sizeof(*dict->buckets) * NUM_HASH_BUCKETS);
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
dict->dict_words_alloc_size_ = total + 1;
dict->dict_words = dict->dict_words_data_ = dict_words;
dict->buckets_alloc_size_ = NUM_HASH_BUCKETS;
dict->buckets = dict->buckets_data_ = buckets;
/* Unused; makes offsets start from 1. */
dict_words[0] = MakeDictWord(0, 0, 0);
total = 1;
for (i = 0; i < NUM_HASH_BUCKETS; ++i) {
size_t num_words = words_by_hash_size[i];
if (num_words > 0) {
buckets[i] = (uint16_t)(total);
memcpy(&dict_words[total], &words_by_hash[i][0],
sizeof(dict_words[0]) * num_words);
total += num_words;
dict_words[total - 1].len |= 0x80;
} else {
buckets[i] = 0;
}
}
for (i = 0; i < NUM_HASH_BUCKETS; ++i) {
BrotliFree(m, words_by_hash[i]);
}
BrotliFree(m, words_by_hash);
BrotliFree(m, words_by_hash_size);
BrotliFree(m, words_by_hash_capacity);
BrotliTrieFree(m, &dedup);
return BROTLI_TRUE;
}
static void BuildDictionaryHashTable(uint16_t* hash_table_words,
uint8_t* hash_table_lengths, const BrotliDictionary* dict) {
int j, len;
/* The order of the loops is such that in case of collision, words with
shorter length are preferred, and in case of same length, words with
smaller index. There is only a single word per bucket. */
/* TODO: consider adding optional user-supplied frequency_map to use
for preferred words instead, this can make the encoder better for
quality 9 and below without affecting the decoder */
memset(hash_table_words, 0, sizeof(kStaticDictionaryHashWords));
memset(hash_table_lengths, 0, sizeof(kStaticDictionaryHashLengths));
for (len = SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH;
len >= SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH; --len) {
const size_t num_words = dict->size_bits_by_length[len] ?
(1u << dict->size_bits_by_length[len]) : 0;
for (j = (int)num_words - 1; j >= 0; --j) {
size_t offset = dict->offsets_by_length[len] +
(size_t)len * (size_t)j;
const uint8_t* word = &dict->data[offset];
const uint32_t key = Hash(word, 14);
int idx = (int)(key << 1) + (len < 8 ? 1 : 0);
BROTLI_DCHECK(idx < (int)NUM_HASH_BUCKETS);
hash_table_words[idx] = (uint16_t)j;
hash_table_lengths[idx] = (uint8_t)len;
}
}
}
static BROTLI_BOOL GenerateWordsHeavy(MemoryManager* m,
const BrotliTransforms* transforms,
BrotliEncoderDictionary* dict) {
int i, j, l;
for (j = (int)transforms->num_transforms - 1; j >= 0 ; --j) {
for (l = 0; l < 32; l++) {
int num = (int)((1u << dict->words->size_bits_by_length[l]) & ~1u);
for (i = 0; i < num; i++) {
uint8_t transformed[kTransformedBufferSize];
size_t size;
TransformedDictionaryWord(
(uint32_t)i, l, j, transforms, dict, transformed, &size);
if (size < 4) continue;
if (!BrotliTrieAdd(m, (uint8_t)l, (uint32_t)(i + num * j),
transformed, size, &dict->trie)) {
return BROTLI_FALSE;
}
}
}
}
return BROTLI_TRUE;
}
/* Computes cutoffTransformsCount (in count) and cutoffTransforms (in data) for
the custom transforms, where possible within the limits of the
cutoffTransforms encoding. The fast encoder uses this to do fast lookup for
transforms that remove the N last characters (OmitLast). */
static void ComputeCutoffTransforms(
const BrotliTransforms* transforms,
uint32_t* count, uint64_t* data) {
int i;
/* The encoding in a 64-bit integer of transform N in the data is: (N << 2) +
((cutoffTransforms >> (N * 6)) & 0x3F), so for example the identity
transform code must be 0-63, for N=1 the transform code must be 4-67, ...,
for N=9 it must be 36-99.
TODO: consider a simple flexible uint8_t[10] instead of the uint64_t
for the cutoff transforms, so that shared dictionaries can have the
OmitLast transforms anywhere without loss. */
*count = 0;
*data = 0;
for (i = 0; i < BROTLI_TRANSFORMS_MAX_CUT_OFF + 1; i++) {
int idx = transforms->cutOffTransforms[i];
if (idx == -1) break; /* Not found */
if (idx < (i << 2)) break; /* Too small for the encoding */
if (idx >= (i << 2) + 64) break; /* Too large for the encoding */
(*count)++;
*data |= (uint64_t)(((uint64_t)idx -
((uint64_t)i << 2u)) << ((uint64_t)i * 6u));
}
}
static BROTLI_BOOL ComputeDictionary(MemoryManager* m, int quality,
const BrotliTransforms* transforms,
BrotliEncoderDictionary* current) {
int default_words = current->words == BrotliGetDictionary();
int default_transforms = transforms == BrotliGetTransforms();
if (default_words && default_transforms) {
/* hashes are already set to Brotli defaults */
return BROTLI_TRUE;
}
current->hash_table_data_words_ = (uint16_t*)BrotliAllocate(
m, sizeof(kStaticDictionaryHashWords));
current->hash_table_data_lengths_ = (uint8_t*)BrotliAllocate(
m, sizeof(kStaticDictionaryHashLengths));
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
current->hash_table_words = current->hash_table_data_words_;
current->hash_table_lengths = current->hash_table_data_lengths_;
BuildDictionaryHashTable(current->hash_table_data_words_,
current->hash_table_data_lengths_, current->words);
ComputeCutoffTransforms(transforms,
&current->cutoffTransformsCount, &current->cutoffTransforms);
/* Only compute the data for slow encoder if the requested quality is high
enough to need it */
if (quality >= ZOPFLIFICATION_QUALITY) {
if (!BuildDictionaryLut(m, transforms, current)) return BROTLI_FALSE;
/* For the built-in Brotli transforms, there is a hard-coded function to
handle all transforms, but for custom transforms, we use the following
large hammer instead */
current->has_words_heavy = !default_transforms;
if (current->has_words_heavy) {
if (!GenerateWordsHeavy(m, transforms, current)) return BROTLI_FALSE;
}
}
return BROTLI_TRUE;
}
void BrotliInitSharedEncoderDictionary(SharedEncoderDictionary* dict) {
dict->magic = kSharedDictionaryMagic;
dict->compound.num_chunks = 0;
dict->compound.total_size = 0;
dict->compound.chunk_offsets[0] = 0;
dict->compound.num_prepared_instances_ = 0;
dict->contextual.context_based = 0;
dict->contextual.num_dictionaries = 1;
dict->contextual.instances_ = 0;
dict->contextual.num_instances_ = 1; /* The instance_ field */
dict->contextual.dict[0] = &dict->contextual.instance_;
InitEncoderDictionary(&dict->contextual.instance_);
dict->contextual.instance_.parent = &dict->contextual;
dict->max_quality = BROTLI_MAX_QUALITY;
}
/* TODO: make sure that tooling will warn user if not all the cutoff
transforms are available (for low-quality encoder). */
static BROTLI_BOOL InitCustomSharedEncoderDictionary(
MemoryManager* m, const BrotliSharedDictionary* decoded_dict,
int quality, SharedEncoderDictionary* dict) {
ContextualEncoderDictionary* contextual;
CompoundDictionary* compound;
BrotliEncoderDictionary* instances;
int i;
BrotliInitSharedEncoderDictionary(dict);
contextual = &dict->contextual;
compound = &dict->compound;
for (i = 0; i < (int)decoded_dict->num_prefix; i++) {
PreparedDictionary* prepared = CreatePreparedDictionary(m,
decoded_dict->prefix[i], decoded_dict->prefix_size[i]);
AttachPreparedDictionary(compound, prepared);
/* remember for cleanup */
compound->prepared_instances_[
compound->num_prepared_instances_++] = prepared;
}
dict->max_quality = quality;
contextual->context_based = decoded_dict->context_based;
if (decoded_dict->context_based) {
memcpy(contextual->context_map, decoded_dict->context_map,
SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS);
}
contextual->num_dictionaries = decoded_dict->num_dictionaries;
contextual->num_instances_ = decoded_dict->num_dictionaries;
if (contextual->num_instances_ == 1) {
instances = &contextual->instance_;
} else {
contextual->instances_ = (BrotliEncoderDictionary*)
BrotliAllocate(m, sizeof(*contextual->instances_) *
contextual->num_instances_);
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
instances = contextual->instances_;
}
for (i = 0; i < (int)contextual->num_instances_; i++) {
BrotliEncoderDictionary* current = &instances[i];
InitEncoderDictionary(current);
current->parent = &dict->contextual;
if (decoded_dict->words[i] == BrotliGetDictionary()) {
current->words = BrotliGetDictionary();
} else {
current->words_instance_ = (BrotliDictionary*)BrotliAllocate(
m, sizeof(BrotliDictionary));
if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
*current->words_instance_ = *decoded_dict->words[i];
current->words = current->words_instance_;
}
current->num_transforms =
(uint32_t)decoded_dict->transforms[i]->num_transforms;
if (!ComputeDictionary(
m, quality, decoded_dict->transforms[i], current)) {
return BROTLI_FALSE;
}
contextual->dict[i] = current;
}
return BROTLI_TRUE; /* success */
}
BROTLI_BOOL BrotliInitCustomSharedEncoderDictionary(
MemoryManager* m, const uint8_t* encoded_dict, size_t size,
int quality, SharedEncoderDictionary* dict) {
BROTLI_BOOL success = BROTLI_FALSE;
BrotliSharedDictionary* decoded_dict = BrotliSharedDictionaryCreateInstance(
m->alloc_func, m->free_func, m->opaque);
if (!decoded_dict) { /* OOM */
return BROTLI_FALSE;
}
success = BrotliSharedDictionaryAttach(
decoded_dict, BROTLI_SHARED_DICTIONARY_SERIALIZED, size, encoded_dict);
if (success) {
success = InitCustomSharedEncoderDictionary(m,
decoded_dict, quality, dict);
}
BrotliSharedDictionaryDestroyInstance(decoded_dict);
return success;
}
void BrotliCleanupSharedEncoderDictionary(MemoryManager* m,
SharedEncoderDictionary* dict) {
size_t i;
for (i = 0; i < dict->compound.num_prepared_instances_; i++) {
DestroyPreparedDictionary(m,
(PreparedDictionary*)dict->compound.prepared_instances_[i]);
}
if (dict->contextual.num_instances_ == 1) {
BrotliDestroyEncoderDictionary(m, &dict->contextual.instance_);
} else if (dict->contextual.num_instances_ > 1) {
for (i = 0; i < dict->contextual.num_instances_; i++) {
BrotliDestroyEncoderDictionary(m, &dict->contextual.instances_[i]);
}
BrotliFree(m, dict->contextual.instances_);
}
}
ManagedDictionary* BrotliCreateManagedDictionary(
brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
ManagedDictionary* result = (ManagedDictionary*)BrotliBootstrapAlloc(
sizeof(ManagedDictionary), alloc_func, free_func, opaque);
if (result == NULL) return NULL;
result->magic = kManagedDictionaryMagic;
BrotliInitMemoryManager(
&result->memory_manager_, alloc_func, free_func, opaque);
result->dictionary = NULL;
return result;
}
void BrotliDestroyManagedDictionary(ManagedDictionary* dictionary) {
if (!dictionary) return;
BrotliBootstrapFree(dictionary, &dictionary->memory_manager_);
}
#if defined(__cplusplus) || defined(c_plusplus)

View File

@ -9,13 +9,50 @@
#include "../common/dictionary.h"
#include "../common/platform.h"
#include <brotli/shared_dictionary.h>
#include <brotli/types.h>
#include "./compound_dictionary.h"
#include "./memory.h"
#include "./static_dict_lut.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
/*
Dictionary hierarchy for Encoder:
-SharedEncoderDictionary
--CompoundDictionary
---PreparedDictionary [up to 15x]
= prefix dictionary with precomputed hashes
--ContextualEncoderDictionary
---BrotliEncoderDictionary [up to 64x]
= for each context, precomputed static dictionary with words + transforms
Dictionary hiearchy from common: similar, but without precomputed hashes
-BrotliSharedDictionary
--BrotliDictionary [up to 64x]
--BrotliTransforms [up to 64x]
--const uint8_t* prefix [up to 15x]: compound dictionaries
*/
typedef struct BrotliTrieNode {
uint8_t single; /* if 1, sub is a single node for c instead of 256 */
uint8_t c;
uint8_t len_; /* untransformed length */
uint32_t idx_; /* word index + num words * transform index */
uint32_t sub; /* index of sub node(s) in the pool */
} BrotliTrieNode;
typedef struct BrotliTrie {
BrotliTrieNode* pool;
size_t pool_capacity;
size_t pool_size;
BrotliTrieNode root;
} BrotliTrie;
BROTLI_INTERNAL const BrotliTrieNode* BrotliTrieSub(const BrotliTrie* trie,
const BrotliTrieNode* node, uint8_t c);
/* Dictionary data (words and transforms) for 1 possible context */
typedef struct BrotliEncoderDictionary {
const BrotliDictionary* words;
@ -32,9 +69,83 @@ typedef struct BrotliEncoderDictionary {
/* from static_dict_lut.h, for slow encoder */
const uint16_t* buckets;
const DictWord* dict_words;
/* Heavy version, for use by slow encoder when there are custom transforms.
Contains every possible transformed dictionary word in a trie. It encodes
about as fast as the non-heavy encoder but consumes a lot of memory and
takes time to build. */
BrotliTrie trie;
BROTLI_BOOL has_words_heavy;
/* Reference to other dictionaries. */
const struct ContextualEncoderDictionary* parent;
/* Allocated memory, used only when not using the Brotli defaults */
uint16_t* hash_table_data_words_;
uint8_t* hash_table_data_lengths_;
size_t buckets_alloc_size_;
uint16_t* buckets_data_;
size_t dict_words_alloc_size_;
DictWord* dict_words_data_;
BrotliDictionary* words_instance_;
} BrotliEncoderDictionary;
BROTLI_INTERNAL void BrotliInitEncoderDictionary(BrotliEncoderDictionary* dict);
/* Dictionary data for all 64 contexts */
typedef struct ContextualEncoderDictionary {
BROTLI_BOOL context_based;
uint8_t num_dictionaries;
uint8_t context_map[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];
const BrotliEncoderDictionary* dict[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];
/* If num_instances_ is 1, instance_ is used, else dynamic allocation with
instances_ is used. */
size_t num_instances_;
BrotliEncoderDictionary instance_;
BrotliEncoderDictionary* instances_;
} ContextualEncoderDictionary;
static const uint32_t kSharedDictionaryMagic = 0xDEBCEDE1;
static const uint32_t kManagedDictionaryMagic = 0xDEBCEDE2;
typedef struct SharedEncoderDictionary {
/* Magic value to distinguish this struct from PreparedDictionary for
certain external usages. */
uint32_t magic;
/* LZ77 prefix, compound dictionary */
CompoundDictionary compound;
/* Custom static dictionary (optionally context-based) */
ContextualEncoderDictionary contextual;
/* The maximum quality the dictionary was computed for */
int max_quality;
} SharedEncoderDictionary;
typedef struct ManagedDictionary {
uint32_t magic;
MemoryManager memory_manager_;
uint32_t* dictionary;
} ManagedDictionary;
/* Initializes to the brotli built-in dictionary */
BROTLI_INTERNAL void BrotliInitSharedEncoderDictionary(
SharedEncoderDictionary* dict);
/* Initializes to shared dictionary that will be parsed from
encoded_dict. Requires that you keep the encoded_dict buffer
around, parts of data will point to it. */
BROTLI_INTERNAL BROTLI_BOOL BrotliInitCustomSharedEncoderDictionary(
MemoryManager* m, const uint8_t* encoded_dict, size_t size,
int quality, SharedEncoderDictionary* dict);
BROTLI_INTERNAL void BrotliCleanupSharedEncoderDictionary(
MemoryManager* m, SharedEncoderDictionary* dict);
BROTLI_INTERNAL ManagedDictionary* BrotliCreateManagedDictionary(
brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque);
BROTLI_INTERNAL void BrotliDestroyManagedDictionary(
ManagedDictionary* dictionary);
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */

View File

@ -504,6 +504,206 @@ static BROTLI_INLINE void InitOrStitchToPreviousBlock(
}
}
/* NB: when seamless dictionary-ring-buffer copies are implemented, don't forget
to add proper guards for non-zero-BROTLI_PARAM_STREAM_OFFSET. */
static BROTLI_INLINE void FindCompoundDictionaryMatch(
const PreparedDictionary* self, const uint8_t* BROTLI_RESTRICT data,
const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,
const size_t cur_ix, const size_t max_length, const size_t distance_offset,
const size_t max_distance, HasherSearchResult* BROTLI_RESTRICT out) {
const uint32_t source_offset = self->source_offset;
const uint32_t source_size = self->source_size;
const size_t boundary = distance_offset - source_size;
const uint32_t hash_bits = self->hash_bits;
const uint32_t bucket_bits = self->bucket_bits;
const uint32_t slot_bits = self->slot_bits;
const uint32_t hash_shift = 64u - bucket_bits;
const uint32_t slot_mask = (~((uint32_t)0U)) >> (32 - slot_bits);
const uint64_t hash_mask = (~((uint64_t)0U)) >> (64 - hash_bits);
const uint32_t* slot_offsets = (uint32_t*)(&self[1]);
const uint16_t* heads = (uint16_t*)(&slot_offsets[1u << slot_bits]);
const uint32_t* items = (uint32_t*)(&heads[1u << bucket_bits]);
const uint8_t* source = (uint8_t*)(&items[source_offset]);
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
score_t best_score = out->score;
size_t best_len = out->len;
size_t i;
const uint64_t h =
(BROTLI_UNALIGNED_LOAD64LE(&data[cur_ix_masked]) & hash_mask) *
kPreparedDictionaryHashMul64Long;
const uint32_t key = (uint32_t)(h >> hash_shift);
const uint32_t slot = key & slot_mask;
const uint32_t head = heads[key];
const uint32_t* BROTLI_RESTRICT chain = &items[slot_offsets[slot] + head];
uint32_t item = (head == 0xFFFF) ? 1 : 0;
for (i = 0; i < 4; ++i) {
const size_t distance = (size_t)distance_cache[i];
size_t offset;
size_t limit;
size_t len;
if (distance <= boundary || distance > distance_offset) continue;
offset = distance_offset - distance;
limit = source_size - offset;
limit = limit > max_length ? max_length : limit;
len = FindMatchLengthWithLimit(&source[offset], &data[cur_ix_masked],
limit);
if (len >= 2) {
score_t score = BackwardReferenceScoreUsingLastDistance(len);
if (best_score < score) {
if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
if (best_score < score) {
best_score = score;
if (len > best_len) best_len = len;
out->len = len;
out->len_code_delta = 0;
out->distance = distance;
out->score = best_score;
}
}
}
}
while (item == 0) {
size_t offset;
size_t distance;
size_t limit;
item = *chain;
chain++;
offset = item & 0x7FFFFFFF;
item &= 0x80000000;
distance = distance_offset - offset;
limit = source_size - offset;
limit = (limit > max_length) ? max_length : limit;
if (distance > max_distance) continue;
if (cur_ix_masked + best_len > ring_buffer_mask ||
best_len >= limit ||
data[cur_ix_masked + best_len] != source[offset + best_len]) {
continue;
}
{
const size_t len = FindMatchLengthWithLimit(&source[offset],
&data[cur_ix_masked],
limit);
if (len >= 4) {
score_t score = BackwardReferenceScore(len, distance);
if (best_score < score) {
best_score = score;
best_len = len;
out->len = best_len;
out->len_code_delta = 0;
out->distance = distance;
out->score = best_score;
}
}
}
}
}
/* NB: when seamless dictionary-ring-buffer copies are implemented, don't forget
to add proper guards for non-zero-BROTLI_PARAM_STREAM_OFFSET. */
static BROTLI_INLINE size_t FindAllCompoundDictionaryMatches(
const PreparedDictionary* self, const uint8_t* BROTLI_RESTRICT data,
const size_t ring_buffer_mask, const size_t cur_ix, const size_t min_length,
const size_t max_length, const size_t distance_offset,
const size_t max_distance, BackwardMatch* matches, size_t match_limit) {
const uint32_t source_offset = self->source_offset;
const uint32_t source_size = self->source_size;
const uint32_t hash_bits = self->hash_bits;
const uint32_t bucket_bits = self->bucket_bits;
const uint32_t slot_bits = self->slot_bits;
const uint32_t hash_shift = 64u - bucket_bits;
const uint32_t slot_mask = (~((uint32_t)0U)) >> (32 - slot_bits);
const uint64_t hash_mask = (~((uint64_t)0U)) >> (64 - hash_bits);
const uint32_t* slot_offsets = (uint32_t*)(&self[1]);
const uint16_t* heads = (uint16_t*)(&slot_offsets[1u << slot_bits]);
const uint32_t* items = (uint32_t*)(&heads[1u << bucket_bits]);
const uint8_t* source = (uint8_t*)(&items[source_offset]);
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
size_t best_len = min_length;
const uint64_t h =
(BROTLI_UNALIGNED_LOAD64LE(&data[cur_ix_masked]) & hash_mask) *
kPreparedDictionaryHashMul64Long;
const uint32_t key = (uint32_t)(h >> hash_shift);
const uint32_t slot = key & slot_mask;
const uint32_t head = heads[key];
const uint32_t* BROTLI_RESTRICT chain = &items[slot_offsets[slot] + head];
uint32_t item = (head == 0xFFFF) ? 1 : 0;
size_t found = 0;
while (item == 0) {
size_t offset;
size_t distance;
size_t limit;
size_t len;
item = *chain;
chain++;
offset = item & 0x7FFFFFFF;
item &= 0x80000000;
distance = distance_offset - offset;
limit = source_size - offset;
limit = (limit > max_length) ? max_length : limit;
if (distance > max_distance) continue;
if (cur_ix_masked + best_len > ring_buffer_mask ||
best_len >= limit ||
data[cur_ix_masked + best_len] != source[offset + best_len]) {
continue;
}
len = FindMatchLengthWithLimit(
&source[offset], &data[cur_ix_masked], limit);
if (len > best_len) {
best_len = len;
InitBackwardMatch(matches++, distance, len);
found++;
if (found == match_limit) break;
}
}
return found;
}
static BROTLI_INLINE void LookupCompoundDictionaryMatch(
const CompoundDictionary* addon, const uint8_t* BROTLI_RESTRICT data,
const size_t ring_buffer_mask, const int* BROTLI_RESTRICT distance_cache,
const size_t cur_ix, const size_t max_length,
const size_t max_ring_buffer_distance, const size_t max_distance,
HasherSearchResult* sr) {
size_t base_offset = max_ring_buffer_distance + 1 + addon->total_size - 1;
size_t d;
for (d = 0; d < addon->num_chunks; ++d) {
/* Only one prepared dictionary type is currently supported. */
FindCompoundDictionaryMatch(
(const PreparedDictionary*)addon->chunks[d], data, ring_buffer_mask,
distance_cache, cur_ix, max_length,
base_offset - addon->chunk_offsets[d], max_distance, sr);
}
}
static BROTLI_INLINE size_t LookupAllCompoundDictionaryMatches(
const CompoundDictionary* addon, const uint8_t* BROTLI_RESTRICT data,
const size_t ring_buffer_mask, const size_t cur_ix, size_t min_length,
const size_t max_length, const size_t max_ring_buffer_distance,
const size_t max_distance, BackwardMatch* matches,
size_t match_limit) {
size_t base_offset = max_ring_buffer_distance + 1 + addon->total_size - 1;
size_t d;
size_t total_found = 0;
for (d = 0; d < addon->num_chunks; ++d) {
/* Only one prepared dictionary type is currently supported. */
total_found += FindAllCompoundDictionaryMatches(
(const PreparedDictionary*)addon->chunks[d], data, ring_buffer_mask,
cur_ix, min_length, max_length, base_offset - addon->chunk_offsets[d],
max_distance, matches + total_found, match_limit - total_found);
if (total_found == match_limit) break;
if (total_found > 0) {
min_length = BackwardMatchLength(&matches[total_found - 1]);
}
}
return total_found;
}
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
#endif

View File

@ -40,7 +40,8 @@ typedef struct BrotliEncoderParams {
BROTLI_BOOL large_window;
BrotliHasherParams hasher;
BrotliDistanceParams dist;
BrotliEncoderDictionary dictionary;
/* TODO(eustas): rename to BrotliShared... */
SharedEncoderDictionary dictionary;
} BrotliEncoderParams;
#endif /* BROTLI_ENC_PARAMS_H_ */

View File

@ -74,10 +74,25 @@ static BROTLI_INLINE BROTLI_BOOL IsMatch(const BrotliDictionary* dictionary,
}
}
BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
/* Finds matches for a single static dictionary */
static BROTLI_BOOL BrotliFindAllStaticDictionaryMatchesFor(
const BrotliEncoderDictionary* dictionary, const uint8_t* data,
size_t min_length, size_t max_length, uint32_t* matches) {
BROTLI_BOOL has_found_match = BROTLI_FALSE;
if (dictionary->has_words_heavy) {
const BrotliTrieNode* node = &dictionary->trie.root;
size_t l = 0;
while (node && l < max_length) {
uint8_t c;
if (l >= min_length && node->len_) {
AddMatch(node->idx_, l, node->len_, matches);
has_found_match = BROTLI_TRUE;
}
c = data[l++];
node = BrotliTrieSub(&dictionary->trie, node, c);
}
return has_found_match;
}
{
size_t offset = dictionary->buckets[Hash(data)];
BROTLI_BOOL end = !offset;
@ -481,6 +496,45 @@ BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
return has_found_match;
}
/* Finds matches for one or more dictionaries, if multiple are present
in the contextual dictionary */
BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
const BrotliEncoderDictionary* dictionary, const uint8_t* data,
size_t min_length, size_t max_length, uint32_t* matches) {
BROTLI_BOOL has_found_match =
BrotliFindAllStaticDictionaryMatchesFor(
dictionary, data, min_length, max_length, matches);
if (!!dictionary->parent && dictionary->parent->num_dictionaries > 1) {
uint32_t matches2[BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1];
int l;
const BrotliEncoderDictionary* dictionary2 = dictionary->parent->dict[0];
if (dictionary2 == dictionary) {
dictionary2 = dictionary->parent->dict[1];
}
for (l = 0; l < BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1; l++) {
matches2[l] = kInvalidMatch;
}
has_found_match |= BrotliFindAllStaticDictionaryMatchesFor(
dictionary2, data, min_length, max_length, matches2);
for (l = 0; l < BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1; l++) {
if (matches2[l] != kInvalidMatch) {
uint32_t dist = (uint32_t)(matches2[l] >> 5);
uint32_t len_code = matches2[l] & 31;
uint32_t skipdist = (uint32_t)((uint32_t)(1 << dictionary->words->
size_bits_by_length[len_code]) & ~1u) *
(uint32_t)dictionary->num_transforms;
/* TODO: check for dist overflow */
dist += skipdist;
AddMatch(dist, (size_t)l, len_code, matches);
}
}
}
return has_found_match;
}
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
#endif

View File

@ -13,6 +13,7 @@
#define BROTLI_DEC_DECODE_H_
#include <brotli/port.h>
#include <brotli/shared_dictionary.h>
#include <brotli/types.h>
#if defined(__cplusplus) || defined(c_plusplus)
@ -85,8 +86,9 @@ typedef enum {
BROTLI_ERROR_CODE(_ERROR_FORMAT_, PADDING_2, -15) SEPARATOR \
BROTLI_ERROR_CODE(_ERROR_FORMAT_, DISTANCE, -16) SEPARATOR \
\
/* -17..-18 codes are reserved */ \
/* -17 code is reserved */ \
\
BROTLI_ERROR_CODE(_ERROR_, COMPOUND_DICTIONARY, -18) SEPARATOR \
BROTLI_ERROR_CODE(_ERROR_, DICTIONARY_NOT_SET, -19) SEPARATOR \
BROTLI_ERROR_CODE(_ERROR_, INVALID_ARGUMENTS, -20) SEPARATOR \
\
@ -154,6 +156,28 @@ typedef enum BrotliDecoderParameter {
BROTLI_DEC_API BROTLI_BOOL BrotliDecoderSetParameter(
BrotliDecoderState* state, BrotliDecoderParameter param, uint32_t value);
/**
* Adds LZ77 prefix dictionary, adds or replaces built-in static dictionary and
* transforms.
*
* Attached dictionary ownership is not transferred.
* Data provided to this method should be kept accessible until
* decoding is finished and decoder instance is destroyed.
*
* @note Dictionaries could NOT be attached after actual decoding is started.
*
* @param state decoder instance
* @param type dictionary data format
* @param data_size length of memory region pointed by @p data
* @param data dictionary data in format corresponding to @p type
* @returns ::BROTLI_FALSE if dictionary is corrupted,
* or dictionary count limit is reached
* @returns ::BROTLI_TRUE if dictionary is accepted / attached
*/
BROTLI_DEC_API BROTLI_BOOL BrotliDecoderAttachDictionary(
BrotliDecoderState* state, BrotliSharedDictionaryType type,
size_t data_size, const uint8_t data[BROTLI_ARRAY_PARAM(data_size)]);
/**
* Creates an instance of ::BrotliDecoderState and initializes it.
*

View File

@ -13,6 +13,7 @@
#define BROTLI_ENC_ENCODE_H_
#include <brotli/port.h>
#include <brotli/shared_dictionary.h>
#include <brotli/types.h>
#if defined(__cplusplus) || defined(c_plusplus)
@ -269,6 +270,51 @@ BROTLI_ENC_API BrotliEncoderState* BrotliEncoderCreateInstance(
*/
BROTLI_ENC_API void BrotliEncoderDestroyInstance(BrotliEncoderState* state);
/* Opaque type for pointer to different possible internal structures containing
dictionary prepared for the encoder */
typedef struct BrotliEncoderPreparedDictionaryStruct
BrotliEncoderPreparedDictionary;
/**
* Prepares a shared dictionary from the given file format for the encoder.
*
* @p alloc_func and @p free_func @b MUST be both zero or both non-zero. In the
* case they are both zero, default memory allocators are used. @p opaque is
* passed to @p alloc_func and @p free_func when they are called. @p free_func
* has to return without doing anything when asked to free a NULL pointer.
*
* @param type type of dictionary stored in data
* @param data_size size of @p data buffer
* @param data pointer to the dictionary data
* @param quality the maximum Brotli quality to prepare the dictionary for,
* use BROTLI_MAX_QUALITY by default
* @param alloc_func custom memory allocation function
* @param free_func custom memory free function
* @param opaque custom memory manager handle
*/
BROTLI_ENC_API BrotliEncoderPreparedDictionary*
BrotliEncoderPrepareDictionary(BrotliSharedDictionaryType type,
size_t data_size, const uint8_t data[BROTLI_ARRAY_PARAM(data_size)],
int quality,
brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque);
BROTLI_ENC_API void BrotliEncoderDestroyPreparedDictionary(
BrotliEncoderPreparedDictionary* dictionary);
/**
* Attaches a prepared dictionary of any type to the encoder. Can be used
* multiple times to attach multiple dictionaries. The dictionary type was
* determined by BrotliEncoderPrepareDictionary. Multiple raw prefix
* dictionaries and/or max 1 serialized dictionary with custom words can be
* attached.
*
* @returns ::BROTLI_FALSE in case of error
* @returns ::BROTLI_TRUE otherwise
*/
BROTLI_ENC_API BROTLI_BOOL BrotliEncoderAttachPreparedDictionary(
BrotliEncoderState* state,
const BrotliEncoderPreparedDictionary* dictionary);
/**
* Calculates the output size bound for the given @p input_size.
*

View File

@ -0,0 +1,97 @@
/* Copyright 2017 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* (Opaque) Shared Dictionary definition and utilities. */
#ifndef BROTLI_COMMON_SHARED_DICTIONARY_H_
#define BROTLI_COMMON_SHARED_DICTIONARY_H_
#include <brotli/port.h>
#include <brotli/types.h>
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
#define SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH 4
#define SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH 31
#define SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS 64
#define SHARED_BROTLI_MAX_COMPOUND_DICTS 15
/**
* Opaque structure that holds shared dictionary data.
*
* Allocated and initialized with ::BrotliSharedDictionaryCreateInstance.
* Cleaned up and deallocated with ::BrotliSharedDictionaryDestroyInstance.
*/
typedef struct BrotliSharedDictionaryStruct BrotliSharedDictionary;
/**
* Input data type for ::BrotliSharedDictionaryAttach.
*/
typedef enum BrotliSharedDictionaryType {
/** Raw LZ77 prefix dictionary. */
BROTLI_SHARED_DICTIONARY_RAW = 0,
/** Serialized shared dictionary. */
BROTLI_SHARED_DICTIONARY_SERIALIZED = 1
} BrotliSharedDictionaryType;
/**
* Creates an instance of ::BrotliSharedDictionary.
*
* Fresh instance has default word dictionary and transforms
* and no LZ77 prefix dictionary.
*
* @p alloc_func and @p free_func @b MUST be both zero or both non-zero. In the
* case they are both zero, default memory allocators are used. @p opaque is
* passed to @p alloc_func and @p free_func when they are called. @p free_func
* has to return without doing anything when asked to free a NULL pointer.
*
* @param alloc_func custom memory allocation function
* @param free_func custom memory free function
* @param opaque custom memory manager handle
* @returns @c 0 if instance can not be allocated or initialized
* @returns pointer to initialized ::BrotliSharedDictionary otherwise
*/
BROTLI_COMMON_API BrotliSharedDictionary* BrotliSharedDictionaryCreateInstance(
brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque);
/**
* Deinitializes and frees ::BrotliSharedDictionary instance.
*
* @param dict shared dictionary instance to be cleaned up and deallocated
*/
BROTLI_COMMON_API void BrotliSharedDictionaryDestroyInstance(
BrotliSharedDictionary* dict);
/**
* Attaches dictionary to a given instance of ::BrotliSharedDictionary.
*
* Dictionary to be attached is represented in a serialized format as a region
* of memory.
*
* Provided data it partially referenced by a resulting (compound) dictionary,
* and should be kept untouched, while at least one compound dictionary uses it.
* This way memory overhead is kept minimal by the cost of additional resource
* management.
*
* @param dict dictionary to extend
* @param type type of dictionary to attach
* @param data_size size of @p data
* @param data serialized dictionary of type @p type, with at least @p data_size
* addressable bytes
* @returns ::BROTLI_TRUE if provided dictionary is successfully attached
* @returns ::BROTLI_FALSE otherwise
*/
BROTLI_COMMON_API BROTLI_BOOL BrotliSharedDictionaryAttach(
BrotliSharedDictionary* dict, BrotliSharedDictionaryType type,
size_t data_size, const uint8_t data[BROTLI_ARRAY_PARAM(data_size)]);
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
#endif
#endif /* BROTLI_COMMON_SHARED_DICTIONARY_H_ */

View File

@ -100,6 +100,7 @@ typedef struct {
BROTLI_BOOL decompress;
BROTLI_BOOL large_window;
const char* output_path;
const char* dictionary_path;
const char* suffix;
int not_input_indices[MAX_OPTIONS];
size_t longest_path_len;
@ -108,6 +109,9 @@ typedef struct {
/* Inner state */
int argc;
char** argv;
uint8_t* dictionary;
size_t dictionary_size;
BrotliEncoderPreparedDictionary* prepared_dictionary;
char* modified_path; /* Storage for path with appended / cut suffix */
int iterator;
int ignore;
@ -359,6 +363,12 @@ static Command ParseParams(Context* params) {
params->lgwin, BROTLI_MIN_WINDOW_BITS);
return COMMAND_INVALID;
}
} else if (c == 'D') {
if (params->dictionary_path) {
fprintf(stderr, "dictionary path already set\n");
return COMMAND_INVALID;
}
params->dictionary_path = argv[i];
} else if (c == 'S') {
if (suffix_set) {
fprintf(stderr, "suffix already set\n");
@ -446,7 +456,13 @@ static Command ParseParams(Context* params) {
}
key_len = (size_t)(value - arg);
value++;
if (strncmp("lgwin", arg, key_len) == 0) {
if (strncmp("dictionary", arg, key_len) == 0) {
if (params->dictionary_path) {
fprintf(stderr, "dictionary path already set\n");
return COMMAND_INVALID;
}
params->dictionary_path = value;
} else if (strncmp("lgwin", arg, key_len) == 0) {
if (lgwin_set) {
fprintf(stderr, "lgwin parameter already set\n");
return COMMAND_INVALID;
@ -575,6 +591,8 @@ static void PrintHelp(const char* name, BROTLI_BOOL error) {
" decodable with regular brotli decoders\n",
BROTLI_MIN_WINDOW_BITS, BROTLI_LARGE_MAX_WINDOW_BITS);
fprintf(media,
" -D FILE, --dictionary=FILE use FILE as raw (LZ77) dictionary\n");
fprintf(media,
" -S SUF, --suffix=SUF output file suffix (default:'%s')\n",
DEFAULT_SUFFIX);
fprintf(media,
@ -678,6 +696,69 @@ static void CopyStat(const char* input_path, const char* output_path) {
}
}
/* Result ownership is passed to caller.
|*dictionary_size| is set to resulting buffer size. */
static BROTLI_BOOL ReadDictionary(Context* context, Command command) {
static const int kMaxDictionarySize =
BROTLI_MAX_DISTANCE - BROTLI_MAX_BACKWARD_LIMIT(24);
FILE* f;
int64_t file_size_64;
uint8_t* buffer;
size_t bytes_read;
if (context->dictionary_path == NULL) return BROTLI_TRUE;
f = fopen(context->dictionary_path, "rb");
if (f == NULL) {
fprintf(stderr, "failed to open dictionary file [%s]: %s\n",
PrintablePath(context->dictionary_path), strerror(errno));
return BROTLI_FALSE;
}
file_size_64 = FileSize(context->dictionary_path);
if (file_size_64 == -1) {
fprintf(stderr, "could not get size of dictionary file [%s]",
PrintablePath(context->dictionary_path));
fclose(f);
return BROTLI_FALSE;
}
if (file_size_64 > kMaxDictionarySize) {
fprintf(stderr, "dictionary [%s] is larger than maximum allowed: %d\n",
PrintablePath(context->dictionary_path), kMaxDictionarySize);
fclose(f);
return BROTLI_FALSE;
}
context->dictionary_size = (size_t)file_size_64;
buffer = (uint8_t*)malloc(context->dictionary_size);
if (!buffer) {
fprintf(stderr, "could not read dictionary: out of memory\n");
fclose(f);
return BROTLI_FALSE;
}
bytes_read = fread(buffer, sizeof(uint8_t), context->dictionary_size, f);
if (bytes_read != context->dictionary_size) {
free(buffer);
fprintf(stderr, "failed to read dictionary [%s]: %s\n",
PrintablePath(context->dictionary_path), strerror(errno));
fclose(f);
return BROTLI_FALSE;
}
fclose(f);
context->dictionary = buffer;
if (command == COMMAND_COMPRESS) {
context->prepared_dictionary = BrotliEncoderPrepareDictionary(
BROTLI_SHARED_DICTIONARY_RAW, context->dictionary_size,
context->dictionary, BROTLI_MAX_QUALITY, NULL, NULL, NULL);
if (context->prepared_dictionary == NULL) {
fprintf(stderr, "failed to prepare dictionary [%s]\n",
PrintablePath(context->dictionary_path));
return BROTLI_FALSE;
}
}
return BROTLI_TRUE;
}
static BROTLI_BOOL NextFile(Context* context) {
const char* arg;
size_t arg_len;
@ -933,6 +1014,10 @@ static BROTLI_BOOL DecompressFiles(Context* context) {
fragmentation (new builds decode streams that old builds don't),
it is better from used experience perspective. */
BrotliDecoderSetParameter(s, BROTLI_DECODER_PARAM_LARGE_WINDOW, 1u);
if (context->dictionary) {
BrotliDecoderAttachDictionary(s, BROTLI_SHARED_DICTIONARY_RAW,
context->dictionary_size, context->dictionary);
}
is_ok = OpenFiles(context);
if (is_ok && !context->current_input_path &&
!context->force_overwrite && isatty(STDIN_FILENO)) {
@ -1020,6 +1105,9 @@ static BROTLI_BOOL CompressFiles(Context* context) {
(uint32_t)context->input_file_length : (1u << 30);
BrotliEncoderSetParameter(s, BROTLI_PARAM_SIZE_HINT, size_hint);
}
if (context->dictionary) {
BrotliEncoderAttachPreparedDictionary(s, context->prepared_dictionary);
}
is_ok = OpenFiles(context);
if (is_ok && !context->current_output_path &&
!context->force_overwrite && isatty(STDOUT_FILENO)) {
@ -1051,6 +1139,7 @@ int main(int argc, char** argv) {
context.decompress = BROTLI_FALSE;
context.large_window = BROTLI_FALSE;
context.output_path = NULL;
context.dictionary_path = NULL;
context.suffix = DEFAULT_SUFFIX;
for (i = 0; i < MAX_OPTIONS; ++i) context.not_input_indices[i] = 0;
context.longest_path_len = 1;
@ -1058,6 +1147,9 @@ int main(int argc, char** argv) {
context.argc = argc;
context.argv = argv;
context.dictionary = NULL;
context.dictionary_size = 0;
context.prepared_dictionary = NULL;
context.modified_path = NULL;
context.iterator = 0;
context.ignore = 0;
@ -1072,6 +1164,7 @@ int main(int argc, char** argv) {
if (command == COMMAND_COMPRESS || command == COMMAND_DECOMPRESS ||
command == COMMAND_TEST_INTEGRITY) {
if (!ReadDictionary(&context, command)) is_ok = BROTLI_FALSE;
if (is_ok) {
size_t modified_path_len =
context.longest_path_len + strlen(context.suffix) + 1;
@ -1116,6 +1209,8 @@ int main(int argc, char** argv) {
if (context.iterator_error) is_ok = BROTLI_FALSE;
BrotliEncoderDestroyPreparedDictionary(context.prepared_dictionary);
free(context.dictionary);
free(context.modified_path);
free(context.buffer);

View File

@ -84,6 +84,9 @@ OPTIONS
`(2**NUM - 16)`; 0 lets compressor decide over the optimal value; bigger
windows size improve density; decoder might require up to window size
memory to operate
* `-D FILE`, `--dictionary=FILE`:
use FILE as raw (LZ77) dictionary; same dictionary MUST be used both for
compression and decompression
* `-S SUF`, `--suffix=SUF`:
output file suffix (default: `.br`)
* `-V`, `--version`:

View File

@ -24,6 +24,7 @@ java_library(
":dec",
"//org/brotli/integration:brotli_jni_test_base",
"//org/brotli/integration:bundle_helper",
"//org/brotli/wrapper/enc",
"@maven//:junit_junit",
],
)
@ -82,3 +83,16 @@ java_test(
test_class = "org.brotli.wrapper.dec.DecoderTest",
runtime_deps = [":test_lib"],
)
java_test(
name = "CornerCasesTest",
size = "large",
data = [
":brotli_jni", # Bazel JNI workaround
],
jvm_flags = [
"-DBROTLI_JNI_LIBRARY=$(location :brotli_jni)",
],
test_class = "org.brotli.wrapper.dec.CornerCasesTest",
runtime_deps = [":test_lib"],
)

View File

@ -35,6 +35,11 @@ public class BrotliDecoderChannel extends Decoder implements ReadableByteChannel
this(source, DEFAULT_BUFFER_SIZE);
}
@Override
public void attachDictionary(ByteBuffer dictionary) throws IOException {
super.attachDictionary(dictionary);
}
@Override
public boolean isOpen() {
synchronized (mutex) {

View File

@ -8,6 +8,7 @@ package org.brotli.wrapper.dec;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.channels.Channels;
/**
@ -34,6 +35,10 @@ public class BrotliInputStream extends InputStream {
this(source, DEFAULT_BUFFER_SIZE);
}
public void attachDictionary(ByteBuffer dictionary) throws IOException {
decoder.attachDictionary(dictionary);
}
public void enableEagerOutput() {
decoder.enableEagerOutput();
}

View File

@ -0,0 +1,33 @@
/* Copyright 2021 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
package org.brotli.wrapper.dec;
import static org.junit.Assert.assertEquals;
import org.brotli.integration.BrotliJniTestBase;
import org.brotli.wrapper.enc.Encoder;
import java.io.IOException;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
/** Tests for {@link org.brotli.wrapper.enc.Encoder}. */
@RunWith(JUnit4.class)
public class CornerCasesTest extends BrotliJniTestBase {
@Test
public void testPowerOfTwoInput() throws IOException {
// 24 == max window bits to ensure ring-buffer size is not greater than input.
int len = 1 << 24;
byte[] data = new byte[len];
for (int i = 0; i < len; ++i) {
data[i] = (byte) Integer.bitCount(i);
}
byte[] encoded = Encoder.compress(data);
byte[] decoded = Decoder.decompress(encoded);
assertEquals(len, decoded.length);
}
}

View File

@ -50,6 +50,12 @@ public class Decoder {
throw new IOException(message);
}
void attachDictionary(ByteBuffer dictionary) throws IOException {
if (!decoder.attachDictionary(dictionary)) {
fail("failed to attach dictionary");
}
}
public void enableEagerOutput() {
this.eager = true;
}

View File

@ -17,6 +17,7 @@ public class DecoderJNI {
private static native void nativePush(long[] context, int length);
private static native ByteBuffer nativePull(long[] context);
private static native void nativeDestroy(long[] context);
private static native boolean nativeAttachDictionary(long[] context, ByteBuffer dictionary);
public enum Status {
ERROR,
@ -40,6 +41,19 @@ public class DecoderJNI {
}
}
public boolean attachDictionary(ByteBuffer dictionary) {
if (!dictionary.isDirect()) {
throw new IllegalArgumentException("only direct buffers allowed");
}
if (context[0] == 0) {
throw new IllegalStateException("brotli decoder is already destroyed");
}
if (!fresh) {
throw new IllegalStateException("decoding is already started");
}
return nativeAttachDictionary(context, dictionary);
}
public void push(int length) {
if (length < 0) {
throw new IllegalArgumentException("negative block length");

View File

@ -15,6 +15,9 @@ namespace {
typedef struct DecoderHandle {
BrotliDecoderState* state;
jobject dictionary_refs[15];
size_t dictionary_count;
uint8_t* input_start;
size_t input_offset;
size_t input_length;
@ -54,6 +57,10 @@ Java_org_brotli_wrapper_dec_DecoderJNI_nativeCreate(
ok = !!handle;
if (ok) {
for (int i = 0; i < 15; ++i) {
handle->dictionary_refs[i] = nullptr;
}
handle->dictionary_count = 0;
handle->input_offset = 0;
handle->input_length = 0;
handle->input_start = nullptr;
@ -193,10 +200,53 @@ Java_org_brotli_wrapper_dec_DecoderJNI_nativeDestroy(
env->GetLongArrayRegion(ctx, 0, 3, context);
DecoderHandle* handle = getHandle(reinterpret_cast<void*>(context[0]));
BrotliDecoderDestroyInstance(handle->state);
for (size_t i = 0; i < handle->dictionary_count; ++i) {
env->DeleteGlobalRef(handle->dictionary_refs[i]);
}
delete[] handle->input_start;
delete handle;
}
JNIEXPORT jboolean JNICALL
Java_org_brotli_wrapper_dec_DecoderJNI_nativeAttachDictionary(
JNIEnv* env, jobject /*jobj*/, jlongArray ctx, jobject dictionary) {
jlong context[3];
env->GetLongArrayRegion(ctx, 0, 3, context);
DecoderHandle* handle = getHandle(reinterpret_cast<void*>(context[0]));
jobject ref = nullptr;
uint8_t* address = nullptr;
jlong capacity = 0;
bool ok = true;
if (ok && !dictionary) {
ok = false;
}
if (ok && handle->dictionary_count >= 15) {
ok = false;
}
if (ok) {
ref = env->NewGlobalRef(dictionary);
ok = !!ref;
}
if (ok) {
handle->dictionary_refs[handle->dictionary_count] = ref;
handle->dictionary_count++;
address = static_cast<uint8_t*>(env->GetDirectBufferAddress(ref));
ok = !!address;
}
if (ok) {
capacity = env->GetDirectBufferCapacity(ref);
ok = (capacity > 0) && (capacity < (1 << 30));
}
if (ok) {
size_t size = static_cast<size_t>(capacity);
ok = !!BrotliDecoderAttachDictionary(handle->state,
BROTLI_SHARED_DICTIONARY_RAW, size, address);
}
return static_cast<jboolean>(ok);
}
#ifdef __cplusplus
}
#endif

View File

@ -18,6 +18,10 @@ java_library(
["*.java"],
exclude = ["*Test*.java"],
),
deps = [
"//org/brotli/common:shared_dictionary",
"//org/brotli/enc:prepared_dictionary",
],
resource_jars = ["//:license"],
)
@ -27,8 +31,11 @@ java_library(
srcs = glob(["*Test*.java"]),
deps = [
":enc",
"//org/brotli/common:shared_dictionary",
"//org/brotli/enc:prepared_dictionary",
"//org/brotli/integration:brotli_jni_test_base",
"//org/brotli/integration:bundle_helper",
"//org/brotli/wrapper/common",
"//org/brotli/wrapper/dec",
"@maven//:junit_junit",
],

View File

@ -6,6 +6,7 @@
package org.brotli.wrapper.enc;
import org.brotli.enc.PreparedDictionary;
import java.io.IOException;
import java.nio.Buffer;
import java.nio.ByteBuffer;
@ -42,6 +43,11 @@ public class BrotliEncoderChannel extends Encoder implements WritableByteChannel
this(destination, new Encoder.Parameters());
}
@Override
public void attachDictionary(PreparedDictionary dictionary) throws IOException {
super.attachDictionary(dictionary);
}
@Override
public boolean isOpen() {
synchronized (mutex) {

View File

@ -6,6 +6,7 @@
package org.brotli.wrapper.enc;
import org.brotli.enc.PreparedDictionary;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.channels.Channels;
@ -40,6 +41,10 @@ public class BrotliOutputStream extends OutputStream {
this(destination, new Encoder.Parameters());
}
public void attachDictionary(PreparedDictionary dictionary) throws IOException {
encoder.attachDictionary(dictionary);
}
@Override
public void close() throws IOException {
encoder.close();

View File

@ -6,17 +6,20 @@
package org.brotli.wrapper.enc;
import org.brotli.enc.PreparedDictionary;
import java.io.IOException;
import java.nio.Buffer;
import java.nio.ByteBuffer;
import java.nio.channels.WritableByteChannel;
import java.util.ArrayList;
import java.util.List;
/**
* Base class for OutputStream / Channel implementations.
*/
public class Encoder {
private final WritableByteChannel destination;
private final List<PreparedDictionary> dictionaries;
private final EncoderJNI.Wrapper encoder;
private ByteBuffer buffer;
final ByteBuffer inputBuffer;
@ -111,6 +114,7 @@ public class Encoder {
if (destination == null) {
throw new NullPointerException("destination can not be null");
}
this.dictionaries = new ArrayList<PreparedDictionary>();
this.destination = destination;
this.encoder = new EncoderJNI.Wrapper(inputBufferSize, params.quality, params.lgwin, params.mode);
this.inputBuffer = this.encoder.getInputBuffer();
@ -125,6 +129,14 @@ public class Encoder {
throw new IOException(message);
}
public void attachDictionary(PreparedDictionary dictionary) throws IOException {
if (!encoder.attachDictionary(dictionary.getData())) {
fail("failed to attach dictionary");
}
// Reference to native prepared dictionary wrapper should be held till the end of encoding.
dictionaries.add(dictionary);
}
/**
* @param force repeat pushing until all output is consumed
* @return true if all encoder output is consumed
@ -239,4 +251,15 @@ public class Encoder {
public static byte[] compress(byte[] data) throws IOException {
return compress(data, new Parameters());
}
/**
* Prepares raw or serialized dictionary for being used by encoder.
*
* @param dictionary raw / serialized dictionary data; MUST be direct
* @param sharedDictionaryType dictionary data type
*/
public static PreparedDictionary prepareDictionary(ByteBuffer dictionary,
int sharedDictionaryType) {
return EncoderJNI.prepareDictionary(dictionary, sharedDictionaryType);
}
}

View File

@ -6,6 +6,7 @@
package org.brotli.wrapper.enc;
import org.brotli.enc.PreparedDictionary;
import java.io.IOException;
import java.nio.ByteBuffer;
@ -17,6 +18,9 @@ class EncoderJNI {
private static native void nativePush(long[] context, int length);
private static native ByteBuffer nativePull(long[] context);
private static native void nativeDestroy(long[] context);
private static native boolean nativeAttachDictionary(long[] context, ByteBuffer dictionary);
private static native ByteBuffer nativePrepareDictionary(ByteBuffer dictionary, long type);
private static native void nativeDestroyDictionary(ByteBuffer dictionary);
enum Operation {
PROCESS,
@ -24,6 +28,47 @@ class EncoderJNI {
FINISH
}
private static class PreparedDictionaryImpl implements PreparedDictionary {
private ByteBuffer data;
private PreparedDictionaryImpl(ByteBuffer data) {
this.data = data;
}
@Override
public ByteBuffer getData() {
return data;
}
@Override
protected void finalize() throws Throwable {
try {
ByteBuffer data = this.data;
this.data = null;
nativeDestroyDictionary(data);
} finally {
super.finalize();
}
}
}
/**
* Prepares raw or serialized dictionary for being used by encoder.
*
* @param dictionary raw / serialized dictionary data; MUST be direct
* @param sharedDictionaryType dictionary data type
*/
static PreparedDictionary prepareDictionary(ByteBuffer dictionary, int sharedDictionaryType) {
if (!dictionary.isDirect()) {
throw new IllegalArgumentException("only direct buffers allowed");
}
ByteBuffer dictionaryData = nativePrepareDictionary(dictionary, sharedDictionaryType);
if (dictionaryData == null) {
throw new IllegalStateException("OOM");
}
return new PreparedDictionaryImpl(dictionaryData);
}
static class Wrapper {
protected final long[] context = new long[5];
private final ByteBuffer inputBuffer;
@ -48,6 +93,19 @@ class EncoderJNI {
this.context[4] = 0;
}
boolean attachDictionary(ByteBuffer dictionary) {
if (!dictionary.isDirect()) {
throw new IllegalArgumentException("only direct buffers allowed");
}
if (context[0] == 0) {
throw new IllegalStateException("brotli decoder is already destroyed");
}
if (!fresh) {
throw new IllegalStateException("decoding is already started");
}
return nativeAttachDictionary(context, dictionary);
}
void push(Operation op, int length) {
if (length < 0) {
throw new IllegalArgumentException("negative block length");

View File

@ -0,0 +1,114 @@
package org.brotli.wrapper.enc;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import org.brotli.common.SharedDictionaryType;
import org.brotli.enc.PreparedDictionary;
import org.brotli.enc.PreparedDictionaryGenerator;
import org.brotli.integration.BrotliJniTestBase;
import org.brotli.integration.BundleHelper;
import org.brotli.wrapper.common.BrotliCommon;
import org.brotli.wrapper.dec.BrotliInputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.util.List;
import junit.framework.TestCase;
import junit.framework.TestSuite;
import org.junit.runner.RunWith;
import org.junit.runners.AllTests;
/** Tests for compression / decompression aided with LZ77 dictionary. */
@RunWith(AllTests.class)
public class UseCompoundDictionaryTest extends BrotliJniTestBase {
static InputStream getBundle() throws IOException {
return new FileInputStream(System.getProperty("TEST_BUNDLE"));
}
/** Creates a test suite. */
public static TestSuite suite() throws IOException {
TestSuite suite = new TestSuite();
InputStream bundle = getBundle();
try {
List<String> entries = BundleHelper.listEntries(bundle);
for (String entry : entries) {
suite.addTest(new UseCompoundDictionaryTestCase(entry));
}
} finally {
bundle.close();
}
return suite;
}
/** Test case with a unique name. */
static class UseCompoundDictionaryTestCase extends TestCase {
final String entryName;
UseCompoundDictionaryTestCase(String entryName) {
super("UseCompoundDictionaryTest." + entryName);
this.entryName = entryName;
}
@Override
protected void runTest() throws Throwable {
UseCompoundDictionaryTest.run(entryName);
}
}
private static PreparedDictionary prepareRawDictionary(String entryName, ByteBuffer data) {
if (entryName.endsWith("E.coli.txt")) {
// Default prepared dictionary parameters doesn't work well for DNA data.
// Should work well with 8-byte hash.
return PreparedDictionaryGenerator.generate(data, 17, 3, 64, 5);
} else {
return Encoder.prepareDictionary(data, SharedDictionaryType.RAW);
}
}
private static void run(String entryName) throws Throwable {
InputStream bundle = getBundle();
byte[] original;
try {
original = BundleHelper.readEntry(bundle, entryName);
} finally {
bundle.close();
}
if (original == null) {
throw new RuntimeException("Can't read bundle entry: " + entryName);
}
ByteBuffer dictionary = BrotliCommon.makeNative(original);
PreparedDictionary preparedDictionary = prepareRawDictionary(entryName, dictionary);
ByteArrayOutputStream dst = new ByteArrayOutputStream();
BrotliOutputStream encoder =
new BrotliOutputStream(dst, new Encoder.Parameters().setQuality(9).setWindow(23), 1 << 23);
encoder.attachDictionary(preparedDictionary);
try {
encoder.write(original);
} finally {
encoder.close();
}
byte[] compressed = dst.toByteArray();
// Just copy self from LZ77 dictionary -> ultimate compression ratio.
assertTrue(compressed.length < 80 + original.length / 65536);
BrotliInputStream decoder =
new BrotliInputStream(new ByteArrayInputStream(compressed), 1 << 23);
decoder.attachDictionary(dictionary);
try {
long originalCrc = BundleHelper.fingerprintStream(new ByteArrayInputStream(original));
long crc = BundleHelper.fingerprintStream(decoder);
assertEquals(originalCrc, crc);
} finally {
decoder.close();
}
}
}

View File

@ -15,6 +15,9 @@ namespace {
typedef struct EncoderHandle {
BrotliEncoderState* state;
jobject dictionary_refs[15];
size_t dictionary_count;
uint8_t* input_start;
size_t input_offset;
size_t input_last;
@ -53,6 +56,10 @@ Java_org_brotli_wrapper_enc_EncoderJNI_nativeCreate(
ok = !!handle;
if (ok) {
for (int i = 0; i < 15; ++i) {
handle->dictionary_refs[i] = nullptr;
}
handle->dictionary_count = 0;
handle->input_offset = 0;
handle->input_last = 0;
handle->input_start = nullptr;
@ -190,10 +197,90 @@ Java_org_brotli_wrapper_enc_EncoderJNI_nativeDestroy(
env->GetLongArrayRegion(ctx, 0, 2, context);
EncoderHandle* handle = getHandle(reinterpret_cast<void*>(context[0]));
BrotliEncoderDestroyInstance(handle->state);
for (size_t i = 0; i < handle->dictionary_count; ++i) {
env->DeleteGlobalRef(handle->dictionary_refs[i]);
}
delete[] handle->input_start;
delete handle;
}
JNIEXPORT jboolean JNICALL
Java_org_brotli_wrapper_enc_EncoderJNI_nativeAttachDictionary(
JNIEnv* env, jobject /*jobj*/, jlongArray ctx, jobject dictionary) {
jlong context[2];
env->GetLongArrayRegion(ctx, 0, 2, context);
EncoderHandle* handle = getHandle(reinterpret_cast<void*>(context[0]));
jobject ref = nullptr;
uint8_t* address = nullptr;
bool ok = true;
if (ok && !dictionary) {
ok = false;
}
if (ok && handle->dictionary_count >= 15) {
ok = false;
}
if (ok) {
ref = env->NewGlobalRef(dictionary);
ok = !!ref;
}
if (ok) {
handle->dictionary_refs[handle->dictionary_count] = ref;
handle->dictionary_count++;
address = static_cast<uint8_t*>(env->GetDirectBufferAddress(ref));
ok = !!address;
}
if (ok) {
ok = !!BrotliEncoderAttachPreparedDictionary(handle->state,
reinterpret_cast<BrotliEncoderPreparedDictionary*>(address));
}
return static_cast<jboolean>(ok);
}
JNIEXPORT void JNICALL
Java_org_brotli_wrapper_enc_EncoderJNI_nativeDestroyDictionary(
JNIEnv* env, jobject /*jobj*/, jobject dictionary) {
if (!dictionary) {
return;
}
uint8_t* address =
static_cast<uint8_t*>(env->GetDirectBufferAddress(dictionary));
if (!address) {
return;
}
BrotliEncoderDestroyPreparedDictionary(
reinterpret_cast<BrotliEncoderPreparedDictionary*>(address));
}
JNIEXPORT jobject JNICALL
Java_org_brotli_wrapper_enc_EncoderJNI_nativePrepareDictionary(
JNIEnv* env, jobject /*jobj*/, jobject dictionary, jlong type) {
if (!dictionary) {
return nullptr;
}
uint8_t* address =
static_cast<uint8_t*>(env->GetDirectBufferAddress(dictionary));
if (!address) {
return nullptr;
}
jlong capacity = env->GetDirectBufferCapacity(dictionary);
if ((capacity <= 0) || (capacity >= (1 << 30))) {
return nullptr;
}
BrotliSharedDictionaryType dictionary_type =
static_cast<BrotliSharedDictionaryType>(type);
size_t size = static_cast<size_t>(capacity);
BrotliEncoderPreparedDictionary* prepared_dictionary =
BrotliEncoderPrepareDictionary(dictionary_type, size, address,
BROTLI_MAX_QUALITY, nullptr, nullptr, nullptr);
if (!prepared_dictionary) {
return nullptr;
}
/* Size is 4 - just enough to check magic bytes. */
return env->NewDirectByteBuffer(prepared_dictionary, 4);
}
#ifdef __cplusplus
}
#endif

View File

@ -9,6 +9,7 @@ BROTLI_COMMON_C = \
c/common/context.c \
c/common/dictionary.c \
c/common/platform.c \
c/common/shared_dictionary.c \
c/common/transform.c
BROTLI_COMMON_H = \
@ -16,6 +17,7 @@ BROTLI_COMMON_H = \
c/common/context.h \
c/common/dictionary.h \
c/common/platform.h \
c/common/shared_dictionary_internal.h \
c/common/transform.h \
c/common/version.h
@ -39,6 +41,7 @@ BROTLI_ENC_C = \
c/enc/brotli_bit_stream.c \
c/enc/cluster.c \
c/enc/command.c \
c/enc/compound_dictionary.c \
c/enc/compress_fragment.c \
c/enc/compress_fragment_two_pass.c \
c/enc/dictionary_hash.c \
@ -66,6 +69,7 @@ BROTLI_ENC_H = \
c/enc/cluster.h \
c/enc/cluster_inc.h \
c/enc/command.h \
c/enc/compound_dictionary.h \
c/enc/compress_fragment.h \
c/enc/compress_fragment_two_pass.h \
c/enc/dictionary_hash.h \
@ -101,4 +105,5 @@ BROTLI_INCLUDE = \
c/include/brotli/decode.h \
c/include/brotli/encode.h \
c/include/brotli/port.h \
c/include/brotli/shared_dictionary.h \
c/include/brotli/types.h

View File

@ -185,6 +185,7 @@ EXT_MODULES = [
'c/common/context.c',
'c/common/dictionary.c',
'c/common/platform.c',
'c/common/shared_dictionary.c',
'c/common/transform.c',
'c/dec/bit_reader.c',
'c/dec/decode.c',
@ -197,6 +198,7 @@ EXT_MODULES = [
'c/enc/brotli_bit_stream.c',
'c/enc/cluster.c',
'c/enc/command.c',
'c/enc/compound_dictionary.c',
'c/enc/compress_fragment.c',
'c/enc/compress_fragment_two_pass.c',
'c/enc/dictionary_hash.c',
@ -216,6 +218,7 @@ EXT_MODULES = [
'c/common/context.h',
'c/common/dictionary.h',
'c/common/platform.h',
'c/common/shared_dictionary_internal.h',
'c/common/transform.h',
'c/common/version.h',
'c/dec/bit_reader.h',
@ -234,6 +237,7 @@ EXT_MODULES = [
'c/enc/cluster.h',
'c/enc/cluster_inc.h',
'c/enc/command.h',
'c/enc/compound_dictionary.h',
'c/enc/compress_fragment.h',
'c/enc/compress_fragment_two_pass.h',
'c/enc/dictionary_hash.h',