Merge pull request #114 from szabadka/master

Brotli custom LZ77 dictionary support.
This commit is contained in:
szabadka 2015-06-12 15:45:41 +02:00
commit a13ea018f5
8 changed files with 101 additions and 16 deletions

View File

@ -97,19 +97,6 @@ static BROTLI_INLINE uint32_t BrotliPrefetchBits(BrotliBitReader* const br) {
return (uint32_t)(br->val_ >> br->bit_pos_);
}
/* For jumping over a number of bits in the bit stream when accessed with */
/* BrotliPrefetchBits and BrotliFillBitWindow. */
static BROTLI_INLINE void BrotliSetBitPos(BrotliBitReader* const br,
uint32_t val) {
#ifdef BROTLI_DECODE_DEBUG
uint32_t n_bits = val - br->bit_pos_;
const uint32_t bval = (uint32_t)(br->val_ >> br->bit_pos_) & BitMask(n_bits);
printf("[BrotliReadBits] %010d %2d val: %6x\n",
(br->pos_ << 3) + br->bit_pos_ - 64, n_bits, bval);
#endif
br->bit_pos_ = val;
}
/*
* Reload up to 32 bits byte-by-byte.
* This function works on both little and big endian.

View File

@ -1101,11 +1101,16 @@ BrotliResult BrotliDecompressStreaming(BrotliInput input, BrotliOutput output,
if (BrotliDecompressedSize(BROTLI_READ_SIZE, br->buf_, &known_size)
== BROTLI_RESULT_SUCCESS) {
while (s->ringbuffer_size >= known_size * 2
&& s->ringbuffer_size > 0) {
&& s->ringbuffer_size > 1) {
s->ringbuffer_size /= 2;
}
}
/* But make it fit the custom dictionary if there is one. */
while (s->ringbuffer_size < s->custom_dict_size) {
s->ringbuffer_size *= 2;
}
s->ringbuffer_mask = s->ringbuffer_size - 1;
s->ringbuffer = (uint8_t*)malloc((size_t)(s->ringbuffer_size +
kRingBufferWriteAheadSlack +
@ -1115,6 +1120,17 @@ BrotliResult BrotliDecompressStreaming(BrotliInput input, BrotliOutput output,
break;
}
s->ringbuffer_end = s->ringbuffer + s->ringbuffer_size;
if (s->custom_dict) {
memcpy(&s->ringbuffer[(-s->custom_dict_size) & s->ringbuffer_mask],
s->custom_dict, (size_t)s->custom_dict_size);
if (s->custom_dict_size > 0) {
s->prev_byte1 = s->custom_dict[s->custom_dict_size - 1];
}
if (s->custom_dict_size > 1) {
s->prev_byte2 = s->custom_dict[s->custom_dict_size - 2];
}
}
}
if (s->is_metadata) {
@ -1455,9 +1471,9 @@ BrotliResult BrotliDecompressStreaming(BrotliInput input, BrotliOutput output,
}
BROTLI_LOG_UINT(s->distance);
if (pos < s->max_backward_distance &&
if (pos + s->custom_dict_size < s->max_backward_distance &&
s->max_distance != s->max_backward_distance) {
s->max_distance = pos;
s->max_distance = pos + s->custom_dict_size;
} else {
s->max_distance = s->max_backward_distance;
}
@ -1702,6 +1718,12 @@ BrotliResult BrotliDecompressStreaming(BrotliInput input, BrotliOutput output,
return result;
}
void BrotliSetCustomDictionary(
size_t size, const uint8_t* dict, BrotliState* s) {
s->custom_dict = dict;
s->custom_dict_size = (int) size;
}
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
#endif

View File

@ -123,6 +123,19 @@ BrotliResult BrotliDecompressBufferStreaming(size_t* available_in,
size_t* total_out,
BrotliState* s);
/* Fills the new state with a dictionary for LZ77, warming up the ringbuffer,
e.g. for custom static dictionaries for data formats.
Not to be confused with the built-in transformable dictionary of Brotli.
The dictionary must exist in memory until decoding is done and is owned by
the caller. To use:
-initialize state with BrotliStateInit
-use BrotliSetCustomDictionary
-use BrotliDecompressBufferStreaming
-clean up with BrotliStateCleanup
*/
void BrotliSetCustomDictionary(
size_t size, const uint8_t* dict, BrotliState* s);
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
#endif

View File

@ -46,6 +46,9 @@ void BrotliStateInit(BrotliState* s) {
s->code_lengths = NULL;
s->context_map_table = NULL;
s->custom_dict = NULL;
s->custom_dict_size = 0;
}
void BrotliStateCleanup(BrotliState* s) {

View File

@ -167,6 +167,10 @@ typedef struct {
int context_index;
int max_run_length_prefix;
HuffmanCode* context_map_table;
/* For custom dictionaries */
const uint8_t* custom_dict;
int custom_dict_size;
} BrotliState;
void BrotliStateInit(BrotliState* s);

View File

@ -282,6 +282,17 @@ void BrotliCompressor::CopyInputToRingBuffer(const size_t input_size,
}
}
void BrotliCompressor::BrotliSetCustomDictionary(
const size_t size, const uint8_t* dict) {
CopyInputToRingBuffer(size, dict);
last_flush_pos_ = size;
last_processed_pos_ = size;
if (size > 0) prev_byte_ = dict[size - 1];
if (size > 1) prev_byte2_ = dict[size - 2];
hashers_->PrependCustomDictionary(hash_type_, size, dict);
}
bool BrotliCompressor::WriteBrotliData(const bool is_last,
const bool force_flush,
size_t* out_size,
@ -641,11 +652,18 @@ bool BrotliInIsFinished(BrotliIn* r) {
}
int BrotliCompress(BrotliParams params, BrotliIn* in, BrotliOut* out) {
return BrotliCompressWithCustomDictionary(0, nullptr, params, in, out);
}
int BrotliCompressWithCustomDictionary(size_t dictsize, const uint8_t* dict,
BrotliParams params,
BrotliIn* in, BrotliOut* out) {
size_t in_bytes = 0;
size_t out_bytes = 0;
uint8_t* output;
bool final_block = false;
BrotliCompressor compressor(params);
if (dictsize != 0) compressor.BrotliSetCustomDictionary(dictsize, dict);
while (!final_block) {
in_bytes = CopyOneBlockToRingBuffer(in, &compressor);
final_block = in_bytes == 0 || BrotliInIsFinished(in);

View File

@ -128,6 +128,13 @@ class BrotliCompressor {
bool WriteBrotliData(const bool is_last, const bool force_flush,
size_t* out_size, uint8_t** output);
// Fills the new state with a dictionary for LZ77, warming up the ringbuffer,
// e.g. for custom static dictionaries for data formats.
// Not to be confused with the built-in transformable dictionary of Brotli.
// To decode, use BrotliSetCustomDictionary of the decoder with the same
// dictionary.
void BrotliSetCustomDictionary(size_t size, const uint8_t* dict);
// No-op, but we keep it here for API backward-compatibility.
void WriteStreamHeader() {}
@ -180,6 +187,12 @@ int BrotliCompressBuffer(BrotliParams params,
// of reading from and writing to pre-allocated memory buffers.
int BrotliCompress(BrotliParams params, BrotliIn* in, BrotliOut* out);
// Before compressing the data, sets a custom LZ77 dictionary with
// BrotliCompressor::BrotliSetCustomDictionary.
int BrotliCompressWithCustomDictionary(size_t dictsize, const uint8_t* dict,
BrotliParams params,
BrotliIn* in, BrotliOut* out);
} // namespace brotli
#endif // BROTLI_ENC_ENCODE_H_

View File

@ -612,6 +612,31 @@ struct Hashers {
if (hash_h10.get() != NULL) hash_h10->SetStaticDictionary(dict);
}
template<typename Hasher>
void WarmupHash(const size_t size, const uint8_t* dict, Hasher* hasher) {
for (size_t i = 0; i < size; i++) {
hasher->Store(dict, i);
}
}
// Custom LZ77 window.
void PrependCustomDictionary(
int type, const size_t size, const uint8_t* dict) {
switch (type) {
case 1: WarmupHash(size, dict, hash_h1.get()); break;
case 2: WarmupHash(size, dict, hash_h2.get()); break;
case 3: WarmupHash(size, dict, hash_h3.get()); break;
case 4: WarmupHash(size, dict, hash_h4.get()); break;
case 5: WarmupHash(size, dict, hash_h5.get()); break;
case 6: WarmupHash(size, dict, hash_h6.get()); break;
case 7: WarmupHash(size, dict, hash_h7.get()); break;
case 8: WarmupHash(size, dict, hash_h8.get()); break;
case 9: WarmupHash(size, dict, hash_h9.get()); break;
case 10: WarmupHash(size, dict, hash_h10.get()); break;
default: break;
}
}
std::unique_ptr<H1> hash_h1;
std::unique_ptr<H2> hash_h2;
std::unique_ptr<H3> hash_h3;