mirror of
https://github.com/google/brotli.git
synced 2024-11-22 19:50:06 +00:00
Merge pull request #114 from szabadka/master
Brotli custom LZ77 dictionary support.
This commit is contained in:
commit
a13ea018f5
@ -97,19 +97,6 @@ static BROTLI_INLINE uint32_t BrotliPrefetchBits(BrotliBitReader* const br) {
|
||||
return (uint32_t)(br->val_ >> br->bit_pos_);
|
||||
}
|
||||
|
||||
/* For jumping over a number of bits in the bit stream when accessed with */
|
||||
/* BrotliPrefetchBits and BrotliFillBitWindow. */
|
||||
static BROTLI_INLINE void BrotliSetBitPos(BrotliBitReader* const br,
|
||||
uint32_t val) {
|
||||
#ifdef BROTLI_DECODE_DEBUG
|
||||
uint32_t n_bits = val - br->bit_pos_;
|
||||
const uint32_t bval = (uint32_t)(br->val_ >> br->bit_pos_) & BitMask(n_bits);
|
||||
printf("[BrotliReadBits] %010d %2d val: %6x\n",
|
||||
(br->pos_ << 3) + br->bit_pos_ - 64, n_bits, bval);
|
||||
#endif
|
||||
br->bit_pos_ = val;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reload up to 32 bits byte-by-byte.
|
||||
* This function works on both little and big endian.
|
||||
|
28
dec/decode.c
28
dec/decode.c
@ -1101,11 +1101,16 @@ BrotliResult BrotliDecompressStreaming(BrotliInput input, BrotliOutput output,
|
||||
if (BrotliDecompressedSize(BROTLI_READ_SIZE, br->buf_, &known_size)
|
||||
== BROTLI_RESULT_SUCCESS) {
|
||||
while (s->ringbuffer_size >= known_size * 2
|
||||
&& s->ringbuffer_size > 0) {
|
||||
&& s->ringbuffer_size > 1) {
|
||||
s->ringbuffer_size /= 2;
|
||||
}
|
||||
}
|
||||
|
||||
/* But make it fit the custom dictionary if there is one. */
|
||||
while (s->ringbuffer_size < s->custom_dict_size) {
|
||||
s->ringbuffer_size *= 2;
|
||||
}
|
||||
|
||||
s->ringbuffer_mask = s->ringbuffer_size - 1;
|
||||
s->ringbuffer = (uint8_t*)malloc((size_t)(s->ringbuffer_size +
|
||||
kRingBufferWriteAheadSlack +
|
||||
@ -1115,6 +1120,17 @@ BrotliResult BrotliDecompressStreaming(BrotliInput input, BrotliOutput output,
|
||||
break;
|
||||
}
|
||||
s->ringbuffer_end = s->ringbuffer + s->ringbuffer_size;
|
||||
|
||||
if (s->custom_dict) {
|
||||
memcpy(&s->ringbuffer[(-s->custom_dict_size) & s->ringbuffer_mask],
|
||||
s->custom_dict, (size_t)s->custom_dict_size);
|
||||
if (s->custom_dict_size > 0) {
|
||||
s->prev_byte1 = s->custom_dict[s->custom_dict_size - 1];
|
||||
}
|
||||
if (s->custom_dict_size > 1) {
|
||||
s->prev_byte2 = s->custom_dict[s->custom_dict_size - 2];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (s->is_metadata) {
|
||||
@ -1455,9 +1471,9 @@ BrotliResult BrotliDecompressStreaming(BrotliInput input, BrotliOutput output,
|
||||
}
|
||||
BROTLI_LOG_UINT(s->distance);
|
||||
|
||||
if (pos < s->max_backward_distance &&
|
||||
if (pos + s->custom_dict_size < s->max_backward_distance &&
|
||||
s->max_distance != s->max_backward_distance) {
|
||||
s->max_distance = pos;
|
||||
s->max_distance = pos + s->custom_dict_size;
|
||||
} else {
|
||||
s->max_distance = s->max_backward_distance;
|
||||
}
|
||||
@ -1702,6 +1718,12 @@ BrotliResult BrotliDecompressStreaming(BrotliInput input, BrotliOutput output,
|
||||
return result;
|
||||
}
|
||||
|
||||
void BrotliSetCustomDictionary(
|
||||
size_t size, const uint8_t* dict, BrotliState* s) {
|
||||
s->custom_dict = dict;
|
||||
s->custom_dict_size = (int) size;
|
||||
}
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
13
dec/decode.h
13
dec/decode.h
@ -123,6 +123,19 @@ BrotliResult BrotliDecompressBufferStreaming(size_t* available_in,
|
||||
size_t* total_out,
|
||||
BrotliState* s);
|
||||
|
||||
/* Fills the new state with a dictionary for LZ77, warming up the ringbuffer,
|
||||
e.g. for custom static dictionaries for data formats.
|
||||
Not to be confused with the built-in transformable dictionary of Brotli.
|
||||
The dictionary must exist in memory until decoding is done and is owned by
|
||||
the caller. To use:
|
||||
-initialize state with BrotliStateInit
|
||||
-use BrotliSetCustomDictionary
|
||||
-use BrotliDecompressBufferStreaming
|
||||
-clean up with BrotliStateCleanup
|
||||
*/
|
||||
void BrotliSetCustomDictionary(
|
||||
size_t size, const uint8_t* dict, BrotliState* s);
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
@ -46,6 +46,9 @@ void BrotliStateInit(BrotliState* s) {
|
||||
|
||||
s->code_lengths = NULL;
|
||||
s->context_map_table = NULL;
|
||||
|
||||
s->custom_dict = NULL;
|
||||
s->custom_dict_size = 0;
|
||||
}
|
||||
|
||||
void BrotliStateCleanup(BrotliState* s) {
|
||||
|
@ -167,6 +167,10 @@ typedef struct {
|
||||
int context_index;
|
||||
int max_run_length_prefix;
|
||||
HuffmanCode* context_map_table;
|
||||
|
||||
/* For custom dictionaries */
|
||||
const uint8_t* custom_dict;
|
||||
int custom_dict_size;
|
||||
} BrotliState;
|
||||
|
||||
void BrotliStateInit(BrotliState* s);
|
||||
|
@ -282,6 +282,17 @@ void BrotliCompressor::CopyInputToRingBuffer(const size_t input_size,
|
||||
}
|
||||
}
|
||||
|
||||
void BrotliCompressor::BrotliSetCustomDictionary(
|
||||
const size_t size, const uint8_t* dict) {
|
||||
CopyInputToRingBuffer(size, dict);
|
||||
last_flush_pos_ = size;
|
||||
last_processed_pos_ = size;
|
||||
if (size > 0) prev_byte_ = dict[size - 1];
|
||||
if (size > 1) prev_byte2_ = dict[size - 2];
|
||||
|
||||
hashers_->PrependCustomDictionary(hash_type_, size, dict);
|
||||
}
|
||||
|
||||
bool BrotliCompressor::WriteBrotliData(const bool is_last,
|
||||
const bool force_flush,
|
||||
size_t* out_size,
|
||||
@ -641,11 +652,18 @@ bool BrotliInIsFinished(BrotliIn* r) {
|
||||
}
|
||||
|
||||
int BrotliCompress(BrotliParams params, BrotliIn* in, BrotliOut* out) {
|
||||
return BrotliCompressWithCustomDictionary(0, nullptr, params, in, out);
|
||||
}
|
||||
|
||||
int BrotliCompressWithCustomDictionary(size_t dictsize, const uint8_t* dict,
|
||||
BrotliParams params,
|
||||
BrotliIn* in, BrotliOut* out) {
|
||||
size_t in_bytes = 0;
|
||||
size_t out_bytes = 0;
|
||||
uint8_t* output;
|
||||
bool final_block = false;
|
||||
BrotliCompressor compressor(params);
|
||||
if (dictsize != 0) compressor.BrotliSetCustomDictionary(dictsize, dict);
|
||||
while (!final_block) {
|
||||
in_bytes = CopyOneBlockToRingBuffer(in, &compressor);
|
||||
final_block = in_bytes == 0 || BrotliInIsFinished(in);
|
||||
|
13
enc/encode.h
13
enc/encode.h
@ -128,6 +128,13 @@ class BrotliCompressor {
|
||||
bool WriteBrotliData(const bool is_last, const bool force_flush,
|
||||
size_t* out_size, uint8_t** output);
|
||||
|
||||
// Fills the new state with a dictionary for LZ77, warming up the ringbuffer,
|
||||
// e.g. for custom static dictionaries for data formats.
|
||||
// Not to be confused with the built-in transformable dictionary of Brotli.
|
||||
// To decode, use BrotliSetCustomDictionary of the decoder with the same
|
||||
// dictionary.
|
||||
void BrotliSetCustomDictionary(size_t size, const uint8_t* dict);
|
||||
|
||||
// No-op, but we keep it here for API backward-compatibility.
|
||||
void WriteStreamHeader() {}
|
||||
|
||||
@ -180,6 +187,12 @@ int BrotliCompressBuffer(BrotliParams params,
|
||||
// of reading from and writing to pre-allocated memory buffers.
|
||||
int BrotliCompress(BrotliParams params, BrotliIn* in, BrotliOut* out);
|
||||
|
||||
// Before compressing the data, sets a custom LZ77 dictionary with
|
||||
// BrotliCompressor::BrotliSetCustomDictionary.
|
||||
int BrotliCompressWithCustomDictionary(size_t dictsize, const uint8_t* dict,
|
||||
BrotliParams params,
|
||||
BrotliIn* in, BrotliOut* out);
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_ENCODE_H_
|
||||
|
25
enc/hash.h
25
enc/hash.h
@ -612,6 +612,31 @@ struct Hashers {
|
||||
if (hash_h10.get() != NULL) hash_h10->SetStaticDictionary(dict);
|
||||
}
|
||||
|
||||
template<typename Hasher>
|
||||
void WarmupHash(const size_t size, const uint8_t* dict, Hasher* hasher) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
hasher->Store(dict, i);
|
||||
}
|
||||
}
|
||||
|
||||
// Custom LZ77 window.
|
||||
void PrependCustomDictionary(
|
||||
int type, const size_t size, const uint8_t* dict) {
|
||||
switch (type) {
|
||||
case 1: WarmupHash(size, dict, hash_h1.get()); break;
|
||||
case 2: WarmupHash(size, dict, hash_h2.get()); break;
|
||||
case 3: WarmupHash(size, dict, hash_h3.get()); break;
|
||||
case 4: WarmupHash(size, dict, hash_h4.get()); break;
|
||||
case 5: WarmupHash(size, dict, hash_h5.get()); break;
|
||||
case 6: WarmupHash(size, dict, hash_h6.get()); break;
|
||||
case 7: WarmupHash(size, dict, hash_h7.get()); break;
|
||||
case 8: WarmupHash(size, dict, hash_h8.get()); break;
|
||||
case 9: WarmupHash(size, dict, hash_h9.get()); break;
|
||||
case 10: WarmupHash(size, dict, hash_h10.get()); break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<H1> hash_h1;
|
||||
std::unique_ptr<H2> hash_h2;
|
||||
std::unique_ptr<H3> hash_h3;
|
||||
|
Loading…
Reference in New Issue
Block a user