Added Brotli compress/decompress utilities and makefiles

This commit is contained in:
Roderick Sheeter 2013-11-19 14:32:56 -08:00
parent c6b9c7c5c8
commit 1cdcbd851f
10 changed files with 169 additions and 135 deletions

10
dec/Makefile Normal file
View File

@ -0,0 +1,10 @@
#brotli/dec
include ../../shared.mk
OBJS = bit_reader.o decode.o huffman.o safe_malloc.o streams.o
all : $(OBJS)
clean :
rm -f $(OBJS)

View File

@ -14,6 +14,7 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "./bit_reader.h"
#include "./context.h"
#include "./decode.h"
@ -372,8 +373,8 @@ static void ReadInsertAndCopy(const HuffmanTree* tree,
} else {
*copy_dist = 0;
}
insert_code = (kInsertRangeLut[range_idx] << 3) + ((code >> 3) & 7);
copy_code = (kCopyRangeLut[range_idx] << 3) + (code & 7);
insert_code = kInsertRangeLut[range_idx] + ((code >> 3) & 7);
copy_code = kCopyRangeLut[range_idx] + (code & 7);
*insert_len = kInsertLengthPrefixCode[insert_code].offset;
insert_extra_bits = kInsertLengthPrefixCode[insert_code].nbits;
if (insert_extra_bits > 0) {
@ -471,17 +472,11 @@ static int DecodeContextMap(int context_map_size,
return 1;
}
if (*num_htrees == context_map_size) {
int i;
for (i = 0; i < context_map_size; ++i) {
(*context_map)[i] = i;
}
return 1;
}
{
HuffmanTree tree_index_htree;
int use_rle_for_zeros = BrotliReadBits(br, 1);
int max_run_length_prefix = 0;
int i;
if (use_rle_for_zeros) {
max_run_length_prefix = BrotliReadBits(br, 4) + 1;
}
@ -489,8 +484,6 @@ static int DecodeContextMap(int context_map_size,
&tree_index_htree, br)) {
return 0;
}
if (use_rle_for_zeros) {
int i;
for (i = 0; i < context_map_size;) {
int code;
if (!BrotliReadMoreInput(br)) {
@ -513,17 +506,6 @@ static int DecodeContextMap(int context_map_size,
++i;
}
}
} else {
int i;
for (i = 0; i < context_map_size; ++i) {
if (!BrotliReadMoreInput(br)) {
printf("[DecodeContextMap] Unexpected end of input.\n");
ok = 0;
goto End;
}
(*context_map)[i] = ReadSymbol(&tree_index_htree, br);
}
}
End:
BrotliHuffmanTreeRelease(&tree_index_htree);
}
@ -640,6 +622,7 @@ int BrotliDecompress(BrotliInput input, BrotliOutput output) {
int input_size_bits = 0;
int input_end = 0;
int window_bits = 0;
size_t max_backward_distance;
size_t ringbuffer_size;
size_t ringbuffer_mask;
uint8_t* ringbuffer;
@ -678,6 +661,7 @@ int BrotliDecompress(BrotliInput input, BrotliOutput output) {
} else {
window_bits = 16;
}
max_backward_distance = (1 << window_bits) - 16;
ringbuffer_size = 1 << window_bits;
ringbuffer_mask = ringbuffer_size - 1;
@ -812,6 +796,7 @@ int BrotliDecompress(BrotliInput input, BrotliOutput output) {
int copy_length;
int distance_code;
int distance;
size_t max_distance;
uint8_t context;
int j;
const uint8_t* copy_src;
@ -899,15 +884,26 @@ int BrotliDecompress(BrotliInput input, BrotliOutput output) {
dist_rb[dist_rb_idx & 3] = distance;
++dist_rb_idx;
}
BROTLI_LOG_UINT(distance);
if (pos < (size_t)distance || pos + copy_length > meta_block_end_pos) {
max_distance = max_backward_distance;
if (pos < max_distance) {
max_distance = pos;
}
if ((size_t)distance > max_distance) {
printf("Invalid backward reference. pos: %ld distance: %d "
"len: %d end: %lu\n", pos, distance, copy_length,
(unsigned long)meta_block_end_pos);
ok = 0;
goto End;
} else {
if (pos + copy_length > meta_block_end_pos) {
printf("Invalid backward reference. pos: %zu distance: %d "
"len: %d end: %zu\n", pos, distance, copy_length,
meta_block_end_pos);
ok = 0;
goto End;
}
copy_src = &ringbuffer[(pos - distance) & ringbuffer_mask];
@ -938,6 +934,7 @@ int BrotliDecompress(BrotliInput input, BrotliOutput output) {
}
++pos;
}
}
// When we get here, we must have inserted at least one literal and made
// a copy of at least length two, therefore accessing the last 2 bytes is

View File

@ -53,16 +53,12 @@ static const struct PrefixCodeRange kCopyLengthPrefixCode[] = {
{326, 8}, { 582, 9}, {1094, 10}, {2118, 24},
};
static const int kInsertAndCopyRangeLut[9] = {
0, 1, 4, 2, 3, 6, 5, 7, 8,
};
static const int kInsertRangeLut[9] = {
0, 0, 1, 1, 0, 2, 1, 2, 2,
0, 0, 8, 8, 0, 16, 8, 16, 16,
};
static const int kCopyRangeLut[9] = {
0, 1, 0, 1, 2, 0, 2, 1, 2,
0, 8, 0, 8, 16, 0, 16, 8, 16,
};
#endif // BROTLI_DEC_PREFIX_H_

11
enc/Makefile Normal file
View File

@ -0,0 +1,11 @@
#brotli/enc
include ../../shared.mk
OBJS = backward_references.o block_splitter.o encode.o entropy_encode.o histogram.o literal_cost.o prefix.o
all : $(OBJS)
clean :
rm -f $(OBJS) $(SO)

View File

@ -47,27 +47,30 @@ void CreateBackwardReferences(size_t num_bytes,
while (i + 2 < i_end) {
size_t best_len = 0;
size_t best_len_code = 0;
size_t best_dist = 0;
double best_score = 0;
const size_t max_distance = std::min(i + i_diff, max_backward_limit);
size_t max_distance = std::min(i + i_diff, max_backward_limit);
hasher->set_insert_length(insert_length);
bool match_found = hasher->FindLongestMatch(
ringbuffer, literal_cost, ringbuffer_mask,
i + i_diff, i_end - i, max_distance,
&best_len, &best_dist, &best_score);
&best_len, &best_len_code, &best_dist, &best_score);
if (match_found) {
// Found a match. Let's look for something even better ahead.
int delayed_backward_references_in_row = 0;
while (i + 4 < i_end &&
delayed_backward_references_in_row < 4) {
size_t best_len_2 = 0;
size_t best_len_code_2 = 0;
size_t best_dist_2 = 0;
double best_score_2 = 0;
max_distance = std::min(i + i_diff + 1, max_backward_limit);
hasher->Store(ringbuffer + i, i + i_diff);
match_found = hasher->FindLongestMatch(
ringbuffer, literal_cost, ringbuffer_mask,
i + i_diff + 1, i_end - i - 1, max_distance,
&best_len_2, &best_dist_2, &best_score_2);
&best_len_2, &best_len_code_2, &best_dist_2, &best_score_2);
double cost_diff_lazy = 0;
if (best_len >= 4) {
cost_diff_lazy +=
@ -96,6 +99,7 @@ void CreateBackwardReferences(size_t num_bytes,
++insert_length;
++delayed_backward_references_in_row;
best_len = best_len_2;
best_len_code = best_len_code_2;
best_dist = best_dist_2;
best_score = best_score_2;
i++;
@ -106,6 +110,7 @@ void CreateBackwardReferences(size_t num_bytes,
Command cmd;
cmd.insert_length_ = insert_length;
cmd.copy_length_ = best_len;
cmd.copy_length_code_ = best_len_code;
cmd.copy_distance_ = best_dist;
commands->push_back(cmd);
hasher->set_last_distance(best_dist);

View File

@ -24,13 +24,14 @@ namespace brotli {
// Command holds a sequence of literals and a backward reference copy.
class Command {
public:
Command() : insert_length_(0), copy_length_(0),
Command() : insert_length_(0), copy_length_(0), copy_length_code_(0),
copy_distance_(0), distance_code_(0),
distance_prefix_(0), command_prefix_(0),
distance_extra_bits_(0), distance_extra_bits_value_(0) {}
uint32_t insert_length_;
uint32_t copy_length_;
uint32_t copy_length_code_;
uint32_t copy_distance_;
// Values <= 16 are short codes, values > 16 are distances shifted by 16.
uint32_t distance_code_;

View File

@ -34,6 +34,18 @@
namespace brotli {
static const int kWindowBits = 22;
// To make decoding faster, we allow the decoder to write 16 bytes ahead in
// its ringbuffer, therefore the encoder has to decrease max distance by this
// amount.
static const int kDecoderRingBufferWriteAheadSlack = 16;
static const int kMaxBackwardDistance =
(1 << kWindowBits) - kDecoderRingBufferWriteAheadSlack;
static const int kMetaBlockSizeBits = 21;
static const int kRingBufferBits = 23;
static const int kRingBufferMask = (1 << kRingBufferBits) - 1;
template<int kSize>
double Entropy(const std::vector<Histogram<kSize> >& histograms) {
double retval = 0;
@ -264,7 +276,7 @@ void EncodeCommand(const Command& cmd,
uint64_t insert_extra_bits_val =
cmd.insert_length_ - InsertLengthOffset(code);
int copy_extra_bits = CopyLengthExtraBits(code);
uint64_t copy_extra_bits_val = cmd.copy_length_ - CopyLengthOffset(code);
uint64_t copy_extra_bits_val = cmd.copy_length_code_ - CopyLengthOffset(code);
if (insert_extra_bits > 0) {
WriteBits(insert_extra_bits, insert_extra_bits_val, storage_ix, storage);
}
@ -325,8 +337,8 @@ void ComputeCommandPrefixes(std::vector<Command>* cmds,
for (int i = 0; i < cmds->size(); ++i) {
Command* cmd = &(*cmds)[i];
cmd->command_prefix_ = CommandPrefix(cmd->insert_length_,
cmd->copy_length_);
if (cmd->copy_length_ > 0) {
cmd->copy_length_code_);
if (cmd->copy_length_code_ > 0) {
PrefixEncodeCopyDistance(cmd->distance_code_,
num_direct_distance_codes,
distance_postfix_bits,
@ -454,7 +466,7 @@ void EncodeContextMap(const std::vector<int>& context_map,
int* storage_ix, uint8_t* storage) {
WriteBits(8, num_clusters - 1, storage_ix, storage);
if (num_clusters == 1 || num_clusters == context_map.size()) {
if (num_clusters == 1) {
return;
}
@ -737,10 +749,10 @@ void StoreMetaBlock(const MetaBlock& mb,
}
if (*pos < end_pos && cmd.distance_prefix_ != 0xffff) {
MoveAndEncode(distance_split_code, &distance_it, storage_ix, storage);
int histogram_index = distance_it.type_;
int context = (distance_it.type_ << 2) +
((cmd.copy_length_ > 4) ? 3 : cmd.copy_length_ - 2);
histogram_index = mb.distance_context_map[context];
((cmd.copy_length_code_ > 4) ? 3 : cmd.copy_length_code_ - 2);
int histogram_index = mb.distance_context_map[context];
size_t max_distance = std::min(*pos, (size_t)kMaxBackwardDistance);
EncodeCopyDistance(cmd, distance_codes[histogram_index],
storage_ix, storage);
}
@ -748,20 +760,9 @@ void StoreMetaBlock(const MetaBlock& mb,
}
}
static const int kWindowBits = 22;
// To make decoding faster, we allow the decoder to write 16 bytes ahead in
// its ringbuffer, therefore the encoder has to decrease max distance by this
// amount.
static const int kDecoderRingBufferWriteAheadSlack = 16;
static const int kMaxBackwardDistance =
(1 << kWindowBits) - kDecoderRingBufferWriteAheadSlack;
static const int kMetaBlockSizeBits = 21;
static const int kRingBufferBits = 23;
static const int kRingBufferMask = (1 << kRingBufferBits) - 1;
BrotliCompressor::BrotliCompressor()
: hasher_(new Hasher),
: window_bits_(kWindowBits),
hasher_(new Hasher),
dist_ringbuffer_idx_(0),
input_pos_(0),
ringbuffer_(kRingBufferBits, kMetaBlockSizeBits),
@ -784,8 +785,12 @@ void BrotliCompressor::WriteStreamHeader() {
// Don't encode input size.
WriteBits(3, 0, &storage_ix_, storage_);
// Encode window size.
if (window_bits_ == 16) {
WriteBits(1, 0, &storage_ix_, storage_);
} else {
WriteBits(1, 1, &storage_ix_, storage_);
WriteBits(3, kWindowBits - 17, &storage_ix_, storage_);
WriteBits(3, window_bits_ - 17, &storage_ix_, storage_);
}
}
void BrotliCompressor::WriteMetaBlock(const size_t input_size,

View File

@ -49,6 +49,7 @@ class BrotliCompressor {
private:
int window_bits_;
Hasher* hasher_;
int dist_ringbuffer_[4];
size_t dist_ringbuffer_idx_;

View File

@ -147,6 +147,7 @@ class HashLongestMatch {
uint32_t max_length,
const uint32_t max_backward,
size_t * __restrict best_len_out,
size_t * __restrict best_len_code_out,
size_t * __restrict best_distance_out,
double * __restrict best_score_out) {
const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
@ -227,6 +228,7 @@ class HashLongestMatch {
best_len = len;
best_ix = backward;
*best_len_out = best_len;
*best_len_code_out = best_len;
*best_distance_out = best_ix;
*best_score_out = best_score;
match_found = true;
@ -234,7 +236,7 @@ class HashLongestMatch {
}
}
const uint32_t key = Hash3Bytes(&data[cur_ix_masked], kBucketBits);
const uint32_t * __restrict const bucket = &buckets_[key][0];
const int * __restrict const bucket = &buckets_[key][0];
const int down = (num_[key] > kBlockSize) ? (num_[key] - kBlockSize) : 0;
int stop = int(cur_ix) - 64;
if (stop < 0) { stop = 0; }
@ -259,12 +261,16 @@ class HashLongestMatch {
best_len = len;
best_ix = backward;
*best_len_out = best_len;
*best_len_code_out = best_len;
*best_distance_out = best_ix;
match_found = true;
}
}
for (int i = num_[key] - 1; i >= down; --i) {
size_t prev_ix = bucket[i & kBlockMask];
int prev_ix = bucket[i & kBlockMask];
if (prev_ix < 0) {
continue;
} else {
const size_t backward = cur_ix - prev_ix;
if (PREDICT_FALSE(backward > max_backward)) {
break;
@ -277,9 +283,9 @@ class HashLongestMatch {
FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked],
max_length);
if (len >= 3) {
// Comparing for >= 3 does not change the semantics, but just saves for
// a few unnecessary binary logarithms in backward reference score,
// since we are not interested in such short matches.
// Comparing for >= 3 does not change the semantics, but just saves
// for a few unnecessary binary logarithms in backward reference
// score, since we are not interested in such short matches.
const double score = BackwardReferenceScore(average_cost_,
start_cost4,
start_cost3,
@ -294,12 +300,14 @@ class HashLongestMatch {
best_len = len;
best_ix = backward;
*best_len_out = best_len;
*best_len_code_out = best_len;
*best_distance_out = best_ix;
*best_score_out = best_score;
match_found = true;
}
}
}
}
return match_found;
}
@ -333,7 +341,7 @@ class HashLongestMatch {
uint16_t num_[kBucketSize];
// Buckets containing kBlockSize of backward references.
uint32_t buckets_[kBucketSize][kBlockSize];
int buckets_[kBucketSize][kBlockSize];
int last_distance1_;
int last_distance2_;

View File

@ -59,7 +59,7 @@ void BuildHistograms(
if (cmd.copy_length_ > 0 && cmd.distance_prefix_ != 0xffff) {
dist_it.Next();
int context = (dist_it.type_ << kDistanceContextBits) +
((cmd.copy_length_ > 4) ? 3 : cmd.copy_length_ - 2);
((cmd.copy_length_code_ > 4) ? 3 : cmd.copy_length_code_ - 2);
(*copy_dist_histograms)[context].Add(cmd.distance_prefix_);
}
}