diff --git a/dec/decode.c b/dec/decode.c index ccfbeab..d3b2fbe 100644 --- a/dec/decode.c +++ b/dec/decode.c @@ -50,8 +50,9 @@ static const int kDistanceContextBits = 2; #define HUFFMAN_TABLE_BITS 8 #define HUFFMAN_TABLE_MASK 0xff -/* This is a rough estimate, not an exact bound. */ -#define HUFFMAN_MAX_TABLE_SIZE 2048 +/* Maximum possible Huffman table size for an alphabet size of 704, max code + * length 15 and root table bits 8. */ +#define HUFFMAN_MAX_TABLE_SIZE 1080 #define CODE_LENGTH_CODES 18 static const uint8_t kCodeLengthCodeOrder[CODE_LENGTH_CODES] = { @@ -633,22 +634,62 @@ int CopyUncompressedBlockToOutput(BrotliOutput output, int len, int pos, int BrotliDecompressedSize(size_t encoded_size, const uint8_t* encoded_buffer, size_t* decoded_size) { - BrotliMemInput memin; - BrotliInput input = BrotliInitMemInput(encoded_buffer, encoded_size, &memin); - BrotliBitReader br; - int meta_block_len; - int input_end; - int is_uncompressed; - if (!BrotliInitBitReader(&br, input)) { + int i; + uint64_t val = 0; + int bit_pos = 0; + int is_last; + int is_uncompressed = 0; + int size_nibbles; + int meta_block_len = 0; + if (encoded_size == 0) { return 0; } - DecodeWindowBits(&br); - DecodeMetaBlockLength(&br, &meta_block_len, &input_end, &is_uncompressed); - if (!input_end) { - return 0; + /* Look at the first 8 bytes, it is enough to decode the length of the first + meta-block. */ + for (i = 0; i < encoded_size && i < 8; ++i) { + val |= (uint64_t)encoded_buffer[i] << (8 * i); } - *decoded_size = (size_t)meta_block_len; - return 1; + /* Skip the window bits. */ + bit_pos += (val & 1) ? 4 : 1; + /* Decode the ISLAST bit. */ + is_last = (val >> bit_pos) & 1; + ++bit_pos; + if (is_last) { + /* Decode the ISEMPTY bit, if it is set to 1, we are done. */ + if ((val >> bit_pos) & 1) { + *decoded_size = 0; + return 1; + } + ++bit_pos; + } + /* Decode the length of the first meta-block. */ + size_nibbles = (int)((val >> bit_pos) & 3) + 4; + bit_pos += 2; + for (i = 0; i < size_nibbles; ++i) { + meta_block_len |= (int)((val >> bit_pos) & 0xf) << (4 * i); + bit_pos += 4; + } + ++meta_block_len; + if (is_last) { + /* If this meta-block is the only one, we are done. */ + *decoded_size = (size_t)meta_block_len; + return 1; + } + is_uncompressed = (val >> bit_pos) & 1; + ++bit_pos; + if (is_uncompressed) { + /* If the first meta-block is uncompressed, we skip it and look at the + first two bits (ISLAST and ISEMPTY) of the next meta-block, and if + both are set to 1, we have a stream with an uncompressed meta-block + followed by an empty one, so the decompressed size is the size of the + first meta-block. */ + int offset = ((bit_pos + 7) >> 3) + meta_block_len; + if (offset < encoded_size && ((encoded_buffer[offset] & 3) == 3)) { + *decoded_size = (size_t)meta_block_len; + return 1; + } + } + return 0; } int BrotliDecompressBuffer(size_t encoded_size, diff --git a/dec/decode.h b/dec/decode.h index ec6d65e..d0490a2 100644 --- a/dec/decode.h +++ b/dec/decode.h @@ -27,7 +27,8 @@ extern "C" { /* Sets *decoded_size to the decompressed size of the given encoded stream. */ /* This function only works if the encoded buffer has a single meta block, */ -/* and this meta block must have the "is last" bit set. */ +/* or if it has two meta-blocks, where the first is uncompressed and the */ +/* second is empty. */ /* Returns 1 on success, 0 on failure. */ int BrotliDecompressedSize(size_t encoded_size, const uint8_t* encoded_buffer, diff --git a/dec/safe_malloc.h b/dec/safe_malloc.h index 9a73b0e..065f930 100644 --- a/dec/safe_malloc.h +++ b/dec/safe_malloc.h @@ -15,8 +15,8 @@ Size-checked memory allocation. */ -#ifndef BROTLI_UTILS_UTILS_H_ -#define BROTLI_UTILS_UTILS_H_ +#ifndef BROTLI_DEC_SAFE_MALLOC_H_ +#define BROTLI_DEC_SAFE_MALLOC_H_ #include @@ -42,4 +42,4 @@ void* BrotliSafeMalloc(uint64_t nmemb, size_t size); } /* extern "C" */ #endif -#endif /* BROTLI_UTILS_UTILS_H_ */ +#endif /* BROTLI_DEC_SAFE_MALLOC_H_ */ diff --git a/enc/backward_references.cc b/enc/backward_references.cc index 837dbf3..8c8acbb 100644 --- a/enc/backward_references.cc +++ b/enc/backward_references.cc @@ -23,173 +23,230 @@ namespace brotli { -template +template void CreateBackwardReferences(size_t num_bytes, size_t position, const uint8_t* ringbuffer, - const float* literal_cost, size_t ringbuffer_mask, + const float* literal_cost, + size_t literal_cost_mask, const size_t max_backward_limit, + const double base_min_score, + const int quality, Hasher* hasher, - std::vector* commands) { - // Length heuristic that seems to help probably by better selection - // of lazy matches of similar lengths. - int insert_length = 0; + int* dist_cache, + int* last_insert_len, + Command* commands, + int* num_commands) { + if (num_bytes >= 3 && position >= 3) { + // Prepare the hashes for three last bytes of the last write. + // These could not be calculated before, since they require knowledge + // of both the previous and the current block. + hasher->Store(&ringbuffer[(position - 3) & ringbuffer_mask], + position - 3); + hasher->Store(&ringbuffer[(position - 2) & ringbuffer_mask], + position - 2); + hasher->Store(&ringbuffer[(position - 1) & ringbuffer_mask], + position - 1); + } + const Command * const orig_commands = commands; + int insert_length = *last_insert_len; size_t i = position & ringbuffer_mask; const int i_diff = position - i; const size_t i_end = i + num_bytes; - const int random_heuristics_window_size = 512; + // For speed up heuristics for random data. + const int random_heuristics_window_size = quality < 9 ? 64 : 512; int apply_random_heuristics = i + random_heuristics_window_size; - double average_cost = 0.0; - for (int k = position; k < position + num_bytes; ++k) { - average_cost += literal_cost[k & ringbuffer_mask]; + double average_cost = 5.4; + if (kUseCostModel) { + average_cost = 0.0; + for (int k = position; k < position + num_bytes; ++k) { + average_cost += literal_cost[k & literal_cost_mask]; + } + average_cost /= num_bytes; } - average_cost /= num_bytes; - hasher->set_average_cost(average_cost); // M1 match is for considering for two repeated copies, if moving // one literal form the previous copy to the current one allows the // current copy to be more efficient (because the way static dictionary // codes words). M1 matching improves text compression density by ~0.15 %. bool match_found_M1 = false; - size_t best_len_M1 = 0; - size_t best_len_code_M1 = 0; - size_t best_dist_M1 = 0; + int best_len_M1 = 0; + int best_len_code_M1 = 0; + int best_dist_M1 = 0; double best_score_M1 = 0; - while (i + 2 < i_end) { - size_t best_len = 0; - size_t best_len_code = 0; - size_t best_dist = 0; - double best_score = 0; + while (i + 3 < i_end) { + int max_length = i_end - i; size_t max_distance = std::min(i + i_diff, max_backward_limit); - bool in_dictionary; - hasher->set_insert_length(insert_length); + double min_score = base_min_score; + if (kUseCostModel && insert_length < 8) { + double cost_diff[8] = + { 0.1, 0.038, 0.019, 0.013, 0.001, 0.001, 0.001, 0.001 }; + min_score += cost_diff[insert_length]; + } + int best_len = 0; + int best_len_code = 0; + int best_dist = 0; + double best_score = min_score; bool match_found = hasher->FindLongestMatch( - ringbuffer, literal_cost, ringbuffer_mask, - i + i_diff, i_end - i, max_distance, - &best_len, &best_len_code, &best_dist, &best_score, - &in_dictionary); - bool best_in_dictionary = in_dictionary; + ringbuffer, ringbuffer_mask, + literal_cost, literal_cost_mask, average_cost, + dist_cache, i + i_diff, max_length, max_distance, + &best_len, &best_len_code, &best_dist, &best_score); if (match_found) { - if (match_found_M1 && best_score_M1 > best_score) { + if (kUseDictionary && match_found_M1 && best_score_M1 > best_score) { // Two copies after each other. Take the last literal from the // last copy, and use it as the first of this one. - (commands->rbegin())->copy_length_ -= 1; - (commands->rbegin())->copy_length_code_ -= 1; + Command prev_cmd = commands[-1]; + commands[-1] = Command(prev_cmd.insert_len_, + prev_cmd.copy_len_ - 1, + prev_cmd.copy_len_ - 1, + prev_cmd.DistanceCode()); hasher->Store(ringbuffer + i, i + i_diff); --i; best_len = best_len_M1; best_len_code = best_len_code_M1; best_dist = best_dist_M1; best_score = best_score_M1; - // in_dictionary doesn't need to be correct, but it is the only - // reason why M1 matching should be beneficial here. Setting it here - // will only disable further M1 matching against this copy. - best_in_dictionary = true; - in_dictionary = true; } else { // Found a match. Let's look for something even better ahead. int delayed_backward_references_in_row = 0; - while (i + 4 < i_end && - delayed_backward_references_in_row < 4) { - size_t best_len_2 = 0; - size_t best_len_code_2 = 0; - size_t best_dist_2 = 0; - double best_score_2 = 0; + for (;;) { + --max_length; + int best_len_2 = quality < 4 ? std::min(best_len - 1, max_length) : 0; + int best_len_code_2 = 0; + int best_dist_2 = 0; + double best_score_2 = min_score; max_distance = std::min(i + i_diff + 1, max_backward_limit); hasher->Store(ringbuffer + i, i + i_diff); match_found = hasher->FindLongestMatch( - ringbuffer, literal_cost, ringbuffer_mask, - i + i_diff + 1, i_end - i - 1, max_distance, - &best_len_2, &best_len_code_2, &best_dist_2, &best_score_2, - &in_dictionary); - double cost_diff_lazy = 0; - if (best_len >= 4) { - cost_diff_lazy += - literal_cost[(i + 4) & ringbuffer_mask] - average_cost; - } - { - const int tail_length = best_len_2 - best_len + 1; - for (int k = 0; k < tail_length; ++k) { - cost_diff_lazy -= - literal_cost[(i + best_len + k) & ringbuffer_mask] - - average_cost; + ringbuffer, ringbuffer_mask, + literal_cost, literal_cost_mask, average_cost, + dist_cache, i + i_diff + 1, max_length, max_distance, + &best_len_2, &best_len_code_2, &best_dist_2, &best_score_2); + double cost_diff_lazy = 7.0; + if (kUseCostModel) { + cost_diff_lazy = 0.0; + if (best_len >= 4) { + cost_diff_lazy += + literal_cost[(i + 4) & literal_cost_mask] - average_cost; } + { + const int tail_length = best_len_2 - best_len + 1; + for (int k = 0; k < tail_length; ++k) { + cost_diff_lazy -= + literal_cost[(i + best_len + k) & literal_cost_mask] - + average_cost; + } + } + // If we are not inserting any symbols, inserting one is more + // expensive than if we were inserting symbols anyways. + if (insert_length < 1) { + cost_diff_lazy += 0.97; + } + // Add bias to slightly avoid lazy matching. + cost_diff_lazy += 2.0 + delayed_backward_references_in_row * 0.2; + cost_diff_lazy += 0.04 * literal_cost[i & literal_cost_mask]; } - // If we are not inserting any symbols, inserting one is more - // expensive than if we were inserting symbols anyways. - if (insert_length < 1) { - cost_diff_lazy += 0.97; - } - // Add bias to slightly avoid lazy matching. - cost_diff_lazy += 2.0 + delayed_backward_references_in_row * 0.2; - cost_diff_lazy += 0.04 * literal_cost[i & ringbuffer_mask]; - if (match_found && best_score_2 >= best_score + cost_diff_lazy) { // Ok, let's just write one byte for now and start a match from the // next byte. + ++i; ++insert_length; - ++delayed_backward_references_in_row; best_len = best_len_2; best_len_code = best_len_code_2; best_dist = best_dist_2; best_score = best_score_2; - best_in_dictionary = in_dictionary; - i++; - } else { - break; + if (++delayed_backward_references_in_row < 4) { + continue; + } } + break; } } apply_random_heuristics = i + 2 * best_len + random_heuristics_window_size; - Command cmd; - cmd.insert_length_ = insert_length; - cmd.copy_length_ = best_len; - cmd.copy_length_code_ = best_len_code; - cmd.copy_distance_ = best_dist; - commands->push_back(cmd); - insert_length = 0; - ++i; - if (best_dist <= std::min(i + i_diff, max_backward_limit)) { - hasher->set_last_distance(best_dist); + max_distance = std::min(i + i_diff, max_backward_limit); + int distance_code = best_dist + 16; + if (best_dist <= max_distance) { + if (best_dist == dist_cache[0]) { + distance_code = 1; + } else if (best_dist == dist_cache[1]) { + distance_code = 2; + } else if (best_dist == dist_cache[2]) { + distance_code = 3; + } else if (best_dist == dist_cache[3]) { + distance_code = 4; + } else if (quality > 1 && best_dist >= 6) { + for (int k = 4; k < kNumDistanceShortCodes; ++k) { + int idx = kDistanceCacheIndex[k]; + int candidate = dist_cache[idx] + kDistanceCacheOffset[k]; + static const int kLimits[16] = { 0, 0, 0, 0, + 6, 6, 11, 11, + 11, 11, 11, 11, + 12, 12, 12, 12 }; + if (best_dist == candidate && best_dist >= kLimits[k]) { + distance_code = k + 1; + break; + } + } + } + if (distance_code > 1) { + dist_cache[3] = dist_cache[2]; + dist_cache[2] = dist_cache[1]; + dist_cache[1] = dist_cache[0]; + dist_cache[0] = best_dist; + } } - - // Copy all copied literals to the hasher, except the last one. - // We cannot store the last one yet, otherwise we couldn't find - // the possible M1 match. - for (int j = 1; j < best_len - 1; ++j) { - if (i + 2 < i_end) { + Command cmd(insert_length, best_len, best_len_code, distance_code); + *commands++ = cmd; + insert_length = 0; + if (kUseDictionary) { + ++i; + // Copy all copied literals to the hasher, except the last one. + // We cannot store the last one yet, otherwise we couldn't find + // the possible M1 match. + for (int j = 1; j < best_len - 1; ++j) { + if (i + 3 < i_end) { + hasher->Store(ringbuffer + i, i + i_diff); + } + ++i; + } + // Prepare M1 match. + if (hasher->HasStaticDictionary() && + best_len >= 4 && i + 20 < i_end && best_dist <= max_distance) { + max_distance = std::min(i + i_diff, max_backward_limit); + best_score_M1 = min_score; + match_found_M1 = hasher->FindLongestMatch( + ringbuffer, ringbuffer_mask, + literal_cost, literal_cost_mask, average_cost, + dist_cache, i + i_diff, i_end - i, max_distance, + &best_len_M1, &best_len_code_M1, &best_dist_M1, &best_score_M1); + } else { + match_found_M1 = false; + } + if (kUseCostModel) { + // This byte is just moved from the previous copy to the current, + // that is no gain. + best_score_M1 -= literal_cost[i & literal_cost_mask]; + // Adjust for losing the opportunity for lazy matching. + best_score_M1 -= 3.75; + } + // Store the last one of the match. + if (i + 3 < i_end) { hasher->Store(ringbuffer + i, i + i_diff); } ++i; - } - // Prepare M1 match. - if (hasher->HasStaticDictionary() && - best_len >= 4 && i + 20 < i_end && !best_in_dictionary) { - max_distance = std::min(i + i_diff, max_backward_limit); - match_found_M1 = hasher->FindLongestMatch( - ringbuffer, literal_cost, ringbuffer_mask, - i + i_diff, i_end - i, max_distance, - &best_len_M1, &best_len_code_M1, &best_dist_M1, &best_score_M1, - &in_dictionary); } else { - match_found_M1 = false; - in_dictionary = false; + // Put the hash keys into the table, if there are enough + // bytes left. + for (int j = 1; j < best_len; ++j) { + hasher->Store(&ringbuffer[i + j], i + i_diff + j); + } + i += best_len; } - // This byte is just moved from the previous copy to the current, - // that is no gain. - best_score_M1 -= literal_cost[i & ringbuffer_mask]; - // Adjust for losing the opportunity for lazy matching. - best_score_M1 -= 3.75; - - // Store the last one of the match. - if (i + 2 < i_end) { - hasher->Store(ringbuffer + i, i + i_diff); - } - ++i; } else { match_found_M1 = false; ++insert_length; @@ -214,7 +271,7 @@ void CreateBackwardReferences(size_t num_bytes, insert_length += 4; } } else { - int i_jump = std::min(i + 8, i_end - 2); + int i_jump = std::min(i + 8, i_end - 3); for (; i < i_jump; i += 2) { hasher->Store(ringbuffer + i, i + i_diff); insert_length += 2; @@ -224,44 +281,92 @@ void CreateBackwardReferences(size_t num_bytes, } } insert_length += (i_end - i); - - if (insert_length > 0) { - Command cmd; - cmd.insert_length_ = insert_length; - cmd.copy_length_ = 0; - cmd.copy_distance_ = 0; - commands->push_back(cmd); - } + *last_insert_len = insert_length; + *num_commands += (commands - orig_commands); } void CreateBackwardReferences(size_t num_bytes, size_t position, const uint8_t* ringbuffer, - const float* literal_cost, size_t ringbuffer_mask, + const float* literal_cost, + size_t literal_cost_mask, const size_t max_backward_limit, + const double base_min_score, + const int quality, Hashers* hashers, - Hashers::Type hash_type, - std::vector* commands) { + int hash_type, + int* dist_cache, + int* last_insert_len, + Command* commands, + int* num_commands) { switch (hash_type) { - case Hashers::HASH_15_8_4: - CreateBackwardReferences( - num_bytes, position, ringbuffer, literal_cost, - ringbuffer_mask, max_backward_limit, - hashers->hash_15_8_4.get(), - commands); + case 1: + CreateBackwardReferences( + num_bytes, position, ringbuffer, ringbuffer_mask, + literal_cost, literal_cost_mask, max_backward_limit, base_min_score, + quality, hashers->hash_h1.get(), dist_cache, last_insert_len, + commands, num_commands); break; - case Hashers::HASH_15_8_2: - CreateBackwardReferences( - num_bytes, position, ringbuffer, literal_cost, - ringbuffer_mask, max_backward_limit, - hashers->hash_15_8_2.get(), - commands); + case 2: + CreateBackwardReferences( + num_bytes, position, ringbuffer, ringbuffer_mask, + literal_cost, literal_cost_mask, max_backward_limit, base_min_score, + quality, hashers->hash_h2.get(), dist_cache, last_insert_len, + commands, num_commands); + break; + case 3: + CreateBackwardReferences( + num_bytes, position, ringbuffer, ringbuffer_mask, + literal_cost, literal_cost_mask, max_backward_limit, base_min_score, + quality, hashers->hash_h3.get(), dist_cache, last_insert_len, + commands, num_commands); + break; + case 4: + CreateBackwardReferences( + num_bytes, position, ringbuffer, ringbuffer_mask, + literal_cost, literal_cost_mask, max_backward_limit, base_min_score, + quality, hashers->hash_h4.get(), dist_cache, last_insert_len, + commands, num_commands); + break; + case 5: + CreateBackwardReferences( + num_bytes, position, ringbuffer, ringbuffer_mask, + literal_cost, literal_cost_mask, max_backward_limit, base_min_score, + quality, hashers->hash_h5.get(), dist_cache, last_insert_len, + commands, num_commands); + break; + case 6: + CreateBackwardReferences( + num_bytes, position, ringbuffer, ringbuffer_mask, + literal_cost, literal_cost_mask, max_backward_limit, base_min_score, + quality, hashers->hash_h6.get(), dist_cache, last_insert_len, + commands, num_commands); + break; + case 7: + CreateBackwardReferences( + num_bytes, position, ringbuffer, ringbuffer_mask, + literal_cost, literal_cost_mask, max_backward_limit, base_min_score, + quality, hashers->hash_h7.get(), dist_cache, last_insert_len, + commands, num_commands); + break; + case 8: + CreateBackwardReferences( + num_bytes, position, ringbuffer, ringbuffer_mask, + literal_cost, literal_cost_mask, max_backward_limit, base_min_score, + quality, hashers->hash_h8.get(), dist_cache, last_insert_len, + commands, num_commands); + break; + case 9: + CreateBackwardReferences( + num_bytes, position, ringbuffer, ringbuffer_mask, + literal_cost, literal_cost_mask, max_backward_limit, base_min_score, + quality, hashers->hash_h9.get(), dist_cache, last_insert_len, + commands, num_commands); break; default: break; } } - } // namespace brotli diff --git a/enc/backward_references.h b/enc/backward_references.h index bf90c7e..691a7b1 100644 --- a/enc/backward_references.h +++ b/enc/backward_references.h @@ -28,12 +28,18 @@ namespace brotli { void CreateBackwardReferences(size_t num_bytes, size_t position, const uint8_t* ringbuffer, - const float* literal_cost, size_t ringbuffer_mask, + const float* literal_cost, + size_t literal_cost_mask, const size_t max_backward_limit, + const double base_min_score, + const int quality, Hashers* hashers, - Hashers::Type hash_type, - std::vector* commands); + int hash_type, + int* dist_cache, + int* last_insert_len, + Command* commands, + int* num_commands); } // namespace brotli diff --git a/enc/bit_cost.h b/enc/bit_cost.h index 46e6229..ab05d54 100644 --- a/enc/bit_cost.h +++ b/enc/bit_cost.h @@ -91,7 +91,7 @@ static inline int HuffmanBitCost(const uint8_t* depth, int length) { // create huffman tree of huffman tree uint8_t cost[kCodeLengthCodes] = { 0 }; - CreateHuffmanTree(histogram, kCodeLengthCodes, 7, cost); + CreateHuffmanTree(histogram, kCodeLengthCodes, 7, 9, cost); // account for rle extra bits cost[16] += 2; cost[17] += 3; @@ -123,7 +123,7 @@ double PopulationCost(const Histogram& histogram) { return 20 + histogram.total_count_; } uint8_t depth[kSize] = { 0 }; - CreateHuffmanTree(&histogram.data_[0], kSize, 15, depth); + CreateHuffmanTree(&histogram.data_[0], kSize, 15, 9, depth); int bits = 0; for (int i = 0; i < kSize; ++i) { bits += histogram.data_[i] * depth[i]; diff --git a/enc/block_splitter.cc b/enc/block_splitter.cc index a24d0fb..01b32ce 100644 --- a/enc/block_splitter.cc +++ b/enc/block_splitter.cc @@ -33,7 +33,7 @@ namespace brotli { static const int kMaxLiteralHistograms = 100; static const int kMaxCommandHistograms = 50; -static const double kLiteralBlockSwitchCost = 26; +static const double kLiteralBlockSwitchCost = 28.1; static const double kCommandBlockSwitchCost = 13.5; static const double kDistanceBlockSwitchCost = 14.6; static const int kLiteralStrideLength = 70; @@ -51,7 +51,7 @@ void CopyLiteralsToByteArray(const std::vector& cmds, // Count how many we have. size_t total_length = 0; for (int i = 0; i < cmds.size(); ++i) { - total_length += cmds[i].insert_length_; + total_length += cmds[i].insert_len_; } if (total_length == 0) { return; @@ -64,9 +64,9 @@ void CopyLiteralsToByteArray(const std::vector& cmds, size_t pos = 0; size_t from_pos = 0; for (int i = 0; i < cmds.size() && pos < total_length; ++i) { - memcpy(&(*literals)[pos], data + from_pos, cmds[i].insert_length_); - pos += cmds[i].insert_length_; - from_pos += cmds[i].insert_length_ + cmds[i].copy_length_; + memcpy(&(*literals)[pos], data + from_pos, cmds[i].insert_len_); + pos += cmds[i].insert_len_; + from_pos += cmds[i].insert_len_ + cmds[i].copy_len_; } } @@ -75,9 +75,9 @@ void CopyCommandsToByteArray(const std::vector& cmds, std::vector* distance_prefixes) { for (int i = 0; i < cmds.size(); ++i) { const Command& cmd = cmds[i]; - insert_and_copy_codes->push_back(cmd.command_prefix_); - if (cmd.copy_length_ > 0 && cmd.distance_prefix_ != 0xffff) { - distance_prefixes->push_back(cmd.distance_prefix_); + insert_and_copy_codes->push_back(cmd.cmd_prefix_); + if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) { + distance_prefixes->push_back(cmd.dist_prefix_); } } } @@ -301,7 +301,7 @@ void SplitByteVector(const std::vector& data, const double block_switch_cost, BlockSplit* split) { if (data.empty()) { - split->num_types_ = 0; + split->num_types_ = 1; return; } else if (data.size() < kMinLengthForBlockSplitting) { split->num_types_ = 1; @@ -376,7 +376,7 @@ void SplitBlockByTotalLength(const std::vector& all_commands, std::vector cur_block; for (int i = 0; i < all_commands.size(); ++i) { const Command& cmd = all_commands[i]; - int cmd_length = cmd.insert_length_ + cmd.copy_length_; + int cmd_length = cmd.insert_len_ + cmd.copy_len_; if (total_length > length_limit) { blocks->push_back(cur_block); cur_block.clear(); diff --git a/enc/block_splitter.h b/enc/block_splitter.h index 2a491e3..6e4682e 100644 --- a/enc/block_splitter.h +++ b/enc/block_splitter.h @@ -29,8 +29,7 @@ namespace brotli { struct BlockSplit { int num_types_; - std::vector types_; - std::vector type_codes_; + std::vector types_; std::vector lengths_; }; diff --git a/enc/brotli_bit_stream.cc b/enc/brotli_bit_stream.cc new file mode 100644 index 0000000..278ea50 --- /dev/null +++ b/enc/brotli_bit_stream.cc @@ -0,0 +1,575 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Brotli bit stream functions to support the low level format. There are no +// compression algorithms here, just the right ordering of bits to match the +// specs. + +#include "./brotli_bit_stream.h" + +#include +#include +#include + +#include "./bit_cost.h" +#include "./entropy_encode.h" +#include "./fast_log.h" +#include "./prefix.h" +#include "./write_bits.h" + +namespace brotli { + +// returns false if fail +// nibblesbits represents the 2 bits to encode MNIBBLES (0-3) +bool EncodeMlen(size_t length, int* bits, int* numbits, int* nibblesbits) { + length--; // MLEN - 1 is encoded + int lg = length == 0 ? 1 : Log2Floor(length) + 1; + if (lg > 28) return false; + int mnibbles = (lg < 16 ? 16 : (lg + 3)) / 4; + *nibblesbits = mnibbles - 4; + *numbits = mnibbles * 4; + *bits = length; + return true; +} + +void StoreVarLenUint8(int n, int* storage_ix, uint8_t* storage) { + if (n == 0) { + WriteBits(1, 0, storage_ix, storage); + } else { + WriteBits(1, 1, storage_ix, storage); + int nbits = Log2Floor(n); + WriteBits(3, nbits, storage_ix, storage); + WriteBits(nbits, n - (1 << nbits), storage_ix, storage); + } +} + +bool StoreCompressedMetaBlockHeader(bool final_block, + int length, + int* storage_ix, + uint8_t* storage) { + // Write ISLAST bit. + WriteBits(1, final_block, storage_ix, storage); + // Write ISEMPTY bit. + if (final_block) { + WriteBits(1, length == 0, storage_ix, storage); + if (length == 0) { + return true; + } + } + + if (length == 0) { + // Only the last meta-block can be empty. + return false; + } + + int lenbits; + int nlenbits; + int nibblesbits; + if (!EncodeMlen(length, &lenbits, &nlenbits, &nibblesbits)) { + return false; + } + + WriteBits(2, nibblesbits, storage_ix, storage); + WriteBits(nlenbits, lenbits, storage_ix, storage); + + if (!final_block) { + // Write ISUNCOMPRESSED bit. + WriteBits(1, 0, storage_ix, storage); + } + return true; +} + +bool StoreUncompressedMetaBlockHeader(int length, + int* storage_ix, + uint8_t* storage) { + // Write ISLAST bit. Uncompressed block cannot be the last one, so set to 0. + WriteBits(1, 0, storage_ix, storage); + int lenbits; + int nlenbits; + int nibblesbits; + if (!EncodeMlen(length, &lenbits, &nlenbits, &nibblesbits)) { + return false; + } + WriteBits(2, nibblesbits, storage_ix, storage); + WriteBits(nlenbits, lenbits, storage_ix, storage); + // Write ISUNCOMPRESSED bit. + WriteBits(1, 1, storage_ix, storage); + return true; +} + +void StoreHuffmanTreeOfHuffmanTreeToBitMask( + const int num_codes, + const uint8_t *code_length_bitdepth, + int *storage_ix, + uint8_t *storage) { + static const uint8_t kStorageOrder[kCodeLengthCodes] = { + 1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15 + }; + // The bit lengths of the Huffman code over the code length alphabet + // are compressed with the following static Huffman code: + // Symbol Code + // ------ ---- + // 0 00 + // 1 1110 + // 2 110 + // 3 01 + // 4 10 + // 5 1111 + static const uint8_t kHuffmanBitLengthHuffmanCodeSymbols[6] = { + 0, 7, 3, 2, 1, 15 + }; + static const uint8_t kHuffmanBitLengthHuffmanCodeBitLengths[6] = { + 2, 4, 3, 2, 2, 4 + }; + + // Throw away trailing zeros: + int codes_to_store = kCodeLengthCodes; + if (num_codes > 1) { + for (; codes_to_store > 0; --codes_to_store) { + if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) { + break; + } + } + } + int skip_some = 0; // skips none. + if (code_length_bitdepth[kStorageOrder[0]] == 0 && + code_length_bitdepth[kStorageOrder[1]] == 0) { + skip_some = 2; // skips two. + if (code_length_bitdepth[kStorageOrder[2]] == 0) { + skip_some = 3; // skips three. + } + } + WriteBits(2, skip_some, storage_ix, storage); + for (int i = skip_some; i < codes_to_store; ++i) { + uint8_t l = code_length_bitdepth[kStorageOrder[i]]; + WriteBits(kHuffmanBitLengthHuffmanCodeBitLengths[l], + kHuffmanBitLengthHuffmanCodeSymbols[l], storage_ix, storage); + } +} + +void StoreHuffmanTreeToBitMask( + const std::vector &huffman_tree, + const std::vector &huffman_tree_extra_bits, + const uint8_t *code_length_bitdepth, + const std::vector &code_length_bitdepth_symbols, + int * __restrict storage_ix, + uint8_t * __restrict storage) { + for (int i = 0; i < huffman_tree.size(); ++i) { + int ix = huffman_tree[i]; + WriteBits(code_length_bitdepth[ix], code_length_bitdepth_symbols[ix], + storage_ix, storage); + // Extra bits + switch (ix) { + case 16: + WriteBits(2, huffman_tree_extra_bits[i], storage_ix, storage); + break; + case 17: + WriteBits(3, huffman_tree_extra_bits[i], storage_ix, storage); + break; + } + } +} + +void StoreSimpleHuffmanTree(const uint8_t* depths, + int symbols[4], + int num_symbols, + int max_bits, + int *storage_ix, uint8_t *storage) { + // value of 1 indicates a simple Huffman code + WriteBits(2, 1, storage_ix, storage); + WriteBits(2, num_symbols - 1, storage_ix, storage); // NSYM - 1 + + // Sort + for (int i = 0; i < num_symbols; i++) { + for (int j = i + 1; j < num_symbols; j++) { + if (depths[symbols[j]] < depths[symbols[i]]) { + std::swap(symbols[j], symbols[i]); + } + } + } + + if (num_symbols == 2) { + WriteBits(max_bits, symbols[0], storage_ix, storage); + WriteBits(max_bits, symbols[1], storage_ix, storage); + } else if (num_symbols == 3) { + WriteBits(max_bits, symbols[0], storage_ix, storage); + WriteBits(max_bits, symbols[1], storage_ix, storage); + WriteBits(max_bits, symbols[2], storage_ix, storage); + } else { + WriteBits(max_bits, symbols[0], storage_ix, storage); + WriteBits(max_bits, symbols[1], storage_ix, storage); + WriteBits(max_bits, symbols[2], storage_ix, storage); + WriteBits(max_bits, symbols[3], storage_ix, storage); + // tree-select + WriteBits(1, depths[symbols[0]] == 1 ? 1 : 0, storage_ix, storage); + } +} + +// num = alphabet size +// depths = symbol depths +void StoreHuffmanTree(const uint8_t* depths, size_t num, + int quality, + int *storage_ix, uint8_t *storage) { + // Write the Huffman tree into the brotli-representation. + std::vector huffman_tree; + std::vector huffman_tree_extra_bits; + // TODO(user): Consider allocating these from stack. + huffman_tree.reserve(256); + huffman_tree_extra_bits.reserve(256); + WriteHuffmanTree(depths, num, &huffman_tree, &huffman_tree_extra_bits); + + // Calculate the statistics of the Huffman tree in brotli-representation. + int huffman_tree_histogram[kCodeLengthCodes] = { 0 }; + for (int i = 0; i < huffman_tree.size(); ++i) { + ++huffman_tree_histogram[huffman_tree[i]]; + } + + int num_codes = 0; + int code = 0; + for (int i = 0; i < kCodeLengthCodes; ++i) { + if (huffman_tree_histogram[i]) { + if (num_codes == 0) { + code = i; + num_codes = 1; + } else if (num_codes == 1) { + num_codes = 2; + break; + } + } + } + + // Calculate another Huffman tree to use for compressing both the + // earlier Huffman tree with. + // TODO(user): Consider allocating these from stack. + uint8_t code_length_bitdepth[kCodeLengthCodes] = { 0 }; + std::vector code_length_bitdepth_symbols(kCodeLengthCodes); + CreateHuffmanTree(&huffman_tree_histogram[0], kCodeLengthCodes, + 5, quality, &code_length_bitdepth[0]); + ConvertBitDepthsToSymbols(code_length_bitdepth, kCodeLengthCodes, + code_length_bitdepth_symbols.data()); + + // Now, we have all the data, let's start storing it + StoreHuffmanTreeOfHuffmanTreeToBitMask(num_codes, code_length_bitdepth, + storage_ix, storage); + + if (num_codes == 1) { + code_length_bitdepth[code] = 0; + } + + // Store the real huffman tree now. + StoreHuffmanTreeToBitMask(huffman_tree, + huffman_tree_extra_bits, + &code_length_bitdepth[0], + code_length_bitdepth_symbols, + storage_ix, storage); +} + +void BuildAndStoreHuffmanTree(const int *histogram, + const int length, + const int quality, + uint8_t* depth, + uint16_t* bits, + int* storage_ix, + uint8_t* storage) { + int count = 0; + int s4[4] = { 0 }; + for (size_t i = 0; i < length; i++) { + if (histogram[i]) { + if (count < 4) { + s4[count] = i; + } else if (quality < 3 && count > 4) { + break; + } + count++; + } + } + + int max_bits_counter = length - 1; + int max_bits = 0; + while (max_bits_counter) { + max_bits_counter >>= 1; + ++max_bits; + } + + if (count <= 1) { + WriteBits(4, 1, storage_ix, storage); + WriteBits(max_bits, s4[0], storage_ix, storage); + return; + } + + if (length >= 50 && count >= 16 && quality >= 3) { + std::vector counts(length); + memcpy(&counts[0], histogram, sizeof(counts[0]) * length); + OptimizeHuffmanCountsForRle(length, &counts[0]); + CreateHuffmanTree(&counts[0], length, 15, quality, depth); + } else { + CreateHuffmanTree(histogram, length, 15, quality, depth); + } + ConvertBitDepthsToSymbols(depth, length, bits); + + if (count <= 4) { + StoreSimpleHuffmanTree(depth, s4, count, max_bits, storage_ix, storage); + } else { + StoreHuffmanTree(depth, length, quality, storage_ix, storage); + } +} + +int IndexOf(const std::vector& v, int value) { + for (int i = 0; i < v.size(); ++i) { + if (v[i] == value) return i; + } + return -1; +} + +void MoveToFront(std::vector* v, int index) { + int value = (*v)[index]; + for (int i = index; i > 0; --i) { + (*v)[i] = (*v)[i - 1]; + } + (*v)[0] = value; +} + +std::vector MoveToFrontTransform(const std::vector& v) { + if (v.empty()) return v; + std::vector mtf(*max_element(v.begin(), v.end()) + 1); + for (int i = 0; i < mtf.size(); ++i) mtf[i] = i; + std::vector result(v.size()); + for (int i = 0; i < v.size(); ++i) { + int index = IndexOf(mtf, v[i]); + result[i] = index; + MoveToFront(&mtf, index); + } + return result; +} + +// Finds runs of zeros in v_in and replaces them with a prefix code of the run +// length plus extra bits in *v_out and *extra_bits. Non-zero values in v_in are +// shifted by *max_length_prefix. Will not create prefix codes bigger than the +// initial value of *max_run_length_prefix. The prefix code of run length L is +// simply Log2Floor(L) and the number of extra bits is the same as the prefix +// code. +void RunLengthCodeZeros(const std::vector& v_in, + int* max_run_length_prefix, + std::vector* v_out, + std::vector* extra_bits) { + int max_reps = 0; + for (int i = 0; i < v_in.size();) { + for (; i < v_in.size() && v_in[i] != 0; ++i) ; + int reps = 0; + for (; i < v_in.size() && v_in[i] == 0; ++i) { + ++reps; + } + max_reps = std::max(reps, max_reps); + } + int max_prefix = max_reps > 0 ? Log2Floor(max_reps) : 0; + *max_run_length_prefix = std::min(max_prefix, *max_run_length_prefix); + for (int i = 0; i < v_in.size();) { + if (v_in[i] != 0) { + v_out->push_back(v_in[i] + *max_run_length_prefix); + extra_bits->push_back(0); + ++i; + } else { + int reps = 1; + for (uint32_t k = i + 1; k < v_in.size() && v_in[k] == 0; ++k) { + ++reps; + } + i += reps; + while (reps) { + if (reps < (2 << *max_run_length_prefix)) { + int run_length_prefix = Log2Floor(reps); + v_out->push_back(run_length_prefix); + extra_bits->push_back(reps - (1 << run_length_prefix)); + break; + } else { + v_out->push_back(*max_run_length_prefix); + extra_bits->push_back((1 << *max_run_length_prefix) - 1); + reps -= (2 << *max_run_length_prefix) - 1; + } + } + } + } +} + +// Returns a maximum zero-run-length-prefix value such that run-length coding +// zeros in v with this maximum prefix value and then encoding the resulting +// histogram and entropy-coding v produces the least amount of bits. +int BestMaxZeroRunLengthPrefix(const std::vector& v) { + int min_cost = std::numeric_limits::max(); + int best_max_prefix = 0; + for (int max_prefix = 0; max_prefix <= 16; ++max_prefix) { + std::vector rle_symbols; + std::vector extra_bits; + int max_run_length_prefix = max_prefix; + RunLengthCodeZeros(v, &max_run_length_prefix, &rle_symbols, &extra_bits); + if (max_run_length_prefix < max_prefix) break; + HistogramContextMap histogram; + for (int i = 0; i < rle_symbols.size(); ++i) { + histogram.Add(rle_symbols[i]); + } + int bit_cost = PopulationCost(histogram); + if (max_prefix > 0) { + bit_cost += 4; + } + for (int i = 1; i <= max_prefix; ++i) { + bit_cost += histogram.data_[i] * i; // extra bits + } + if (bit_cost < min_cost) { + min_cost = bit_cost; + best_max_prefix = max_prefix; + } + } + return best_max_prefix; +} + +void EncodeContextMap(const std::vector& context_map, + int num_clusters, + int* storage_ix, uint8_t* storage) { + StoreVarLenUint8(num_clusters - 1, storage_ix, storage); + + if (num_clusters == 1) { + return; + } + + std::vector transformed_symbols = MoveToFrontTransform(context_map); + std::vector rle_symbols; + std::vector extra_bits; + int max_run_length_prefix = BestMaxZeroRunLengthPrefix(transformed_symbols); + RunLengthCodeZeros(transformed_symbols, &max_run_length_prefix, + &rle_symbols, &extra_bits); + HistogramContextMap symbol_histogram; + for (int i = 0; i < rle_symbols.size(); ++i) { + symbol_histogram.Add(rle_symbols[i]); + } + bool use_rle = max_run_length_prefix > 0; + WriteBits(1, use_rle, storage_ix, storage); + if (use_rle) { + WriteBits(4, max_run_length_prefix - 1, storage_ix, storage); + } + EntropyCodeContextMap symbol_code; + memset(symbol_code.depth_, 0, sizeof(symbol_code.depth_)); + memset(symbol_code.bits_, 0, sizeof(symbol_code.bits_)); + BuildAndStoreHuffmanTree(symbol_histogram.data_, + num_clusters + max_run_length_prefix, + 9, // quality + symbol_code.depth_, symbol_code.bits_, + storage_ix, storage); + for (int i = 0; i < rle_symbols.size(); ++i) { + WriteBits(symbol_code.depth_[rle_symbols[i]], + symbol_code.bits_[rle_symbols[i]], + storage_ix, storage); + if (rle_symbols[i] > 0 && rle_symbols[i] <= max_run_length_prefix) { + WriteBits(rle_symbols[i], extra_bits[i], storage_ix, storage); + } + } + WriteBits(1, 1, storage_ix, storage); // use move-to-front +} + +void StoreBlockSwitch(const BlockSplitCode& code, + const int block_ix, + int* storage_ix, + uint8_t* storage) { + if (block_ix > 0) { + int typecode = code.type_code[block_ix]; + WriteBits(code.type_depths[typecode], code.type_bits[typecode], + storage_ix, storage); + } + int lencode = code.length_prefix[block_ix]; + WriteBits(code.length_depths[lencode], code.length_bits[lencode], + storage_ix, storage); + WriteBits(code.length_nextra[block_ix], code.length_extra[block_ix], + storage_ix, storage); +} + +void BuildAndStoreBlockSplitCode(const std::vector& types, + const std::vector& lengths, + const int num_types, + const int quality, + BlockSplitCode* code, + int* storage_ix, + uint8_t* storage) { + const int num_blocks = types.size(); + std::vector type_histo(num_types + 2); + std::vector length_histo(26); + int last_type = 1; + int second_last_type = 0; + code->type_code.resize(num_blocks); + code->length_prefix.resize(num_blocks); + code->length_nextra.resize(num_blocks); + code->length_extra.resize(num_blocks); + code->type_depths.resize(num_types + 2); + code->type_bits.resize(num_types + 2); + code->length_depths.resize(26); + code->length_bits.resize(26); + for (int i = 0; i < num_blocks; ++i) { + int type = types[i]; + int type_code = (type == last_type + 1 ? 1 : + type == second_last_type ? 0 : + type + 2); + second_last_type = last_type; + last_type = type; + code->type_code[i] = type_code; + if (i > 0) ++type_histo[type_code]; + GetBlockLengthPrefixCode(lengths[i], + &code->length_prefix[i], + &code->length_nextra[i], + &code->length_extra[i]); + ++length_histo[code->length_prefix[i]]; + } + StoreVarLenUint8(num_types - 1, storage_ix, storage); + if (num_types > 1) { + BuildAndStoreHuffmanTree(&type_histo[0], num_types + 2, quality, + &code->type_depths[0], &code->type_bits[0], + storage_ix, storage); + BuildAndStoreHuffmanTree(&length_histo[0], 26, quality, + &code->length_depths[0], &code->length_bits[0], + storage_ix, storage); + StoreBlockSwitch(*code, 0, storage_ix, storage); + } +} + +void StoreTrivialContextMap(int num_types, + int context_bits, + int* storage_ix, + uint8_t* storage) { + StoreVarLenUint8(num_types - 1, storage_ix, storage); + if (num_types > 1) { + int repeat_code = context_bits - 1; + int repeat_bits = (1 << repeat_code) - 1; + int alphabet_size = num_types + repeat_code; + std::vector histogram(alphabet_size); + std::vector depths(alphabet_size); + std::vector bits(alphabet_size); + // Write RLEMAX. + WriteBits(1, 1, storage_ix, storage); + WriteBits(4, repeat_code - 1, storage_ix, storage); + histogram[repeat_code] = num_types; + histogram[0] = 1; + for (int i = context_bits; i < alphabet_size; ++i) { + histogram[i] = 1; + } + BuildAndStoreHuffmanTree(&histogram[0], alphabet_size, 1, + &depths[0], &bits[0], + storage_ix, storage); + for (int i = 0; i < num_types; ++i) { + int code = (i == 0 ? 0 : i + context_bits - 1); + WriteBits(depths[code], bits[code], storage_ix, storage); + WriteBits(depths[repeat_code], bits[repeat_code], storage_ix, storage); + WriteBits(repeat_code, repeat_bits, storage_ix, storage); + } + // Write IMTF (inverse-move-to-front) bit. + WriteBits(1, 1, storage_ix, storage); + } +} + +} // namespace brotli diff --git a/enc/brotli_bit_stream.h b/enc/brotli_bit_stream.h new file mode 100644 index 0000000..bb488be --- /dev/null +++ b/enc/brotli_bit_stream.h @@ -0,0 +1,109 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Functions to convert brotli-related data structures into the +// brotli bit stream. The functions here operate under +// assumption that there is enough space in the storage, i.e., there are +// no out-of-range checks anywhere. +// +// These functions do bit addressing into a byte array. The byte array +// is called "storage" and the index to the bit is called storage_ix +// in function arguments. + +#ifndef BROTLI_ENC_BROTLI_BIT_STREAM_H_ +#define BROTLI_ENC_BROTLI_BIT_STREAM_H_ + +#include +#include +#include + +namespace brotli { + +// All Store functions here will use a storage_ix, which is always the bit +// position for the current storage. + +// Stores a number between 0 and 255. +void StoreVarLenUint8(int n, int* storage_ix, uint8_t* storage); + +// Stores the compressed meta-block header. +bool StoreCompressedMetaBlockHeader(bool final_block, + int length, + int* storage_ix, + uint8_t* storage); + +// Stores the uncompressed meta-block header. +bool StoreUncompressedMetaBlockHeader(int length, + int* storage_ix, + uint8_t* storage); + +// Stores a context map where the histogram type is always the block type. +void StoreTrivialContextMap(int num_types, + int context_bits, + int* storage_ix, + uint8_t* storage); + +void StoreHuffmanTreeOfHuffmanTreeToBitMask( + const int num_codes, + const uint8_t *code_length_bitdepth, + int *storage_ix, + uint8_t *storage); + +// Builds a Huffman tree from histogram[0:length] into depth[0:length] and +// bits[0:length] and stores the encoded tree to the bit stream. +void BuildAndStoreHuffmanTree(const int *histogram, + const int length, + const int quality, + uint8_t* depth, + uint16_t* bits, + int* storage_ix, + uint8_t* storage); + +// Encodes the given context map to the bit stream. The number of different +// histogram ids is given by num_clusters. +void EncodeContextMap(const std::vector& context_map, + int num_clusters, + int* storage_ix, uint8_t* storage); + +// Data structure that stores everything that is needed to encode each block +// block switch command. +struct BlockSplitCode { + std::vector type_code; + std::vector length_prefix; + std::vector length_nextra; + std::vector length_extra; + std::vector type_depths; + std::vector type_bits; + std::vector length_depths; + std::vector length_bits; +}; + +// Builds a BlockSplitCode data structure from the block split given by the +// vector of block types and block lengths and stores it to the bit stream. +void BuildAndStoreBlockSplitCode(const std::vector& types, + const std::vector& lengths, + const int num_types, + const int quality, + BlockSplitCode* code, + int* storage_ix, + uint8_t* storage); + +// Stores the block switch command with index block_ix to the bit stream. +void StoreBlockSwitch(const BlockSplitCode& code, + const int block_ix, + int* storage_ix, + uint8_t* storage); + +} // namespace brotli + +#endif // BROTLI_ENC_BROTLI_BIT_STREAM_H_ diff --git a/enc/cluster.h b/enc/cluster.h index 855a88d..315093c 100644 --- a/enc/cluster.h +++ b/enc/cluster.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -42,7 +43,7 @@ struct HistogramPair { }; struct HistogramPairComparator { - bool operator()(const HistogramPair& p1, const HistogramPair& p2) { + bool operator()(const HistogramPair& p1, const HistogramPair& p2) const { if (p1.cost_diff != p2.cost_diff) { return p1.cost_diff > p2.cost_diff; } @@ -59,8 +60,8 @@ inline double ClusterCostDiff(int size_a, int size_b) { // Computes the bit cost reduction by combining out[idx1] and out[idx2] and if // it is below a threshold, stores the pair (idx1, idx2) in the *pairs heap. -template -void CompareAndPushToHeap(const Histogram* out, +template +void CompareAndPushToHeap(const HistogramType* out, const int* cluster_size, int idx1, int idx2, std::vector* pairs) { @@ -90,7 +91,7 @@ void CompareAndPushToHeap(const Histogram* out, } else { double threshold = pairs->empty() ? 1e99 : std::max(0.0, (*pairs)[0].cost_diff); - Histogram combo = out[idx1]; + HistogramType combo = out[idx1]; combo.AddHistogram(out[idx2]); double cost_combo = PopulationCost(combo); if (cost_combo < threshold - p.cost_diff) { @@ -105,8 +106,8 @@ void CompareAndPushToHeap(const Histogram* out, } } -template -void HistogramCombine(Histogram* out, +template +void HistogramCombine(HistogramType* out, int* cluster_size, int* symbols, int symbols_size, @@ -178,22 +179,22 @@ void HistogramCombine(Histogram* out, // Histogram refinement // What is the bit cost of moving histogram from cur_symbol to candidate. -template -double HistogramBitCostDistance(const Histogram& histogram, - const Histogram& candidate) { +template +double HistogramBitCostDistance(const HistogramType& histogram, + const HistogramType& candidate) { if (histogram.total_count_ == 0) { return 0.0; } - Histogram tmp = histogram; + HistogramType tmp = histogram; tmp.AddHistogram(candidate); return PopulationCost(tmp) - candidate.bit_cost_; } // Find the best 'out' histogram for each of the 'in' histograms. // Note: we assume that out[]->bit_cost_ is already up-to-date. -template -void HistogramRemap(const Histogram* in, int in_size, - Histogram* out, int* symbols) { +template +void HistogramRemap(const HistogramType* in, int in_size, + HistogramType* out, int* symbols) { std::set all_symbols; for (int i = 0; i < in_size; ++i) { all_symbols.insert(symbols[i]); @@ -224,10 +225,10 @@ void HistogramRemap(const Histogram* in, int in_size, // Reorder histograms in *out so that the new symbols in *symbols come in // increasing order. -template -void HistogramReindex(std::vector >* out, +template +void HistogramReindex(std::vector* out, std::vector* symbols) { - std::vector > tmp(*out); + std::vector tmp(*out); std::map new_index; int next_index = 0; for (int i = 0; i < symbols->size(); ++i) { @@ -246,11 +247,11 @@ void HistogramReindex(std::vector >* out, // Clusters similar histograms in 'in' together, the selected histograms are // placed in 'out', and for each index in 'in', *histogram_symbols will // indicate which of the 'out' histograms is the best approximation. -template -void ClusterHistograms(const std::vector >& in, +template +void ClusterHistograms(const std::vector& in, int num_contexts, int num_blocks, int max_histograms, - std::vector >* out, + std::vector* out, std::vector* histogram_symbols) { const int in_size = num_contexts * num_blocks; std::vector cluster_size(in_size, 1); diff --git a/enc/command.h b/enc/command.h index f979973..5600455 100644 --- a/enc/command.h +++ b/enc/command.h @@ -18,31 +18,131 @@ #define BROTLI_ENC_COMMAND_H_ #include +#include "./fast_log.h" namespace brotli { -// Command holds a sequence of literals and a backward reference copy. -class Command { - public: - // distance_code_ is initialized to 17 because it refers to the distance - // code of a backward distance of 1, this way the last insert-only command - // won't use the last-distance short code, and accordingly distance_prefix_ is - // set to 16 - Command() : insert_length_(0), copy_length_(0), copy_length_code_(0), - copy_distance_(0), distance_code_(17), - distance_prefix_(16), command_prefix_(0), - distance_extra_bits_(0), distance_extra_bits_value_(0) {} +static inline void GetDistCode(int distance_code, + uint16_t* code, uint32_t* extra) { + distance_code -= 1; + if (distance_code < 16) { + *code = distance_code; + *extra = 0; + } else { + distance_code -= 12; + int numextra = Log2FloorNonZero(distance_code) - 1; + int prefix = distance_code >> numextra; + *code = 12 + 2 * numextra + prefix; + *extra = (numextra << 24) | (distance_code - (prefix << numextra)); + } +} - uint32_t insert_length_; - uint32_t copy_length_; - uint32_t copy_length_code_; - uint32_t copy_distance_; - // Values <= 16 are short codes, values > 16 are distances shifted by 16. - uint32_t distance_code_; - uint16_t distance_prefix_; - uint16_t command_prefix_; - int distance_extra_bits_; - uint32_t distance_extra_bits_value_; +static int insbase[] = { 0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50, 66, + 98, 130, 194, 322, 578, 1090, 2114, 6210, 22594 }; +static int insextra[] = { 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, + 5, 6, 7, 8, 9, 10, 12, 14, 24 }; +static int copybase[] = { 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 18, 22, 30, 38, + 54, 70, 102, 134, 198, 326, 582, 1094, 2118 }; +static int copyextra[] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, + 4, 5, 5, 6, 7, 8, 9, 10, 24 }; + +static inline int GetInsertLengthCode(int insertlen) { + if (insertlen < 6) { + return insertlen; + } else if (insertlen < 130) { + insertlen -= 2; + int nbits = Log2FloorNonZero(insertlen) - 1; + return (nbits << 1) + (insertlen >> nbits) + 2; + } else if (insertlen < 2114) { + return Log2FloorNonZero(insertlen - 66) + 10; + } else if (insertlen < 6210) { + return 21; + } else if (insertlen < 22594) { + return 22; + } else { + return 23; + } +} + +static inline int GetCopyLengthCode(int copylen) { + if (copylen < 10) { + return copylen - 2; + } else if (copylen < 134) { + copylen -= 6; + int nbits = Log2FloorNonZero(copylen) - 1; + return (nbits << 1) + (copylen >> nbits) + 4; + } else if (copylen < 2118) { + return Log2FloorNonZero(copylen - 70) + 12; + } else { + return 23; + } +} + +static inline int CombineLengthCodes( + int inscode, int copycode, int distancecode) { + int bits64 = (copycode & 0x7u) | ((inscode & 0x7u) << 3); + if (distancecode == 0 && inscode < 8 && copycode < 16) { + return (copycode < 8) ? bits64 : (bits64 | 64); + } else { + // "To convert an insert-and-copy length code to an insert length code and + // a copy length code, the following table can be used" + static const int cells[9] = { 2, 3, 6, 4, 5, 8, 7, 9, 10 }; + return (cells[(copycode >> 3) + 3 * (inscode >> 3)] << 6) | bits64; + } +} + +static inline void GetLengthCode(int insertlen, int copylen, int distancecode, + uint16_t* code, uint64_t* extra) { + int inscode = GetInsertLengthCode(insertlen); + int copycode = GetCopyLengthCode(copylen); + uint64_t insnumextra = insextra[inscode]; + uint64_t numextra = insnumextra + copyextra[copycode]; + uint64_t insextraval = insertlen - insbase[inscode]; + uint64_t copyextraval = copylen - copybase[copycode]; + *code = CombineLengthCodes(inscode, copycode, distancecode); + *extra = (numextra << 48) | (copyextraval << insnumextra) | insextraval; +} + +struct Command { + Command() {} + + Command(int insertlen, int copylen, int copylen_code, int distance_code) + : insert_len_(insertlen), copy_len_(copylen) { + GetDistCode(distance_code, &dist_prefix_, &dist_extra_); + GetLengthCode(insertlen, copylen_code, dist_prefix_, + &cmd_prefix_, &cmd_extra_); + } + + Command(int insertlen) + : insert_len_(insertlen), copy_len_(0), dist_prefix_(16), dist_extra_(0) { + GetLengthCode(insertlen, 4, dist_prefix_, &cmd_prefix_, &cmd_extra_); + } + + int DistanceCode() const { + if (dist_prefix_ < 16) { + return dist_prefix_ + 1; + } + int nbits = dist_extra_ >> 24; + int extra = dist_extra_ & 0xffffff; + int prefix = dist_prefix_ - 12 - 2 * nbits; + return (prefix << nbits) + extra + 13; + } + + int DistanceContext() const { + int r = cmd_prefix_ >> 6; + int c = cmd_prefix_ & 7; + if ((r == 0 || r == 2 || r == 4 || r == 7) && (c <= 2)) { + return c; + } + return 3; + } + + int insert_len_; + int copy_len_; + uint16_t cmd_prefix_; + uint16_t dist_prefix_; + uint64_t cmd_extra_; + uint32_t dist_extra_; }; } // namespace brotli diff --git a/enc/dictionary.h b/enc/dictionary.h index 66dbf62..2c0624e 100644 --- a/enc/dictionary.h +++ b/enc/dictionary.h @@ -17,6 +17,8 @@ #ifndef BROTLI_ENC_DICTIONARY_H_ #define BROTLI_ENC_DICTIONARY_H_ +#include + static const uint8_t kBrotliDictionary[] = { 0x74, 0x69, 0x6d, 0x65, 0x64, 0x6f, 0x77, 0x6e, 0x6c, 0x69, 0x66, 0x65, 0x6c, 0x65, 0x66, 0x74, 0x62, 0x61, 0x63, 0x6b, 0x63, 0x6f, 0x64, 0x65, 0x64, 0x61, diff --git a/enc/encode.cc b/enc/encode.cc index 6e18845..c819389 100644 --- a/enc/encode.cc +++ b/enc/encode.cc @@ -22,6 +22,7 @@ #include "./backward_references.h" #include "./bit_cost.h" #include "./block_splitter.h" +#include "./brotli_bit_stream.h" #include "./cluster.h" #include "./context.h" #include "./transform.h" @@ -65,19 +66,6 @@ double TotalBitCost(const std::vector >& histograms) { return retval; } -void EncodeVarLenUint8(int n, int* storage_ix, uint8_t* storage) { - if (n == 0) { - WriteBits(1, 0, storage_ix, storage); - } else { - WriteBits(1, 1, storage_ix, storage); - int nbits = Log2Floor(n); - WriteBits(3, nbits, storage_ix, storage); - if (nbits > 0) { - WriteBits(nbits, n - (1 << nbits), storage_ix, storage); - } - } -} - int ParseAsUTF8(int* symbol, const uint8_t* input, int size) { // ASCII if ((input[0] & 0x80) == 0) { @@ -168,134 +156,6 @@ void EncodeMetaBlockLength(size_t meta_block_size, } } -void StoreHuffmanTreeOfHuffmanTreeToBitMask( - const uint8_t* code_length_bitdepth, - int* storage_ix, uint8_t* storage) { - static const uint8_t kStorageOrder[kCodeLengthCodes] = { - 1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15, - }; - // Throw away trailing zeros: - int codes_to_store = kCodeLengthCodes; - for (; codes_to_store > 0; --codes_to_store) { - if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) { - break; - } - } - int num_codes = 0; - for (int i = 0; i < codes_to_store; ++i) { - if (code_length_bitdepth[kStorageOrder[i]] != 0) { - ++num_codes; - } - } - if (num_codes == 1) { - codes_to_store = kCodeLengthCodes; - } - int skip_some = 0; // skips none. - if (code_length_bitdepth[kStorageOrder[0]] == 0 && - code_length_bitdepth[kStorageOrder[1]] == 0) { - skip_some = 2; // skips two. - if (code_length_bitdepth[kStorageOrder[2]] == 0) { - skip_some = 3; // skips three. - } - } - WriteBits(2, skip_some, storage_ix, storage); - for (int i = skip_some; i < codes_to_store; ++i) { - uint8_t len[] = { 2, 4, 3, 2, 2, 4 }; - uint8_t bits[] = { 0, 7, 3, 2, 1, 15 }; - int v = code_length_bitdepth[kStorageOrder[i]]; - WriteBits(len[v], bits[v], storage_ix, storage); - } -} - -void StoreHuffmanTreeToBitMask( - const uint8_t* huffman_tree, - const uint8_t* huffman_tree_extra_bits, - const int huffman_tree_size, - const EntropyCode& entropy, - int* storage_ix, uint8_t* storage) { - for (int i = 0; i < huffman_tree_size; ++i) { - const int ix = huffman_tree[i]; - const int extra_bits = huffman_tree_extra_bits[i]; - if (entropy.count_ > 1) { - WriteBits(entropy.depth_[ix], entropy.bits_[ix], storage_ix, storage); - } - switch (ix) { - case 16: - WriteBits(2, extra_bits, storage_ix, storage); - break; - case 17: - WriteBits(3, extra_bits, storage_ix, storage); - break; - } - } -} - -template -void StoreHuffmanCodeSimple( - const EntropyCode& code, int alphabet_size, - int max_bits, int* storage_ix, uint8_t* storage) { - const uint8_t *depth = &code.depth_[0]; - int symbols[4]; - // Quadratic sort. - int k, j; - for (k = 0; k < code.count_; ++k) { - symbols[k] = code.symbols_[k]; - } - for (k = 0; k < code.count_; ++k) { - for (j = k + 1; j < code.count_; ++j) { - if (depth[symbols[j]] < depth[symbols[k]]) { - int t = symbols[k]; - symbols[k] = symbols[j]; - symbols[j] = t; - } - } - } - // Small tree marker to encode 1-4 symbols. - WriteBits(2, 1, storage_ix, storage); - WriteBits(2, code.count_ - 1, storage_ix, storage); - for (int i = 0; i < code.count_; ++i) { - WriteBits(max_bits, symbols[i], storage_ix, storage); - } - if (code.count_ == 4) { - if (depth[symbols[0]] == 2 && - depth[symbols[1]] == 2 && - depth[symbols[2]] == 2 && - depth[symbols[3]] == 2) { - WriteBits(1, 0, storage_ix, storage); - } else { - WriteBits(1, 1, storage_ix, storage); - } - } -} - -template -void StoreHuffmanCodeComplex( - const EntropyCode& code, int alphabet_size, - int* storage_ix, uint8_t* storage) { - const uint8_t *depth = &code.depth_[0]; - uint8_t huffman_tree[kSize]; - uint8_t huffman_tree_extra_bits[kSize]; - int huffman_tree_size = 0; - WriteHuffmanTree(depth, - alphabet_size, - &huffman_tree[0], - &huffman_tree_extra_bits[0], - &huffman_tree_size); - Histogram huffman_tree_histogram; - memset(huffman_tree_histogram.data_, 0, sizeof(huffman_tree_histogram.data_)); - for (int i = 0; i < huffman_tree_size; ++i) { - huffman_tree_histogram.Add(huffman_tree[i]); - } - EntropyCode huffman_tree_entropy; - BuildEntropyCode(huffman_tree_histogram, 5, kCodeLengthCodes, - &huffman_tree_entropy); - StoreHuffmanTreeOfHuffmanTreeToBitMask( - &huffman_tree_entropy.depth_[0], storage_ix, storage); - StoreHuffmanTreeToBitMask(&huffman_tree[0], &huffman_tree_extra_bits[0], - huffman_tree_size, huffman_tree_entropy, - storage_ix, storage); -} - template void BuildAndStoreEntropyCode(const Histogram& histogram, const int tree_limit, @@ -304,45 +164,8 @@ void BuildAndStoreEntropyCode(const Histogram& histogram, int* storage_ix, uint8_t* storage) { memset(code->depth_, 0, sizeof(code->depth_)); memset(code->bits_, 0, sizeof(code->bits_)); - memset(code->symbols_, 0, sizeof(code->symbols_)); - code->count_ = 0; - - int max_bits_counter = alphabet_size - 1; - int max_bits = 0; - while (max_bits_counter) { - max_bits_counter >>= 1; - ++max_bits; - } - - for (size_t i = 0; i < alphabet_size; i++) { - if (histogram.data_[i] > 0) { - if (code->count_ < 4) code->symbols_[code->count_] = i; - ++code->count_; - } - } - - if (code->count_ <= 1) { - WriteBits(2, 1, storage_ix, storage); - WriteBits(2, 0, storage_ix, storage); - WriteBits(max_bits, code->symbols_[0], storage_ix, storage); - return; - } - - if (alphabet_size >= 50 && code->count_ >= 16) { - std::vector counts(alphabet_size); - memcpy(&counts[0], histogram.data_, sizeof(counts[0]) * alphabet_size); - OptimizeHuffmanCountsForRle(alphabet_size, &counts[0]); - CreateHuffmanTree(&counts[0], alphabet_size, tree_limit, code->depth_); - } else { - CreateHuffmanTree(histogram.data_, alphabet_size, tree_limit, code->depth_); - } - ConvertBitDepthsToSymbols(code->depth_, alphabet_size, code->bits_); - - if (code->count_ <= 4) { - StoreHuffmanCodeSimple(*code, alphabet_size, max_bits, storage_ix, storage); - } else { - StoreHuffmanCodeComplex(*code, alphabet_size, storage_ix, storage); - } + BuildAndStoreHuffmanTree(histogram.data_, alphabet_size, 9, + code->depth_, code->bits_, storage_ix, storage); } template @@ -362,324 +185,45 @@ void BuildAndStoreEntropyCodes( void EncodeCommand(const Command& cmd, const EntropyCodeCommand& entropy, int* storage_ix, uint8_t* storage) { - int code = cmd.command_prefix_; + int code = cmd.cmd_prefix_; WriteBits(entropy.depth_[code], entropy.bits_[code], storage_ix, storage); - if (code >= 128) { - code -= 128; - } - int insert_extra_bits = InsertLengthExtraBits(code); - uint64_t insert_extra_bits_val = - cmd.insert_length_ - InsertLengthOffset(code); - int copy_extra_bits = CopyLengthExtraBits(code); - uint64_t copy_extra_bits_val = cmd.copy_length_code_ - CopyLengthOffset(code); - if (insert_extra_bits > 0) { - WriteBits(insert_extra_bits, insert_extra_bits_val, storage_ix, storage); - } - if (copy_extra_bits > 0) { - WriteBits(copy_extra_bits, copy_extra_bits_val, storage_ix, storage); + int nextra = cmd.cmd_extra_ >> 48; + uint64_t extra = cmd.cmd_extra_ & 0xffffffffffffULL; + if (nextra > 0) { + WriteBits(nextra, extra, storage_ix, storage); } } void EncodeCopyDistance(const Command& cmd, const EntropyCodeDistance& entropy, int* storage_ix, uint8_t* storage) { - int code = cmd.distance_prefix_; - int extra_bits = cmd.distance_extra_bits_; - uint64_t extra_bits_val = cmd.distance_extra_bits_value_; + int code = cmd.dist_prefix_; + int extra_bits = cmd.dist_extra_ >> 24; + uint64_t extra_bits_val = cmd.dist_extra_ & 0xffffff; WriteBits(entropy.depth_[code], entropy.bits_[code], storage_ix, storage); if (extra_bits > 0) { WriteBits(extra_bits, extra_bits_val, storage_ix, storage); } } -void ComputeDistanceShortCodes(std::vector* cmds, - size_t pos, - const size_t max_backward, - int* dist_ringbuffer, - size_t* ringbuffer_idx) { - static const int kIndexOffset[16] = { - 3, 2, 1, 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2 - }; - static const int kValueOffset[16] = { - 0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3 - }; - for (int i = 0; i < cmds->size(); ++i) { - pos += (*cmds)[i].insert_length_; - size_t max_distance = std::min(pos, max_backward); - int cur_dist = (*cmds)[i].copy_distance_; - int dist_code = cur_dist + 16; - if (cur_dist <= max_distance) { - if (cur_dist == 0) break; - int limits[16] = { 0, 0, 0, 0, - 6, 6, 11, 11, - 11, 11, 11, 11, - 12, 12, 12, 12 }; - for (int k = 0; k < 16; ++k) { - // Only accept more popular choices. - if (cur_dist < limits[k]) { - // Typically unpopular ranges, don't replace a short distance - // with them. - continue; - } - int comp = (dist_ringbuffer[(*ringbuffer_idx + kIndexOffset[k]) & 3] + - kValueOffset[k]); - if (cur_dist == comp) { - dist_code = k + 1; - break; - } - } - if (dist_code > 1) { - dist_ringbuffer[*ringbuffer_idx & 3] = cur_dist; - ++(*ringbuffer_idx); - } - pos += (*cmds)[i].copy_length_; - } else { - int word_idx = cur_dist - max_distance - 1; - const std::string word = - GetTransformedDictionaryWord((*cmds)[i].copy_length_code_, word_idx); - pos += word.size(); - } - (*cmds)[i].distance_code_ = dist_code; +void RecomputeDistancePrefixes(std::vector* cmds, + int num_direct_distance_codes, + int distance_postfix_bits) { + if (num_direct_distance_codes == 0 && + distance_postfix_bits == 0) { + return; } -} - -void ComputeCommandPrefixes(std::vector* cmds, - int num_direct_distance_codes, - int distance_postfix_bits) { for (int i = 0; i < cmds->size(); ++i) { Command* cmd = &(*cmds)[i]; - cmd->command_prefix_ = CommandPrefix(cmd->insert_length_, - cmd->copy_length_code_); - if (cmd->copy_length_code_ > 0) { - PrefixEncodeCopyDistance(cmd->distance_code_, + if (cmd->copy_len_ > 0 && cmd->cmd_prefix_ >= 128) { + PrefixEncodeCopyDistance(cmd->DistanceCode(), num_direct_distance_codes, distance_postfix_bits, - &cmd->distance_prefix_, - &cmd->distance_extra_bits_, - &cmd->distance_extra_bits_value_); - } - if (cmd->command_prefix_ < 128 && cmd->distance_prefix_ == 0) { - cmd->distance_prefix_ = 0xffff; - } else { - cmd->command_prefix_ += 128; + &cmd->dist_prefix_, + &cmd->dist_extra_); } } } -int IndexOf(const std::vector& v, int value) { - for (int i = 0; i < v.size(); ++i) { - if (v[i] == value) return i; - } - return -1; -} - -void MoveToFront(std::vector* v, int index) { - int value = (*v)[index]; - for (int i = index; i > 0; --i) { - (*v)[i] = (*v)[i - 1]; - } - (*v)[0] = value; -} - -std::vector MoveToFrontTransform(const std::vector& v) { - if (v.empty()) return v; - std::vector mtf(*max_element(v.begin(), v.end()) + 1); - for (int i = 0; i < mtf.size(); ++i) mtf[i] = i; - std::vector result(v.size()); - for (int i = 0; i < v.size(); ++i) { - int index = IndexOf(mtf, v[i]); - result[i] = index; - MoveToFront(&mtf, index); - } - return result; -} - -// Finds runs of zeros in v_in and replaces them with a prefix code of the run -// length plus extra bits in *v_out and *extra_bits. Non-zero values in v_in are -// shifted by *max_length_prefix. Will not create prefix codes bigger than the -// initial value of *max_run_length_prefix. The prefix code of run length L is -// simply Log2Floor(L) and the number of extra bits is the same as the prefix -// code. -void RunLengthCodeZeros(const std::vector& v_in, - int* max_run_length_prefix, - std::vector* v_out, - std::vector* extra_bits) { - int max_reps = 0; - for (int i = 0; i < v_in.size();) { - for (; i < v_in.size() && v_in[i] != 0; ++i) ; - int reps = 0; - for (; i < v_in.size() && v_in[i] == 0; ++i) { - ++reps; - } - max_reps = std::max(reps, max_reps); - } - int max_prefix = max_reps > 0 ? Log2Floor(max_reps) : 0; - *max_run_length_prefix = std::min(max_prefix, *max_run_length_prefix); - for (int i = 0; i < v_in.size();) { - if (v_in[i] != 0) { - v_out->push_back(v_in[i] + *max_run_length_prefix); - extra_bits->push_back(0); - ++i; - } else { - int reps = 1; - for (uint32_t k = i + 1; k < v_in.size() && v_in[k] == 0; ++k) { - ++reps; - } - i += reps; - while (reps) { - if (reps < (2 << *max_run_length_prefix)) { - int run_length_prefix = Log2Floor(reps); - v_out->push_back(run_length_prefix); - extra_bits->push_back(reps - (1 << run_length_prefix)); - break; - } else { - v_out->push_back(*max_run_length_prefix); - extra_bits->push_back((1 << *max_run_length_prefix) - 1); - reps -= (2 << *max_run_length_prefix) - 1; - } - } - } - } -} - -// Returns a maximum zero-run-length-prefix value such that run-length coding -// zeros in v with this maximum prefix value and then encoding the resulting -// histogram and entropy-coding v produces the least amount of bits. -int BestMaxZeroRunLengthPrefix(const std::vector& v) { - int min_cost = std::numeric_limits::max(); - int best_max_prefix = 0; - for (int max_prefix = 0; max_prefix <= 16; ++max_prefix) { - std::vector rle_symbols; - std::vector extra_bits; - int max_run_length_prefix = max_prefix; - RunLengthCodeZeros(v, &max_run_length_prefix, &rle_symbols, &extra_bits); - if (max_run_length_prefix < max_prefix) break; - HistogramContextMap histogram; - for (int i = 0; i < rle_symbols.size(); ++i) { - histogram.Add(rle_symbols[i]); - } - int bit_cost = PopulationCost(histogram); - if (max_prefix > 0) { - bit_cost += 4; - } - for (int i = 1; i <= max_prefix; ++i) { - bit_cost += histogram.data_[i] * i; // extra bits - } - if (bit_cost < min_cost) { - min_cost = bit_cost; - best_max_prefix = max_prefix; - } - } - return best_max_prefix; -} - -void EncodeContextMap(const std::vector& context_map, - int num_clusters, - int* storage_ix, uint8_t* storage) { - EncodeVarLenUint8(num_clusters - 1, storage_ix, storage); - - if (num_clusters == 1) { - return; - } - - std::vector transformed_symbols = MoveToFrontTransform(context_map); - std::vector rle_symbols; - std::vector extra_bits; - int max_run_length_prefix = BestMaxZeroRunLengthPrefix(transformed_symbols); - RunLengthCodeZeros(transformed_symbols, &max_run_length_prefix, - &rle_symbols, &extra_bits); - HistogramContextMap symbol_histogram; - for (int i = 0; i < rle_symbols.size(); ++i) { - symbol_histogram.Add(rle_symbols[i]); - } - bool use_rle = max_run_length_prefix > 0; - WriteBits(1, use_rle, storage_ix, storage); - if (use_rle) { - WriteBits(4, max_run_length_prefix - 1, storage_ix, storage); - } - EntropyCodeContextMap symbol_code; - BuildAndStoreEntropyCode(symbol_histogram, 15, - num_clusters + max_run_length_prefix, - &symbol_code, - storage_ix, storage); - for (int i = 0; i < rle_symbols.size(); ++i) { - WriteBits(symbol_code.depth_[rle_symbols[i]], - symbol_code.bits_[rle_symbols[i]], - storage_ix, storage); - if (rle_symbols[i] > 0 && rle_symbols[i] <= max_run_length_prefix) { - WriteBits(rle_symbols[i], extra_bits[i], storage_ix, storage); - } - } - WriteBits(1, 1, storage_ix, storage); // use move-to-front -} - -struct BlockSplitCode { - EntropyCodeBlockType block_type_code; - EntropyCodeBlockLength block_len_code; -}; - -void EncodeBlockLength(const EntropyCodeBlockLength& entropy, - int length, - int* storage_ix, uint8_t* storage) { - int len_code = BlockLengthPrefix(length); - int extra_bits = BlockLengthExtraBits(len_code); - int extra_bits_value = length - BlockLengthOffset(len_code); - WriteBits(entropy.depth_[len_code], entropy.bits_[len_code], - storage_ix, storage); - if (extra_bits > 0) { - WriteBits(extra_bits, extra_bits_value, storage_ix, storage); - } -} - -void ComputeBlockTypeShortCodes(BlockSplit* split) { - if (split->num_types_ <= 1) { - split->num_types_ = 1; - return; - } - int ringbuffer[2] = { 0, 1 }; - size_t index = 0; - for (int i = 0; i < split->types_.size(); ++i) { - int type = split->types_[i]; - int type_code; - if (type == ringbuffer[index & 1]) { - type_code = 0; - } else if (type == ringbuffer[(index - 1) & 1] + 1) { - type_code = 1; - } else { - type_code = type + 2; - } - ringbuffer[index & 1] = type; - ++index; - split->type_codes_.push_back(type_code); - } -} - -void BuildAndEncodeBlockSplitCode(const BlockSplit& split, - BlockSplitCode* code, - int* storage_ix, uint8_t* storage) { - EncodeVarLenUint8(split.num_types_ - 1, storage_ix, storage); - - if (split.num_types_ == 1) { - return; - } - - HistogramBlockType type_histo; - for (int i = 1; i < split.type_codes_.size(); ++i) { - type_histo.Add(split.type_codes_[i]); - } - HistogramBlockLength length_histo; - for (int i = 0; i < split.lengths_.size(); ++i) { - length_histo.Add(BlockLengthPrefix(split.lengths_[i])); - } - BuildAndStoreEntropyCode(type_histo, 15, split.num_types_ + 2, - &code->block_type_code, - storage_ix, storage); - BuildAndStoreEntropyCode(length_histo, 15, kNumBlockLenPrefixes, - &code->block_len_code, - storage_ix, storage); - EncodeBlockLength(code->block_len_code, split.lengths_[0], - storage_ix, storage); -} - void MoveAndEncode(const BlockSplitCode& code, BlockSplitIterator* it, int* storage_ix, uint8_t* storage) { @@ -687,11 +231,7 @@ void MoveAndEncode(const BlockSplitCode& code, ++it->idx_; it->type_ = it->split_.types_[it->idx_]; it->length_ = it->split_.lengths_[it->idx_]; - int type_code = it->split_.type_codes_[it->idx_]; - WriteBits(code.block_type_code.depth_[type_code], - code.block_type_code.bits_[type_code], - storage_ix, storage); - EncodeBlockLength(code.block_len_code, it->length_, storage_ix, storage); + StoreBlockSwitch(code, it->idx_, storage_ix, storage); } --it->length_; } @@ -727,17 +267,14 @@ void BuildMetaBlock(const EncodingParams& params, if (cmds.empty()) { return; } - ComputeCommandPrefixes(&mb->cmds, - mb->params.num_direct_distance_codes, - mb->params.distance_postfix_bits); + RecomputeDistancePrefixes(&mb->cmds, + mb->params.num_direct_distance_codes, + mb->params.distance_postfix_bits); SplitBlock(mb->cmds, &ringbuffer[pos & mask], &mb->literal_split, &mb->command_split, &mb->distance_split); - ComputeBlockTypeShortCodes(&mb->literal_split); - ComputeBlockTypeShortCodes(&mb->command_split); - ComputeBlockTypeShortCodes(&mb->distance_split); mb->literal_context_modes.resize(mb->literal_split.num_types_, mb->params.literal_context_mode); @@ -786,7 +323,7 @@ size_t MetaBlockLength(const std::vector& cmds) { size_t length = 0; for (int i = 0; i < cmds.size(); ++i) { const Command& cmd = cmds[i]; - length += cmd.insert_length_ + cmd.copy_length_; + length += cmd.insert_len_ + cmd.copy_len_; } return length; } @@ -807,12 +344,24 @@ void StoreMetaBlock(const MetaBlock& mb, BlockSplitCode literal_split_code; BlockSplitCode command_split_code; BlockSplitCode distance_split_code; - BuildAndEncodeBlockSplitCode(mb.literal_split, &literal_split_code, - storage_ix, storage); - BuildAndEncodeBlockSplitCode(mb.command_split, &command_split_code, - storage_ix, storage); - BuildAndEncodeBlockSplitCode(mb.distance_split, &distance_split_code, - storage_ix, storage); + BuildAndStoreBlockSplitCode(mb.literal_split.types_, + mb.literal_split.lengths_, + mb.literal_split.num_types_, + 9, // quality + &literal_split_code, + storage_ix, storage); + BuildAndStoreBlockSplitCode(mb.command_split.types_, + mb.command_split.lengths_, + mb.command_split.num_types_, + 9, // quality + &command_split_code, + storage_ix, storage); + BuildAndStoreBlockSplitCode(mb.distance_split.types_, + mb.distance_split.lengths_, + mb.distance_split.num_types_, + 9, // quality + &distance_split_code, + storage_ix, storage); WriteBits(2, mb.params.distance_postfix_bits, storage_ix, storage); WriteBits(4, mb.params.num_direct_distance_codes >> @@ -844,7 +393,7 @@ void StoreMetaBlock(const MetaBlock& mb, const Command& cmd = mb.cmds[i]; MoveAndEncode(command_split_code, &command_it, storage_ix, storage); EncodeCommand(cmd, command_codes[command_it.type_], storage_ix, storage); - for (int j = 0; j < cmd.insert_length_; ++j) { + for (int j = 0; j < cmd.insert_len_; ++j) { MoveAndEncode(literal_split_code, &literal_it, storage_ix, storage); int histogram_idx = literal_it.type_; uint8_t prev_byte = *pos > 0 ? ringbuffer[(*pos - 1) & mask] : 0; @@ -859,16 +408,14 @@ void StoreMetaBlock(const MetaBlock& mb, storage_ix, storage); ++(*pos); } - if (*pos < end_pos && cmd.distance_prefix_ != 0xffff) { + if (*pos < end_pos && cmd.cmd_prefix_ >= 128) { MoveAndEncode(distance_split_code, &distance_it, storage_ix, storage); - int context = (distance_it.type_ << 2) + - ((cmd.copy_length_code_ > 4) ? 3 : cmd.copy_length_code_ - 2); + int context = (distance_it.type_ << 2) + cmd.DistanceContext(); int histogram_index = mb.distance_context_map[context]; - size_t max_distance = std::min(*pos, (size_t)kMaxBackwardDistance); EncodeCopyDistance(cmd, distance_codes[histogram_index], storage_ix, storage); } - *pos += cmd.copy_length_; + *pos += cmd.copy_len_; } } @@ -876,20 +423,19 @@ BrotliCompressor::BrotliCompressor(BrotliParams params) : params_(params), window_bits_(kWindowBits), hashers_(new Hashers()), - dist_ringbuffer_idx_(0), input_pos_(0), ringbuffer_(kRingBufferBits, kMetaBlockSizeBits), literal_cost_(1 << kRingBufferBits), storage_ix_(0), storage_(new uint8_t[2 << kMetaBlockSizeBits]) { - dist_ringbuffer_[0] = 16; - dist_ringbuffer_[1] = 15; - dist_ringbuffer_[2] = 11; - dist_ringbuffer_[3] = 4; + dist_cache_[0] = 4; + dist_cache_[1] = 11; + dist_cache_[2] = 15; + dist_cache_[3] = 16; storage_[0] = 0; switch (params.mode) { - case BrotliParams::MODE_TEXT: hash_type_ = Hashers::HASH_15_8_4; break; - case BrotliParams::MODE_FONT: hash_type_ = Hashers::HASH_15_8_2; break; + case BrotliParams::MODE_TEXT: hash_type_ = 8; break; + case BrotliParams::MODE_FONT: hash_type_ = 9; break; default: break; } hashers_->Init(hash_type_); @@ -942,7 +488,7 @@ void BrotliCompressor::WriteMetaBlock(const size_t input_size, uint8_t* encoded_buffer) { static const double kMinUTF8Ratio = 0.75; bool utf8_mode = false; - std::vector commands; + std::vector commands((input_size + 1) >> 1); if (input_size > 0) { ringbuffer_.Write(input_buffer, input_size); utf8_mode = IsMostlyUTF8( @@ -957,17 +503,26 @@ void BrotliCompressor::WriteMetaBlock(const size_t input_size, kRingBufferMask, kRingBufferMask, ringbuffer_.start(), &literal_cost_[0]); } + int last_insert_len = 0; + int num_commands = 0; + double base_min_score = 8.115; CreateBackwardReferences( input_size, input_pos_, - ringbuffer_.start(), - &literal_cost_[0], - kRingBufferMask, kMaxBackwardDistance, + ringbuffer_.start(), kRingBufferMask, + &literal_cost_[0], kRingBufferMask, + kMaxBackwardDistance, + base_min_score, + 9, // quality hashers_.get(), hash_type_, - &commands); - ComputeDistanceShortCodes(&commands, input_pos_, kMaxBackwardDistance, - dist_ringbuffer_, - &dist_ringbuffer_idx_); + dist_cache_, + &last_insert_len, + &commands[0], + &num_commands); + commands.resize(num_commands); + if (last_insert_len > 0) { + commands.push_back(Command(last_insert_len)); + } } EncodingParams params; params.num_direct_distance_codes = @@ -1015,7 +570,6 @@ void BrotliCompressor::FinishStream( WriteMetaBlock(0, NULL, true, encoded_size, encoded_buffer); } - int BrotliCompressBuffer(BrotliParams params, size_t input_size, const uint8_t* input_buffer, @@ -1049,7 +603,6 @@ int BrotliCompressBuffer(BrotliParams params, *encoded_size += output_size; max_output_size -= output_size; } - return 1; } diff --git a/enc/encode.h b/enc/encode.h index a128f7e..34285e0 100644 --- a/enc/encode.h +++ b/enc/encode.h @@ -59,7 +59,6 @@ class BrotliCompressor { // sets *encoded_size to the number of bytes written. void FinishStream(size_t* encoded_size, uint8_t* encoded_buffer); - private: // Initializes the hasher with the hashes of dictionary words. void StoreDictionaryWordHashes(); @@ -67,12 +66,11 @@ class BrotliCompressor { BrotliParams params_; int window_bits_; std::unique_ptr hashers_; - Hashers::Type hash_type_; - int dist_ringbuffer_[4]; - size_t dist_ringbuffer_idx_; + int hash_type_; size_t input_pos_; RingBuffer ringbuffer_; std::vector literal_cost_; + int dist_cache_[4]; int storage_ix_; uint8_t* storage_; static StaticDictionary *static_dictionary_; @@ -87,7 +85,6 @@ int BrotliCompressBuffer(BrotliParams params, size_t* encoded_size, uint8_t* encoded_buffer); - } // namespace brotli #endif // BROTLI_ENC_ENCODE_H_ diff --git a/enc/entropy_encode.cc b/enc/entropy_encode.cc index 1ec50f1..787f988 100644 --- a/enc/entropy_encode.cc +++ b/enc/entropy_encode.cc @@ -42,7 +42,7 @@ struct HuffmanTree { HuffmanTree::HuffmanTree() {} -// Sort the root nodes, least popular first. +// Sort the root nodes, least popular first, break ties by value. bool SortHuffmanTree(const HuffmanTree &v0, const HuffmanTree &v1) { if (v0.total_count_ == v1.total_count_) { return v0.index_right_or_value_ > v1.index_right_or_value_; @@ -50,6 +50,11 @@ bool SortHuffmanTree(const HuffmanTree &v0, const HuffmanTree &v1) { return v0.total_count_ < v1.total_count_; } +// Sort the root nodes, least popular first. +bool SortHuffmanTreeFast(const HuffmanTree &v0, const HuffmanTree &v1) { + return v0.total_count_ < v1.total_count_; +} + void SetDepth(const HuffmanTree &p, HuffmanTree *pool, uint8_t *depth, @@ -83,6 +88,7 @@ void SetDepth(const HuffmanTree &p, void CreateHuffmanTree(const int *data, const int length, const int tree_limit, + const int quality, uint8_t *depth) { // For block sizes below 64 kB, we never need to do a second iteration // of this loop. Probably all of our block sizes will be smaller than @@ -105,8 +111,11 @@ void CreateHuffmanTree(const int *data, break; } - std::sort(tree.begin(), tree.end(), SortHuffmanTree); - + if (quality > 1) { + std::sort(tree.begin(), tree.end(), SortHuffmanTree); + } else { + std::sort(tree.begin(), tree.end(), SortHuffmanTreeFast); + } // The nodes are: // [0, n): the sorted leaf nodes that we start with. // [n]: we add a sentinel here. @@ -158,12 +167,12 @@ void CreateHuffmanTree(const int *data, } } -void Reverse(uint8_t* v, int start, int end) { +void Reverse(std::vector* v, int start, int end) { --end; while (start < end) { - int tmp = v[start]; - v[start] = v[end]; - v[end] = tmp; + int tmp = (*v)[start]; + (*v)[start] = (*v)[end]; + (*v)[end] = tmp; ++start; --end; } @@ -173,75 +182,65 @@ void WriteHuffmanTreeRepetitions( const int previous_value, const int value, int repetitions, - uint8_t* tree, - uint8_t* extra_bits, - int* tree_size) { + std::vector *tree, + std::vector *extra_bits_data) { if (previous_value != value) { - tree[*tree_size] = value; - extra_bits[*tree_size] = 0; - ++(*tree_size); + tree->push_back(value); + extra_bits_data->push_back(0); --repetitions; } if (repetitions == 7) { - tree[*tree_size] = value; - extra_bits[*tree_size] = 0; - ++(*tree_size); + tree->push_back(value); + extra_bits_data->push_back(0); --repetitions; } if (repetitions < 3) { for (int i = 0; i < repetitions; ++i) { - tree[*tree_size] = value; - extra_bits[*tree_size] = 0; - ++(*tree_size); + tree->push_back(value); + extra_bits_data->push_back(0); } } else { repetitions -= 3; - int start = *tree_size; + int start = tree->size(); while (repetitions >= 0) { - tree[*tree_size] = 16; - extra_bits[*tree_size] = repetitions & 0x3; - ++(*tree_size); + tree->push_back(16); + extra_bits_data->push_back(repetitions & 0x3); repetitions >>= 2; --repetitions; } - Reverse(tree, start, *tree_size); - Reverse(extra_bits, start, *tree_size); + Reverse(tree, start, tree->size()); + Reverse(extra_bits_data, start, tree->size()); } } void WriteHuffmanTreeRepetitionsZeros( int repetitions, - uint8_t* tree, - uint8_t* extra_bits, - int* tree_size) { + std::vector *tree, + std::vector *extra_bits_data) { if (repetitions == 11) { - tree[*tree_size] = 0; - extra_bits[*tree_size] = 0; - ++(*tree_size); + tree->push_back(0); + extra_bits_data->push_back(0); --repetitions; } if (repetitions < 3) { for (int i = 0; i < repetitions; ++i) { - tree[*tree_size] = 0; - extra_bits[*tree_size] = 0; - ++(*tree_size); + tree->push_back(0); + extra_bits_data->push_back(0); } } else { repetitions -= 3; - int start = *tree_size; + int start = tree->size(); while (repetitions >= 0) { - tree[*tree_size] = 17; - extra_bits[*tree_size] = repetitions & 0x7; - ++(*tree_size); + tree->push_back(17); + extra_bits_data->push_back(repetitions & 0x7); repetitions >>= 3; --repetitions; } - Reverse(tree, start, *tree_size); - Reverse(extra_bits, start, *tree_size); + Reverse(tree, start, tree->size()); + Reverse(extra_bits_data, start, tree->size()); } } - int OptimizeHuffmanCountsForRle(int length, int* counts) { int stride; int limit; @@ -371,7 +370,6 @@ int OptimizeHuffmanCountsForRle(int length, int* counts) { return 1; } - static void DecideOverRleUse(const uint8_t* depth, const int length, bool *use_rle_for_non_zero, bool *use_rle_for_zero) { @@ -379,20 +377,10 @@ static void DecideOverRleUse(const uint8_t* depth, const int length, int total_reps_non_zero = 0; int count_reps_zero = 0; int count_reps_non_zero = 0; - int new_length = length; - for (int i = 0; i < length; ++i) { - if (depth[length - i - 1] == 0) { - --new_length; - } else { - break; - } - } - for (uint32_t i = 0; i < new_length;) { + for (uint32_t i = 0; i < length;) { const int value = depth[i]; int reps = 1; - // Find rle coding for longer codes. - // Shorter codes seem not to benefit from rle. - for (uint32_t k = i + 1; k < new_length && depth[k] == value; ++k) { + for (uint32_t k = i + 1; k < length && depth[k] == value; ++k) { ++reps; } if (reps >= 3 && value == 0) { @@ -411,48 +399,51 @@ static void DecideOverRleUse(const uint8_t* depth, const int length, *use_rle_for_zero = total_reps_zero > 2; } - -void WriteHuffmanTree(const uint8_t* depth, const int length, - uint8_t* tree, - uint8_t* extra_bits_data, - int* huffman_tree_size) { +void WriteHuffmanTree(const uint8_t* depth, + uint32_t length, + std::vector *tree, + std::vector *extra_bits_data) { int previous_value = 8; + // Throw away trailing zeros. + int new_length = length; + for (int i = 0; i < length; ++i) { + if (depth[length - i - 1] == 0) { + --new_length; + } else { + break; + } + } + // First gather statistics on if it is a good idea to do rle. - bool use_rle_for_non_zero; - bool use_rle_for_zero; - DecideOverRleUse(depth, length, &use_rle_for_non_zero, &use_rle_for_zero); + bool use_rle_for_non_zero = false; + bool use_rle_for_zero = false; + if (length > 50) { + // Find rle coding for longer codes. + // Shorter codes seem not to benefit from rle. + DecideOverRleUse(depth, new_length, + &use_rle_for_non_zero, &use_rle_for_zero); + } // Actual rle coding. - for (uint32_t i = 0; i < length;) { + for (uint32_t i = 0; i < new_length;) { const int value = depth[i]; int reps = 1; - if (length > 50) { - // Find rle coding for longer codes. - // Shorter codes seem not to benefit from rle. - if ((value != 0 && use_rle_for_non_zero) || - (value == 0 && use_rle_for_zero)) { - for (uint32_t k = i + 1; k < length && depth[k] == value; ++k) { - ++reps; - } + if ((value != 0 && use_rle_for_non_zero) || + (value == 0 && use_rle_for_zero)) { + for (uint32_t k = i + 1; k < new_length && depth[k] == value; ++k) { + ++reps; } } if (value == 0) { - WriteHuffmanTreeRepetitionsZeros(reps, tree, extra_bits_data, - huffman_tree_size); + WriteHuffmanTreeRepetitionsZeros(reps, tree, extra_bits_data); } else { - WriteHuffmanTreeRepetitions(previous_value, value, reps, tree, - extra_bits_data, huffman_tree_size); + WriteHuffmanTreeRepetitions(previous_value, + value, reps, tree, extra_bits_data); previous_value = value; } i += reps; } - // Throw away trailing zeros. - for (; *huffman_tree_size > 0; --(*huffman_tree_size)) { - if (tree[*huffman_tree_size - 1] > 0 && tree[*huffman_tree_size - 1] < 17) { - break; - } - } } namespace { diff --git a/enc/entropy_encode.h b/enc/entropy_encode.h index aabb9a5..d275d15 100644 --- a/enc/entropy_encode.h +++ b/enc/entropy_encode.h @@ -19,6 +19,7 @@ #include #include +#include #include "./histogram.h" #include "./prefix.h" @@ -36,6 +37,7 @@ namespace brotli { void CreateHuffmanTree(const int *data, const int length, const int tree_limit, + const int quality, uint8_t *depth); // Change the population counts in a way that the consequent @@ -46,14 +48,13 @@ void CreateHuffmanTree(const int *data, // counts contains the population counts. int OptimizeHuffmanCountsForRle(int length, int* counts); - // Write a huffman tree from bit depths into the bitstream representation // of a Huffman tree. The generated Huffman tree is to be compressed once // more using a Huffman tree -void WriteHuffmanTree(const uint8_t* depth, const int length, - uint8_t* tree, - uint8_t* extra_bits_data, - int* huffman_tree_size); +void WriteHuffmanTree(const uint8_t* depth, + uint32_t num, + std::vector *tree, + std::vector *extra_bits_data); // Get the actual bit values for a tree of bit depths. void ConvertBitDepthsToSymbols(const uint8_t *depth, int len, uint16_t *bits); @@ -70,34 +71,6 @@ struct EntropyCode { int symbols_[4]; }; -template -void BuildEntropyCode(const Histogram& histogram, - const int tree_limit, - const int alphabet_size, - EntropyCode* code) { - memset(code->depth_, 0, sizeof(code->depth_)); - memset(code->bits_, 0, sizeof(code->bits_)); - memset(code->symbols_, 0, sizeof(code->symbols_)); - code->count_ = 0; - if (histogram.total_count_ == 0) return; - for (int i = 0; i < kSize; ++i) { - if (histogram.data_[i] > 0) { - if (code->count_ < 4) code->symbols_[code->count_] = i; - ++code->count_; - } - } - if (alphabet_size >= 50 && code->count_ >= 16) { - int counts[kSize]; - memcpy(counts, &histogram.data_[0], sizeof(counts[0]) * kSize); - OptimizeHuffmanCountsForRle(alphabet_size, counts); - CreateHuffmanTree(counts, alphabet_size, tree_limit, &code->depth_[0]); - } else { - CreateHuffmanTree(&histogram.data_[0], alphabet_size, tree_limit, - &code->depth_[0]); - } - ConvertBitDepthsToSymbols(&code->depth_[0], alphabet_size, &code->bits_[0]); -} - static const int kCodeLengthCodes = 18; // Literal entropy code. diff --git a/enc/fast_log.h b/enc/fast_log.h index 0b09ea6..b202aed 100644 --- a/enc/fast_log.h +++ b/enc/fast_log.h @@ -46,6 +46,16 @@ inline int Log2Floor(uint32_t n) { #endif } +static inline int Log2FloorNonZero(uint32_t n) { +#ifdef __GNUC__ + return 31 ^ __builtin_clz(n); +#else + unsigned int result = 0; + while (n >>= 1) result++; + return result; +#endif +} + // Return ceiling(log2(n)) for positive integer n. Returns -1 iff n == 0. inline int Log2Ceiling(uint32_t n) { int floor = Log2Floor(n); diff --git a/enc/find_match_length.h b/enc/find_match_length.h index 0994ac2..a62d5ae 100644 --- a/enc/find_match_length.h +++ b/enc/find_match_length.h @@ -19,6 +19,8 @@ #include +#include + #include "./port.h" namespace brotli { diff --git a/enc/hash.h b/enc/hash.h index bc3e1c4..c89426c 100644 --- a/enc/hash.h +++ b/enc/hash.h @@ -31,10 +31,18 @@ #include "./fast_log.h" #include "./find_match_length.h" #include "./port.h" +#include "./prefix.h" #include "./static_dict.h" namespace brotli { +static const int kDistanceCacheIndex[] = { + 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, +}; +static const int kDistanceCacheOffset[] = { + 0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3 +}; + // kHashMul32 multiplier has these properties: // * The multiplier must be odd. Otherwise we may lose the highest bit. // * No long streaks of 1s or 0s. @@ -75,59 +83,194 @@ inline uint32_t Hash(const uint8_t *data) { // when it is not much longer and the bit cost for encoding it is more // than the saved literals. inline double BackwardReferenceScore(double average_cost, - double start_cost4, - double start_cost3, - double start_cost2, int copy_length, int backward_reference_offset) { - double retval = 0; - switch (copy_length) { - case 2: retval = start_cost2; break; - case 3: retval = start_cost3; break; - default: retval = start_cost4 + (copy_length - 4) * average_cost; break; - } - retval -= 1.20 * Log2Floor(backward_reference_offset); - return retval; + return (copy_length * average_cost - + 1.20 * Log2Floor(backward_reference_offset)); } inline double BackwardReferenceScoreUsingLastDistance(double average_cost, - double start_cost4, - double start_cost3, - double start_cost2, int copy_length, int distance_short_code) { - double retval = 0; - switch (copy_length) { - case 2: retval = start_cost2; break; - case 3: retval = start_cost3; break; - default: retval = start_cost4 + (copy_length - 4) * average_cost; break; - } static const double kDistanceShortCodeBitCost[16] = { -0.6, 0.95, 1.17, 1.27, 0.93, 0.93, 0.96, 0.96, 0.99, 0.99, 1.05, 1.05, 1.15, 1.15, 1.25, 1.25 }; - retval -= kDistanceShortCodeBitCost[distance_short_code]; - return retval; + return (average_cost * copy_length + - kDistanceShortCodeBitCost[distance_short_code]); } +// A (forgetful) hash table to the data seen by the compressor, to +// help create backward references to previous data. +// +// This is a hash map of fixed size (kBucketSize). Starting from the +// given index, kBucketSweep buckets are used to store values of a key. +template +class HashLongestMatchQuickly { + public: + HashLongestMatchQuickly() { + Reset(); + } + void Reset() { + // It is not strictly necessary to fill this buffer here, but + // not filling will make the results of the compression stochastic + // (but correct). This is because random data would cause the + // system to find accidentally good backward references here and there. + std::fill(&buckets_[0], + &buckets_[sizeof(buckets_) / sizeof(buckets_[0])], + 0); + } + // Look at 4 bytes at data. + // Compute a hash from these, and store the value somewhere within + // [ix .. ix+3]. + inline void Store(const uint8_t *data, const int ix) { + const uint32_t key = Hash(data); + // Wiggle the value with the bucket sweep range. + const uint32_t off = (static_cast(ix) >> 3) % kBucketSweep; + buckets_[key + off] = ix; + } + + // Store hashes for a range of data. + void StoreHashes(const uint8_t *data, size_t len, int startix, int mask) { + for (int p = 0; p < len; ++p) { + Store(&data[p & mask], startix + p); + } + } + + bool HasStaticDictionary() const { return false; } + + // Find a longest backward match of &ring_buffer[cur_ix & ring_buffer_mask] + // up to the length of max_length. + // + // Does not look for matches longer than max_length. + // Does not look for matches further away than max_backward. + // Writes the best found match length into best_len_out. + // Writes the index (&data[index]) of the start of the best match into + // best_distance_out. + inline bool FindLongestMatch(const uint8_t * __restrict ring_buffer, + const size_t ring_buffer_mask, + const float* __restrict literal_cost, + const size_t literal_cost_mask, + const double average_cost, + const int* __restrict distance_cache, + const uint32_t cur_ix, + const uint32_t max_length, + const uint32_t max_backward, + int * __restrict best_len_out, + int * __restrict best_len_code_out, + int * __restrict best_distance_out, + double* __restrict best_score_out) { + const int best_len_in = *best_len_out; + const int cur_ix_masked = cur_ix & ring_buffer_mask; + int compare_char = ring_buffer[cur_ix_masked + best_len_in]; + double best_score = *best_score_out; + int best_len = best_len_in; + int backward = distance_cache[0]; + size_t prev_ix = cur_ix - backward; + bool match_found = false; + if (prev_ix < cur_ix) { + prev_ix &= ring_buffer_mask; + if (compare_char == ring_buffer[prev_ix + best_len]) { + int len = FindMatchLengthWithLimit(&ring_buffer[prev_ix], + &ring_buffer[cur_ix_masked], + max_length); + if (len >= 4) { + best_score = BackwardReferenceScoreUsingLastDistance(average_cost, + len, 0); + best_len = len; + *best_len_out = len; + *best_len_code_out = len; + *best_distance_out = backward; + *best_score_out = best_score; + compare_char = ring_buffer[cur_ix_masked + best_len]; + if (kBucketSweep == 1) { + return true; + } else { + match_found = true; + } + } + } + } + const uint32_t key = Hash(&ring_buffer[cur_ix_masked]); + if (kBucketSweep == 1) { + // Only one to look for, don't bother to prepare for a loop. + prev_ix = buckets_[key]; + backward = cur_ix - prev_ix; + prev_ix &= ring_buffer_mask; + if (compare_char != ring_buffer[prev_ix + best_len_in]) { + return false; + } + if (PREDICT_FALSE(backward == 0 || backward > max_backward)) { + return false; + } + const int len = FindMatchLengthWithLimit(&ring_buffer[prev_ix], + &ring_buffer[cur_ix_masked], + max_length); + if (len >= 4) { + *best_len_out = len; + *best_len_code_out = len; + *best_distance_out = backward; + *best_score_out = BackwardReferenceScore(average_cost, len, backward); + return true; + } else { + return false; + } + } else { + uint32_t *bucket = buckets_ + key; + prev_ix = *bucket++; + for (int i = 0; i < kBucketSweep; ++i, prev_ix = *bucket++) { + const int backward = cur_ix - prev_ix; + prev_ix &= ring_buffer_mask; + if (compare_char != ring_buffer[prev_ix + best_len]) { + continue; + } + if (PREDICT_FALSE(backward == 0 || backward > max_backward)) { + continue; + } + const int len = + FindMatchLengthWithLimit(&ring_buffer[prev_ix], + &ring_buffer[cur_ix_masked], + max_length); + if (len >= 4) { + const double score = BackwardReferenceScore(average_cost, + len, backward); + if (best_score < score) { + best_score = score; + best_len = len; + *best_len_out = best_len; + *best_len_code_out = best_len; + *best_distance_out = backward; + *best_score_out = score; + compare_char = ring_buffer[cur_ix_masked + best_len]; + match_found = true; + } + } + } + return match_found; + } + } + + private: + static const uint32_t kBucketSize = 1 << kBucketBits; + uint32_t buckets_[kBucketSize + kBucketSweep]; +}; + // A (forgetful) hash table to the data seen by the compressor, to // help create backward references to previous data. // // This is a hash map of fixed size (kBucketSize) to a ring buffer of // fixed size (kBlockSize). The ring buffer contains the last kBlockSize // index positions of the given hash key in the compressed data. -template +template class HashLongestMatch { public: - HashLongestMatch() - : last_distance1_(4), - last_distance2_(11), - last_distance3_(15), - last_distance4_(16), - insert_length_(0), - average_cost_(5.4), - static_dict_(NULL) { + HashLongestMatch() : static_dict_(NULL) { Reset(); } void Reset() { @@ -166,72 +309,58 @@ class HashLongestMatch { // into best_distance_out. // Write the score of the best match into best_score_out. bool FindLongestMatch(const uint8_t * __restrict data, - const float * __restrict literal_cost, const size_t ring_buffer_mask, + const float * __restrict literal_cost, + const size_t literal_cost_mask, + const double average_cost, + const int* __restrict distance_cache, const uint32_t cur_ix, uint32_t max_length, const uint32_t max_backward, - size_t * __restrict best_len_out, - size_t * __restrict best_len_code_out, - size_t * __restrict best_distance_out, - double * __restrict best_score_out, - bool * __restrict in_dictionary) { - *in_dictionary = true; + int * __restrict best_len_out, + int * __restrict best_len_code_out, + int * __restrict best_distance_out, + double * __restrict best_score_out) { *best_len_code_out = 0; const size_t cur_ix_masked = cur_ix & ring_buffer_mask; - const double start_cost4 = literal_cost == NULL ? 20 : - literal_cost[cur_ix_masked] + - literal_cost[(cur_ix + 1) & ring_buffer_mask] + - literal_cost[(cur_ix + 2) & ring_buffer_mask] + - literal_cost[(cur_ix + 3) & ring_buffer_mask]; - const double start_cost3 = literal_cost == NULL ? 15 : - literal_cost[cur_ix_masked] + - literal_cost[(cur_ix + 1) & ring_buffer_mask] + - literal_cost[(cur_ix + 2) & ring_buffer_mask] + 0.3; - double start_cost2 = literal_cost == NULL ? 10 : - literal_cost[cur_ix_masked] + - literal_cost[(cur_ix + 1) & ring_buffer_mask] + 1.2; + double start_cost_diff4 = 0.0; + double start_cost_diff3 = 0.0; + double start_cost_diff2 = 0.0; + if (kUseCostModel) { + start_cost_diff4 = literal_cost == NULL ? 0 : + literal_cost[cur_ix & literal_cost_mask] + + literal_cost[(cur_ix + 1) & literal_cost_mask] + + literal_cost[(cur_ix + 2) & literal_cost_mask] + + literal_cost[(cur_ix + 3) & literal_cost_mask] - + 4 * average_cost; + start_cost_diff3 = literal_cost == NULL ? 0 : + literal_cost[cur_ix & literal_cost_mask] + + literal_cost[(cur_ix + 1) & literal_cost_mask] + + literal_cost[(cur_ix + 2) & literal_cost_mask] - + 3 * average_cost + 0.3; + start_cost_diff2 = literal_cost == NULL ? 0 : + literal_cost[cur_ix & literal_cost_mask] + + literal_cost[(cur_ix + 1) & literal_cost_mask] - + 2 * average_cost + 1.2; + } bool match_found = false; // Don't accept a short copy from far away. - double best_score = 8.115; - if (insert_length_ < 4) { - double cost_diff[4] = { 0.10, 0.04, 0.02, 0.01 }; - best_score += cost_diff[insert_length_]; - } - size_t best_len = *best_len_out; + double best_score = *best_score_out; + int best_len = *best_len_out; *best_len_out = 0; - size_t best_ix = 1; // Try last distance first. - for (int i = 0; i < 16; ++i) { - size_t prev_ix = cur_ix; - switch(i) { - case 0: prev_ix -= last_distance1_; break; - case 1: prev_ix -= last_distance2_; break; - case 2: prev_ix -= last_distance3_; break; - case 3: prev_ix -= last_distance4_; break; - - case 4: prev_ix -= last_distance1_ - 1; break; - case 5: prev_ix -= last_distance1_ + 1; break; - case 6: prev_ix -= last_distance1_ - 2; break; - case 7: prev_ix -= last_distance1_ + 2; break; - case 8: prev_ix -= last_distance1_ - 3; break; - case 9: prev_ix -= last_distance1_ + 3; break; - - case 10: prev_ix -= last_distance2_ - 1; break; - case 11: prev_ix -= last_distance2_ + 1; break; - case 12: prev_ix -= last_distance2_ - 2; break; - case 13: prev_ix -= last_distance2_ + 2; break; - case 14: prev_ix -= last_distance2_ - 3; break; - case 15: prev_ix -= last_distance2_ + 3; break; - } + for (int i = 0; i < kNumLastDistancesToCheck; ++i) { + const int idx = kDistanceCacheIndex[i]; + const int backward = distance_cache[idx] + kDistanceCacheOffset[i]; + size_t prev_ix = cur_ix - backward; if (prev_ix >= cur_ix) { continue; } - const size_t backward = cur_ix - prev_ix; if (PREDICT_FALSE(backward > max_backward)) { continue; } prev_ix &= ring_buffer_mask; + if (cur_ix_masked + best_len > ring_buffer_mask || prev_ix + best_len > ring_buffer_mask || data[cur_ix_masked + best_len] != data[prev_ix + best_len]) { @@ -245,29 +374,30 @@ class HashLongestMatch { // Comparing for >= 2 does not change the semantics, but just saves for // a few unnecessary binary logarithms in backward reference score, // since we are not interested in such short matches. - const double score = BackwardReferenceScoreUsingLastDistance( - average_cost_, - start_cost4, - start_cost3, - start_cost2, - len, i); + double score = BackwardReferenceScoreUsingLastDistance( + average_cost, len, i); + if (kUseCostModel) { + switch (len) { + case 2: score += start_cost_diff2; break; + case 3: score += start_cost_diff3; break; + default: score += start_cost_diff4; + } + } if (best_score < score) { best_score = score; best_len = len; - best_ix = backward; *best_len_out = best_len; *best_len_code_out = best_len; - *best_distance_out = best_ix; + *best_distance_out = backward; *best_score_out = best_score; match_found = true; - *in_dictionary = backward > max_backward; } } } if (kMinLength == 2) { int stop = int(cur_ix) - 64; if (stop < 0) { stop = 0; } - start_cost2 -= 1.0; + start_cost_diff2 -= 1.0; for (int i = cur_ix - 1; i > stop; --i) { size_t prev_ix = i; const size_t backward = cur_ix - prev_ix; @@ -280,15 +410,15 @@ class HashLongestMatch { continue; } int len = 2; - const double score = start_cost2 - 2.3 * Log2Floor(backward); + const double score = + average_cost * 2 - 2.3 * Log2Floor(backward) + start_cost_diff2; if (best_score < score) { best_score = score; best_len = len; - best_ix = backward; *best_len_out = best_len; *best_len_code_out = best_len; - *best_distance_out = best_ix; + *best_distance_out = backward; match_found = true; } } @@ -316,26 +446,24 @@ class HashLongestMatch { // Comparing for >= 3 does not change the semantics, but just saves // for a few unnecessary binary logarithms in backward reference // score, since we are not interested in such short matches. - const double score = BackwardReferenceScore(average_cost_, - start_cost4, - start_cost3, - start_cost2, - len, backward); + double score = BackwardReferenceScore(average_cost, + len, backward); + if (kUseCostModel) { + score += (len >= 4) ? start_cost_diff4 : start_cost_diff3; + } if (best_score < score) { best_score = score; best_len = len; - best_ix = backward; *best_len_out = best_len; *best_len_code_out = best_len; - *best_distance_out = best_ix; + *best_distance_out = backward; *best_score_out = best_score; match_found = true; - *in_dictionary = false; } } } } - if (static_dict_ != NULL) { + if (kUseDictionary && static_dict_ != NULL) { // We decide based on first 4 bytes how many bytes to test for. int prefix = BROTLI_UNALIGNED_LOAD32(&data[cur_ix_masked]); int maxlen = static_dict_->GetLength(prefix); @@ -346,21 +474,17 @@ class HashLongestMatch { int word_id; if (static_dict_->Get(snippet, ©_len_code, &word_id)) { const size_t backward = max_backward + word_id + 1; - const double score = BackwardReferenceScore(average_cost_, - start_cost4, - start_cost3, - start_cost2, - len, backward); + const double score = (BackwardReferenceScore(average_cost, + len, backward) + + start_cost_diff4); if (best_score < score) { best_score = score; best_len = len; - best_ix = backward; *best_len_out = best_len; *best_len_code_out = copy_len_code; - *best_distance_out = best_ix; + *best_distance_out = backward; *best_score_out = best_score; match_found = true; - *in_dictionary = true; } } } @@ -368,21 +492,6 @@ class HashLongestMatch { return match_found; } - void set_last_distance(int v) { - if (last_distance1_ != v) { - last_distance4_ = last_distance3_; - last_distance3_ = last_distance2_; - last_distance2_ = last_distance1_; - last_distance1_ = v; - } - } - - int last_distance() const { return last_distance1_; } - - void set_insert_length(int v) { insert_length_ = v; } - - void set_average_cost(double v) { average_cost_ = v; } - private: // Number of hash buckets. static const uint32_t kBucketSize = 1 << kBucketBits; @@ -400,46 +509,48 @@ class HashLongestMatch { // Buckets containing kBlockSize of backward references. int buckets_[kBucketSize][kBlockSize]; - int last_distance1_; - int last_distance2_; - int last_distance3_; - int last_distance4_; - - // Cost adjustment for how many literals we are planning to insert - // anyway. - int insert_length_; - - double average_cost_; - const StaticDictionary *static_dict_; }; struct Hashers { - enum Type { - HASH_15_8_4 = 0, - HASH_15_8_2 = 1, - }; + typedef HashLongestMatchQuickly<16, 1> H1; + typedef HashLongestMatchQuickly<17, 4> H2; + typedef HashLongestMatch<14, 4, 4, 4, false, false> H3; + typedef HashLongestMatch<14, 5, 4, 4, false, false> H4; + typedef HashLongestMatch<15, 6, 4, 10, false, false> H5; + typedef HashLongestMatch<15, 7, 4, 10, false, false> H6; + typedef HashLongestMatch<15, 8, 4, 16, true, false> H7; + typedef HashLongestMatch<15, 8, 4, 16, true, true> H8; + typedef HashLongestMatch<15, 8, 2, 16, true, false> H9; - void Init(Type type) { + void Init(int type) { switch (type) { - case HASH_15_8_4: - hash_15_8_4.reset(new HashLongestMatch<15, 8, 4>()); - break; - case HASH_15_8_2: - hash_15_8_2.reset(new HashLongestMatch<15, 8, 2>()); - break; - default: - break; + case 1: hash_h1.reset(new H1); break; + case 2: hash_h2.reset(new H2); break; + case 3: hash_h3.reset(new H3); break; + case 4: hash_h4.reset(new H4); break; + case 5: hash_h5.reset(new H5); break; + case 6: hash_h6.reset(new H6); break; + case 7: hash_h7.reset(new H7); break; + case 8: hash_h8.reset(new H8); break; + case 9: hash_h9.reset(new H9); break; + default: break; } } void SetStaticDictionary(const StaticDictionary *dict) { - if (hash_15_8_4.get() != NULL) hash_15_8_4->SetStaticDictionary(dict); - if (hash_15_8_2.get() != NULL) hash_15_8_2->SetStaticDictionary(dict); + if (hash_h8.get() != NULL) hash_h8->SetStaticDictionary(dict); } - std::unique_ptr > hash_15_8_4; - std::unique_ptr > hash_15_8_2; + std::unique_ptr

hash_h1; + std::unique_ptr

hash_h2; + std::unique_ptr

hash_h3; + std::unique_ptr

hash_h4; + std::unique_ptr

hash_h5; + std::unique_ptr
hash_h6; + std::unique_ptr hash_h7; + std::unique_ptr hash_h8; + std::unique_ptr hash_h9; }; } // namespace brotli diff --git a/enc/histogram.cc b/enc/histogram.cc index 910b987..68d8e7e 100644 --- a/enc/histogram.cc +++ b/enc/histogram.cc @@ -45,8 +45,8 @@ void BuildHistograms( const Command &cmd = cmds[i]; insert_and_copy_it.Next(); (*insert_and_copy_histograms)[insert_and_copy_it.type_].Add( - cmd.command_prefix_); - for (int j = 0; j < cmd.insert_length_; ++j) { + cmd.cmd_prefix_); + for (int j = 0; j < cmd.insert_len_; ++j) { literal_it.Next(); uint8_t prev_byte = pos > 0 ? ringbuffer[(pos - 1) & mask] : 0; uint8_t prev_byte2 = pos > 1 ? ringbuffer[(pos - 2) & mask] : 0; @@ -55,12 +55,12 @@ void BuildHistograms( (*literal_histograms)[context].Add(ringbuffer[pos & mask]); ++pos; } - pos += cmd.copy_length_; - if (cmd.copy_length_ > 0 && cmd.distance_prefix_ != 0xffff) { + pos += cmd.copy_len_; + if (cmd.copy_len_ > 0 && cmd.cmd_prefix_ >= 128) { dist_it.Next(); int context = (dist_it.type_ << kDistanceContextBits) + - ((cmd.copy_length_code_ > 4) ? 3 : cmd.copy_length_code_ - 2); - (*copy_dist_histograms)[context].Add(cmd.distance_prefix_); + cmd.DistanceContext(); + (*copy_dist_histograms)[context].Add(cmd.dist_prefix_); } } } @@ -77,7 +77,7 @@ void BuildLiteralHistogramsForBlockType( BlockSplitIterator literal_it(literal_split); for (int i = 0; i < cmds.size(); ++i) { const Command &cmd = cmds[i]; - for (int j = 0; j < cmd.insert_length_; ++j) { + for (int j = 0; j < cmd.insert_len_; ++j) { literal_it.Next(); if (literal_it.type_ == block_type) { uint8_t prev_byte = pos > 0 ? ringbuffer[(pos - 1) & mask] : 0; @@ -87,7 +87,7 @@ void BuildLiteralHistogramsForBlockType( } ++pos; } - pos += cmd.copy_length_; + pos += cmd.copy_len_; } } diff --git a/enc/prefix.h b/enc/prefix.h index 47974f8..693a26f 100644 --- a/enc/prefix.h +++ b/enc/prefix.h @@ -19,6 +19,7 @@ #define BROTLI_ENC_PREFIX_H_ #include +#include "./fast_log.h" namespace brotli { @@ -29,22 +30,56 @@ static const int kNumBlockLenPrefixes = 26; static const int kNumDistanceShortCodes = 16; static const int kNumDistancePrefixes = 520; -int CommandPrefix(int insert_length, int copy_length); -int InsertLengthExtraBits(int prefix); -int InsertLengthOffset(int prefix); -int CopyLengthExtraBits(int prefix); -int CopyLengthOffset(int prefix); +// Represents the range of values belonging to a prefix code: +// [offset, offset + 2^nbits) +struct PrefixCodeRange { + int offset; + int nbits; +}; -void PrefixEncodeCopyDistance(int distance_code, - int num_direct_codes, - int shift_bits, - uint16_t* prefix, - int* nbits, - uint32_t* extra_bits); +static const PrefixCodeRange kBlockLengthPrefixCode[kNumBlockLenPrefixes] = { + { 1, 2}, { 5, 2}, { 9, 2}, { 13, 2}, + { 17, 3}, { 25, 3}, { 33, 3}, { 41, 3}, + { 49, 4}, { 65, 4}, { 81, 4}, { 97, 4}, + { 113, 5}, { 145, 5}, { 177, 5}, { 209, 5}, + { 241, 6}, { 305, 6}, { 369, 7}, { 497, 8}, + { 753, 9}, { 1265, 10}, {2289, 11}, {4337, 12}, + {8433, 13}, {16625, 24} +}; -int BlockLengthPrefix(int length); -int BlockLengthExtraBits(int prefix); -int BlockLengthOffset(int prefix); +inline void GetBlockLengthPrefixCode(int len, + int* code, int* n_extra, int* extra) { + *code = 0; + while (*code < 25 && len >= kBlockLengthPrefixCode[*code + 1].offset) { + ++(*code); + } + *n_extra = kBlockLengthPrefixCode[*code].nbits; + *extra = len - kBlockLengthPrefixCode[*code].offset; +} + +inline void PrefixEncodeCopyDistance(int distance_code, + int num_direct_codes, + int postfix_bits, + uint16_t* code, + uint32_t* extra_bits) { + distance_code -= 1; + if (distance_code < kNumDistanceShortCodes + num_direct_codes) { + *code = distance_code; + *extra_bits = 0; + return; + } + distance_code -= kNumDistanceShortCodes + num_direct_codes; + distance_code += (1 << (postfix_bits + 2)); + int bucket = Log2Floor(distance_code) - 1; + int postfix_mask = (1 << postfix_bits) - 1; + int postfix = distance_code & postfix_mask; + int prefix = (distance_code >> bucket) & 1; + int offset = (2 + prefix) << bucket; + int nbits = bucket - postfix_bits; + *code = kNumDistanceShortCodes + num_direct_codes + + ((2 * (nbits - 1) + prefix) << postfix_bits) + postfix; + *extra_bits = (nbits << 24) | ((distance_code - offset) >> postfix_bits); +} } // namespace brotli diff --git a/enc/ringbuffer.h b/enc/ringbuffer.h index d88f2ca..e67c5f2 100644 --- a/enc/ringbuffer.h +++ b/enc/ringbuffer.h @@ -17,6 +17,9 @@ #ifndef BROTLI_ENC_RINGBUFFER_H_ #define BROTLI_ENC_RINGBUFFER_H_ +#include +#include + // A RingBuffer(window_bits, tail_bits) contains `1 << window_bits' bytes of // data in a circular manner: writing a byte writes it to // `position() % (1 << window_bits)'. For convenience, the RingBuffer array @@ -26,10 +29,10 @@ class RingBuffer { public: RingBuffer(int window_bits, int tail_bits) : window_bits_(window_bits), tail_bits_(tail_bits), pos_(0) { - static const int kSlackForThreeByteHashingEverywhere = 2; + static const int kSlackForFourByteHashingEverywhere = 3; const int buflen = (1 << window_bits_) + (1 << tail_bits_); - buffer_ = new uint8_t[buflen + kSlackForThreeByteHashingEverywhere]; - for (int i = 0; i < kSlackForThreeByteHashingEverywhere; ++i) { + buffer_ = new uint8_t[buflen + kSlackForFourByteHashingEverywhere]; + for (int i = 0; i < kSlackForFourByteHashingEverywhere; ++i) { buffer_[buflen + i] = 0; } } diff --git a/enc/write_bits.h b/enc/write_bits.h index cf6f53e..c3a72ba 100644 --- a/enc/write_bits.h +++ b/enc/write_bits.h @@ -54,6 +54,7 @@ inline void WriteBits(int n_bits, #ifdef BIT_WRITER_DEBUG printf("WriteBits %2d 0x%016llx %10d\n", n_bits, bits, *pos); #endif + assert(bits < 1ULL << n_bits); #ifdef IS_LITTLE_ENDIAN // This branch of the code can write up to 56 bits at a time, // 7 bits are lost by being perhaps already in *p and at least