mirror of
https://github.com/google/brotli.git
synced 2024-11-09 13:40:06 +00:00
Move literal cost computation to where it's used.
Move utf8 heuristics functions to their own file.
This commit is contained in:
parent
dc416abcb7
commit
4c37566f4b
@ -2,7 +2,7 @@
|
||||
|
||||
include ../shared.mk
|
||||
|
||||
OBJS_NODICT = backward_references.o block_splitter.o brotli_bit_stream.o encode.o encode_parallel.o entropy_encode.o histogram.o literal_cost.o metablock.o static_dict.o streams.o
|
||||
OBJS_NODICT = backward_references.o block_splitter.o brotli_bit_stream.o encode.o encode_parallel.o entropy_encode.o histogram.o literal_cost.o metablock.o static_dict.o streams.o utf8_util.o
|
||||
OBJS = $(OBJS_NODICT) dictionary.o
|
||||
|
||||
nodict : $(OBJS_NODICT)
|
||||
|
@ -22,6 +22,7 @@
|
||||
|
||||
#include "./command.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./literal_cost.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
@ -78,19 +79,15 @@ class ZopfliCostModel {
|
||||
|
||||
void SetFromLiteralCosts(size_t num_bytes,
|
||||
size_t position,
|
||||
const float* literal_cost,
|
||||
size_t literal_cost_mask) {
|
||||
const uint8_t* ringbuffer,
|
||||
size_t ringbuffer_mask) {
|
||||
std::vector<float> literal_cost(num_bytes);
|
||||
EstimateBitCostsForLiterals(position, num_bytes, ringbuffer_mask,
|
||||
ringbuffer, &literal_cost[0]);
|
||||
literal_costs_.resize(num_bytes + 1);
|
||||
literal_costs_[0] = 0.0;
|
||||
if (literal_cost) {
|
||||
for (int i = 0; i < num_bytes; ++i) {
|
||||
literal_costs_[i + 1] = literal_costs_[i] +
|
||||
literal_cost[(position + i) & literal_cost_mask];
|
||||
}
|
||||
} else {
|
||||
for (int i = 1; i <= num_bytes; ++i) {
|
||||
literal_costs_[i] = i * 5.4;
|
||||
}
|
||||
for (int i = 0; i < num_bytes; ++i) {
|
||||
literal_costs_[i + 1] = literal_costs_[i] + literal_cost[i];
|
||||
}
|
||||
cost_cmd_.resize(kNumCommandPrefixes);
|
||||
cost_dist_.resize(kNumDistancePrefixes);
|
||||
@ -623,8 +620,6 @@ void CreateBackwardReferences(size_t num_bytes,
|
||||
size_t position,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t ringbuffer_mask,
|
||||
const float* literal_cost,
|
||||
size_t literal_cost_mask,
|
||||
const size_t max_backward_limit,
|
||||
const int quality,
|
||||
Hashers* hashers,
|
||||
@ -688,7 +683,7 @@ void CreateBackwardReferences(size_t num_bytes,
|
||||
ZopfliCostModel model;
|
||||
if (i == 0) {
|
||||
model.SetFromLiteralCosts(num_bytes, position,
|
||||
literal_cost, literal_cost_mask);
|
||||
ringbuffer, ringbuffer_mask);
|
||||
} else {
|
||||
model.SetFromCommands(num_bytes, position,
|
||||
ringbuffer, ringbuffer_mask,
|
||||
|
@ -33,8 +33,6 @@ void CreateBackwardReferences(size_t num_bytes,
|
||||
size_t position,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t ringbuffer_mask,
|
||||
const float* literal_cost,
|
||||
size_t literal_cost_mask,
|
||||
const size_t max_backward_limit,
|
||||
const int quality,
|
||||
Hashers* hashers,
|
||||
|
@ -31,79 +31,16 @@
|
||||
#include "./fast_log.h"
|
||||
#include "./hash.h"
|
||||
#include "./histogram.h"
|
||||
#include "./literal_cost.h"
|
||||
#include "./prefix.h"
|
||||
#include "./utf8_util.h"
|
||||
#include "./write_bits.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
static const double kMinUTF8Ratio = 0.75;
|
||||
static const int kMinQualityForBlockSplit = 4;
|
||||
static const int kMinQualityForContextModeling = 5;
|
||||
static const int kMinQualityForOptimizeHistograms = 4;
|
||||
|
||||
int ParseAsUTF8(int* symbol, const uint8_t* input, int size) {
|
||||
// ASCII
|
||||
if ((input[0] & 0x80) == 0) {
|
||||
*symbol = input[0];
|
||||
if (*symbol > 0) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
// 2-byte UTF8
|
||||
if (size > 1 &&
|
||||
(input[0] & 0xe0) == 0xc0 &&
|
||||
(input[1] & 0xc0) == 0x80) {
|
||||
*symbol = (((input[0] & 0x1f) << 6) |
|
||||
(input[1] & 0x3f));
|
||||
if (*symbol > 0x7f) {
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
// 3-byte UFT8
|
||||
if (size > 2 &&
|
||||
(input[0] & 0xf0) == 0xe0 &&
|
||||
(input[1] & 0xc0) == 0x80 &&
|
||||
(input[2] & 0xc0) == 0x80) {
|
||||
*symbol = (((input[0] & 0x0f) << 12) |
|
||||
((input[1] & 0x3f) << 6) |
|
||||
(input[2] & 0x3f));
|
||||
if (*symbol > 0x7ff) {
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
// 4-byte UFT8
|
||||
if (size > 3 &&
|
||||
(input[0] & 0xf8) == 0xf0 &&
|
||||
(input[1] & 0xc0) == 0x80 &&
|
||||
(input[2] & 0xc0) == 0x80 &&
|
||||
(input[3] & 0xc0) == 0x80) {
|
||||
*symbol = (((input[0] & 0x07) << 18) |
|
||||
((input[1] & 0x3f) << 12) |
|
||||
((input[2] & 0x3f) << 6) |
|
||||
(input[3] & 0x3f));
|
||||
if (*symbol > 0xffff && *symbol <= 0x10ffff) {
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
// Not UTF8, emit a special symbol above the UTF8-code space
|
||||
*symbol = 0x110000 | input[0];
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Returns true if at least min_fraction of the data is UTF8-encoded.
|
||||
bool IsMostlyUTF8(const uint8_t* data, size_t length, double min_fraction) {
|
||||
size_t size_utf8 = 0;
|
||||
size_t pos = 0;
|
||||
while (pos < length) {
|
||||
int symbol;
|
||||
int bytes_read = ParseAsUTF8(&symbol, data + pos, length - pos);
|
||||
pos += bytes_read;
|
||||
if (symbol < 0x110000) size_utf8 += bytes_read;
|
||||
}
|
||||
return size_utf8 > min_fraction * length;
|
||||
}
|
||||
|
||||
void RecomputeDistancePrefixes(Command* cmds,
|
||||
size_t num_commands,
|
||||
int num_direct_distance_codes,
|
||||
@ -136,7 +73,6 @@ BrotliCompressor::BrotliCompressor(BrotliParams params)
|
||||
: params_(params),
|
||||
hashers_(new Hashers()),
|
||||
input_pos_(0),
|
||||
literal_cost_(0),
|
||||
num_commands_(0),
|
||||
num_literals_(0),
|
||||
last_insert_len_(0),
|
||||
@ -173,10 +109,6 @@ BrotliCompressor::BrotliCompressor(BrotliParams params)
|
||||
// smaller than ringbuffer size.
|
||||
int ringbuffer_bits = std::max(params_.lgwin + 1, params_.lgblock + 1);
|
||||
ringbuffer_ = new RingBuffer(ringbuffer_bits, params_.lgblock);
|
||||
if (params_.quality > 9) {
|
||||
literal_cost_mask_ = (1 << params_.lgblock) - 1;
|
||||
literal_cost_ = new float[literal_cost_mask_ + 1];
|
||||
}
|
||||
|
||||
// Allocate command buffer.
|
||||
cmd_buffer_size_ = std::max(1 << 18, 1 << params_.lgblock);
|
||||
@ -213,7 +145,6 @@ BrotliCompressor::BrotliCompressor(BrotliParams params)
|
||||
|
||||
BrotliCompressor::~BrotliCompressor() {
|
||||
delete[] storage_;
|
||||
delete[] literal_cost_;
|
||||
delete[] commands_;
|
||||
delete ringbuffer_;
|
||||
delete hashers_;
|
||||
@ -296,24 +227,7 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last,
|
||||
return false;
|
||||
}
|
||||
|
||||
bool utf8_mode =
|
||||
params_.quality >= 9 &&
|
||||
IsMostlyUTF8(&data[last_processed_pos_ & mask], bytes, kMinUTF8Ratio);
|
||||
|
||||
if (literal_cost_) {
|
||||
if (utf8_mode) {
|
||||
EstimateBitCostsForLiteralsUTF8(last_processed_pos_, bytes, mask,
|
||||
literal_cost_mask_, data,
|
||||
literal_cost_);
|
||||
} else {
|
||||
EstimateBitCostsForLiterals(last_processed_pos_, bytes, mask,
|
||||
literal_cost_mask_,
|
||||
data, literal_cost_);
|
||||
}
|
||||
}
|
||||
CreateBackwardReferences(bytes, last_processed_pos_, data, mask,
|
||||
literal_cost_,
|
||||
literal_cost_mask_,
|
||||
max_backward_distance_,
|
||||
params_.quality,
|
||||
hashers_,
|
||||
@ -347,7 +261,7 @@ bool BrotliCompressor::WriteBrotliData(const bool is_last,
|
||||
last_insert_len_ = 0;
|
||||
}
|
||||
|
||||
return WriteMetaBlockInternal(is_last, utf8_mode, out_size, output);
|
||||
return WriteMetaBlockInternal(is_last, out_size, output);
|
||||
}
|
||||
|
||||
// Decide about the context map based on the ability of the prediction
|
||||
@ -449,7 +363,6 @@ void DecideOverLiteralContextModeling(const uint8_t* input,
|
||||
}
|
||||
|
||||
bool BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
|
||||
const bool utf8_mode,
|
||||
size_t* out_size,
|
||||
uint8_t** output) {
|
||||
const size_t bytes = input_pos_ - last_flush_pos_;
|
||||
@ -511,7 +424,7 @@ bool BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
|
||||
}
|
||||
} else {
|
||||
MetaBlockSplit mb;
|
||||
int literal_context_mode = utf8_mode ? CONTEXT_UTF8 : CONTEXT_SIGNED;
|
||||
int literal_context_mode = CONTEXT_UTF8;
|
||||
if (params_.quality <= 9) {
|
||||
int num_literal_contexts = 1;
|
||||
const int* literal_context_map = NULL;
|
||||
@ -534,6 +447,9 @@ bool BrotliCompressor::WriteMetaBlockInternal(const bool is_last,
|
||||
&mb);
|
||||
}
|
||||
} else {
|
||||
if (!IsMostlyUTF8(data, last_flush_pos_, mask, bytes, kMinUTF8Ratio)) {
|
||||
literal_context_mode = CONTEXT_SIGNED;
|
||||
}
|
||||
BuildMetaBlock(data, last_flush_pos_, mask,
|
||||
prev_byte_, prev_byte2_,
|
||||
commands_, num_commands_,
|
||||
|
@ -142,7 +142,6 @@ class BrotliCompressor {
|
||||
uint8_t* GetBrotliStorage(size_t size);
|
||||
|
||||
bool WriteMetaBlockInternal(const bool is_last,
|
||||
const bool utf8_mode,
|
||||
size_t* out_size,
|
||||
uint8_t** output);
|
||||
|
||||
@ -152,8 +151,6 @@ class BrotliCompressor {
|
||||
int hash_type_;
|
||||
size_t input_pos_;
|
||||
RingBuffer* ringbuffer_;
|
||||
float* literal_cost_;
|
||||
size_t literal_cost_mask_;
|
||||
size_t cmd_buffer_size_;
|
||||
Command* commands_;
|
||||
int num_commands_;
|
||||
|
@ -31,75 +31,14 @@
|
||||
#include "./fast_log.h"
|
||||
#include "./hash.h"
|
||||
#include "./histogram.h"
|
||||
#include "./literal_cost.h"
|
||||
#include "./prefix.h"
|
||||
#include "./utf8_util.h"
|
||||
#include "./write_bits.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
namespace {
|
||||
|
||||
int ParseAsUTF8(int* symbol, const uint8_t* input, int size) {
|
||||
// ASCII
|
||||
if ((input[0] & 0x80) == 0) {
|
||||
*symbol = input[0];
|
||||
if (*symbol > 0) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
// 2-byte UTF8
|
||||
if (size > 1 &&
|
||||
(input[0] & 0xe0) == 0xc0 &&
|
||||
(input[1] & 0xc0) == 0x80) {
|
||||
*symbol = (((input[0] & 0x1f) << 6) |
|
||||
(input[1] & 0x3f));
|
||||
if (*symbol > 0x7f) {
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
// 3-byte UFT8
|
||||
if (size > 2 &&
|
||||
(input[0] & 0xf0) == 0xe0 &&
|
||||
(input[1] & 0xc0) == 0x80 &&
|
||||
(input[2] & 0xc0) == 0x80) {
|
||||
*symbol = (((input[0] & 0x0f) << 12) |
|
||||
((input[1] & 0x3f) << 6) |
|
||||
(input[2] & 0x3f));
|
||||
if (*symbol > 0x7ff) {
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
// 4-byte UFT8
|
||||
if (size > 3 &&
|
||||
(input[0] & 0xf8) == 0xf0 &&
|
||||
(input[1] & 0xc0) == 0x80 &&
|
||||
(input[2] & 0xc0) == 0x80 &&
|
||||
(input[3] & 0xc0) == 0x80) {
|
||||
*symbol = (((input[0] & 0x07) << 18) |
|
||||
((input[1] & 0x3f) << 12) |
|
||||
((input[2] & 0x3f) << 6) |
|
||||
(input[3] & 0x3f));
|
||||
if (*symbol > 0xffff && *symbol <= 0x10ffff) {
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
// Not UTF8, emit a special symbol above the UTF8-code space
|
||||
*symbol = 0x110000 | input[0];
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Returns true if at least min_fraction of the data is UTF8-encoded.
|
||||
bool IsMostlyUTF8(const uint8_t* data, size_t length, double min_fraction) {
|
||||
size_t size_utf8 = 0;
|
||||
for (size_t pos = 0; pos < length; ) {
|
||||
int symbol;
|
||||
int bytes_read = ParseAsUTF8(&symbol, data + pos, length - pos);
|
||||
pos += bytes_read;
|
||||
if (symbol < 0x110000) size_utf8 += bytes_read;
|
||||
}
|
||||
return size_utf8 > min_fraction * length;
|
||||
}
|
||||
|
||||
void RecomputeDistancePrefixes(std::vector<Command>* cmds,
|
||||
int num_direct_distance_codes,
|
||||
int distance_postfix_bits) {
|
||||
@ -151,19 +90,8 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
|
||||
|
||||
// Decide about UTF8 mode.
|
||||
static const double kMinUTF8Ratio = 0.75;
|
||||
bool utf8_mode = IsMostlyUTF8(&input[input_pos], input_size, kMinUTF8Ratio);
|
||||
|
||||
// Compute literal costs. The 4 bytes at the end are there to cover for an
|
||||
// over-read past the end of input, but not past the mask, in
|
||||
// CreateBackwardReferences.
|
||||
std::vector<float> literal_cost(prefix_size + input_size + 4);
|
||||
if (utf8_mode) {
|
||||
EstimateBitCostsForLiteralsUTF8(input_pos, input_size, mask, mask,
|
||||
&input[0], &literal_cost[0]);
|
||||
} else {
|
||||
EstimateBitCostsForLiterals(input_pos, input_size, mask, mask,
|
||||
&input[0], &literal_cost[0]);
|
||||
}
|
||||
bool utf8_mode = IsMostlyUTF8(&input[0], input_pos, mask, input_size,
|
||||
kMinUTF8Ratio);
|
||||
|
||||
// Initialize hashers.
|
||||
int hash_type = std::min(9, params.quality);
|
||||
@ -180,7 +108,6 @@ bool WriteMetaBlockParallel(const BrotliParams& params,
|
||||
CreateBackwardReferences(
|
||||
input_size, input_pos,
|
||||
&input[0], mask,
|
||||
&literal_cost[0], mask,
|
||||
max_backward_distance,
|
||||
params.quality,
|
||||
hashers,
|
||||
|
@ -21,6 +21,7 @@
|
||||
|
||||
#include "./fast_log.h"
|
||||
#include "./types.h"
|
||||
#include "./utf8_util.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
@ -61,8 +62,7 @@ static int DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
|
||||
}
|
||||
|
||||
void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
|
||||
size_t cost_mask, const uint8_t *data,
|
||||
float *cost) {
|
||||
const uint8_t *data, float *cost) {
|
||||
|
||||
// max_utf8 is 0 (normal ascii single byte modeling),
|
||||
// 1 (for 2-byte utf-8 modeling), or 2 (for 3-byte utf-8 modeling).
|
||||
@ -126,13 +126,16 @@ void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
|
||||
if (i < 2000) {
|
||||
lit_cost += 0.7 - ((2000 - i) / 2000.0 * 0.35);
|
||||
}
|
||||
cost[(pos + i) & cost_mask] = lit_cost;
|
||||
cost[i] = lit_cost;
|
||||
}
|
||||
}
|
||||
|
||||
void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
|
||||
size_t cost_mask, const uint8_t *data,
|
||||
float *cost) {
|
||||
const uint8_t *data, float *cost) {
|
||||
if (IsMostlyUTF8(data, pos, mask, len, kMinUTF8Ratio)) {
|
||||
EstimateBitCostsForLiteralsUTF8(pos, len, mask, data, cost);
|
||||
return;
|
||||
}
|
||||
int histogram[256] = { 0 };
|
||||
int window_half = 2000;
|
||||
int in_window = std::min(static_cast<size_t>(window_half), len);
|
||||
@ -164,7 +167,7 @@ void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
|
||||
lit_cost *= 0.5;
|
||||
lit_cost += 0.5;
|
||||
}
|
||||
cost[(pos + i) & cost_mask] = lit_cost;
|
||||
cost[i] = lit_cost;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -23,14 +23,9 @@ namespace brotli {
|
||||
|
||||
// Estimates how many bits the literals in the interval [pos, pos + len) in the
|
||||
// ringbuffer (data, mask) will take entropy coded and writes these estimates
|
||||
// to the ringbuffer (cost, mask).
|
||||
// to the cost[0..len) array.
|
||||
void EstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
|
||||
size_t cost_mask, const uint8_t *data,
|
||||
float *cost);
|
||||
|
||||
void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
|
||||
size_t cost_mask, const uint8_t *data,
|
||||
float *cost);
|
||||
const uint8_t *data, float *cost);
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
|
90
enc/utf8_util.cc
Normal file
90
enc/utf8_util.cc
Normal file
@ -0,0 +1,90 @@
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Heuristics for deciding about the UTF8-ness of strings.
|
||||
|
||||
#include "./utf8_util.h"
|
||||
|
||||
#include "./types.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
namespace {
|
||||
|
||||
int ParseAsUTF8(int* symbol, const uint8_t* input, int size) {
|
||||
// ASCII
|
||||
if ((input[0] & 0x80) == 0) {
|
||||
*symbol = input[0];
|
||||
if (*symbol > 0) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
// 2-byte UTF8
|
||||
if (size > 1 &&
|
||||
(input[0] & 0xe0) == 0xc0 &&
|
||||
(input[1] & 0xc0) == 0x80) {
|
||||
*symbol = (((input[0] & 0x1f) << 6) |
|
||||
(input[1] & 0x3f));
|
||||
if (*symbol > 0x7f) {
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
// 3-byte UFT8
|
||||
if (size > 2 &&
|
||||
(input[0] & 0xf0) == 0xe0 &&
|
||||
(input[1] & 0xc0) == 0x80 &&
|
||||
(input[2] & 0xc0) == 0x80) {
|
||||
*symbol = (((input[0] & 0x0f) << 12) |
|
||||
((input[1] & 0x3f) << 6) |
|
||||
(input[2] & 0x3f));
|
||||
if (*symbol > 0x7ff) {
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
// 4-byte UFT8
|
||||
if (size > 3 &&
|
||||
(input[0] & 0xf8) == 0xf0 &&
|
||||
(input[1] & 0xc0) == 0x80 &&
|
||||
(input[2] & 0xc0) == 0x80 &&
|
||||
(input[3] & 0xc0) == 0x80) {
|
||||
*symbol = (((input[0] & 0x07) << 18) |
|
||||
((input[1] & 0x3f) << 12) |
|
||||
((input[2] & 0x3f) << 6) |
|
||||
(input[3] & 0x3f));
|
||||
if (*symbol > 0xffff && *symbol <= 0x10ffff) {
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
// Not UTF8, emit a special symbol above the UTF8-code space
|
||||
*symbol = 0x110000 | input[0];
|
||||
return 1;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// Returns true if at least min_fraction of the data is UTF8-encoded.
|
||||
bool IsMostlyUTF8(const uint8_t* data, const size_t pos, const size_t mask,
|
||||
const size_t length, const double min_fraction) {
|
||||
size_t size_utf8 = 0;
|
||||
size_t i = 0;
|
||||
while (i < length) {
|
||||
int symbol;
|
||||
int bytes_read = ParseAsUTF8(&symbol, &data[(pos + i) & mask], length - i);
|
||||
i += bytes_read;
|
||||
if (symbol < 0x110000) size_utf8 += bytes_read;
|
||||
}
|
||||
return size_utf8 > min_fraction * length;
|
||||
}
|
||||
|
||||
} // namespace brotli
|
33
enc/utf8_util.h
Normal file
33
enc/utf8_util.h
Normal file
@ -0,0 +1,33 @@
|
||||
#ifndef BROTLI_ENC_UTF8_UTIL_H_
|
||||
#define BROTLI_ENC_UTF8_UTIL_H_
|
||||
|
||||
// Copyright 2013 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Heuristics for deciding about the UTF8-ness of strings.
|
||||
|
||||
#include "./types.h"
|
||||
|
||||
namespace brotli {
|
||||
|
||||
static const double kMinUTF8Ratio = 0.75;
|
||||
|
||||
// Returns true if at least min_fraction of the bytes between pos and
|
||||
// pos + length in the (data, mask) ringbuffer is UTF8-encoded.
|
||||
bool IsMostlyUTF8(const uint8_t* data, const size_t pos, const size_t mask,
|
||||
const size_t length, const double min_fraction);
|
||||
|
||||
} // namespace brotli
|
||||
|
||||
#endif // BROTLI_ENC_UTF8_UTIL_H_
|
2
setup.py
2
setup.py
@ -144,6 +144,7 @@ brotli = Extension("brotli",
|
||||
"enc/metablock.cc",
|
||||
"enc/static_dict.cc",
|
||||
"enc/streams.cc",
|
||||
"enc/utf8_util.cc",
|
||||
"dec/bit_reader.c",
|
||||
"dec/decode.c",
|
||||
"dec/dictionary.c",
|
||||
@ -177,6 +178,7 @@ brotli = Extension("brotli",
|
||||
"enc/streams.h",
|
||||
"enc/transform.h",
|
||||
"enc/types.h",
|
||||
"enc/utf8_util.h",
|
||||
"enc/write_bits.h",
|
||||
"dec/bit_reader.h",
|
||||
"dec/context.h",
|
||||
|
Loading…
Reference in New Issue
Block a user