Merge pull request #1204 from heshpdx:master

PiperOrigin-RevId: 697922880
This commit is contained in:
Copybara-Service 2024-11-19 01:42:21 -08:00
commit 2b6efcbdcc
3 changed files with 10 additions and 5 deletions

View File

@ -118,6 +118,8 @@ static size_t FN(FindBlocks)(const DataType* data, const size_t length,
size_t insert_cost_ix = symbol * num_histograms; size_t insert_cost_ix = symbol * num_histograms;
double min_cost = 1e99; double min_cost = 1e99;
double block_switch_cost = block_switch_bitcost; double block_switch_cost = block_switch_bitcost;
static const size_t prologue_length = 2000;
static const double multiplier = 0.07 / 2000;
size_t k; size_t k;
for (k = 0; k < num_histograms; ++k) { for (k = 0; k < num_histograms; ++k) {
/* We are coding the symbol with entropy code k. */ /* We are coding the symbol with entropy code k. */
@ -128,8 +130,8 @@ static size_t FN(FindBlocks)(const DataType* data, const size_t length,
} }
} }
/* More blocks for the beginning. */ /* More blocks for the beginning. */
if (byte_ix < 2000) { if (byte_ix < prologue_length) {
block_switch_cost *= 0.77 + 0.07 * (double)byte_ix / 2000; block_switch_cost *= 0.77 + multiplier * (double)byte_ix;
} }
for (k = 0; k < num_histograms; ++k) { for (k = 0; k < num_histograms; ++k) {
cost[k] -= min_cost; cost[k] -= min_cost;

View File

@ -437,9 +437,10 @@ static BROTLI_BOOL ShouldCompress(
if ((double)num_literals > 0.99 * (double)bytes) { if ((double)num_literals > 0.99 * (double)bytes) {
uint32_t literal_histo[256] = { 0 }; uint32_t literal_histo[256] = { 0 };
static const uint32_t kSampleRate = 13; static const uint32_t kSampleRate = 13;
static const double kInvSampleRate = 1.0 / 13.0;
static const double kMinEntropy = 7.92; static const double kMinEntropy = 7.92;
const double bit_cost_threshold = const double bit_cost_threshold =
(double)bytes * kMinEntropy / kSampleRate; (double)bytes * kMinEntropy * kInvSampleRate;
size_t t = (bytes + kSampleRate - 1) / kSampleRate; size_t t = (bytes + kSampleRate - 1) / kSampleRate;
uint32_t pos = (uint32_t)last_flush_pos; uint32_t pos = (uint32_t)last_flush_pos;
size_t i; size_t i;

View File

@ -106,6 +106,8 @@ static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
size_t utf8_pos = UTF8Position(last_c, c, max_utf8); size_t utf8_pos = UTF8Position(last_c, c, max_utf8);
size_t masked_pos = (pos + i) & mask; size_t masked_pos = (pos + i) & mask;
size_t histo = histogram[256 * utf8_pos + data[masked_pos]]; size_t histo = histogram[256 * utf8_pos + data[masked_pos]];
static const size_t prologue_length = 2000;
static const double multiplier = 0.35 / 2000;
double lit_cost; double lit_cost;
if (histo == 0) { if (histo == 0) {
histo = 1; histo = 1;
@ -120,8 +122,8 @@ static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
Perhaps because the entropy source is changing its properties Perhaps because the entropy source is changing its properties
rapidly in the beginning of the file, perhaps because the beginning rapidly in the beginning of the file, perhaps because the beginning
of the data is a statistical "anomaly". */ of the data is a statistical "anomaly". */
if (i < 2000) { if (i < prologue_length) {
lit_cost += 0.7 - ((double)(2000 - i) / 2000.0 * 0.35); lit_cost += 0.35 + multiplier * (double)i;
} }
cost[i] = (float)lit_cost; cost[i] = (float)lit_cost;
} }