Apply suggestions from code review

Co-authored-by: Eugene Kliuchnikov <eustas@google.com>
This commit is contained in:
Mahesh Madhav 2024-11-12 14:45:06 -08:00 committed by Eugene Kliuchnikov
parent 1054ecc262
commit 782aadd0ff
2 changed files with 8 additions and 6 deletions

View File

@ -118,7 +118,8 @@ static size_t FN(FindBlocks)(const DataType* data, const size_t length,
size_t insert_cost_ix = symbol * num_histograms; size_t insert_cost_ix = symbol * num_histograms;
double min_cost = 1e99; double min_cost = 1e99;
double block_switch_cost = block_switch_bitcost; double block_switch_cost = block_switch_bitcost;
static const double threshold = 0.07 / 2000.0; static const size_t prologue_length = 2000;
static const double multiplier = 0.07 / 2000;
size_t k; size_t k;
for (k = 0; k < num_histograms; ++k) { for (k = 0; k < num_histograms; ++k) {
/* We are coding the symbol with entropy code k. */ /* We are coding the symbol with entropy code k. */
@ -129,8 +130,8 @@ static size_t FN(FindBlocks)(const DataType* data, const size_t length,
} }
} }
/* More blocks for the beginning. */ /* More blocks for the beginning. */
if (byte_ix < 2000) { if (byte_ix < prologue_length) {
block_switch_cost *= 0.77 + threshold * (double)byte_ix; block_switch_cost *= 0.77 + multiplier * (double)byte_ix;
} }
for (k = 0; k < num_histograms; ++k) { for (k = 0; k < num_histograms; ++k) {
cost[k] -= min_cost; cost[k] -= min_cost;

View File

@ -106,7 +106,8 @@ static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
size_t utf8_pos = UTF8Position(last_c, c, max_utf8); size_t utf8_pos = UTF8Position(last_c, c, max_utf8);
size_t masked_pos = (pos + i) & mask; size_t masked_pos = (pos + i) & mask;
size_t histo = histogram[256 * utf8_pos + data[masked_pos]]; size_t histo = histogram[256 * utf8_pos + data[masked_pos]];
static const double threshold = 0.35 / 2000.0; static const size_t prologue_length = 2000;
static const double multiplier = 0.35 / 2000;
double lit_cost; double lit_cost;
if (histo == 0) { if (histo == 0) {
histo = 1; histo = 1;
@ -121,8 +122,8 @@ static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
Perhaps because the entropy source is changing its properties Perhaps because the entropy source is changing its properties
rapidly in the beginning of the file, perhaps because the beginning rapidly in the beginning of the file, perhaps because the beginning
of the data is a statistical "anomaly". */ of the data is a statistical "anomaly". */
if (i < 2000) { if (i < prologue_length) {
lit_cost += 0.7 - ((double)(2000 - i) * threshold); lit_cost += 0.35 + multiplier * (double)i;
} }
cost[i] = (float)lit_cost; cost[i] = (float)lit_cost;
} }