[dictBuilder] Fix displayLevel for corpus warning

Pass the displaylevel into the corpus warning, because it is used in
fast cover and cover, so it needs to respect the local level.
This commit is contained in:
Nick Terrell 2019-04-08 20:00:18 -07:00
parent bfcd5b81d7
commit e649fad7aa
3 changed files with 14 additions and 13 deletions

View File

@ -627,19 +627,20 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
return 1;
}
void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers)
void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
{
const double ratio = (double)nbDmers / maxDictSize;
if (ratio >= 10) {
return;
}
DISPLAYLEVEL(1, "WARNING: The maximum dictionary size %u is too large "
"compared to the source size %u! "
"size(source)/size(dictionary) = %f, but it should be >= "
"10! This may lead to a subpar dictionary! We recommend "
"training on sources at least 10x, and up to 100x the "
"size of the dictionary!\n", (U32)maxDictSize,
(U32)nbDmers, ratio);
LOCALDISPLAYLEVEL(displayLevel, 1,
"WARNING: The maximum dictionary size %u is too large "
"compared to the source size %u! "
"size(source)/size(dictionary) = %f, but it should be >= "
"10! This may lead to a subpar dictionary! We recommend "
"training on sources at least 10x, and up to 100x the "
"size of the dictionary!\n", (U32)maxDictSize,
(U32)nbDmers, ratio);
}
COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize,
@ -744,7 +745,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
parameters.d, parameters.splitPoint)) {
return ERROR(GENERIC);
}
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize);
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel);
if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
COVER_ctx_destroy(&ctx);
@ -1060,7 +1061,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
return ERROR(GENERIC);
}
if (!warned) {
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize);
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel);
warned = 1;
}
/* Loop through k reusing the same context */

View File

@ -65,7 +65,7 @@ COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize, U32 nbDmers,
/**
* Warns the user when their corpus is too small.
*/
void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers);
void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel);
/**
* Checks total compressed size of a dictionary

View File

@ -570,7 +570,7 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
DISPLAYLEVEL(1, "Failed to initialize context\n");
return ERROR(GENERIC);
}
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers);
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel);
/* Build the dictionary */
DISPLAYLEVEL(2, "Building dictionary\n");
{
@ -673,7 +673,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
return ERROR(GENERIC);
}
if (!warned) {
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers);
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, displayLevel);
warned = 1;
}
/* Loop through k reusing the same context */