added tutorial warning messages for dictBuilder
This commit is contained in:
parent
04cdd8660d
commit
dd25a27702
1
.gitignore
vendored
1
.gitignore
vendored
@ -40,6 +40,7 @@ projects/cmake/
|
|||||||
|
|
||||||
# Test artefacts
|
# Test artefacts
|
||||||
tmp*
|
tmp*
|
||||||
|
dictionary
|
||||||
|
|
||||||
# tmp files
|
# tmp files
|
||||||
*.swp
|
*.swp
|
||||||
|
1
NEWS
1
NEWS
@ -1,5 +1,6 @@
|
|||||||
v0.8.0
|
v0.8.0
|
||||||
New : updated compresson format
|
New : updated compresson format
|
||||||
|
Improved : better speed on clang and gcc -O2, thanks to Eric Biggers
|
||||||
Fixed : legacy mode with ZSTD_HEAPMODE=0, by Christopher Bergqvist
|
Fixed : legacy mode with ZSTD_HEAPMODE=0, by Christopher Bergqvist
|
||||||
Fixed : premature end of frame when zero-sized raw block, reported by Eric Biggers
|
Fixed : premature end of frame when zero-sized raw block, reported by Eric Biggers
|
||||||
Fixed : checksum correctly checked in single-pass mode
|
Fixed : checksum correctly checked in single-pass mode
|
||||||
|
@ -957,17 +957,25 @@ size_t ZDICT_trainFromBuffer_unsafe(
|
|||||||
DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos, dictContentSize);
|
DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos, dictContentSize);
|
||||||
DISPLAYLEVEL(3, "list %u best segments \n", nb);
|
DISPLAYLEVEL(3, "list %u best segments \n", nb);
|
||||||
for (u=1; u<=nb; u++) {
|
for (u=1; u<=nb; u++) {
|
||||||
U32 p = dictList[u].pos;
|
U32 pos = dictList[u].pos;
|
||||||
U32 l = dictList[u].length;
|
U32 length = dictList[u].length;
|
||||||
U32 d = MIN(40, l);
|
U32 printedLength = MIN(40, length);
|
||||||
DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |",
|
DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |",
|
||||||
u, l, p, dictList[u].savings);
|
u, length, pos, dictList[u].savings);
|
||||||
ZDICT_printHex(3, (const char*)samplesBuffer+p, d);
|
ZDICT_printHex(3, (const char*)samplesBuffer+pos, printedLength);
|
||||||
DISPLAYLEVEL(3, "| \n");
|
DISPLAYLEVEL(3, "| \n");
|
||||||
} } }
|
} } }
|
||||||
|
|
||||||
/* create dictionary */
|
/* create dictionary */
|
||||||
{ U32 dictContentSize = ZDICT_dictSize(dictList);
|
{ U32 dictContentSize = ZDICT_dictSize(dictList);
|
||||||
|
U64 const totalSamplesSize = ZDICT_totalSampleSize(samplesSizes, nbSamples);
|
||||||
|
if (dictContentSize < targetDictSize/2) {
|
||||||
|
DISPLAYLEVEL(2, "! warning : created dictionary significantly smaller than requested (%u < %u) \n", dictContentSize, (U32)maxDictSize);
|
||||||
|
DISPLAYLEVEL(2, "! consider increasing selectivity to produce larger dictionary (-s%u) \n", selectivity+1);
|
||||||
|
DISPLAYLEVEL(2, "! note : larger dictionaries are not necessarily better, test its efficiency on samples \n");
|
||||||
|
if (totalSamplesSize < 10 * targetDictSize)
|
||||||
|
DISPLAYLEVEL(2, "! consider also increasing the number of samples (total size : %u MB)\n", (U32)(totalSamplesSize>>20));
|
||||||
|
}
|
||||||
|
|
||||||
/* build dict content */
|
/* build dict content */
|
||||||
{ U32 u;
|
{ U32 u;
|
||||||
|
@ -202,9 +202,16 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
|
|||||||
|
|
||||||
/* Checks */
|
/* Checks */
|
||||||
if ((!fileSizes) || (!srcBuffer) || (!dictBuffer)) EXM_THROW(12, "not enough memory for DiB_trainFiles"); /* should not happen */
|
if ((!fileSizes) || (!srcBuffer) || (!dictBuffer)) EXM_THROW(12, "not enough memory for DiB_trainFiles"); /* should not happen */
|
||||||
|
g_displayLevel = params.notificationLevel;
|
||||||
|
if (nbFiles < 5) {
|
||||||
|
DISPLAYLEVEL(2, "! Warning : nb of samples too low for proper processing \n");
|
||||||
|
DISPLAYLEVEL(2, "! Please provide one file per sample \n");
|
||||||
|
DISPLAYLEVEL(2, "! Avoid concatenating multiple samples into a single file \n");
|
||||||
|
DISPLAYLEVEL(2, "! otherwise, dictBuilder will be unable to find the beginning of each sample \n");
|
||||||
|
DISPLAYLEVEL(2, "! resulting in distorted statistics \n");
|
||||||
|
}
|
||||||
|
|
||||||
/* init */
|
/* init */
|
||||||
g_displayLevel = params.notificationLevel;
|
|
||||||
if (benchedSize < totalSizeToLoad)
|
if (benchedSize < totalSizeToLoad)
|
||||||
DISPLAYLEVEL(1, "Not enough memory; training on %u MB only...\n", (unsigned)(benchedSize >> 20));
|
DISPLAYLEVEL(1, "Not enough memory; training on %u MB only...\n", (unsigned)(benchedSize >> 20));
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user