better warning and error messages in case of dictionary training failure (#292)
This commit is contained in:
parent
79d9cdd258
commit
49d105cfcf
@ -692,7 +692,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
||||
esr.workPlace = malloc(ZSTD_BLOCKSIZE_ABSOLUTEMAX);
|
||||
if (!esr.ref || !esr.zc || !esr.workPlace) {
|
||||
eSize = ERROR(memory_allocation);
|
||||
DISPLAYLEVEL(1, "Not enough memory");
|
||||
DISPLAYLEVEL(1, "Not enough memory \n");
|
||||
goto _cleanup;
|
||||
}
|
||||
if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionary_wrong); goto _cleanup; } /* too large dictionary */
|
||||
@ -708,7 +708,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
||||
{ size_t const beginResult = ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0);
|
||||
if (ZSTD_isError(beginResult)) {
|
||||
eSize = ERROR(GENERIC);
|
||||
DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced failed ");
|
||||
DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced failed \n");
|
||||
goto _cleanup;
|
||||
} }
|
||||
|
||||
@ -724,7 +724,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
||||
errorCode = HUF_buildCTable (hufTable, countLit, 255, huffLog);
|
||||
if (HUF_isError(errorCode)) {
|
||||
eSize = ERROR(GENERIC);
|
||||
DISPLAYLEVEL(1, "HUF_buildCTable error");
|
||||
DISPLAYLEVEL(1, "HUF_buildCTable error \n");
|
||||
goto _cleanup;
|
||||
}
|
||||
huffLog = (U32)errorCode;
|
||||
@ -740,7 +740,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
||||
errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax);
|
||||
if (FSE_isError(errorCode)) {
|
||||
eSize = ERROR(GENERIC);
|
||||
DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount");
|
||||
DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n");
|
||||
goto _cleanup;
|
||||
}
|
||||
Offlog = (U32)errorCode;
|
||||
@ -749,7 +749,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
||||
errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML);
|
||||
if (FSE_isError(errorCode)) {
|
||||
eSize = ERROR(GENERIC);
|
||||
DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount");
|
||||
DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n");
|
||||
goto _cleanup;
|
||||
}
|
||||
mlLog = (U32)errorCode;
|
||||
@ -758,7 +758,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
||||
errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL);
|
||||
if (FSE_isError(errorCode)) {
|
||||
eSize = ERROR(GENERIC);
|
||||
DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount");
|
||||
DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n");
|
||||
goto _cleanup;
|
||||
}
|
||||
llLog = (U32)errorCode;
|
||||
@ -768,7 +768,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
||||
{ size_t const hhSize = HUF_writeCTable(dstPtr, maxDstSize, hufTable, 255, huffLog);
|
||||
if (HUF_isError(hhSize)) {
|
||||
eSize = ERROR(GENERIC);
|
||||
DISPLAYLEVEL(1, "HUF_writeCTable error");
|
||||
DISPLAYLEVEL(1, "HUF_writeCTable error \n");
|
||||
goto _cleanup;
|
||||
}
|
||||
dstPtr += hhSize;
|
||||
@ -779,7 +779,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
||||
{ size_t const ohSize = FSE_writeNCount(dstPtr, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog);
|
||||
if (FSE_isError(ohSize)) {
|
||||
eSize = ERROR(GENERIC);
|
||||
DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount");
|
||||
DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount \n");
|
||||
goto _cleanup;
|
||||
}
|
||||
dstPtr += ohSize;
|
||||
@ -790,7 +790,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
||||
{ size_t const mhSize = FSE_writeNCount(dstPtr, maxDstSize, matchLengthNCount, MaxML, mlLog);
|
||||
if (FSE_isError(mhSize)) {
|
||||
eSize = ERROR(GENERIC);
|
||||
DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount");
|
||||
DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount \n");
|
||||
goto _cleanup;
|
||||
}
|
||||
dstPtr += mhSize;
|
||||
@ -801,7 +801,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
||||
{ size_t const lhSize = FSE_writeNCount(dstPtr, maxDstSize, litLengthNCount, MaxLL, llLog);
|
||||
if (FSE_isError(lhSize)) {
|
||||
eSize = ERROR(GENERIC);
|
||||
DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount");
|
||||
DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount \n");
|
||||
goto _cleanup;
|
||||
}
|
||||
dstPtr += lhSize;
|
||||
@ -811,7 +811,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
||||
|
||||
if (maxDstSize<12) {
|
||||
eSize = ERROR(GENERIC);
|
||||
DISPLAYLEVEL(1, "not enough space to write RepOffsets");
|
||||
DISPLAYLEVEL(1, "not enough space to write RepOffsets \n");
|
||||
goto _cleanup;
|
||||
}
|
||||
# if 0
|
||||
@ -856,10 +856,14 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
|
||||
/* entropy tables */
|
||||
DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
|
||||
DISPLAYLEVEL(2, "statistics ... \n");
|
||||
hSize += ZDICT_analyzeEntropy((char*)dictBuffer+hSize, dictBufferCapacity-hSize,
|
||||
{ size_t const eSize = ZDICT_analyzeEntropy((char*)dictBuffer+hSize, dictBufferCapacity-hSize,
|
||||
compressionLevel,
|
||||
samplesBuffer, samplesSizes, nbSamples,
|
||||
(char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize);
|
||||
if (ZDICT_isError(eSize)) return eSize;
|
||||
hSize += eSize;
|
||||
}
|
||||
|
||||
|
||||
if (hSize + dictContentSize < dictBufferCapacity)
|
||||
memmove((char*)dictBuffer + hSize, (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize);
|
||||
@ -902,7 +906,7 @@ size_t ZDICT_trainFromBuffer_unsafe(
|
||||
|
||||
/* display best matches */
|
||||
if (g_displayLevel>= 3) {
|
||||
U32 const nb = 25;
|
||||
U32 const nb = MIN(25, dictList[0].pos);
|
||||
U32 const dictContentSize = ZDICT_dictSize(dictList);
|
||||
U32 u;
|
||||
DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos, dictContentSize);
|
||||
|
@ -204,11 +204,11 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
|
||||
if ((!fileSizes) || (!srcBuffer) || (!dictBuffer)) EXM_THROW(12, "not enough memory for DiB_trainFiles"); /* should not happen */
|
||||
g_displayLevel = params.notificationLevel;
|
||||
if (nbFiles < 5) {
|
||||
DISPLAYLEVEL(2, "! Warning : nb of samples too low for proper processing \n");
|
||||
DISPLAYLEVEL(2, "! Please provide one file per sample \n");
|
||||
DISPLAYLEVEL(2, "! Avoid concatenating multiple samples into a single file \n");
|
||||
DISPLAYLEVEL(2, "! otherwise, dictBuilder will be unable to find the beginning of each sample \n");
|
||||
DISPLAYLEVEL(2, "! resulting in distorted statistics \n");
|
||||
DISPLAYLEVEL(2, "! Warning : nb of samples too low for proper processing ! \n");
|
||||
DISPLAYLEVEL(2, "! Please provide _one file per sample_. \n");
|
||||
DISPLAYLEVEL(2, "! Do not concatenate samples together into a single file, \n");
|
||||
DISPLAYLEVEL(2, "! as dictBuilder will be unable to find the beginning of each sample, \n");
|
||||
DISPLAYLEVEL(2, "! resulting in poor dictionary quality. \n");
|
||||
}
|
||||
|
||||
/* init */
|
||||
|
Loading…
Reference in New Issue
Block a user