removed fastscan mode

This commit is contained in:
Yann Collet 2016-07-27 12:53:54 +02:00
parent dd25a27702
commit f796f7ab45

View File

@ -489,7 +489,7 @@ static U32 ZDICT_dictSize(const dictItem* dictList)
static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
const void* const buffer, size_t bufferSize, /* buffer must end with noisy guard band */
const size_t* fileSizes, unsigned nbFiles,
U32 shiftRatio, unsigned maxDictSize)
U32 shiftRatio)
{
int* const suffix0 = (int*)malloc((bufferSize+2)*sizeof(*suffix0));
int* const suffix = suffix0+1;
@ -542,16 +542,6 @@ static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / bufferSize * 100);
} }
/* limit dictionary size */
{ U32 const max = dictList->pos; /* convention : nb of useful elts within dictList */
U32 currentSize = 0;
U32 n; for (n=1; n<max; n++) {
currentSize += dictList[n].length;
if (currentSize > maxDictSize) break;
}
dictList->pos = n;
}
_cleanup:
free(suffix0);
free(reverseSuffix);
@ -845,45 +835,6 @@ _cleanup:
}
#define DIB_FASTSEGMENTSIZE 64
/*! ZDICT_fastSampling() (based on an idea proposed by Giuseppe Ottaviano) :
Fill `dictBuffer` with stripes of size DIB_FASTSEGMENTSIZE from `samplesBuffer`,
up to `dictSize`.
Filling starts from the end of `dictBuffer`, down to maximum possible.
if `dictSize` is not a multiply of DIB_FASTSEGMENTSIZE, some bytes at beginning of `dictBuffer` won't be used.
@return : amount of data written into `dictBuffer`,
or an error code
*/
static size_t ZDICT_fastSampling(void* dictBuffer, size_t dictSize,
const void* samplesBuffer, size_t samplesSize)
{
char* dstPtr = (char*)dictBuffer + dictSize;
const char* srcPtr = (const char*)samplesBuffer;
size_t const nbSegments = dictSize / DIB_FASTSEGMENTSIZE;
size_t segNb, interSize;
if (nbSegments <= 2) return ERROR(srcSize_wrong);
if (samplesSize < dictSize) return ERROR(srcSize_wrong);
/* first and last segments are part of dictionary, in case they contain interesting header/footer */
dstPtr -= DIB_FASTSEGMENTSIZE;
memcpy(dstPtr, srcPtr, DIB_FASTSEGMENTSIZE);
dstPtr -= DIB_FASTSEGMENTSIZE;
memcpy(dstPtr, srcPtr+samplesSize-DIB_FASTSEGMENTSIZE, DIB_FASTSEGMENTSIZE);
/* regularly copy a segment */
interSize = (samplesSize - nbSegments*DIB_FASTSEGMENTSIZE) / (nbSegments-1);
srcPtr += DIB_FASTSEGMENTSIZE;
for (segNb=2; segNb < nbSegments; segNb++) {
srcPtr += interSize;
dstPtr -= DIB_FASTSEGMENTSIZE;
memcpy(dstPtr, srcPtr, DIB_FASTSEGMENTSIZE);
srcPtr += DIB_FASTSEGMENTSIZE;
}
return nbSegments * DIB_FASTSEGMENTSIZE;
}
size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
ZDICT_params_t params)
@ -914,7 +865,7 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
}
#define DIB_MINSAMPLESSIZE (DIB_FASTSEGMENTSIZE*3)
#define DIB_MINSAMPLESSIZE 512
/*! ZDICT_trainFromBuffer_unsafe() :
* `samplesBuffer` must be followed by noisy guard band.
* @return : size of dictionary.
@ -928,53 +879,67 @@ size_t ZDICT_trainFromBuffer_unsafe(
dictItem* const dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList));
unsigned selectivity = params.selectivityLevel;
size_t const targetDictSize = maxDictSize;
size_t sBuffSize;
size_t const samplesBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples);
size_t dictSize = 0;
/* checks */
if (!dictList) return ERROR(memory_allocation);
if (maxDictSize <= g_provision_entropySize + g_min_fast_dictContent) { free(dictList); return ERROR(dstSize_tooSmall); }
if (samplesBuffSize < DIB_MINSAMPLESSIZE) { free(dictList); return 0; } /* not enough source to create dictionary */
/* init */
{ unsigned u; for (u=0, sBuffSize=0; u<nbSamples; u++) sBuffSize += samplesSizes[u]; }
if (sBuffSize < DIB_MINSAMPLESSIZE) { free(dictList); return 0; } /* not enough source to create dictionary */
ZDICT_initDictItem(dictList);
g_displayLevel = params.notificationLevel;
if (selectivity==0) selectivity = g_selectivity_default;
/* build dictionary */
if (selectivity>1) { /* selectivity == 1 => fast mode */
ZDICT_trainBuffer(dictList, dictListSize,
samplesBuffer, sBuffSize,
samplesSizes, nbSamples,
selectivity, (U32)targetDictSize);
ZDICT_trainBuffer(dictList, dictListSize,
samplesBuffer, samplesBuffSize,
samplesSizes, nbSamples,
selectivity);
/* display best matches */
if (g_displayLevel>= 3) {
U32 const nb = 25;
U32 const dictContentSize = ZDICT_dictSize(dictList);
U32 u;
DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos, dictContentSize);
DISPLAYLEVEL(3, "list %u best segments \n", nb);
for (u=1; u<=nb; u++) {
U32 pos = dictList[u].pos;
U32 length = dictList[u].length;
U32 printedLength = MIN(40, length);
DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |",
u, length, pos, dictList[u].savings);
ZDICT_printHex(3, (const char*)samplesBuffer+pos, printedLength);
DISPLAYLEVEL(3, "| \n");
} }
/* display best matches */
if (g_displayLevel>= 3) {
U32 const nb = 25;
U32 const dictContentSize = ZDICT_dictSize(dictList);
U32 u;
DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos, dictContentSize);
DISPLAYLEVEL(3, "list %u best segments \n", nb);
for (u=1; u<=nb; u++) {
U32 pos = dictList[u].pos;
U32 length = dictList[u].length;
U32 printedLength = MIN(40, length);
DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |",
u, length, pos, dictList[u].savings);
ZDICT_printHex(3, (const char*)samplesBuffer+pos, printedLength);
DISPLAYLEVEL(3, "| \n");
} } }
/* create dictionary */
{ U32 dictContentSize = ZDICT_dictSize(dictList);
U64 const totalSamplesSize = ZDICT_totalSampleSize(samplesSizes, nbSamples);
if (dictContentSize < targetDictSize/2) {
DISPLAYLEVEL(2, "! warning : created dictionary significantly smaller than requested (%u < %u) \n", dictContentSize, (U32)maxDictSize);
DISPLAYLEVEL(2, "! consider increasing selectivity to produce larger dictionary (-s%u) \n", selectivity+1);
DISPLAYLEVEL(2, "! note : larger dictionaries are not necessarily better, test its efficiency on samples \n");
if (totalSamplesSize < 10 * targetDictSize)
DISPLAYLEVEL(2, "! consider also increasing the number of samples (total size : %u MB)\n", (U32)(totalSamplesSize>>20));
if (samplesBuffSize < 10 * targetDictSize)
DISPLAYLEVEL(2, "! consider also increasing the number of samples (total size : %u MB)\n", (U32)(samplesBuffSize>>20));
}
if (dictContentSize > targetDictSize*2) {
DISPLAYLEVEL(2, "! warning : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (U32)maxDictSize);
DISPLAYLEVEL(2, "! consider decreasing selectivity to produce denser dictionary (-s%u) \n", selectivity-1);
DISPLAYLEVEL(2, "! test its efficiency on samples \n");
}
/* limit dictionary size */
{ U32 const max = dictList->pos; /* convention : nb of useful elts within dictList */
U32 currentSize = 0;
U32 n; for (n=1; n<max; n++) {
currentSize += dictList[n].length;
if (currentSize > targetDictSize) break;
}
dictList->pos = n;
}
/* build dict content */
@ -987,14 +952,6 @@ size_t ZDICT_trainFromBuffer_unsafe(
memcpy(ptr, (const char*)samplesBuffer+dictList[u].pos, l);
} }
/* fast mode dict content */
if (selectivity==1) { /* note could also be used to complete a dictionary, but not necessarily better */
DISPLAYLEVEL(3, "\r%70s\r", ""); /* clean display line */
DISPLAYLEVEL(3, "Adding %u KB with fast sampling \n", (U32)(targetDictSize>>10));
dictContentSize = (U32)ZDICT_fastSampling(dictBuffer, targetDictSize,
samplesBuffer, sBuffSize);
}
dictSize = ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, maxDictSize,
samplesBuffer, samplesSizes, nbSamples,
params);