Merge pull request #507 from terrelln/dict-builder-optimize
Add new dictionary builder
This commit is contained in:
commit
4736dffd41
@ -331,6 +331,10 @@
|
||||
RelativePath="..\..\..\programs\datagen.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\dictBuilder\cover.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
|
||||
>
|
||||
|
@ -343,6 +343,10 @@
|
||||
RelativePath="..\..\..\programs\dibio.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\dictBuilder\cover.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
|
||||
>
|
||||
|
@ -327,6 +327,10 @@
|
||||
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
|
||||
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
|
||||
>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\dictBuilder\cover.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
|
||||
>
|
||||
|
@ -332,6 +332,10 @@
|
||||
RelativePath="..\..\..\programs\datagen.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\dictBuilder\cover.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
|
||||
>
|
||||
|
@ -344,6 +344,10 @@
|
||||
RelativePath="..\..\..\programs\dibio.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\dictBuilder\cover.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
|
||||
>
|
||||
|
@ -328,6 +328,10 @@
|
||||
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
|
||||
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
|
||||
>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\dictBuilder\cover.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
|
||||
>
|
||||
|
@ -165,6 +165,7 @@
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_compress.c" />
|
||||
<ClCompile Include="..\..\..\lib\decompress\huf_decompress.c" />
|
||||
<ClCompile Include="..\..\..\lib\decompress\zstd_decompress.c" />
|
||||
<ClCompile Include="..\..\..\lib\dictBuilder\cover.c" />
|
||||
<ClCompile Include="..\..\..\lib\dictBuilder\divsufsort.c" />
|
||||
<ClCompile Include="..\..\..\lib\dictBuilder\zdict.c" />
|
||||
<ClCompile Include="..\..\..\programs\datagen.c" />
|
||||
|
@ -32,6 +32,7 @@
|
||||
<ClCompile Include="..\..\..\lib\deprecated\zbuff_common.c" />
|
||||
<ClCompile Include="..\..\..\lib\deprecated\zbuff_compress.c" />
|
||||
<ClCompile Include="..\..\..\lib\deprecated\zbuff_decompress.c" />
|
||||
<ClCompile Include="..\..\..\lib\dictBuilder\cover.c" />
|
||||
<ClCompile Include="..\..\..\lib\dictBuilder\divsufsort.c" />
|
||||
<ClCompile Include="..\..\..\lib\dictBuilder\zdict.c" />
|
||||
<ClCompile Include="..\..\..\lib\legacy\zstd_v01.c" />
|
||||
|
@ -32,6 +32,7 @@
|
||||
<ClCompile Include="..\..\..\lib\deprecated\zbuff_common.c" />
|
||||
<ClCompile Include="..\..\..\lib\deprecated\zbuff_compress.c" />
|
||||
<ClCompile Include="..\..\..\lib\deprecated\zbuff_decompress.c" />
|
||||
<ClCompile Include="..\..\..\lib\dictBuilder\cover.c" />
|
||||
<ClCompile Include="..\..\..\lib\dictBuilder\divsufsort.c" />
|
||||
<ClCompile Include="..\..\..\lib\dictBuilder\zdict.c" />
|
||||
<ClCompile Include="..\..\..\lib\legacy\zstd_v01.c" />
|
||||
|
@ -29,6 +29,7 @@
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_compress.c" />
|
||||
<ClCompile Include="..\..\..\lib\decompress\huf_decompress.c" />
|
||||
<ClCompile Include="..\..\..\lib\decompress\zstd_decompress.c" />
|
||||
<ClCompile Include="..\..\..\lib\dictBuilder\cover.c" />
|
||||
<ClCompile Include="..\..\..\lib\dictBuilder\divsufsort.c" />
|
||||
<ClCompile Include="..\..\..\lib\dictBuilder\zdict.c" />
|
||||
<ClCompile Include="..\..\..\lib\legacy\zstd_v01.c" />
|
||||
|
@ -67,6 +67,7 @@ SET(Sources
|
||||
${LIBRARY_DIR}/compress/zstd_compress.c
|
||||
${LIBRARY_DIR}/decompress/huf_decompress.c
|
||||
${LIBRARY_DIR}/decompress/zstd_decompress.c
|
||||
${LIBRARY_DIR}/dictBuilder/cover.c
|
||||
${LIBRARY_DIR}/dictBuilder/divsufsort.c
|
||||
${LIBRARY_DIR}/dictBuilder/zdict.c
|
||||
${LIBRARY_DIR}/deprecated/zbuff_common.c
|
||||
|
1021
lib/dictBuilder/cover.c
Normal file
1021
lib/dictBuilder/cover.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -86,6 +86,55 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dict
|
||||
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
||||
ZDICT_params_t parameters);
|
||||
|
||||
/*! COVER_params_t :
|
||||
For all values 0 means default.
|
||||
kMin and d are the only required parameters.
|
||||
*/
|
||||
typedef struct {
|
||||
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
|
||||
unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
|
||||
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (256) : Higher means more parameters checked */
|
||||
|
||||
unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
|
||||
unsigned dictID; /* 0 means auto mode (32-bits random value); other : force dictID value */
|
||||
int compressionLevel; /* 0 means default; target a specific zstd compression level */
|
||||
} COVER_params_t;
|
||||
|
||||
|
||||
/*! COVER_trainFromBuffer() :
|
||||
Train a dictionary from an array of samples using the COVER algorithm.
|
||||
Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
||||
supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
||||
The resulting dictionary will be saved into `dictBuffer`.
|
||||
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||
or an error code, which can be tested with ZDICT_isError().
|
||||
Tips : In general, a reasonable dictionary has a size of ~ 100 KB.
|
||||
It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
|
||||
In general, it's recommended to provide a few thousands samples, but this can vary a lot.
|
||||
It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
||||
*/
|
||||
ZDICTLIB_API size_t COVER_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
||||
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
||||
COVER_params_t parameters);
|
||||
|
||||
/*! COVER_optimizeTrainFromBuffer() :
|
||||
The same requirements as above hold for all the parameters except `parameters`.
|
||||
This function tries many parameter combinations and picks the best parameters.
|
||||
`*parameters` is filled with the best parameters found, and the dictionary
|
||||
constructed with those parameters is stored in `dictBuffer`.
|
||||
|
||||
All of the parameters d, k, steps are optional.
|
||||
If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
|
||||
if steps is zero it defaults to its default value.
|
||||
If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
|
||||
|
||||
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||
or an error code, which can be tested with ZDICT_isError().
|
||||
On success `*parameters` contains the parameters selected.
|
||||
*/
|
||||
ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
||||
const void* samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
|
||||
COVER_params_t *parameters);
|
||||
|
||||
/*! ZDICT_finalizeDictionary() :
|
||||
|
||||
|
@ -42,6 +42,7 @@
|
||||
|
||||
#define SAMPLESIZE_MAX (128 KB)
|
||||
#define MEMMULT 11 /* rough estimation : memory cost to analyze 1 byte of sample */
|
||||
#define COVER_MEMMULT 9 /* rough estimation : memory cost to analyze 1 byte of sample */
|
||||
static const size_t maxMemory = (sizeof(size_t) == 4) ? (2 GB - 64 MB) : ((size_t)(512 MB) << sizeof(size_t));
|
||||
|
||||
#define NOISELENGTH 32
|
||||
@ -118,10 +119,36 @@ static unsigned DiB_loadFiles(void* buffer, size_t* bufferSizePtr,
|
||||
fileSizes[n] = fileSize;
|
||||
fclose(f);
|
||||
} }
|
||||
DISPLAYLEVEL(2, "\r%79s\r", "");
|
||||
*bufferSizePtr = pos;
|
||||
return n;
|
||||
}
|
||||
|
||||
#define DiB_rotl32(x,r) ((x << r) | (x >> (32 - r)))
|
||||
static U32 DiB_rand(U32* src)
|
||||
{
|
||||
static const U32 prime1 = 2654435761U;
|
||||
static const U32 prime2 = 2246822519U;
|
||||
U32 rand32 = *src;
|
||||
rand32 *= prime1;
|
||||
rand32 ^= prime2;
|
||||
rand32 = DiB_rotl32(rand32, 13);
|
||||
*src = rand32;
|
||||
return rand32 >> 5;
|
||||
}
|
||||
|
||||
static void DiB_shuffle(const char** fileNamesTable, unsigned nbFiles) {
|
||||
/* Initialize the pseudorandom number generator */
|
||||
U32 seed = 0xFD2FB528;
|
||||
unsigned i;
|
||||
for (i = nbFiles - 1; i > 0; --i) {
|
||||
unsigned const j = DiB_rand(&seed) % (i + 1);
|
||||
const char* tmp = fileNamesTable[j];
|
||||
fileNamesTable[j] = fileNamesTable[i];
|
||||
fileNamesTable[i] = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*-********************************************************
|
||||
* Dictionary training functions
|
||||
@ -202,7 +229,8 @@ size_t ZDICT_trainFromBuffer_unsafe(void* dictBuffer, size_t dictBufferCapacity,
|
||||
|
||||
int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
|
||||
const char** fileNamesTable, unsigned nbFiles,
|
||||
ZDICT_params_t params)
|
||||
ZDICT_params_t *params, COVER_params_t *coverParams,
|
||||
int optimizeCover)
|
||||
{
|
||||
void* const dictBuffer = malloc(maxDictSize);
|
||||
size_t* const fileSizes = (size_t*)malloc(nbFiles * sizeof(size_t));
|
||||
@ -213,8 +241,10 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
|
||||
int result = 0;
|
||||
|
||||
/* Checks */
|
||||
if (params) g_displayLevel = params->notificationLevel;
|
||||
else if (coverParams) g_displayLevel = coverParams->notificationLevel;
|
||||
else EXM_THROW(13, "Neither dictionary algorith selected"); /* should not happen */
|
||||
if ((!fileSizes) || (!srcBuffer) || (!dictBuffer)) EXM_THROW(12, "not enough memory for DiB_trainFiles"); /* should not happen */
|
||||
g_displayLevel = params.notificationLevel;
|
||||
if (g_tooLargeSamples) {
|
||||
DISPLAYLEVEL(2, "! Warning : some samples are very large \n");
|
||||
DISPLAYLEVEL(2, "! Note that dictionary is only useful for small files or beginning of large files. \n");
|
||||
@ -233,12 +263,29 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
|
||||
DISPLAYLEVEL(1, "Not enough memory; training on %u MB only...\n", (unsigned)(benchedSize >> 20));
|
||||
|
||||
/* Load input buffer */
|
||||
DISPLAYLEVEL(3, "Shuffling input files\n");
|
||||
DiB_shuffle(fileNamesTable, nbFiles);
|
||||
nbFiles = DiB_loadFiles(srcBuffer, &benchedSize, fileSizes, fileNamesTable, nbFiles);
|
||||
DiB_fillNoise((char*)srcBuffer + benchedSize, NOISELENGTH); /* guard band, for end of buffer condition */
|
||||
|
||||
{ size_t const dictSize = ZDICT_trainFromBuffer_unsafe(dictBuffer, maxDictSize,
|
||||
srcBuffer, fileSizes, nbFiles,
|
||||
params);
|
||||
{
|
||||
size_t dictSize;
|
||||
if (params) {
|
||||
DiB_fillNoise((char*)srcBuffer + benchedSize, NOISELENGTH); /* guard band, for end of buffer condition */
|
||||
dictSize = ZDICT_trainFromBuffer_unsafe(dictBuffer, maxDictSize,
|
||||
srcBuffer, fileSizes, nbFiles,
|
||||
*params);
|
||||
} else if (optimizeCover) {
|
||||
dictSize = COVER_optimizeTrainFromBuffer(
|
||||
dictBuffer, maxDictSize, srcBuffer, fileSizes, nbFiles,
|
||||
coverParams);
|
||||
if (!ZDICT_isError(dictSize)) {
|
||||
DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\n", coverParams->k, coverParams->d, coverParams->steps);
|
||||
}
|
||||
} else {
|
||||
dictSize = COVER_trainFromBuffer(dictBuffer, maxDictSize,
|
||||
srcBuffer, fileSizes, nbFiles,
|
||||
*coverParams);
|
||||
}
|
||||
if (ZDICT_isError(dictSize)) {
|
||||
DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */
|
||||
result = 1;
|
||||
|
@ -32,7 +32,7 @@
|
||||
*/
|
||||
int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
|
||||
const char** fileNamesTable, unsigned nbFiles,
|
||||
ZDICT_params_t parameters);
|
||||
|
||||
ZDICT_params_t *params, COVER_params_t *coverParams,
|
||||
int optimizeCover);
|
||||
|
||||
#endif
|
||||
|
@ -127,6 +127,8 @@ static int usage_advanced(const char* programName)
|
||||
DISPLAY( "\n");
|
||||
DISPLAY( "Dictionary builder :\n");
|
||||
DISPLAY( "--train ## : create a dictionary from a training set of files \n");
|
||||
DISPLAY( "--cover=k=#,d=# : use the cover algorithm with parameters k and d \n");
|
||||
DISPLAY( "--optimize-cover[=steps=#,k=#,d=#] : optimize cover parameters with optional parameters\n");
|
||||
DISPLAY( " -o file : `file` is dictionary name (default: %s) \n", g_defaultDictName);
|
||||
DISPLAY( "--maxdict ## : limit dictionary to specified size (default : %u) \n", g_defaultMaxDictSize);
|
||||
DISPLAY( " -s# : dictionary selectivity level (default: %u)\n", g_defaultSelectivityLevel);
|
||||
@ -192,6 +194,27 @@ static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
|
||||
}
|
||||
|
||||
|
||||
#ifndef ZSTD_NODICT
|
||||
/**
|
||||
* parseCoverParameters() :
|
||||
* reads cover parameters from *stringPtr (e.g. "--cover=smoothing=100,kmin=48,kstep=4,kmax=64,d=8") into *params
|
||||
* @return 1 means that cover parameters were correct
|
||||
* @return 0 in case of malformed parameters
|
||||
*/
|
||||
static unsigned parseCoverParameters(const char* stringPtr, COVER_params_t *params)
|
||||
{
|
||||
memset(params, 0, sizeof(*params));
|
||||
for (; ;) {
|
||||
if (longCommandWArg(&stringPtr, "k=")) { params->k = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
|
||||
if (longCommandWArg(&stringPtr, "d=")) { params->d = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
|
||||
if (longCommandWArg(&stringPtr, "steps=")) { params->steps = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
|
||||
return 0;
|
||||
}
|
||||
if (stringPtr[0] != 0) return 0;
|
||||
DISPLAYLEVEL(4, "k=%u\nd=%u\nsteps=%u\n", params->k, params->d, params->steps);
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
/** parseCompressionParameters() :
|
||||
* reads compression parameters from *stringPtr (e.g. "--zstd=wlog=23,clog=23,hlog=22,slog=6,slen=3,tlen=48,strat=6") into *params
|
||||
* @return 1 means that compression parameters were correct
|
||||
@ -254,6 +277,10 @@ int main(int argCount, const char* argv[])
|
||||
char* fileNamesBuf = NULL;
|
||||
unsigned fileNamesNb;
|
||||
#endif
|
||||
#ifndef ZSTD_NODICT
|
||||
COVER_params_t coverParams;
|
||||
int cover = 0;
|
||||
#endif
|
||||
|
||||
/* init */
|
||||
(void)recursive; (void)cLevelLast; /* not used when ZSTD_NOBENCH set */
|
||||
@ -318,6 +345,20 @@ int main(int argCount, const char* argv[])
|
||||
if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(1); continue; }
|
||||
|
||||
/* long commands with arguments */
|
||||
#ifndef ZSTD_NODICT
|
||||
if (longCommandWArg(&argument, "--cover=")) {
|
||||
cover=1; if (!parseCoverParameters(argument, &coverParams)) CLEAN_RETURN(badusage(programName));
|
||||
continue;
|
||||
}
|
||||
if (longCommandWArg(&argument, "--optimize-cover")) {
|
||||
cover=2;
|
||||
/* Allow optional arguments following an = */
|
||||
if (*argument == 0) { memset(&coverParams, 0, sizeof(coverParams)); }
|
||||
else if (*argument++ != '=') { CLEAN_RETURN(badusage(programName)); }
|
||||
else if (!parseCoverParameters(argument, &coverParams)) { CLEAN_RETURN(badusage(programName)); }
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
if (longCommandWArg(&argument, "--memlimit=")) { memLimit = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--memory=")) { memLimit = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--memlimit-decompress=")) { memLimit = readU32FromChar(&argument); continue; }
|
||||
@ -520,13 +561,20 @@ int main(int argCount, const char* argv[])
|
||||
/* Check if dictionary builder is selected */
|
||||
if (operation==zom_train) {
|
||||
#ifndef ZSTD_NODICT
|
||||
ZDICT_params_t dictParams;
|
||||
memset(&dictParams, 0, sizeof(dictParams));
|
||||
dictParams.compressionLevel = dictCLevel;
|
||||
dictParams.selectivityLevel = dictSelect;
|
||||
dictParams.notificationLevel = displayLevel;
|
||||
dictParams.dictID = dictID;
|
||||
DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, dictParams);
|
||||
if (cover) {
|
||||
coverParams.compressionLevel = dictCLevel;
|
||||
coverParams.notificationLevel = displayLevel;
|
||||
coverParams.dictID = dictID;
|
||||
DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, NULL, &coverParams, cover - 1);
|
||||
} else {
|
||||
ZDICT_params_t dictParams;
|
||||
memset(&dictParams, 0, sizeof(dictParams));
|
||||
dictParams.compressionLevel = dictCLevel;
|
||||
dictParams.selectivityLevel = dictSelect;
|
||||
dictParams.notificationLevel = displayLevel;
|
||||
dictParams.dictID = dictID;
|
||||
DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, &dictParams, NULL, 0);
|
||||
}
|
||||
#endif
|
||||
goto _end;
|
||||
}
|
||||
|
@ -28,6 +28,7 @@
|
||||
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressContinue, ZSTD_compressBlock */
|
||||
#include "zstd.h" /* ZSTD_VERSION_STRING */
|
||||
#include "zstd_errors.h" /* ZSTD_getErrorCode */
|
||||
#define ZDICT_STATIC_LINKING_ONLY
|
||||
#include "zdict.h" /* ZDICT_trainFromBuffer */
|
||||
#include "datagen.h" /* RDG_genBuffer */
|
||||
#include "mem.h"
|
||||
@ -317,6 +318,61 @@ static int basicUnitTests(U32 seed, double compressibility)
|
||||
free(samplesSizes);
|
||||
}
|
||||
|
||||
/* COVER dictionary builder tests */
|
||||
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
|
||||
ZSTD_DCtx* const dctx = ZSTD_createDCtx();
|
||||
size_t dictSize = 16 KB;
|
||||
size_t optDictSize = dictSize;
|
||||
void* dictBuffer = malloc(dictSize);
|
||||
size_t const totalSampleSize = 1 MB;
|
||||
size_t const sampleUnitSize = 8 KB;
|
||||
U32 const nbSamples = (U32)(totalSampleSize / sampleUnitSize);
|
||||
size_t* const samplesSizes = (size_t*) malloc(nbSamples * sizeof(size_t));
|
||||
COVER_params_t params;
|
||||
U32 dictID;
|
||||
|
||||
if (dictBuffer==NULL || samplesSizes==NULL) {
|
||||
free(dictBuffer);
|
||||
free(samplesSizes);
|
||||
goto _output_error;
|
||||
}
|
||||
|
||||
DISPLAYLEVEL(4, "test%3i : COVER_trainFromBuffer : ", testNb++);
|
||||
{ U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
|
||||
memset(¶ms, 0, sizeof(params));
|
||||
params.d = 1 + (FUZ_rand(&seed) % 16);
|
||||
params.k = params.d + (FUZ_rand(&seed) % 256);
|
||||
dictSize = COVER_trainFromBuffer(dictBuffer, dictSize,
|
||||
CNBuffer, samplesSizes, nbSamples,
|
||||
params);
|
||||
if (ZDICT_isError(dictSize)) goto _output_error;
|
||||
DISPLAYLEVEL(4, "OK, created dictionary of size %u \n", (U32)dictSize);
|
||||
|
||||
DISPLAYLEVEL(4, "test%3i : check dictID : ", testNb++);
|
||||
dictID = ZDICT_getDictID(dictBuffer, dictSize);
|
||||
if (dictID==0) goto _output_error;
|
||||
DISPLAYLEVEL(4, "OK : %u \n", dictID);
|
||||
|
||||
DISPLAYLEVEL(4, "test%3i : COVER_optimizeTrainFromBuffer : ", testNb++);
|
||||
memset(¶ms, 0, sizeof(params));
|
||||
params.steps = 4;
|
||||
optDictSize = COVER_optimizeTrainFromBuffer(dictBuffer, optDictSize,
|
||||
CNBuffer, samplesSizes, nbSamples,
|
||||
¶ms);
|
||||
if (ZDICT_isError(optDictSize)) goto _output_error;
|
||||
DISPLAYLEVEL(4, "OK, created dictionary of size %u \n", (U32)optDictSize);
|
||||
|
||||
DISPLAYLEVEL(4, "test%3i : check dictID : ", testNb++);
|
||||
dictID = ZDICT_getDictID(dictBuffer, optDictSize);
|
||||
if (dictID==0) goto _output_error;
|
||||
DISPLAYLEVEL(4, "OK : %u \n", dictID);
|
||||
|
||||
ZSTD_freeCCtx(cctx);
|
||||
ZSTD_freeDCtx(dctx);
|
||||
free(dictBuffer);
|
||||
free(samplesSizes);
|
||||
}
|
||||
|
||||
/* Decompression defense tests */
|
||||
DISPLAYLEVEL(4, "test%3i : Check input length for magic number : ", testNb++);
|
||||
{ size_t const r = ZSTD_decompress(decodedBuffer, CNBuffSize, CNBuffer, 3);
|
||||
|
@ -255,6 +255,27 @@ rm -rf dirTestDict
|
||||
rm tmp*
|
||||
|
||||
|
||||
$ECHO "\n**** cover dictionary tests **** "
|
||||
|
||||
TESTFILE=../programs/zstdcli.c
|
||||
./datagen > tmpDict
|
||||
$ECHO "- Create first dictionary"
|
||||
$ZSTD --train --cover=k=46,d=8 *.c ../programs/*.c -o tmpDict
|
||||
cp $TESTFILE tmp
|
||||
$ZSTD -f tmp -D tmpDict
|
||||
$ZSTD -d tmp.zst -D tmpDict -fo result
|
||||
$DIFF $TESTFILE result
|
||||
$ECHO "- Create second (different) dictionary"
|
||||
$ZSTD --train --cover=k=56,d=8 *.c ../programs/*.c ../programs/*.h -o tmpDictC
|
||||
$ZSTD -d tmp.zst -D tmpDictC -fo result && die "wrong dictionary not detected!"
|
||||
$ECHO "- Create dictionary with short dictID"
|
||||
$ZSTD --train --cover=k=46,d=8 *.c ../programs/*.c --dictID 1 -o tmpDict1
|
||||
cmp tmpDict tmpDict1 && die "dictionaries should have different ID !"
|
||||
$ECHO "- Create dictionary with size limit"
|
||||
$ZSTD --train --optimize-cover=steps=8 *.c ../programs/*.c -o tmpDict2 --maxdict 4K
|
||||
rm tmp*
|
||||
|
||||
|
||||
$ECHO "\n**** integrity tests **** "
|
||||
|
||||
$ECHO "test one file (tmp1.zst) "
|
||||
|
Loading…
Reference in New Issue
Block a user