Merge remote-tracking branch 'refs/remotes/facebook/dev' into dev11
This commit is contained in:
commit
8a0bc30a2d
2
Makefile
2
Makefile
@ -88,7 +88,7 @@ travis-install:
|
||||
$(MAKE) install PREFIX=~/install_test_dir
|
||||
|
||||
gpptest: clean
|
||||
$(MAKE) -C programs all CC=g++ CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
|
||||
CC=g++ $(MAKE) -C programs all CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
|
||||
|
||||
gcc5test: clean
|
||||
gcc-5 -v
|
||||
|
@ -331,6 +331,10 @@
|
||||
RelativePath="..\..\..\programs\datagen.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\dictBuilder\cover.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
|
||||
>
|
||||
|
@ -343,6 +343,10 @@
|
||||
RelativePath="..\..\..\programs\dibio.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\dictBuilder\cover.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
|
||||
>
|
||||
|
@ -327,6 +327,10 @@
|
||||
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
|
||||
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
|
||||
>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\dictBuilder\cover.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
|
||||
>
|
||||
|
@ -332,6 +332,10 @@
|
||||
RelativePath="..\..\..\programs\datagen.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\dictBuilder\cover.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
|
||||
>
|
||||
|
@ -344,6 +344,10 @@
|
||||
RelativePath="..\..\..\programs\dibio.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\dictBuilder\cover.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
|
||||
>
|
||||
|
@ -328,6 +328,10 @@
|
||||
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
|
||||
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
|
||||
>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\dictBuilder\cover.c"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
|
||||
>
|
||||
|
@ -165,6 +165,7 @@
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_compress.c" />
|
||||
<ClCompile Include="..\..\..\lib\decompress\huf_decompress.c" />
|
||||
<ClCompile Include="..\..\..\lib\decompress\zstd_decompress.c" />
|
||||
<ClCompile Include="..\..\..\lib\dictBuilder\cover.c" />
|
||||
<ClCompile Include="..\..\..\lib\dictBuilder\divsufsort.c" />
|
||||
<ClCompile Include="..\..\..\lib\dictBuilder\zdict.c" />
|
||||
<ClCompile Include="..\..\..\programs\datagen.c" />
|
||||
|
@ -32,6 +32,7 @@
|
||||
<ClCompile Include="..\..\..\lib\deprecated\zbuff_common.c" />
|
||||
<ClCompile Include="..\..\..\lib\deprecated\zbuff_compress.c" />
|
||||
<ClCompile Include="..\..\..\lib\deprecated\zbuff_decompress.c" />
|
||||
<ClCompile Include="..\..\..\lib\dictBuilder\cover.c" />
|
||||
<ClCompile Include="..\..\..\lib\dictBuilder\divsufsort.c" />
|
||||
<ClCompile Include="..\..\..\lib\dictBuilder\zdict.c" />
|
||||
<ClCompile Include="..\..\..\lib\legacy\zstd_v01.c" />
|
||||
|
@ -32,6 +32,7 @@
|
||||
<ClCompile Include="..\..\..\lib\deprecated\zbuff_common.c" />
|
||||
<ClCompile Include="..\..\..\lib\deprecated\zbuff_compress.c" />
|
||||
<ClCompile Include="..\..\..\lib\deprecated\zbuff_decompress.c" />
|
||||
<ClCompile Include="..\..\..\lib\dictBuilder\cover.c" />
|
||||
<ClCompile Include="..\..\..\lib\dictBuilder\divsufsort.c" />
|
||||
<ClCompile Include="..\..\..\lib\dictBuilder\zdict.c" />
|
||||
<ClCompile Include="..\..\..\lib\legacy\zstd_v01.c" />
|
||||
|
@ -29,6 +29,7 @@
|
||||
<ClCompile Include="..\..\..\lib\compress\zstd_compress.c" />
|
||||
<ClCompile Include="..\..\..\lib\decompress\huf_decompress.c" />
|
||||
<ClCompile Include="..\..\..\lib\decompress\zstd_decompress.c" />
|
||||
<ClCompile Include="..\..\..\lib\dictBuilder\cover.c" />
|
||||
<ClCompile Include="..\..\..\lib\dictBuilder\divsufsort.c" />
|
||||
<ClCompile Include="..\..\..\lib\dictBuilder\zdict.c" />
|
||||
<ClCompile Include="..\..\..\lib\legacy\zstd_v01.c" />
|
||||
|
@ -67,6 +67,7 @@ SET(Sources
|
||||
${LIBRARY_DIR}/compress/zstd_compress.c
|
||||
${LIBRARY_DIR}/decompress/huf_decompress.c
|
||||
${LIBRARY_DIR}/decompress/zstd_decompress.c
|
||||
${LIBRARY_DIR}/dictBuilder/cover.c
|
||||
${LIBRARY_DIR}/dictBuilder/divsufsort.c
|
||||
${LIBRARY_DIR}/dictBuilder/zdict.c
|
||||
${LIBRARY_DIR}/deprecated/zbuff_common.c
|
||||
|
@ -2970,6 +2970,7 @@ size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict)
|
||||
ZSTD_parameters const params = ZSTD_getParamsFromCDict(cdict);
|
||||
size_t const initError = ZSTD_initCStream_advanced(zcs, NULL, 0, params, 0);
|
||||
zcs->cdict = cdict;
|
||||
zcs->cctx->dictID = params.fParams.noDictIDFlag ? 0 : cdict->refContext->dictID;
|
||||
return initError;
|
||||
}
|
||||
|
||||
|
@ -1444,7 +1444,7 @@ size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
|
||||
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
|
||||
if (ZSTD_isLegacy(src, srcSize)) return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, dict, dictSize);
|
||||
#endif
|
||||
ZSTD_decompressBegin_usingDict(dctx, dict, dictSize);
|
||||
CHECK_F(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize));
|
||||
ZSTD_checkContinuity(dctx, dst);
|
||||
return ZSTD_decompressFrame(dctx, dst, dstCapacity, src, srcSize);
|
||||
}
|
||||
|
1023
lib/dictBuilder/cover.c
Normal file
1023
lib/dictBuilder/cover.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -86,6 +86,57 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dict
|
||||
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
||||
ZDICT_params_t parameters);
|
||||
|
||||
/*! COVER_params_t :
|
||||
For all values 0 means default.
|
||||
kMin and d are the only required parameters.
|
||||
*/
|
||||
typedef struct {
|
||||
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
|
||||
unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
|
||||
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (256) : Higher means more parameters checked */
|
||||
|
||||
unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
|
||||
unsigned dictID; /* 0 means auto mode (32-bits random value); other : force dictID value */
|
||||
int compressionLevel; /* 0 means default; target a specific zstd compression level */
|
||||
} COVER_params_t;
|
||||
|
||||
|
||||
/*! COVER_trainFromBuffer() :
|
||||
Train a dictionary from an array of samples using the COVER algorithm.
|
||||
Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
||||
supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
||||
The resulting dictionary will be saved into `dictBuffer`.
|
||||
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||
or an error code, which can be tested with ZDICT_isError().
|
||||
Note : COVER_trainFromBuffer() requires about 9 bytes of memory for each input byte.
|
||||
Tips : In general, a reasonable dictionary has a size of ~ 100 KB.
|
||||
It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
|
||||
In general, it's recommended to provide a few thousands samples, but this can vary a lot.
|
||||
It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
||||
*/
|
||||
ZDICTLIB_API size_t COVER_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
||||
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
||||
COVER_params_t parameters);
|
||||
|
||||
/*! COVER_optimizeTrainFromBuffer() :
|
||||
The same requirements as above hold for all the parameters except `parameters`.
|
||||
This function tries many parameter combinations and picks the best parameters.
|
||||
`*parameters` is filled with the best parameters found, and the dictionary
|
||||
constructed with those parameters is stored in `dictBuffer`.
|
||||
|
||||
All of the parameters d, k, steps are optional.
|
||||
If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
|
||||
if steps is zero it defaults to its default value.
|
||||
If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
|
||||
|
||||
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
||||
or an error code, which can be tested with ZDICT_isError().
|
||||
On success `*parameters` contains the parameters selected.
|
||||
Note : COVER_optimizeTrainFromBuffer() requires about 9 bytes of memory for each input byte.
|
||||
*/
|
||||
ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
||||
const void* samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
|
||||
COVER_params_t *parameters);
|
||||
|
||||
/*! ZDICT_finalizeDictionary() :
|
||||
|
||||
|
@ -42,6 +42,7 @@
|
||||
|
||||
#define SAMPLESIZE_MAX (128 KB)
|
||||
#define MEMMULT 11 /* rough estimation : memory cost to analyze 1 byte of sample */
|
||||
#define COVER_MEMMULT 9 /* rough estimation : memory cost to analyze 1 byte of sample */
|
||||
static const size_t maxMemory = (sizeof(size_t) == 4) ? (2 GB - 64 MB) : ((size_t)(512 MB) << sizeof(size_t));
|
||||
|
||||
#define NOISELENGTH 32
|
||||
@ -118,10 +119,36 @@ static unsigned DiB_loadFiles(void* buffer, size_t* bufferSizePtr,
|
||||
fileSizes[n] = fileSize;
|
||||
fclose(f);
|
||||
} }
|
||||
DISPLAYLEVEL(2, "\r%79s\r", "");
|
||||
*bufferSizePtr = pos;
|
||||
return n;
|
||||
}
|
||||
|
||||
#define DiB_rotl32(x,r) ((x << r) | (x >> (32 - r)))
|
||||
static U32 DiB_rand(U32* src)
|
||||
{
|
||||
static const U32 prime1 = 2654435761U;
|
||||
static const U32 prime2 = 2246822519U;
|
||||
U32 rand32 = *src;
|
||||
rand32 *= prime1;
|
||||
rand32 ^= prime2;
|
||||
rand32 = DiB_rotl32(rand32, 13);
|
||||
*src = rand32;
|
||||
return rand32 >> 5;
|
||||
}
|
||||
|
||||
static void DiB_shuffle(const char** fileNamesTable, unsigned nbFiles) {
|
||||
/* Initialize the pseudorandom number generator */
|
||||
U32 seed = 0xFD2FB528;
|
||||
unsigned i;
|
||||
for (i = nbFiles - 1; i > 0; --i) {
|
||||
unsigned const j = DiB_rand(&seed) % (i + 1);
|
||||
const char* tmp = fileNamesTable[j];
|
||||
fileNamesTable[j] = fileNamesTable[i];
|
||||
fileNamesTable[i] = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*-********************************************************
|
||||
* Dictionary training functions
|
||||
@ -202,19 +229,23 @@ size_t ZDICT_trainFromBuffer_unsafe(void* dictBuffer, size_t dictBufferCapacity,
|
||||
|
||||
int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
|
||||
const char** fileNamesTable, unsigned nbFiles,
|
||||
ZDICT_params_t params)
|
||||
ZDICT_params_t *params, COVER_params_t *coverParams,
|
||||
int optimizeCover)
|
||||
{
|
||||
void* const dictBuffer = malloc(maxDictSize);
|
||||
size_t* const fileSizes = (size_t*)malloc(nbFiles * sizeof(size_t));
|
||||
unsigned long long const totalSizeToLoad = DiB_getTotalCappedFileSize(fileNamesTable, nbFiles);
|
||||
size_t const maxMem = DiB_findMaxMem(totalSizeToLoad * MEMMULT) / MEMMULT;
|
||||
size_t const memMult = params ? MEMMULT : COVER_MEMMULT;
|
||||
size_t const maxMem = DiB_findMaxMem(totalSizeToLoad * memMult) / memMult;
|
||||
size_t benchedSize = (size_t) MIN ((unsigned long long)maxMem, totalSizeToLoad);
|
||||
void* const srcBuffer = malloc(benchedSize+NOISELENGTH);
|
||||
int result = 0;
|
||||
|
||||
/* Checks */
|
||||
if (params) g_displayLevel = params->notificationLevel;
|
||||
else if (coverParams) g_displayLevel = coverParams->notificationLevel;
|
||||
else EXM_THROW(13, "Neither dictionary algorith selected"); /* should not happen */
|
||||
if ((!fileSizes) || (!srcBuffer) || (!dictBuffer)) EXM_THROW(12, "not enough memory for DiB_trainFiles"); /* should not happen */
|
||||
g_displayLevel = params.notificationLevel;
|
||||
if (g_tooLargeSamples) {
|
||||
DISPLAYLEVEL(2, "! Warning : some samples are very large \n");
|
||||
DISPLAYLEVEL(2, "! Note that dictionary is only useful for small files or beginning of large files. \n");
|
||||
@ -233,12 +264,29 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
|
||||
DISPLAYLEVEL(1, "Not enough memory; training on %u MB only...\n", (unsigned)(benchedSize >> 20));
|
||||
|
||||
/* Load input buffer */
|
||||
DISPLAYLEVEL(3, "Shuffling input files\n");
|
||||
DiB_shuffle(fileNamesTable, nbFiles);
|
||||
nbFiles = DiB_loadFiles(srcBuffer, &benchedSize, fileSizes, fileNamesTable, nbFiles);
|
||||
DiB_fillNoise((char*)srcBuffer + benchedSize, NOISELENGTH); /* guard band, for end of buffer condition */
|
||||
|
||||
{ size_t const dictSize = ZDICT_trainFromBuffer_unsafe(dictBuffer, maxDictSize,
|
||||
{
|
||||
size_t dictSize;
|
||||
if (params) {
|
||||
DiB_fillNoise((char*)srcBuffer + benchedSize, NOISELENGTH); /* guard band, for end of buffer condition */
|
||||
dictSize = ZDICT_trainFromBuffer_unsafe(dictBuffer, maxDictSize,
|
||||
srcBuffer, fileSizes, nbFiles,
|
||||
params);
|
||||
*params);
|
||||
} else if (optimizeCover) {
|
||||
dictSize = COVER_optimizeTrainFromBuffer(
|
||||
dictBuffer, maxDictSize, srcBuffer, fileSizes, nbFiles,
|
||||
coverParams);
|
||||
if (!ZDICT_isError(dictSize)) {
|
||||
DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\n", coverParams->k, coverParams->d, coverParams->steps);
|
||||
}
|
||||
} else {
|
||||
dictSize = COVER_trainFromBuffer(dictBuffer, maxDictSize,
|
||||
srcBuffer, fileSizes, nbFiles,
|
||||
*coverParams);
|
||||
}
|
||||
if (ZDICT_isError(dictSize)) {
|
||||
DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */
|
||||
result = 1;
|
||||
|
@ -32,7 +32,7 @@
|
||||
*/
|
||||
int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
|
||||
const char** fileNamesTable, unsigned nbFiles,
|
||||
ZDICT_params_t parameters);
|
||||
|
||||
ZDICT_params_t *params, COVER_params_t *coverParams,
|
||||
int optimizeCover);
|
||||
|
||||
#endif
|
||||
|
@ -69,6 +69,8 @@ from standard input if it is a terminal.
|
||||
.PP
|
||||
Unless
|
||||
.B \-\-stdout
|
||||
or
|
||||
.B \-o
|
||||
is specified,
|
||||
.I files
|
||||
are written to a new file whose name is derived from the source
|
||||
@ -159,7 +161,8 @@ No files are created or removed.
|
||||
# compression level [1-19] (default:3)
|
||||
.TP
|
||||
.BR \--ultra
|
||||
unlocks high compression levels 20+ (maximum 22), using a lot more memory
|
||||
unlocks high compression levels 20+ (maximum 22), using a lot more memory.
|
||||
Note that decompression will also require more memory when using these levels.
|
||||
.TP
|
||||
.B \-D file
|
||||
use `file` as Dictionary to compress or decompress FILE(s)
|
||||
|
@ -127,6 +127,8 @@ static int usage_advanced(const char* programName)
|
||||
DISPLAY( "\n");
|
||||
DISPLAY( "Dictionary builder :\n");
|
||||
DISPLAY( "--train ## : create a dictionary from a training set of files \n");
|
||||
DISPLAY( "--cover=k=#,d=# : use the cover algorithm with parameters k and d \n");
|
||||
DISPLAY( "--optimize-cover[=steps=#,k=#,d=#] : optimize cover parameters with optional parameters\n");
|
||||
DISPLAY( " -o file : `file` is dictionary name (default: %s) \n", g_defaultDictName);
|
||||
DISPLAY( "--maxdict ## : limit dictionary to specified size (default : %u) \n", g_defaultMaxDictSize);
|
||||
DISPLAY( " -s# : dictionary selectivity level (default: %u)\n", g_defaultSelectivityLevel);
|
||||
@ -192,6 +194,27 @@ static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
|
||||
}
|
||||
|
||||
|
||||
#ifndef ZSTD_NODICT
|
||||
/**
|
||||
* parseCoverParameters() :
|
||||
* reads cover parameters from *stringPtr (e.g. "--cover=smoothing=100,kmin=48,kstep=4,kmax=64,d=8") into *params
|
||||
* @return 1 means that cover parameters were correct
|
||||
* @return 0 in case of malformed parameters
|
||||
*/
|
||||
static unsigned parseCoverParameters(const char* stringPtr, COVER_params_t *params)
|
||||
{
|
||||
memset(params, 0, sizeof(*params));
|
||||
for (; ;) {
|
||||
if (longCommandWArg(&stringPtr, "k=")) { params->k = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
|
||||
if (longCommandWArg(&stringPtr, "d=")) { params->d = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
|
||||
if (longCommandWArg(&stringPtr, "steps=")) { params->steps = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
|
||||
return 0;
|
||||
}
|
||||
if (stringPtr[0] != 0) return 0;
|
||||
DISPLAYLEVEL(4, "k=%u\nd=%u\nsteps=%u\n", params->k, params->d, params->steps);
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
/** parseCompressionParameters() :
|
||||
* reads compression parameters from *stringPtr (e.g. "--zstd=wlog=23,clog=23,hlog=22,slog=6,slen=3,tlen=48,strat=6") into *params
|
||||
* @return 1 means that compression parameters were correct
|
||||
@ -254,6 +277,10 @@ int main(int argCount, const char* argv[])
|
||||
char* fileNamesBuf = NULL;
|
||||
unsigned fileNamesNb;
|
||||
#endif
|
||||
#ifndef ZSTD_NODICT
|
||||
COVER_params_t coverParams;
|
||||
int cover = 0;
|
||||
#endif
|
||||
|
||||
/* init */
|
||||
(void)recursive; (void)cLevelLast; /* not used when ZSTD_NOBENCH set */
|
||||
@ -318,6 +345,20 @@ int main(int argCount, const char* argv[])
|
||||
if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(1); continue; }
|
||||
|
||||
/* long commands with arguments */
|
||||
#ifndef ZSTD_NODICT
|
||||
if (longCommandWArg(&argument, "--cover=")) {
|
||||
cover=1; if (!parseCoverParameters(argument, &coverParams)) CLEAN_RETURN(badusage(programName));
|
||||
continue;
|
||||
}
|
||||
if (longCommandWArg(&argument, "--optimize-cover")) {
|
||||
cover=2;
|
||||
/* Allow optional arguments following an = */
|
||||
if (*argument == 0) { memset(&coverParams, 0, sizeof(coverParams)); }
|
||||
else if (*argument++ != '=') { CLEAN_RETURN(badusage(programName)); }
|
||||
else if (!parseCoverParameters(argument, &coverParams)) { CLEAN_RETURN(badusage(programName)); }
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
if (longCommandWArg(&argument, "--memlimit=")) { memLimit = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--memory=")) { memLimit = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--memlimit-decompress=")) { memLimit = readU32FromChar(&argument); continue; }
|
||||
@ -520,13 +561,20 @@ int main(int argCount, const char* argv[])
|
||||
/* Check if dictionary builder is selected */
|
||||
if (operation==zom_train) {
|
||||
#ifndef ZSTD_NODICT
|
||||
if (cover) {
|
||||
coverParams.compressionLevel = dictCLevel;
|
||||
coverParams.notificationLevel = displayLevel;
|
||||
coverParams.dictID = dictID;
|
||||
DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, NULL, &coverParams, cover - 1);
|
||||
} else {
|
||||
ZDICT_params_t dictParams;
|
||||
memset(&dictParams, 0, sizeof(dictParams));
|
||||
dictParams.compressionLevel = dictCLevel;
|
||||
dictParams.selectivityLevel = dictSelect;
|
||||
dictParams.notificationLevel = displayLevel;
|
||||
dictParams.dictID = dictID;
|
||||
DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, dictParams);
|
||||
DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, &dictParams, NULL, 0);
|
||||
}
|
||||
#endif
|
||||
goto _end;
|
||||
}
|
||||
|
@ -20,10 +20,6 @@
|
||||
# zstreamtest32: Same as zstreamtest, but forced to compile in 32-bits mode
|
||||
# ##########################################################################
|
||||
|
||||
DESTDIR?=
|
||||
PREFIX ?= /usr/local
|
||||
BINDIR = $(PREFIX)/bin
|
||||
MANDIR = $(PREFIX)/share/man/man1
|
||||
ZSTDDIR = ../lib
|
||||
PRGDIR = ../programs
|
||||
PYTHON ?= python3
|
||||
@ -125,10 +121,10 @@ zbufftest-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/datagen.c zbufftest.c
|
||||
$(MAKE) -C $(ZSTDDIR) libzstd
|
||||
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@$(EXT)
|
||||
|
||||
zstreamtest : $(ZSTD_FILES) $(PRGDIR)/datagen.c zstreamtest.c
|
||||
zstreamtest : $(ZSTD_FILES) $(ZDICT_FILES) $(PRGDIR)/datagen.c zstreamtest.c
|
||||
$(CC) $(FLAGS) $^ -o $@$(EXT)
|
||||
|
||||
zstreamtest32 : $(ZSTD_FILES) $(PRGDIR)/datagen.c zstreamtest.c
|
||||
zstreamtest32 : $(ZSTD_FILES) $(ZDICT_FILES) $(PRGDIR)/datagen.c zstreamtest.c
|
||||
$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
|
||||
|
||||
zstreamtest-dll : LDFLAGS+= -L$(ZSTDDIR) -lzstd
|
||||
|
@ -28,6 +28,7 @@
|
||||
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressContinue, ZSTD_compressBlock */
|
||||
#include "zstd.h" /* ZSTD_VERSION_STRING */
|
||||
#include "zstd_errors.h" /* ZSTD_getErrorCode */
|
||||
#define ZDICT_STATIC_LINKING_ONLY
|
||||
#include "zdict.h" /* ZDICT_trainFromBuffer */
|
||||
#include "datagen.h" /* RDG_genBuffer */
|
||||
#include "mem.h"
|
||||
@ -311,6 +312,70 @@ static int basicUnitTests(U32 seed, double compressibility)
|
||||
if (r != CNBuffSize) goto _output_error);
|
||||
DISPLAYLEVEL(4, "OK \n");
|
||||
|
||||
DISPLAYLEVEL(4, "test%3i : dictionary containing only header should return error : ", testNb++);
|
||||
{
|
||||
const size_t ret = ZSTD_decompress_usingDict(
|
||||
dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize,
|
||||
"\x37\xa4\x30\xec\x11\x22\x33\x44", 8);
|
||||
if (ZSTD_getErrorCode(ret) != ZSTD_error_dictionary_corrupted) goto _output_error;
|
||||
}
|
||||
DISPLAYLEVEL(4, "OK \n");
|
||||
|
||||
ZSTD_freeCCtx(cctx);
|
||||
ZSTD_freeDCtx(dctx);
|
||||
free(dictBuffer);
|
||||
free(samplesSizes);
|
||||
}
|
||||
|
||||
/* COVER dictionary builder tests */
|
||||
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
|
||||
ZSTD_DCtx* const dctx = ZSTD_createDCtx();
|
||||
size_t dictSize = 16 KB;
|
||||
size_t optDictSize = dictSize;
|
||||
void* dictBuffer = malloc(dictSize);
|
||||
size_t const totalSampleSize = 1 MB;
|
||||
size_t const sampleUnitSize = 8 KB;
|
||||
U32 const nbSamples = (U32)(totalSampleSize / sampleUnitSize);
|
||||
size_t* const samplesSizes = (size_t*) malloc(nbSamples * sizeof(size_t));
|
||||
COVER_params_t params;
|
||||
U32 dictID;
|
||||
|
||||
if (dictBuffer==NULL || samplesSizes==NULL) {
|
||||
free(dictBuffer);
|
||||
free(samplesSizes);
|
||||
goto _output_error;
|
||||
}
|
||||
|
||||
DISPLAYLEVEL(4, "test%3i : COVER_trainFromBuffer : ", testNb++);
|
||||
{ U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
|
||||
memset(¶ms, 0, sizeof(params));
|
||||
params.d = 1 + (FUZ_rand(&seed) % 16);
|
||||
params.k = params.d + (FUZ_rand(&seed) % 256);
|
||||
dictSize = COVER_trainFromBuffer(dictBuffer, dictSize,
|
||||
CNBuffer, samplesSizes, nbSamples,
|
||||
params);
|
||||
if (ZDICT_isError(dictSize)) goto _output_error;
|
||||
DISPLAYLEVEL(4, "OK, created dictionary of size %u \n", (U32)dictSize);
|
||||
|
||||
DISPLAYLEVEL(4, "test%3i : check dictID : ", testNb++);
|
||||
dictID = ZDICT_getDictID(dictBuffer, dictSize);
|
||||
if (dictID==0) goto _output_error;
|
||||
DISPLAYLEVEL(4, "OK : %u \n", dictID);
|
||||
|
||||
DISPLAYLEVEL(4, "test%3i : COVER_optimizeTrainFromBuffer : ", testNb++);
|
||||
memset(¶ms, 0, sizeof(params));
|
||||
params.steps = 4;
|
||||
optDictSize = COVER_optimizeTrainFromBuffer(dictBuffer, optDictSize,
|
||||
CNBuffer, samplesSizes, nbSamples,
|
||||
¶ms);
|
||||
if (ZDICT_isError(optDictSize)) goto _output_error;
|
||||
DISPLAYLEVEL(4, "OK, created dictionary of size %u \n", (U32)optDictSize);
|
||||
|
||||
DISPLAYLEVEL(4, "test%3i : check dictID : ", testNb++);
|
||||
dictID = ZDICT_getDictID(dictBuffer, optDictSize);
|
||||
if (dictID==0) goto _output_error;
|
||||
DISPLAYLEVEL(4, "OK : %u \n", dictID);
|
||||
|
||||
ZSTD_freeCCtx(cctx);
|
||||
ZSTD_freeDCtx(dctx);
|
||||
free(dictBuffer);
|
||||
|
@ -255,6 +255,27 @@ rm -rf dirTestDict
|
||||
rm tmp*
|
||||
|
||||
|
||||
$ECHO "\n**** cover dictionary tests **** "
|
||||
|
||||
TESTFILE=../programs/zstdcli.c
|
||||
./datagen > tmpDict
|
||||
$ECHO "- Create first dictionary"
|
||||
$ZSTD --train --cover=k=46,d=8 *.c ../programs/*.c -o tmpDict
|
||||
cp $TESTFILE tmp
|
||||
$ZSTD -f tmp -D tmpDict
|
||||
$ZSTD -d tmp.zst -D tmpDict -fo result
|
||||
$DIFF $TESTFILE result
|
||||
$ECHO "- Create second (different) dictionary"
|
||||
$ZSTD --train --cover=k=56,d=8 *.c ../programs/*.c ../programs/*.h -o tmpDictC
|
||||
$ZSTD -d tmp.zst -D tmpDictC -fo result && die "wrong dictionary not detected!"
|
||||
$ECHO "- Create dictionary with short dictID"
|
||||
$ZSTD --train --cover=k=46,d=8 *.c ../programs/*.c --dictID 1 -o tmpDict1
|
||||
cmp tmpDict tmpDict1 && die "dictionaries should have different ID !"
|
||||
$ECHO "- Create dictionary with size limit"
|
||||
$ZSTD --train --optimize-cover=steps=8 *.c ../programs/*.c -o tmpDict2 --maxdict 4K
|
||||
rm tmp*
|
||||
|
||||
|
||||
$ECHO "\n**** integrity tests **** "
|
||||
|
||||
$ECHO "test one file (tmp1.zst) "
|
||||
|
@ -26,9 +26,10 @@
|
||||
#include <time.h> /* clock_t, clock() */
|
||||
#include <string.h> /* strcmp */
|
||||
#include "mem.h"
|
||||
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_maxCLevel, ZSTD_customMem */
|
||||
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_maxCLevel, ZSTD_customMem, ZSTD_getDictID_fromFrame */
|
||||
#include "zstd.h" /* ZSTD_compressBound */
|
||||
#include "zstd_errors.h" /* ZSTD_error_srcSize_wrong */
|
||||
#include "zdict.h" /* ZDICT_trainFromBuffer */
|
||||
#include "datagen.h" /* RDG_genBuffer */
|
||||
#define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
|
||||
#include "xxhash.h" /* XXH64_* */
|
||||
@ -44,8 +45,7 @@
|
||||
static const U32 nbTestsDefault = 10000;
|
||||
#define COMPRESSIBLE_NOISE_LENGTH (10 MB)
|
||||
#define FUZ_COMPRESSIBILITY_DEFAULT 50
|
||||
static const U32 prime1 = 2654435761U;
|
||||
static const U32 prime2 = 2246822519U;
|
||||
static const U32 prime32 = 2654435761U;
|
||||
|
||||
|
||||
/*-************************************
|
||||
@ -81,8 +81,9 @@ static clock_t FUZ_GetClockSpan(clock_t clockStart)
|
||||
#define FUZ_rotl32(x,r) ((x << r) | (x >> (32 - r)))
|
||||
unsigned int FUZ_rand(unsigned int* seedPtr)
|
||||
{
|
||||
static const U32 prime2 = 2246822519U;
|
||||
U32 rand32 = *seedPtr;
|
||||
rand32 *= prime1;
|
||||
rand32 *= prime32;
|
||||
rand32 += prime2;
|
||||
rand32 = FUZ_rotl32(rand32, 13);
|
||||
*seedPtr = rand32;
|
||||
@ -107,6 +108,41 @@ static void freeFunction(void* opaque, void* address)
|
||||
* Basic Unit tests
|
||||
======================================================*/
|
||||
|
||||
typedef struct {
|
||||
void* start;
|
||||
size_t size;
|
||||
size_t filled;
|
||||
} buffer_t;
|
||||
|
||||
static const buffer_t g_nullBuffer = { NULL, 0 , 0 };
|
||||
|
||||
static buffer_t FUZ_createDictionary(const void* src, size_t srcSize, size_t blockSize, size_t requestedDictSize)
|
||||
{
|
||||
buffer_t dict = { NULL, 0, 0 };
|
||||
size_t const nbBlocks = (srcSize + (blockSize-1)) / blockSize;
|
||||
size_t* const blockSizes = (size_t*) malloc(nbBlocks * sizeof(size_t));
|
||||
if (!blockSizes) return dict;
|
||||
dict.start = malloc(requestedDictSize);
|
||||
if (!dict.start) { free(blockSizes); return dict; }
|
||||
{ size_t nb;
|
||||
for (nb=0; nb<nbBlocks-1; nb++) blockSizes[nb] = blockSize;
|
||||
blockSizes[nbBlocks-1] = srcSize - (blockSize * (nbBlocks-1));
|
||||
}
|
||||
{ size_t const dictSize = ZDICT_trainFromBuffer(dict.start, requestedDictSize, src, blockSizes, (unsigned)nbBlocks);
|
||||
free(blockSizes);
|
||||
if (ZDICT_isError(dictSize)) { free(dict.start); return (buffer_t){ NULL, 0, 0 }; }
|
||||
dict.size = requestedDictSize;
|
||||
dict.filled = dictSize;
|
||||
return dict; /* how to return dictSize ? */
|
||||
}
|
||||
}
|
||||
|
||||
static void FUZ_freeDictionary(buffer_t dict)
|
||||
{
|
||||
free(dict.start);
|
||||
}
|
||||
|
||||
|
||||
static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem customMem)
|
||||
{
|
||||
size_t const CNBufferSize = COMPRESSIBLE_NOISE_LENGTH;
|
||||
@ -123,6 +159,8 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo
|
||||
ZSTD_DStream* zd = ZSTD_createDStream_advanced(customMem);
|
||||
ZSTD_inBuffer inBuff, inBuff2;
|
||||
ZSTD_outBuffer outBuff;
|
||||
buffer_t dictionary = g_nullBuffer;
|
||||
unsigned dictID = 0;
|
||||
|
||||
/* Create compressible test buffer */
|
||||
if (!CNBuffer || !compressedBuffer || !decodedBuffer || !zc || !zd) {
|
||||
@ -131,6 +169,15 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo
|
||||
}
|
||||
RDG_genBuffer(CNBuffer, CNBufferSize, compressibility, 0., seed);
|
||||
|
||||
/* Create dictionary */
|
||||
MEM_STATIC_ASSERT(COMPRESSIBLE_NOISE_LENGTH >= 4 MB);
|
||||
dictionary = FUZ_createDictionary(CNBuffer, 4 MB, 4 KB, 40 KB);
|
||||
if (!dictionary.start) {
|
||||
DISPLAY("Error creating dictionary, aborting \n");
|
||||
goto _output_error;
|
||||
}
|
||||
dictID = ZDICT_getDictID(dictionary.start, dictionary.filled);
|
||||
|
||||
/* generate skippable frame */
|
||||
MEM_writeLE32(compressedBuffer, ZSTD_MAGIC_SKIPPABLE_START);
|
||||
MEM_writeLE32(((char*)compressedBuffer)+4, (U32)skippableFrameSize);
|
||||
@ -260,8 +307,6 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo
|
||||
{ size_t const r = ZSTD_endStream(zc, &outBuff);
|
||||
if (r != 0) goto _output_error; } /* error, or some data not flushed */
|
||||
{ unsigned long long origSize = ZSTD_getDecompressedSize(outBuff.dst, outBuff.pos);
|
||||
DISPLAY("outBuff.pos : %u \n", (U32)outBuff.pos);
|
||||
DISPLAY("origSize = %u \n", (U32)origSize);
|
||||
if ((size_t)origSize != CNBufferSize) goto _output_error; } /* exact original size must be present */
|
||||
DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/COMPRESSIBLE_NOISE_LENGTH*100);
|
||||
|
||||
@ -320,7 +365,7 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo
|
||||
|
||||
/* CDict scenario */
|
||||
DISPLAYLEVEL(4, "test%3i : digested dictionary : ", testNb++);
|
||||
{ ZSTD_CDict* const cdict = ZSTD_createCDict(CNBuffer, 128 KB, 1);
|
||||
{ ZSTD_CDict* const cdict = ZSTD_createCDict(dictionary.start, dictionary.filled, 1);
|
||||
size_t const initError = ZSTD_initCStream_usingCDict(zc, cdict);
|
||||
if (ZSTD_isError(initError)) goto _output_error;
|
||||
cSize = 0;
|
||||
@ -346,9 +391,15 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo
|
||||
DISPLAYLEVEL(4, "OK (%u bytes) \n", (U32)s);
|
||||
}
|
||||
|
||||
DISPLAYLEVEL(4, "test%3i : check Dictionary ID : ", testNb++);
|
||||
{ unsigned const dID = ZSTD_getDictID_fromFrame(compressedBuffer, cSize);
|
||||
if (dID != dictID) goto _output_error;
|
||||
DISPLAYLEVEL(4, "OK (%u) \n", dID);
|
||||
}
|
||||
|
||||
/* DDict scenario */
|
||||
DISPLAYLEVEL(4, "test%3i : decompress %u bytes with digested dictionary : ", testNb++, (U32)CNBufferSize);
|
||||
{ ZSTD_DDict* const ddict = ZSTD_createDDict(CNBuffer, 128 KB);
|
||||
{ ZSTD_DDict* const ddict = ZSTD_createDDict(dictionary.start, dictionary.filled);
|
||||
size_t const initError = ZSTD_initDStream_usingDDict(zd, ddict);
|
||||
if (ZSTD_isError(initError)) goto _output_error;
|
||||
inBuff.src = compressedBuffer;
|
||||
@ -388,6 +439,7 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo
|
||||
|
||||
|
||||
_end:
|
||||
FUZ_freeDictionary(dictionary);
|
||||
ZSTD_freeCStream(zc);
|
||||
ZSTD_freeDStream(zd);
|
||||
free(CNBuffer);
|
||||
@ -492,7 +544,7 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compres
|
||||
if (nbTests >= testNb) { DISPLAYUPDATE(2, "\r%6u/%6u ", testNb, nbTests); }
|
||||
else { DISPLAYUPDATE(2, "\r%6u ", testNb); }
|
||||
FUZ_rand(&coreSeed);
|
||||
lseed = coreSeed ^ prime1;
|
||||
lseed = coreSeed ^ prime32;
|
||||
|
||||
/* states full reset (deliberately not synchronized) */
|
||||
/* some issues can only happen when reusing states */
|
||||
|
Loading…
Reference in New Issue
Block a user