Merge remote-tracking branch 'refs/remotes/facebook/dev' into dev11

This commit is contained in:
Przemyslaw Skibinski 2017-01-17 13:02:29 +01:00
commit 8a0bc30a2d
24 changed files with 1374 additions and 37 deletions

View File

@ -88,7 +88,7 @@ travis-install:
$(MAKE) install PREFIX=~/install_test_dir
gpptest: clean
$(MAKE) -C programs all CC=g++ CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
CC=g++ $(MAKE) -C programs all CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
gcc5test: clean
gcc-5 -v

View File

@ -331,6 +331,10 @@
RelativePath="..\..\..\programs\datagen.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\cover.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
>

View File

@ -343,6 +343,10 @@
RelativePath="..\..\..\programs\dibio.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\cover.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
>

View File

@ -327,6 +327,10 @@
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath="..\..\..\lib\dictBuilder\cover.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
>

View File

@ -332,6 +332,10 @@
RelativePath="..\..\..\programs\datagen.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\cover.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
>

View File

@ -344,6 +344,10 @@
RelativePath="..\..\..\programs\dibio.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\cover.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
>

View File

@ -328,6 +328,10 @@
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath="..\..\..\lib\dictBuilder\cover.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
>

View File

@ -165,6 +165,7 @@
<ClCompile Include="..\..\..\lib\compress\zstd_compress.c" />
<ClCompile Include="..\..\..\lib\decompress\huf_decompress.c" />
<ClCompile Include="..\..\..\lib\decompress\zstd_decompress.c" />
<ClCompile Include="..\..\..\lib\dictBuilder\cover.c" />
<ClCompile Include="..\..\..\lib\dictBuilder\divsufsort.c" />
<ClCompile Include="..\..\..\lib\dictBuilder\zdict.c" />
<ClCompile Include="..\..\..\programs\datagen.c" />

View File

@ -32,6 +32,7 @@
<ClCompile Include="..\..\..\lib\deprecated\zbuff_common.c" />
<ClCompile Include="..\..\..\lib\deprecated\zbuff_compress.c" />
<ClCompile Include="..\..\..\lib\deprecated\zbuff_decompress.c" />
<ClCompile Include="..\..\..\lib\dictBuilder\cover.c" />
<ClCompile Include="..\..\..\lib\dictBuilder\divsufsort.c" />
<ClCompile Include="..\..\..\lib\dictBuilder\zdict.c" />
<ClCompile Include="..\..\..\lib\legacy\zstd_v01.c" />

View File

@ -32,6 +32,7 @@
<ClCompile Include="..\..\..\lib\deprecated\zbuff_common.c" />
<ClCompile Include="..\..\..\lib\deprecated\zbuff_compress.c" />
<ClCompile Include="..\..\..\lib\deprecated\zbuff_decompress.c" />
<ClCompile Include="..\..\..\lib\dictBuilder\cover.c" />
<ClCompile Include="..\..\..\lib\dictBuilder\divsufsort.c" />
<ClCompile Include="..\..\..\lib\dictBuilder\zdict.c" />
<ClCompile Include="..\..\..\lib\legacy\zstd_v01.c" />

View File

@ -29,6 +29,7 @@
<ClCompile Include="..\..\..\lib\compress\zstd_compress.c" />
<ClCompile Include="..\..\..\lib\decompress\huf_decompress.c" />
<ClCompile Include="..\..\..\lib\decompress\zstd_decompress.c" />
<ClCompile Include="..\..\..\lib\dictBuilder\cover.c" />
<ClCompile Include="..\..\..\lib\dictBuilder\divsufsort.c" />
<ClCompile Include="..\..\..\lib\dictBuilder\zdict.c" />
<ClCompile Include="..\..\..\lib\legacy\zstd_v01.c" />

View File

@ -67,6 +67,7 @@ SET(Sources
${LIBRARY_DIR}/compress/zstd_compress.c
${LIBRARY_DIR}/decompress/huf_decompress.c
${LIBRARY_DIR}/decompress/zstd_decompress.c
${LIBRARY_DIR}/dictBuilder/cover.c
${LIBRARY_DIR}/dictBuilder/divsufsort.c
${LIBRARY_DIR}/dictBuilder/zdict.c
${LIBRARY_DIR}/deprecated/zbuff_common.c

View File

@ -2970,6 +2970,7 @@ size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict)
ZSTD_parameters const params = ZSTD_getParamsFromCDict(cdict);
size_t const initError = ZSTD_initCStream_advanced(zcs, NULL, 0, params, 0);
zcs->cdict = cdict;
zcs->cctx->dictID = params.fParams.noDictIDFlag ? 0 : cdict->refContext->dictID;
return initError;
}

View File

@ -1444,7 +1444,7 @@ size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
if (ZSTD_isLegacy(src, srcSize)) return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, dict, dictSize);
#endif
ZSTD_decompressBegin_usingDict(dctx, dict, dictSize);
CHECK_F(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize));
ZSTD_checkContinuity(dctx, dst);
return ZSTD_decompressFrame(dctx, dst, dstCapacity, src, srcSize);
}

1023
lib/dictBuilder/cover.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -86,6 +86,57 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dict
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
ZDICT_params_t parameters);
/*! COVER_params_t :
For all values 0 means default.
kMin and d are the only required parameters.
*/
typedef struct {
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (256) : Higher means more parameters checked */
unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
unsigned dictID; /* 0 means auto mode (32-bits random value); other : force dictID value */
int compressionLevel; /* 0 means default; target a specific zstd compression level */
} COVER_params_t;
/*! COVER_trainFromBuffer() :
Train a dictionary from an array of samples using the COVER algorithm.
Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
The resulting dictionary will be saved into `dictBuffer`.
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
or an error code, which can be tested with ZDICT_isError().
Note : COVER_trainFromBuffer() requires about 9 bytes of memory for each input byte.
Tips : In general, a reasonable dictionary has a size of ~ 100 KB.
It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
In general, it's recommended to provide a few thousands samples, but this can vary a lot.
It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
*/
ZDICTLIB_API size_t COVER_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
COVER_params_t parameters);
/*! COVER_optimizeTrainFromBuffer() :
The same requirements as above hold for all the parameters except `parameters`.
This function tries many parameter combinations and picks the best parameters.
`*parameters` is filled with the best parameters found, and the dictionary
constructed with those parameters is stored in `dictBuffer`.
All of the parameters d, k, steps are optional.
If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
if steps is zero it defaults to its default value.
If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
or an error code, which can be tested with ZDICT_isError().
On success `*parameters` contains the parameters selected.
Note : COVER_optimizeTrainFromBuffer() requires about 9 bytes of memory for each input byte.
*/
ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
const void* samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
COVER_params_t *parameters);
/*! ZDICT_finalizeDictionary() :

View File

@ -42,6 +42,7 @@
#define SAMPLESIZE_MAX (128 KB)
#define MEMMULT 11 /* rough estimation : memory cost to analyze 1 byte of sample */
#define COVER_MEMMULT 9 /* rough estimation : memory cost to analyze 1 byte of sample */
static const size_t maxMemory = (sizeof(size_t) == 4) ? (2 GB - 64 MB) : ((size_t)(512 MB) << sizeof(size_t));
#define NOISELENGTH 32
@ -118,10 +119,36 @@ static unsigned DiB_loadFiles(void* buffer, size_t* bufferSizePtr,
fileSizes[n] = fileSize;
fclose(f);
} }
DISPLAYLEVEL(2, "\r%79s\r", "");
*bufferSizePtr = pos;
return n;
}
#define DiB_rotl32(x,r) ((x << r) | (x >> (32 - r)))
static U32 DiB_rand(U32* src)
{
static const U32 prime1 = 2654435761U;
static const U32 prime2 = 2246822519U;
U32 rand32 = *src;
rand32 *= prime1;
rand32 ^= prime2;
rand32 = DiB_rotl32(rand32, 13);
*src = rand32;
return rand32 >> 5;
}
static void DiB_shuffle(const char** fileNamesTable, unsigned nbFiles) {
/* Initialize the pseudorandom number generator */
U32 seed = 0xFD2FB528;
unsigned i;
for (i = nbFiles - 1; i > 0; --i) {
unsigned const j = DiB_rand(&seed) % (i + 1);
const char* tmp = fileNamesTable[j];
fileNamesTable[j] = fileNamesTable[i];
fileNamesTable[i] = tmp;
}
}
/*-********************************************************
* Dictionary training functions
@ -202,19 +229,23 @@ size_t ZDICT_trainFromBuffer_unsafe(void* dictBuffer, size_t dictBufferCapacity,
int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
const char** fileNamesTable, unsigned nbFiles,
ZDICT_params_t params)
ZDICT_params_t *params, COVER_params_t *coverParams,
int optimizeCover)
{
void* const dictBuffer = malloc(maxDictSize);
size_t* const fileSizes = (size_t*)malloc(nbFiles * sizeof(size_t));
unsigned long long const totalSizeToLoad = DiB_getTotalCappedFileSize(fileNamesTable, nbFiles);
size_t const maxMem = DiB_findMaxMem(totalSizeToLoad * MEMMULT) / MEMMULT;
size_t const memMult = params ? MEMMULT : COVER_MEMMULT;
size_t const maxMem = DiB_findMaxMem(totalSizeToLoad * memMult) / memMult;
size_t benchedSize = (size_t) MIN ((unsigned long long)maxMem, totalSizeToLoad);
void* const srcBuffer = malloc(benchedSize+NOISELENGTH);
int result = 0;
/* Checks */
if (params) g_displayLevel = params->notificationLevel;
else if (coverParams) g_displayLevel = coverParams->notificationLevel;
else EXM_THROW(13, "Neither dictionary algorith selected"); /* should not happen */
if ((!fileSizes) || (!srcBuffer) || (!dictBuffer)) EXM_THROW(12, "not enough memory for DiB_trainFiles"); /* should not happen */
g_displayLevel = params.notificationLevel;
if (g_tooLargeSamples) {
DISPLAYLEVEL(2, "! Warning : some samples are very large \n");
DISPLAYLEVEL(2, "! Note that dictionary is only useful for small files or beginning of large files. \n");
@ -233,12 +264,29 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
DISPLAYLEVEL(1, "Not enough memory; training on %u MB only...\n", (unsigned)(benchedSize >> 20));
/* Load input buffer */
DISPLAYLEVEL(3, "Shuffling input files\n");
DiB_shuffle(fileNamesTable, nbFiles);
nbFiles = DiB_loadFiles(srcBuffer, &benchedSize, fileSizes, fileNamesTable, nbFiles);
DiB_fillNoise((char*)srcBuffer + benchedSize, NOISELENGTH); /* guard band, for end of buffer condition */
{ size_t const dictSize = ZDICT_trainFromBuffer_unsafe(dictBuffer, maxDictSize,
{
size_t dictSize;
if (params) {
DiB_fillNoise((char*)srcBuffer + benchedSize, NOISELENGTH); /* guard band, for end of buffer condition */
dictSize = ZDICT_trainFromBuffer_unsafe(dictBuffer, maxDictSize,
srcBuffer, fileSizes, nbFiles,
params);
*params);
} else if (optimizeCover) {
dictSize = COVER_optimizeTrainFromBuffer(
dictBuffer, maxDictSize, srcBuffer, fileSizes, nbFiles,
coverParams);
if (!ZDICT_isError(dictSize)) {
DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\n", coverParams->k, coverParams->d, coverParams->steps);
}
} else {
dictSize = COVER_trainFromBuffer(dictBuffer, maxDictSize,
srcBuffer, fileSizes, nbFiles,
*coverParams);
}
if (ZDICT_isError(dictSize)) {
DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */
result = 1;

View File

@ -32,7 +32,7 @@
*/
int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
const char** fileNamesTable, unsigned nbFiles,
ZDICT_params_t parameters);
ZDICT_params_t *params, COVER_params_t *coverParams,
int optimizeCover);
#endif

View File

@ -69,6 +69,8 @@ from standard input if it is a terminal.
.PP
Unless
.B \-\-stdout
or
.B \-o
is specified,
.I files
are written to a new file whose name is derived from the source
@ -159,7 +161,8 @@ No files are created or removed.
# compression level [1-19] (default:3)
.TP
.BR \--ultra
unlocks high compression levels 20+ (maximum 22), using a lot more memory
unlocks high compression levels 20+ (maximum 22), using a lot more memory.
Note that decompression will also require more memory when using these levels.
.TP
.B \-D file
use `file` as Dictionary to compress or decompress FILE(s)

View File

@ -127,6 +127,8 @@ static int usage_advanced(const char* programName)
DISPLAY( "\n");
DISPLAY( "Dictionary builder :\n");
DISPLAY( "--train ## : create a dictionary from a training set of files \n");
DISPLAY( "--cover=k=#,d=# : use the cover algorithm with parameters k and d \n");
DISPLAY( "--optimize-cover[=steps=#,k=#,d=#] : optimize cover parameters with optional parameters\n");
DISPLAY( " -o file : `file` is dictionary name (default: %s) \n", g_defaultDictName);
DISPLAY( "--maxdict ## : limit dictionary to specified size (default : %u) \n", g_defaultMaxDictSize);
DISPLAY( " -s# : dictionary selectivity level (default: %u)\n", g_defaultSelectivityLevel);
@ -192,6 +194,27 @@ static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
}
#ifndef ZSTD_NODICT
/**
* parseCoverParameters() :
* reads cover parameters from *stringPtr (e.g. "--cover=smoothing=100,kmin=48,kstep=4,kmax=64,d=8") into *params
* @return 1 means that cover parameters were correct
* @return 0 in case of malformed parameters
*/
static unsigned parseCoverParameters(const char* stringPtr, COVER_params_t *params)
{
memset(params, 0, sizeof(*params));
for (; ;) {
if (longCommandWArg(&stringPtr, "k=")) { params->k = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
if (longCommandWArg(&stringPtr, "d=")) { params->d = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
if (longCommandWArg(&stringPtr, "steps=")) { params->steps = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
return 0;
}
if (stringPtr[0] != 0) return 0;
DISPLAYLEVEL(4, "k=%u\nd=%u\nsteps=%u\n", params->k, params->d, params->steps);
return 1;
}
#endif
/** parseCompressionParameters() :
* reads compression parameters from *stringPtr (e.g. "--zstd=wlog=23,clog=23,hlog=22,slog=6,slen=3,tlen=48,strat=6") into *params
* @return 1 means that compression parameters were correct
@ -254,6 +277,10 @@ int main(int argCount, const char* argv[])
char* fileNamesBuf = NULL;
unsigned fileNamesNb;
#endif
#ifndef ZSTD_NODICT
COVER_params_t coverParams;
int cover = 0;
#endif
/* init */
(void)recursive; (void)cLevelLast; /* not used when ZSTD_NOBENCH set */
@ -318,6 +345,20 @@ int main(int argCount, const char* argv[])
if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(1); continue; }
/* long commands with arguments */
#ifndef ZSTD_NODICT
if (longCommandWArg(&argument, "--cover=")) {
cover=1; if (!parseCoverParameters(argument, &coverParams)) CLEAN_RETURN(badusage(programName));
continue;
}
if (longCommandWArg(&argument, "--optimize-cover")) {
cover=2;
/* Allow optional arguments following an = */
if (*argument == 0) { memset(&coverParams, 0, sizeof(coverParams)); }
else if (*argument++ != '=') { CLEAN_RETURN(badusage(programName)); }
else if (!parseCoverParameters(argument, &coverParams)) { CLEAN_RETURN(badusage(programName)); }
continue;
}
#endif
if (longCommandWArg(&argument, "--memlimit=")) { memLimit = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--memory=")) { memLimit = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--memlimit-decompress=")) { memLimit = readU32FromChar(&argument); continue; }
@ -520,13 +561,20 @@ int main(int argCount, const char* argv[])
/* Check if dictionary builder is selected */
if (operation==zom_train) {
#ifndef ZSTD_NODICT
if (cover) {
coverParams.compressionLevel = dictCLevel;
coverParams.notificationLevel = displayLevel;
coverParams.dictID = dictID;
DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, NULL, &coverParams, cover - 1);
} else {
ZDICT_params_t dictParams;
memset(&dictParams, 0, sizeof(dictParams));
dictParams.compressionLevel = dictCLevel;
dictParams.selectivityLevel = dictSelect;
dictParams.notificationLevel = displayLevel;
dictParams.dictID = dictID;
DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, dictParams);
DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, &dictParams, NULL, 0);
}
#endif
goto _end;
}

View File

@ -20,10 +20,6 @@
# zstreamtest32: Same as zstreamtest, but forced to compile in 32-bits mode
# ##########################################################################
DESTDIR?=
PREFIX ?= /usr/local
BINDIR = $(PREFIX)/bin
MANDIR = $(PREFIX)/share/man/man1
ZSTDDIR = ../lib
PRGDIR = ../programs
PYTHON ?= python3
@ -125,10 +121,10 @@ zbufftest-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/datagen.c zbufftest.c
$(MAKE) -C $(ZSTDDIR) libzstd
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@$(EXT)
zstreamtest : $(ZSTD_FILES) $(PRGDIR)/datagen.c zstreamtest.c
zstreamtest : $(ZSTD_FILES) $(ZDICT_FILES) $(PRGDIR)/datagen.c zstreamtest.c
$(CC) $(FLAGS) $^ -o $@$(EXT)
zstreamtest32 : $(ZSTD_FILES) $(PRGDIR)/datagen.c zstreamtest.c
zstreamtest32 : $(ZSTD_FILES) $(ZDICT_FILES) $(PRGDIR)/datagen.c zstreamtest.c
$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
zstreamtest-dll : LDFLAGS+= -L$(ZSTDDIR) -lzstd

View File

@ -28,6 +28,7 @@
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressContinue, ZSTD_compressBlock */
#include "zstd.h" /* ZSTD_VERSION_STRING */
#include "zstd_errors.h" /* ZSTD_getErrorCode */
#define ZDICT_STATIC_LINKING_ONLY
#include "zdict.h" /* ZDICT_trainFromBuffer */
#include "datagen.h" /* RDG_genBuffer */
#include "mem.h"
@ -311,6 +312,70 @@ static int basicUnitTests(U32 seed, double compressibility)
if (r != CNBuffSize) goto _output_error);
DISPLAYLEVEL(4, "OK \n");
DISPLAYLEVEL(4, "test%3i : dictionary containing only header should return error : ", testNb++);
{
const size_t ret = ZSTD_decompress_usingDict(
dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize,
"\x37\xa4\x30\xec\x11\x22\x33\x44", 8);
if (ZSTD_getErrorCode(ret) != ZSTD_error_dictionary_corrupted) goto _output_error;
}
DISPLAYLEVEL(4, "OK \n");
ZSTD_freeCCtx(cctx);
ZSTD_freeDCtx(dctx);
free(dictBuffer);
free(samplesSizes);
}
/* COVER dictionary builder tests */
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
ZSTD_DCtx* const dctx = ZSTD_createDCtx();
size_t dictSize = 16 KB;
size_t optDictSize = dictSize;
void* dictBuffer = malloc(dictSize);
size_t const totalSampleSize = 1 MB;
size_t const sampleUnitSize = 8 KB;
U32 const nbSamples = (U32)(totalSampleSize / sampleUnitSize);
size_t* const samplesSizes = (size_t*) malloc(nbSamples * sizeof(size_t));
COVER_params_t params;
U32 dictID;
if (dictBuffer==NULL || samplesSizes==NULL) {
free(dictBuffer);
free(samplesSizes);
goto _output_error;
}
DISPLAYLEVEL(4, "test%3i : COVER_trainFromBuffer : ", testNb++);
{ U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
memset(&params, 0, sizeof(params));
params.d = 1 + (FUZ_rand(&seed) % 16);
params.k = params.d + (FUZ_rand(&seed) % 256);
dictSize = COVER_trainFromBuffer(dictBuffer, dictSize,
CNBuffer, samplesSizes, nbSamples,
params);
if (ZDICT_isError(dictSize)) goto _output_error;
DISPLAYLEVEL(4, "OK, created dictionary of size %u \n", (U32)dictSize);
DISPLAYLEVEL(4, "test%3i : check dictID : ", testNb++);
dictID = ZDICT_getDictID(dictBuffer, dictSize);
if (dictID==0) goto _output_error;
DISPLAYLEVEL(4, "OK : %u \n", dictID);
DISPLAYLEVEL(4, "test%3i : COVER_optimizeTrainFromBuffer : ", testNb++);
memset(&params, 0, sizeof(params));
params.steps = 4;
optDictSize = COVER_optimizeTrainFromBuffer(dictBuffer, optDictSize,
CNBuffer, samplesSizes, nbSamples,
&params);
if (ZDICT_isError(optDictSize)) goto _output_error;
DISPLAYLEVEL(4, "OK, created dictionary of size %u \n", (U32)optDictSize);
DISPLAYLEVEL(4, "test%3i : check dictID : ", testNb++);
dictID = ZDICT_getDictID(dictBuffer, optDictSize);
if (dictID==0) goto _output_error;
DISPLAYLEVEL(4, "OK : %u \n", dictID);
ZSTD_freeCCtx(cctx);
ZSTD_freeDCtx(dctx);
free(dictBuffer);

View File

@ -255,6 +255,27 @@ rm -rf dirTestDict
rm tmp*
$ECHO "\n**** cover dictionary tests **** "
TESTFILE=../programs/zstdcli.c
./datagen > tmpDict
$ECHO "- Create first dictionary"
$ZSTD --train --cover=k=46,d=8 *.c ../programs/*.c -o tmpDict
cp $TESTFILE tmp
$ZSTD -f tmp -D tmpDict
$ZSTD -d tmp.zst -D tmpDict -fo result
$DIFF $TESTFILE result
$ECHO "- Create second (different) dictionary"
$ZSTD --train --cover=k=56,d=8 *.c ../programs/*.c ../programs/*.h -o tmpDictC
$ZSTD -d tmp.zst -D tmpDictC -fo result && die "wrong dictionary not detected!"
$ECHO "- Create dictionary with short dictID"
$ZSTD --train --cover=k=46,d=8 *.c ../programs/*.c --dictID 1 -o tmpDict1
cmp tmpDict tmpDict1 && die "dictionaries should have different ID !"
$ECHO "- Create dictionary with size limit"
$ZSTD --train --optimize-cover=steps=8 *.c ../programs/*.c -o tmpDict2 --maxdict 4K
rm tmp*
$ECHO "\n**** integrity tests **** "
$ECHO "test one file (tmp1.zst) "

View File

@ -26,9 +26,10 @@
#include <time.h> /* clock_t, clock() */
#include <string.h> /* strcmp */
#include "mem.h"
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_maxCLevel, ZSTD_customMem */
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_maxCLevel, ZSTD_customMem, ZSTD_getDictID_fromFrame */
#include "zstd.h" /* ZSTD_compressBound */
#include "zstd_errors.h" /* ZSTD_error_srcSize_wrong */
#include "zdict.h" /* ZDICT_trainFromBuffer */
#include "datagen.h" /* RDG_genBuffer */
#define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
#include "xxhash.h" /* XXH64_* */
@ -44,8 +45,7 @@
static const U32 nbTestsDefault = 10000;
#define COMPRESSIBLE_NOISE_LENGTH (10 MB)
#define FUZ_COMPRESSIBILITY_DEFAULT 50
static const U32 prime1 = 2654435761U;
static const U32 prime2 = 2246822519U;
static const U32 prime32 = 2654435761U;
/*-************************************
@ -81,8 +81,9 @@ static clock_t FUZ_GetClockSpan(clock_t clockStart)
#define FUZ_rotl32(x,r) ((x << r) | (x >> (32 - r)))
unsigned int FUZ_rand(unsigned int* seedPtr)
{
static const U32 prime2 = 2246822519U;
U32 rand32 = *seedPtr;
rand32 *= prime1;
rand32 *= prime32;
rand32 += prime2;
rand32 = FUZ_rotl32(rand32, 13);
*seedPtr = rand32;
@ -107,6 +108,41 @@ static void freeFunction(void* opaque, void* address)
* Basic Unit tests
======================================================*/
typedef struct {
void* start;
size_t size;
size_t filled;
} buffer_t;
static const buffer_t g_nullBuffer = { NULL, 0 , 0 };
static buffer_t FUZ_createDictionary(const void* src, size_t srcSize, size_t blockSize, size_t requestedDictSize)
{
buffer_t dict = { NULL, 0, 0 };
size_t const nbBlocks = (srcSize + (blockSize-1)) / blockSize;
size_t* const blockSizes = (size_t*) malloc(nbBlocks * sizeof(size_t));
if (!blockSizes) return dict;
dict.start = malloc(requestedDictSize);
if (!dict.start) { free(blockSizes); return dict; }
{ size_t nb;
for (nb=0; nb<nbBlocks-1; nb++) blockSizes[nb] = blockSize;
blockSizes[nbBlocks-1] = srcSize - (blockSize * (nbBlocks-1));
}
{ size_t const dictSize = ZDICT_trainFromBuffer(dict.start, requestedDictSize, src, blockSizes, (unsigned)nbBlocks);
free(blockSizes);
if (ZDICT_isError(dictSize)) { free(dict.start); return (buffer_t){ NULL, 0, 0 }; }
dict.size = requestedDictSize;
dict.filled = dictSize;
return dict; /* how to return dictSize ? */
}
}
static void FUZ_freeDictionary(buffer_t dict)
{
free(dict.start);
}
static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem customMem)
{
size_t const CNBufferSize = COMPRESSIBLE_NOISE_LENGTH;
@ -123,6 +159,8 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo
ZSTD_DStream* zd = ZSTD_createDStream_advanced(customMem);
ZSTD_inBuffer inBuff, inBuff2;
ZSTD_outBuffer outBuff;
buffer_t dictionary = g_nullBuffer;
unsigned dictID = 0;
/* Create compressible test buffer */
if (!CNBuffer || !compressedBuffer || !decodedBuffer || !zc || !zd) {
@ -131,6 +169,15 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo
}
RDG_genBuffer(CNBuffer, CNBufferSize, compressibility, 0., seed);
/* Create dictionary */
MEM_STATIC_ASSERT(COMPRESSIBLE_NOISE_LENGTH >= 4 MB);
dictionary = FUZ_createDictionary(CNBuffer, 4 MB, 4 KB, 40 KB);
if (!dictionary.start) {
DISPLAY("Error creating dictionary, aborting \n");
goto _output_error;
}
dictID = ZDICT_getDictID(dictionary.start, dictionary.filled);
/* generate skippable frame */
MEM_writeLE32(compressedBuffer, ZSTD_MAGIC_SKIPPABLE_START);
MEM_writeLE32(((char*)compressedBuffer)+4, (U32)skippableFrameSize);
@ -260,8 +307,6 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo
{ size_t const r = ZSTD_endStream(zc, &outBuff);
if (r != 0) goto _output_error; } /* error, or some data not flushed */
{ unsigned long long origSize = ZSTD_getDecompressedSize(outBuff.dst, outBuff.pos);
DISPLAY("outBuff.pos : %u \n", (U32)outBuff.pos);
DISPLAY("origSize = %u \n", (U32)origSize);
if ((size_t)origSize != CNBufferSize) goto _output_error; } /* exact original size must be present */
DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/COMPRESSIBLE_NOISE_LENGTH*100);
@ -320,7 +365,7 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo
/* CDict scenario */
DISPLAYLEVEL(4, "test%3i : digested dictionary : ", testNb++);
{ ZSTD_CDict* const cdict = ZSTD_createCDict(CNBuffer, 128 KB, 1);
{ ZSTD_CDict* const cdict = ZSTD_createCDict(dictionary.start, dictionary.filled, 1);
size_t const initError = ZSTD_initCStream_usingCDict(zc, cdict);
if (ZSTD_isError(initError)) goto _output_error;
cSize = 0;
@ -346,9 +391,15 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo
DISPLAYLEVEL(4, "OK (%u bytes) \n", (U32)s);
}
DISPLAYLEVEL(4, "test%3i : check Dictionary ID : ", testNb++);
{ unsigned const dID = ZSTD_getDictID_fromFrame(compressedBuffer, cSize);
if (dID != dictID) goto _output_error;
DISPLAYLEVEL(4, "OK (%u) \n", dID);
}
/* DDict scenario */
DISPLAYLEVEL(4, "test%3i : decompress %u bytes with digested dictionary : ", testNb++, (U32)CNBufferSize);
{ ZSTD_DDict* const ddict = ZSTD_createDDict(CNBuffer, 128 KB);
{ ZSTD_DDict* const ddict = ZSTD_createDDict(dictionary.start, dictionary.filled);
size_t const initError = ZSTD_initDStream_usingDDict(zd, ddict);
if (ZSTD_isError(initError)) goto _output_error;
inBuff.src = compressedBuffer;
@ -388,6 +439,7 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo
_end:
FUZ_freeDictionary(dictionary);
ZSTD_freeCStream(zc);
ZSTD_freeDStream(zd);
free(CNBuffer);
@ -492,7 +544,7 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compres
if (nbTests >= testNb) { DISPLAYUPDATE(2, "\r%6u/%6u ", testNb, nbTests); }
else { DISPLAYUPDATE(2, "\r%6u ", testNb); }
FUZ_rand(&coreSeed);
lseed = coreSeed ^ prime1;
lseed = coreSeed ^ prime32;
/* states full reset (deliberately not synchronized) */
/* some issues can only happen when reusing states */