Merge remote-tracking branch 'refs/remotes/facebook/dev' into dev11

This commit is contained in:
Przemyslaw Skibinski 2017-01-17 13:02:29 +01:00
commit 8a0bc30a2d
24 changed files with 1374 additions and 37 deletions

View File

@ -88,7 +88,7 @@ travis-install:
$(MAKE) install PREFIX=~/install_test_dir
gpptest: clean
$(MAKE) -C programs all CC=g++ CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
CC=g++ $(MAKE) -C programs all CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
gcc5test: clean
gcc-5 -v

View File

@ -331,6 +331,10 @@
RelativePath="..\..\..\programs\datagen.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\cover.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
>

View File

@ -343,6 +343,10 @@
RelativePath="..\..\..\programs\dibio.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\cover.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
>

View File

@ -327,6 +327,10 @@
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath="..\..\..\lib\dictBuilder\cover.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
>

View File

@ -332,6 +332,10 @@
RelativePath="..\..\..\programs\datagen.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\cover.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
>

View File

@ -344,6 +344,10 @@
RelativePath="..\..\..\programs\dibio.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\cover.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
>

View File

@ -328,6 +328,10 @@
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath="..\..\..\lib\dictBuilder\cover.c"
>
</File>
<File
RelativePath="..\..\..\lib\dictBuilder\divsufsort.c"
>

View File

@ -165,6 +165,7 @@
<ClCompile Include="..\..\..\lib\compress\zstd_compress.c" />
<ClCompile Include="..\..\..\lib\decompress\huf_decompress.c" />
<ClCompile Include="..\..\..\lib\decompress\zstd_decompress.c" />
<ClCompile Include="..\..\..\lib\dictBuilder\cover.c" />
<ClCompile Include="..\..\..\lib\dictBuilder\divsufsort.c" />
<ClCompile Include="..\..\..\lib\dictBuilder\zdict.c" />
<ClCompile Include="..\..\..\programs\datagen.c" />

View File

@ -32,6 +32,7 @@
<ClCompile Include="..\..\..\lib\deprecated\zbuff_common.c" />
<ClCompile Include="..\..\..\lib\deprecated\zbuff_compress.c" />
<ClCompile Include="..\..\..\lib\deprecated\zbuff_decompress.c" />
<ClCompile Include="..\..\..\lib\dictBuilder\cover.c" />
<ClCompile Include="..\..\..\lib\dictBuilder\divsufsort.c" />
<ClCompile Include="..\..\..\lib\dictBuilder\zdict.c" />
<ClCompile Include="..\..\..\lib\legacy\zstd_v01.c" />

View File

@ -32,6 +32,7 @@
<ClCompile Include="..\..\..\lib\deprecated\zbuff_common.c" />
<ClCompile Include="..\..\..\lib\deprecated\zbuff_compress.c" />
<ClCompile Include="..\..\..\lib\deprecated\zbuff_decompress.c" />
<ClCompile Include="..\..\..\lib\dictBuilder\cover.c" />
<ClCompile Include="..\..\..\lib\dictBuilder\divsufsort.c" />
<ClCompile Include="..\..\..\lib\dictBuilder\zdict.c" />
<ClCompile Include="..\..\..\lib\legacy\zstd_v01.c" />

View File

@ -29,6 +29,7 @@
<ClCompile Include="..\..\..\lib\compress\zstd_compress.c" />
<ClCompile Include="..\..\..\lib\decompress\huf_decompress.c" />
<ClCompile Include="..\..\..\lib\decompress\zstd_decompress.c" />
<ClCompile Include="..\..\..\lib\dictBuilder\cover.c" />
<ClCompile Include="..\..\..\lib\dictBuilder\divsufsort.c" />
<ClCompile Include="..\..\..\lib\dictBuilder\zdict.c" />
<ClCompile Include="..\..\..\lib\legacy\zstd_v01.c" />

View File

@ -67,6 +67,7 @@ SET(Sources
${LIBRARY_DIR}/compress/zstd_compress.c
${LIBRARY_DIR}/decompress/huf_decompress.c
${LIBRARY_DIR}/decompress/zstd_decompress.c
${LIBRARY_DIR}/dictBuilder/cover.c
${LIBRARY_DIR}/dictBuilder/divsufsort.c
${LIBRARY_DIR}/dictBuilder/zdict.c
${LIBRARY_DIR}/deprecated/zbuff_common.c

View File

@ -2970,6 +2970,7 @@ size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict)
ZSTD_parameters const params = ZSTD_getParamsFromCDict(cdict);
size_t const initError = ZSTD_initCStream_advanced(zcs, NULL, 0, params, 0);
zcs->cdict = cdict;
zcs->cctx->dictID = params.fParams.noDictIDFlag ? 0 : cdict->refContext->dictID;
return initError;
}

View File

@ -1444,7 +1444,7 @@ size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
if (ZSTD_isLegacy(src, srcSize)) return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, dict, dictSize);
#endif
ZSTD_decompressBegin_usingDict(dctx, dict, dictSize);
CHECK_F(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize));
ZSTD_checkContinuity(dctx, dst);
return ZSTD_decompressFrame(dctx, dst, dstCapacity, src, srcSize);
}

1023
lib/dictBuilder/cover.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -86,6 +86,57 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dict
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
ZDICT_params_t parameters);
/*! COVER_params_t :
For all values 0 means default.
kMin and d are the only required parameters.
*/
typedef struct {
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (256) : Higher means more parameters checked */
unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
unsigned dictID; /* 0 means auto mode (32-bits random value); other : force dictID value */
int compressionLevel; /* 0 means default; target a specific zstd compression level */
} COVER_params_t;
/*! COVER_trainFromBuffer() :
Train a dictionary from an array of samples using the COVER algorithm.
Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
The resulting dictionary will be saved into `dictBuffer`.
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
or an error code, which can be tested with ZDICT_isError().
Note : COVER_trainFromBuffer() requires about 9 bytes of memory for each input byte.
Tips : In general, a reasonable dictionary has a size of ~ 100 KB.
It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
In general, it's recommended to provide a few thousands samples, but this can vary a lot.
It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
*/
ZDICTLIB_API size_t COVER_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
COVER_params_t parameters);
/*! COVER_optimizeTrainFromBuffer() :
The same requirements as above hold for all the parameters except `parameters`.
This function tries many parameter combinations and picks the best parameters.
`*parameters` is filled with the best parameters found, and the dictionary
constructed with those parameters is stored in `dictBuffer`.
All of the parameters d, k, steps are optional.
If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
if steps is zero it defaults to its default value.
If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
or an error code, which can be tested with ZDICT_isError().
On success `*parameters` contains the parameters selected.
Note : COVER_optimizeTrainFromBuffer() requires about 9 bytes of memory for each input byte.
*/
ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
const void* samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
COVER_params_t *parameters);
/*! ZDICT_finalizeDictionary() :

View File

@ -42,6 +42,7 @@
#define SAMPLESIZE_MAX (128 KB)
#define MEMMULT 11 /* rough estimation : memory cost to analyze 1 byte of sample */
#define COVER_MEMMULT 9 /* rough estimation : memory cost to analyze 1 byte of sample */
static const size_t maxMemory = (sizeof(size_t) == 4) ? (2 GB - 64 MB) : ((size_t)(512 MB) << sizeof(size_t));
#define NOISELENGTH 32
@ -118,10 +119,36 @@ static unsigned DiB_loadFiles(void* buffer, size_t* bufferSizePtr,
fileSizes[n] = fileSize;
fclose(f);
} }
DISPLAYLEVEL(2, "\r%79s\r", "");
*bufferSizePtr = pos;
return n;
}
#define DiB_rotl32(x,r) ((x << r) | (x >> (32 - r)))
static U32 DiB_rand(U32* src)
{
static const U32 prime1 = 2654435761U;
static const U32 prime2 = 2246822519U;
U32 rand32 = *src;
rand32 *= prime1;
rand32 ^= prime2;
rand32 = DiB_rotl32(rand32, 13);
*src = rand32;
return rand32 >> 5;
}
static void DiB_shuffle(const char** fileNamesTable, unsigned nbFiles) {
/* Initialize the pseudorandom number generator */
U32 seed = 0xFD2FB528;
unsigned i;
for (i = nbFiles - 1; i > 0; --i) {
unsigned const j = DiB_rand(&seed) % (i + 1);
const char* tmp = fileNamesTable[j];
fileNamesTable[j] = fileNamesTable[i];
fileNamesTable[i] = tmp;
}
}
/*-********************************************************
* Dictionary training functions
@ -202,19 +229,23 @@ size_t ZDICT_trainFromBuffer_unsafe(void* dictBuffer, size_t dictBufferCapacity,
int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
const char** fileNamesTable, unsigned nbFiles,
ZDICT_params_t params)
ZDICT_params_t *params, COVER_params_t *coverParams,
int optimizeCover)
{
void* const dictBuffer = malloc(maxDictSize);
size_t* const fileSizes = (size_t*)malloc(nbFiles * sizeof(size_t));
unsigned long long const totalSizeToLoad = DiB_getTotalCappedFileSize(fileNamesTable, nbFiles);
size_t const maxMem = DiB_findMaxMem(totalSizeToLoad * MEMMULT) / MEMMULT;
size_t const memMult = params ? MEMMULT : COVER_MEMMULT;
size_t const maxMem = DiB_findMaxMem(totalSizeToLoad * memMult) / memMult;
size_t benchedSize = (size_t) MIN ((unsigned long long)maxMem, totalSizeToLoad);
void* const srcBuffer = malloc(benchedSize+NOISELENGTH);
int result = 0;
/* Checks */
if (params) g_displayLevel = params->notificationLevel;
else if (coverParams) g_displayLevel = coverParams->notificationLevel;
else EXM_THROW(13, "Neither dictionary algorith selected"); /* should not happen */
if ((!fileSizes) || (!srcBuffer) || (!dictBuffer)) EXM_THROW(12, "not enough memory for DiB_trainFiles"); /* should not happen */
g_displayLevel = params.notificationLevel;
if (g_tooLargeSamples) {
DISPLAYLEVEL(2, "! Warning : some samples are very large \n");
DISPLAYLEVEL(2, "! Note that dictionary is only useful for small files or beginning of large files. \n");
@ -233,12 +264,29 @@ int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
DISPLAYLEVEL(1, "Not enough memory; training on %u MB only...\n", (unsigned)(benchedSize >> 20));
/* Load input buffer */
DISPLAYLEVEL(3, "Shuffling input files\n");
DiB_shuffle(fileNamesTable, nbFiles);
nbFiles = DiB_loadFiles(srcBuffer, &benchedSize, fileSizes, fileNamesTable, nbFiles);
DiB_fillNoise((char*)srcBuffer + benchedSize, NOISELENGTH); /* guard band, for end of buffer condition */
{ size_t const dictSize = ZDICT_trainFromBuffer_unsafe(dictBuffer, maxDictSize,
srcBuffer, fileSizes, nbFiles,
params);
{
size_t dictSize;
if (params) {
DiB_fillNoise((char*)srcBuffer + benchedSize, NOISELENGTH); /* guard band, for end of buffer condition */
dictSize = ZDICT_trainFromBuffer_unsafe(dictBuffer, maxDictSize,
srcBuffer, fileSizes, nbFiles,
*params);
} else if (optimizeCover) {
dictSize = COVER_optimizeTrainFromBuffer(
dictBuffer, maxDictSize, srcBuffer, fileSizes, nbFiles,
coverParams);
if (!ZDICT_isError(dictSize)) {
DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\n", coverParams->k, coverParams->d, coverParams->steps);
}
} else {
dictSize = COVER_trainFromBuffer(dictBuffer, maxDictSize,
srcBuffer, fileSizes, nbFiles,
*coverParams);
}
if (ZDICT_isError(dictSize)) {
DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */
result = 1;

View File

@ -32,7 +32,7 @@
*/
int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
const char** fileNamesTable, unsigned nbFiles,
ZDICT_params_t parameters);
ZDICT_params_t *params, COVER_params_t *coverParams,
int optimizeCover);
#endif

View File

@ -69,6 +69,8 @@ from standard input if it is a terminal.
.PP
Unless
.B \-\-stdout
or
.B \-o
is specified,
.I files
are written to a new file whose name is derived from the source
@ -159,7 +161,8 @@ No files are created or removed.
# compression level [1-19] (default:3)
.TP
.BR \--ultra
unlocks high compression levels 20+ (maximum 22), using a lot more memory
unlocks high compression levels 20+ (maximum 22), using a lot more memory.
Note that decompression will also require more memory when using these levels.
.TP
.B \-D file
use `file` as Dictionary to compress or decompress FILE(s)
@ -293,7 +296,7 @@ There are 8 strategies numbered from 0 to 7, from faster to stronger:
.PD
Specify the maximum number of bits for a match distance.
.IP ""
The higher number of bits increases the chance to find a match what usually improves compression ratio.
The higher number of bits increases the chance to find a match what usually improves compression ratio.
It also increases memory requirements for compressor and decompressor.
.IP ""
The minimum \fIwlog\fR is 10 (1 KiB) and the maximum is 25 (32 MiB) for 32-bit compilation and 27 (128 MiB) for 64-bit compilation.
@ -319,7 +322,7 @@ The minimum \fIhlog\fR is 6 (64 B) and the maximum is 25 (32 MiB) for 32-bit com
.PD
Specify the maximum number of bits for a hash chain or a binary tree.
.IP ""
The higher number of bits increases the chance to find a match what usually improves compression ratio.
The higher number of bits increases the chance to find a match what usually improves compression ratio.
It also slows down compression speed and increases memory requirements for compression.
This option is ignored for the ZSTD_fast strategy.
.IP ""

View File

@ -127,6 +127,8 @@ static int usage_advanced(const char* programName)
DISPLAY( "\n");
DISPLAY( "Dictionary builder :\n");
DISPLAY( "--train ## : create a dictionary from a training set of files \n");
DISPLAY( "--cover=k=#,d=# : use the cover algorithm with parameters k and d \n");
DISPLAY( "--optimize-cover[=steps=#,k=#,d=#] : optimize cover parameters with optional parameters\n");
DISPLAY( " -o file : `file` is dictionary name (default: %s) \n", g_defaultDictName);
DISPLAY( "--maxdict ## : limit dictionary to specified size (default : %u) \n", g_defaultMaxDictSize);
DISPLAY( " -s# : dictionary selectivity level (default: %u)\n", g_defaultSelectivityLevel);
@ -192,6 +194,27 @@ static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
}
#ifndef ZSTD_NODICT
/**
* parseCoverParameters() :
* reads cover parameters from *stringPtr (e.g. "--cover=smoothing=100,kmin=48,kstep=4,kmax=64,d=8") into *params
* @return 1 means that cover parameters were correct
* @return 0 in case of malformed parameters
*/
static unsigned parseCoverParameters(const char* stringPtr, COVER_params_t *params)
{
memset(params, 0, sizeof(*params));
for (; ;) {
if (longCommandWArg(&stringPtr, "k=")) { params->k = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
if (longCommandWArg(&stringPtr, "d=")) { params->d = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
if (longCommandWArg(&stringPtr, "steps=")) { params->steps = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
return 0;
}
if (stringPtr[0] != 0) return 0;
DISPLAYLEVEL(4, "k=%u\nd=%u\nsteps=%u\n", params->k, params->d, params->steps);
return 1;
}
#endif
/** parseCompressionParameters() :
* reads compression parameters from *stringPtr (e.g. "--zstd=wlog=23,clog=23,hlog=22,slog=6,slen=3,tlen=48,strat=6") into *params
* @return 1 means that compression parameters were correct
@ -254,6 +277,10 @@ int main(int argCount, const char* argv[])
char* fileNamesBuf = NULL;
unsigned fileNamesNb;
#endif
#ifndef ZSTD_NODICT
COVER_params_t coverParams;
int cover = 0;
#endif
/* init */
(void)recursive; (void)cLevelLast; /* not used when ZSTD_NOBENCH set */
@ -318,6 +345,20 @@ int main(int argCount, const char* argv[])
if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(1); continue; }
/* long commands with arguments */
#ifndef ZSTD_NODICT
if (longCommandWArg(&argument, "--cover=")) {
cover=1; if (!parseCoverParameters(argument, &coverParams)) CLEAN_RETURN(badusage(programName));
continue;
}
if (longCommandWArg(&argument, "--optimize-cover")) {
cover=2;
/* Allow optional arguments following an = */
if (*argument == 0) { memset(&coverParams, 0, sizeof(coverParams)); }
else if (*argument++ != '=') { CLEAN_RETURN(badusage(programName)); }
else if (!parseCoverParameters(argument, &coverParams)) { CLEAN_RETURN(badusage(programName)); }
continue;
}
#endif
if (longCommandWArg(&argument, "--memlimit=")) { memLimit = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--memory=")) { memLimit = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "--memlimit-decompress=")) { memLimit = readU32FromChar(&argument); continue; }
@ -520,13 +561,20 @@ int main(int argCount, const char* argv[])
/* Check if dictionary builder is selected */
if (operation==zom_train) {
#ifndef ZSTD_NODICT
ZDICT_params_t dictParams;
memset(&dictParams, 0, sizeof(dictParams));
dictParams.compressionLevel = dictCLevel;
dictParams.selectivityLevel = dictSelect;
dictParams.notificationLevel = displayLevel;
dictParams.dictID = dictID;
DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, dictParams);
if (cover) {
coverParams.compressionLevel = dictCLevel;
coverParams.notificationLevel = displayLevel;
coverParams.dictID = dictID;
DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, NULL, &coverParams, cover - 1);
} else {
ZDICT_params_t dictParams;
memset(&dictParams, 0, sizeof(dictParams));
dictParams.compressionLevel = dictCLevel;
dictParams.selectivityLevel = dictSelect;
dictParams.notificationLevel = displayLevel;
dictParams.dictID = dictID;
DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, &dictParams, NULL, 0);
}
#endif
goto _end;
}

View File

@ -20,10 +20,6 @@
# zstreamtest32: Same as zstreamtest, but forced to compile in 32-bits mode
# ##########################################################################
DESTDIR?=
PREFIX ?= /usr/local
BINDIR = $(PREFIX)/bin
MANDIR = $(PREFIX)/share/man/man1
ZSTDDIR = ../lib
PRGDIR = ../programs
PYTHON ?= python3
@ -125,10 +121,10 @@ zbufftest-dll : $(ZSTDDIR)/common/xxhash.c $(PRGDIR)/datagen.c zbufftest.c
$(MAKE) -C $(ZSTDDIR) libzstd
$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@$(EXT)
zstreamtest : $(ZSTD_FILES) $(PRGDIR)/datagen.c zstreamtest.c
zstreamtest : $(ZSTD_FILES) $(ZDICT_FILES) $(PRGDIR)/datagen.c zstreamtest.c
$(CC) $(FLAGS) $^ -o $@$(EXT)
zstreamtest32 : $(ZSTD_FILES) $(PRGDIR)/datagen.c zstreamtest.c
zstreamtest32 : $(ZSTD_FILES) $(ZDICT_FILES) $(PRGDIR)/datagen.c zstreamtest.c
$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
zstreamtest-dll : LDFLAGS+= -L$(ZSTDDIR) -lzstd

View File

@ -28,6 +28,7 @@
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressContinue, ZSTD_compressBlock */
#include "zstd.h" /* ZSTD_VERSION_STRING */
#include "zstd_errors.h" /* ZSTD_getErrorCode */
#define ZDICT_STATIC_LINKING_ONLY
#include "zdict.h" /* ZDICT_trainFromBuffer */
#include "datagen.h" /* RDG_genBuffer */
#include "mem.h"
@ -311,6 +312,70 @@ static int basicUnitTests(U32 seed, double compressibility)
if (r != CNBuffSize) goto _output_error);
DISPLAYLEVEL(4, "OK \n");
DISPLAYLEVEL(4, "test%3i : dictionary containing only header should return error : ", testNb++);
{
const size_t ret = ZSTD_decompress_usingDict(
dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize,
"\x37\xa4\x30\xec\x11\x22\x33\x44", 8);
if (ZSTD_getErrorCode(ret) != ZSTD_error_dictionary_corrupted) goto _output_error;
}
DISPLAYLEVEL(4, "OK \n");
ZSTD_freeCCtx(cctx);
ZSTD_freeDCtx(dctx);
free(dictBuffer);
free(samplesSizes);
}
/* COVER dictionary builder tests */
{ ZSTD_CCtx* const cctx = ZSTD_createCCtx();
ZSTD_DCtx* const dctx = ZSTD_createDCtx();
size_t dictSize = 16 KB;
size_t optDictSize = dictSize;
void* dictBuffer = malloc(dictSize);
size_t const totalSampleSize = 1 MB;
size_t const sampleUnitSize = 8 KB;
U32 const nbSamples = (U32)(totalSampleSize / sampleUnitSize);
size_t* const samplesSizes = (size_t*) malloc(nbSamples * sizeof(size_t));
COVER_params_t params;
U32 dictID;
if (dictBuffer==NULL || samplesSizes==NULL) {
free(dictBuffer);
free(samplesSizes);
goto _output_error;
}
DISPLAYLEVEL(4, "test%3i : COVER_trainFromBuffer : ", testNb++);
{ U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
memset(&params, 0, sizeof(params));
params.d = 1 + (FUZ_rand(&seed) % 16);
params.k = params.d + (FUZ_rand(&seed) % 256);
dictSize = COVER_trainFromBuffer(dictBuffer, dictSize,
CNBuffer, samplesSizes, nbSamples,
params);
if (ZDICT_isError(dictSize)) goto _output_error;
DISPLAYLEVEL(4, "OK, created dictionary of size %u \n", (U32)dictSize);
DISPLAYLEVEL(4, "test%3i : check dictID : ", testNb++);
dictID = ZDICT_getDictID(dictBuffer, dictSize);
if (dictID==0) goto _output_error;
DISPLAYLEVEL(4, "OK : %u \n", dictID);
DISPLAYLEVEL(4, "test%3i : COVER_optimizeTrainFromBuffer : ", testNb++);
memset(&params, 0, sizeof(params));
params.steps = 4;
optDictSize = COVER_optimizeTrainFromBuffer(dictBuffer, optDictSize,
CNBuffer, samplesSizes, nbSamples,
&params);
if (ZDICT_isError(optDictSize)) goto _output_error;
DISPLAYLEVEL(4, "OK, created dictionary of size %u \n", (U32)optDictSize);
DISPLAYLEVEL(4, "test%3i : check dictID : ", testNb++);
dictID = ZDICT_getDictID(dictBuffer, optDictSize);
if (dictID==0) goto _output_error;
DISPLAYLEVEL(4, "OK : %u \n", dictID);
ZSTD_freeCCtx(cctx);
ZSTD_freeDCtx(dctx);
free(dictBuffer);

View File

@ -255,6 +255,27 @@ rm -rf dirTestDict
rm tmp*
$ECHO "\n**** cover dictionary tests **** "
TESTFILE=../programs/zstdcli.c
./datagen > tmpDict
$ECHO "- Create first dictionary"
$ZSTD --train --cover=k=46,d=8 *.c ../programs/*.c -o tmpDict
cp $TESTFILE tmp
$ZSTD -f tmp -D tmpDict
$ZSTD -d tmp.zst -D tmpDict -fo result
$DIFF $TESTFILE result
$ECHO "- Create second (different) dictionary"
$ZSTD --train --cover=k=56,d=8 *.c ../programs/*.c ../programs/*.h -o tmpDictC
$ZSTD -d tmp.zst -D tmpDictC -fo result && die "wrong dictionary not detected!"
$ECHO "- Create dictionary with short dictID"
$ZSTD --train --cover=k=46,d=8 *.c ../programs/*.c --dictID 1 -o tmpDict1
cmp tmpDict tmpDict1 && die "dictionaries should have different ID !"
$ECHO "- Create dictionary with size limit"
$ZSTD --train --optimize-cover=steps=8 *.c ../programs/*.c -o tmpDict2 --maxdict 4K
rm tmp*
$ECHO "\n**** integrity tests **** "
$ECHO "test one file (tmp1.zst) "

View File

@ -26,9 +26,10 @@
#include <time.h> /* clock_t, clock() */
#include <string.h> /* strcmp */
#include "mem.h"
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_maxCLevel, ZSTD_customMem */
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_maxCLevel, ZSTD_customMem, ZSTD_getDictID_fromFrame */
#include "zstd.h" /* ZSTD_compressBound */
#include "zstd_errors.h" /* ZSTD_error_srcSize_wrong */
#include "zdict.h" /* ZDICT_trainFromBuffer */
#include "datagen.h" /* RDG_genBuffer */
#define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
#include "xxhash.h" /* XXH64_* */
@ -44,8 +45,7 @@
static const U32 nbTestsDefault = 10000;
#define COMPRESSIBLE_NOISE_LENGTH (10 MB)
#define FUZ_COMPRESSIBILITY_DEFAULT 50
static const U32 prime1 = 2654435761U;
static const U32 prime2 = 2246822519U;
static const U32 prime32 = 2654435761U;
/*-************************************
@ -81,8 +81,9 @@ static clock_t FUZ_GetClockSpan(clock_t clockStart)
#define FUZ_rotl32(x,r) ((x << r) | (x >> (32 - r)))
unsigned int FUZ_rand(unsigned int* seedPtr)
{
static const U32 prime2 = 2246822519U;
U32 rand32 = *seedPtr;
rand32 *= prime1;
rand32 *= prime32;
rand32 += prime2;
rand32 = FUZ_rotl32(rand32, 13);
*seedPtr = rand32;
@ -107,6 +108,41 @@ static void freeFunction(void* opaque, void* address)
* Basic Unit tests
======================================================*/
typedef struct {
void* start;
size_t size;
size_t filled;
} buffer_t;
static const buffer_t g_nullBuffer = { NULL, 0 , 0 };
static buffer_t FUZ_createDictionary(const void* src, size_t srcSize, size_t blockSize, size_t requestedDictSize)
{
buffer_t dict = { NULL, 0, 0 };
size_t const nbBlocks = (srcSize + (blockSize-1)) / blockSize;
size_t* const blockSizes = (size_t*) malloc(nbBlocks * sizeof(size_t));
if (!blockSizes) return dict;
dict.start = malloc(requestedDictSize);
if (!dict.start) { free(blockSizes); return dict; }
{ size_t nb;
for (nb=0; nb<nbBlocks-1; nb++) blockSizes[nb] = blockSize;
blockSizes[nbBlocks-1] = srcSize - (blockSize * (nbBlocks-1));
}
{ size_t const dictSize = ZDICT_trainFromBuffer(dict.start, requestedDictSize, src, blockSizes, (unsigned)nbBlocks);
free(blockSizes);
if (ZDICT_isError(dictSize)) { free(dict.start); return (buffer_t){ NULL, 0, 0 }; }
dict.size = requestedDictSize;
dict.filled = dictSize;
return dict; /* how to return dictSize ? */
}
}
static void FUZ_freeDictionary(buffer_t dict)
{
free(dict.start);
}
static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem customMem)
{
size_t const CNBufferSize = COMPRESSIBLE_NOISE_LENGTH;
@ -123,14 +159,25 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo
ZSTD_DStream* zd = ZSTD_createDStream_advanced(customMem);
ZSTD_inBuffer inBuff, inBuff2;
ZSTD_outBuffer outBuff;
buffer_t dictionary = g_nullBuffer;
unsigned dictID = 0;
/* Create compressible test buffer */
if (!CNBuffer || !compressedBuffer || !decodedBuffer || !zc || !zd) {
DISPLAY("Not enough memory, aborting\n");
DISPLAY("Not enough memory, aborting \n");
goto _output_error;
}
RDG_genBuffer(CNBuffer, CNBufferSize, compressibility, 0., seed);
/* Create dictionary */
MEM_STATIC_ASSERT(COMPRESSIBLE_NOISE_LENGTH >= 4 MB);
dictionary = FUZ_createDictionary(CNBuffer, 4 MB, 4 KB, 40 KB);
if (!dictionary.start) {
DISPLAY("Error creating dictionary, aborting \n");
goto _output_error;
}
dictID = ZDICT_getDictID(dictionary.start, dictionary.filled);
/* generate skippable frame */
MEM_writeLE32(compressedBuffer, ZSTD_MAGIC_SKIPPABLE_START);
MEM_writeLE32(((char*)compressedBuffer)+4, (U32)skippableFrameSize);
@ -260,8 +307,6 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo
{ size_t const r = ZSTD_endStream(zc, &outBuff);
if (r != 0) goto _output_error; } /* error, or some data not flushed */
{ unsigned long long origSize = ZSTD_getDecompressedSize(outBuff.dst, outBuff.pos);
DISPLAY("outBuff.pos : %u \n", (U32)outBuff.pos);
DISPLAY("origSize = %u \n", (U32)origSize);
if ((size_t)origSize != CNBufferSize) goto _output_error; } /* exact original size must be present */
DISPLAYLEVEL(4, "OK (%u bytes : %.2f%%)\n", (U32)cSize, (double)cSize/COMPRESSIBLE_NOISE_LENGTH*100);
@ -320,7 +365,7 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo
/* CDict scenario */
DISPLAYLEVEL(4, "test%3i : digested dictionary : ", testNb++);
{ ZSTD_CDict* const cdict = ZSTD_createCDict(CNBuffer, 128 KB, 1);
{ ZSTD_CDict* const cdict = ZSTD_createCDict(dictionary.start, dictionary.filled, 1);
size_t const initError = ZSTD_initCStream_usingCDict(zc, cdict);
if (ZSTD_isError(initError)) goto _output_error;
cSize = 0;
@ -346,9 +391,15 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo
DISPLAYLEVEL(4, "OK (%u bytes) \n", (U32)s);
}
DISPLAYLEVEL(4, "test%3i : check Dictionary ID : ", testNb++);
{ unsigned const dID = ZSTD_getDictID_fromFrame(compressedBuffer, cSize);
if (dID != dictID) goto _output_error;
DISPLAYLEVEL(4, "OK (%u) \n", dID);
}
/* DDict scenario */
DISPLAYLEVEL(4, "test%3i : decompress %u bytes with digested dictionary : ", testNb++, (U32)CNBufferSize);
{ ZSTD_DDict* const ddict = ZSTD_createDDict(CNBuffer, 128 KB);
{ ZSTD_DDict* const ddict = ZSTD_createDDict(dictionary.start, dictionary.filled);
size_t const initError = ZSTD_initDStream_usingDDict(zd, ddict);
if (ZSTD_isError(initError)) goto _output_error;
inBuff.src = compressedBuffer;
@ -388,6 +439,7 @@ static int basicUnitTests(U32 seed, double compressibility, ZSTD_customMem custo
_end:
FUZ_freeDictionary(dictionary);
ZSTD_freeCStream(zc);
ZSTD_freeDStream(zd);
free(CNBuffer);
@ -492,7 +544,7 @@ static int fuzzerTests(U32 seed, U32 nbTests, unsigned startTest, double compres
if (nbTests >= testNb) { DISPLAYUPDATE(2, "\r%6u/%6u ", testNb, nbTests); }
else { DISPLAYUPDATE(2, "\r%6u ", testNb); }
FUZ_rand(&coreSeed);
lseed = coreSeed ^ prime1;
lseed = coreSeed ^ prime32;
/* states full reset (deliberately not synchronized) */
/* some issues can only happen when reusing states */