Merge remote-tracking branch 'upstream/dev' into dev
This commit is contained in:
commit
0fcaa675e0
@ -392,6 +392,11 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
|
||||
bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX;
|
||||
return bounds;
|
||||
|
||||
case ZSTD_c_srcSizeHint:
|
||||
bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN;
|
||||
bounds.upperBound = ZSTD_SRCSIZEHINT_MAX;
|
||||
return bounds;
|
||||
|
||||
default:
|
||||
{ ZSTD_bounds const boundError = { ERROR(parameter_unsupported), 0, 0 };
|
||||
return boundError;
|
||||
@ -448,6 +453,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
|
||||
case ZSTD_c_forceAttachDict:
|
||||
case ZSTD_c_literalCompressionMode:
|
||||
case ZSTD_c_targetCBlockSize:
|
||||
case ZSTD_c_srcSizeHint:
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
@ -494,6 +500,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
|
||||
case ZSTD_c_ldmMinMatch:
|
||||
case ZSTD_c_ldmBucketSizeLog:
|
||||
case ZSTD_c_targetCBlockSize:
|
||||
case ZSTD_c_srcSizeHint:
|
||||
break;
|
||||
|
||||
default: RETURN_ERROR(parameter_unsupported);
|
||||
@ -674,6 +681,12 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
|
||||
CCtxParams->targetCBlockSize = value;
|
||||
return CCtxParams->targetCBlockSize;
|
||||
|
||||
case ZSTD_c_srcSizeHint :
|
||||
if (value!=0) /* 0 ==> default */
|
||||
BOUNDCHECK(ZSTD_c_srcSizeHint, value);
|
||||
CCtxParams->srcSizeHint = value;
|
||||
return CCtxParams->srcSizeHint;
|
||||
|
||||
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
|
||||
}
|
||||
}
|
||||
@ -779,6 +792,9 @@ size_t ZSTD_CCtxParams_getParameter(
|
||||
case ZSTD_c_targetCBlockSize :
|
||||
*value = (int)CCtxParams->targetCBlockSize;
|
||||
break;
|
||||
case ZSTD_c_srcSizeHint :
|
||||
*value = (int)CCtxParams->srcSizeHint;
|
||||
break;
|
||||
default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
|
||||
}
|
||||
return 0;
|
||||
@ -1029,7 +1045,11 @@ ZSTD_adjustCParams(ZSTD_compressionParameters cPar,
|
||||
ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
|
||||
const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize)
|
||||
{
|
||||
ZSTD_compressionParameters cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize);
|
||||
ZSTD_compressionParameters cParams;
|
||||
if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) {
|
||||
srcSizeHint = CCtxParams->srcSizeHint;
|
||||
}
|
||||
cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize);
|
||||
if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;
|
||||
if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog;
|
||||
if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog;
|
||||
|
@ -203,6 +203,9 @@ struct ZSTD_CCtx_params_s {
|
||||
size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize.
|
||||
* No target when targetCBlockSize == 0.
|
||||
* There is no guarantee on compressed block size */
|
||||
int srcSizeHint; /* User's best guess of source size.
|
||||
* Hint is not valid when srcSizeHint == 0.
|
||||
* There is no guarantee that hint is close to actual source size */
|
||||
|
||||
ZSTD_dictAttachPref_e attachDictPref;
|
||||
ZSTD_literalCompressionMode_e literalCompressionMode;
|
||||
|
@ -2889,6 +2889,7 @@ static size_t ZSTD_decodeLiteralsBlock(void* ctx,
|
||||
const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
|
||||
if (litSize > srcSize-11) /* risk of reading too far with wildcopy */
|
||||
{
|
||||
if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
|
||||
if (litSize > srcSize-3) return ERROR(corruption_detected);
|
||||
memcpy(dctx->litBuffer, istart, litSize);
|
||||
dctx->litPtr = dctx->litBuffer;
|
||||
|
@ -2655,6 +2655,7 @@ static size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
|
||||
const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
|
||||
if (litSize > srcSize-11) /* risk of reading too far with wildcopy */
|
||||
{
|
||||
if (litSize > BLOCKSIZE) return ERROR(corruption_detected);
|
||||
if (litSize > srcSize-3) return ERROR(corruption_detected);
|
||||
memcpy(dctx->litBuffer, istart, litSize);
|
||||
dctx->litPtr = dctx->litBuffer;
|
||||
@ -3034,9 +3035,12 @@ static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
||||
{
|
||||
/* blockType == blockCompressed */
|
||||
const BYTE* ip = (const BYTE*)src;
|
||||
size_t litCSize;
|
||||
|
||||
if (srcSize > BLOCKSIZE) return ERROR(corruption_detected);
|
||||
|
||||
/* Decode literals sub-block */
|
||||
size_t litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
|
||||
litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
|
||||
if (ZSTD_isError(litCSize)) return litCSize;
|
||||
ip += litCSize;
|
||||
srcSize -= litCSize;
|
||||
|
11
lib/zstd.h
11
lib/zstd.h
@ -15,6 +15,7 @@ extern "C" {
|
||||
#define ZSTD_H_235446
|
||||
|
||||
/* ====== Dependency ======*/
|
||||
#include <limits.h> /* INT_MAX */
|
||||
#include <stddef.h> /* size_t */
|
||||
|
||||
|
||||
@ -386,6 +387,7 @@ typedef enum {
|
||||
* ZSTD_c_forceAttachDict
|
||||
* ZSTD_c_literalCompressionMode
|
||||
* ZSTD_c_targetCBlockSize
|
||||
* ZSTD_c_srcSizeHint
|
||||
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
|
||||
* note : never ever use experimentalParam? names directly;
|
||||
* also, the enums values themselves are unstable and can still change.
|
||||
@ -396,6 +398,7 @@ typedef enum {
|
||||
ZSTD_c_experimentalParam4=1001,
|
||||
ZSTD_c_experimentalParam5=1002,
|
||||
ZSTD_c_experimentalParam6=1003,
|
||||
ZSTD_c_experimentalParam7=1004,
|
||||
} ZSTD_cParameter;
|
||||
|
||||
typedef struct {
|
||||
@ -1063,6 +1066,8 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
|
||||
/* Advanced parameter bounds */
|
||||
#define ZSTD_TARGETCBLOCKSIZE_MIN 64
|
||||
#define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX
|
||||
#define ZSTD_SRCSIZEHINT_MIN 0
|
||||
#define ZSTD_SRCSIZEHINT_MAX INT_MAX
|
||||
|
||||
/* internal */
|
||||
#define ZSTD_HASHLOG3_MAX 17
|
||||
@ -1441,6 +1446,12 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre
|
||||
* There is no guarantee on compressed block size (default:0) */
|
||||
#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6
|
||||
|
||||
/* User's best guess of source size.
|
||||
* Hint is not valid when srcSizeHint == 0.
|
||||
* There is no guarantee that hint is close to actual source size,
|
||||
* but compression ratio may regress significantly if guess considerably underestimates */
|
||||
#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7
|
||||
|
||||
/*! ZSTD_CCtx_getParameter() :
|
||||
* Get the requested compression parameter value, selected by enum ZSTD_cParameter,
|
||||
* and store it into int* value.
|
||||
|
@ -30,6 +30,7 @@
|
||||
#include <string.h> /* strcmp, strlen */
|
||||
#include <assert.h>
|
||||
#include <errno.h> /* errno */
|
||||
#include <limits.h> /* INT_MAX */
|
||||
#include <signal.h>
|
||||
#include "timefn.h" /* UTIL_getTime, UTIL_clockSpanMicro */
|
||||
|
||||
@ -304,7 +305,9 @@ struct FIO_prefs_s {
|
||||
int ldmMinMatch;
|
||||
int ldmBucketSizeLog;
|
||||
int ldmHashRateLog;
|
||||
size_t streamSrcSize;
|
||||
size_t targetCBlockSize;
|
||||
int srcSizeHint;
|
||||
ZSTD_literalCompressionMode_e literalCompressionMode;
|
||||
|
||||
/* IO preferences */
|
||||
@ -349,7 +352,9 @@ FIO_prefs_t* FIO_createPreferences(void)
|
||||
ret->ldmMinMatch = 0;
|
||||
ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET;
|
||||
ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET;
|
||||
ret->streamSrcSize = 0;
|
||||
ret->targetCBlockSize = 0;
|
||||
ret->srcSizeHint = 0;
|
||||
ret->literalCompressionMode = ZSTD_lcm_auto;
|
||||
return ret;
|
||||
}
|
||||
@ -418,10 +423,18 @@ void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable) {
|
||||
prefs->rsyncable = rsyncable;
|
||||
}
|
||||
|
||||
void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize) {
|
||||
prefs->streamSrcSize = streamSrcSize;
|
||||
}
|
||||
|
||||
void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize) {
|
||||
prefs->targetCBlockSize = targetCBlockSize;
|
||||
}
|
||||
|
||||
void FIO_setSrcSizeHint(FIO_prefs_t* const prefs, size_t srcSizeHint) {
|
||||
prefs->srcSizeHint = (int)MIN((size_t)INT_MAX, srcSizeHint);
|
||||
}
|
||||
|
||||
void FIO_setLiteralCompressionMode(
|
||||
FIO_prefs_t* const prefs,
|
||||
ZSTD_literalCompressionMode_e mode) {
|
||||
@ -633,7 +646,6 @@ typedef struct {
|
||||
|
||||
static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
|
||||
const char* dictFileName, int cLevel,
|
||||
U64 srcSize,
|
||||
ZSTD_compressionParameters comprParams) {
|
||||
cRess_t ress;
|
||||
memset(&ress, 0, sizeof(ress));
|
||||
@ -667,6 +679,8 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
|
||||
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, cLevel) );
|
||||
/* max compressed block size */
|
||||
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetCBlockSize, (int)prefs->targetCBlockSize) );
|
||||
/* source size hint */
|
||||
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_srcSizeHint, (int)prefs->srcSizeHint) );
|
||||
/* long distance matching */
|
||||
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableLongDistanceMatching, prefs->ldmFlag) );
|
||||
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashLog, prefs->ldmHashLog) );
|
||||
@ -698,10 +712,7 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
|
||||
CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_rsyncable, prefs->rsyncable) );
|
||||
#endif
|
||||
/* dictionary */
|
||||
CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, srcSize) ); /* set the value temporarily for dictionary loading, to adapt compression parameters */
|
||||
CHECK( ZSTD_CCtx_loadDictionary(ress.cctx, dictBuffer, dictBuffSize) );
|
||||
CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, ZSTD_CONTENTSIZE_UNKNOWN) ); /* reset */
|
||||
|
||||
free(dictBuffer);
|
||||
}
|
||||
|
||||
@ -1003,6 +1014,9 @@ FIO_compressZstdFrame(FIO_prefs_t* const prefs,
|
||||
/* init */
|
||||
if (fileSize != UTIL_FILESIZE_UNKNOWN) {
|
||||
CHECK(ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize));
|
||||
} else if (prefs->streamSrcSize > 0) {
|
||||
/* unknown source size; use the declared stream size */
|
||||
CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, prefs->streamSrcSize) );
|
||||
}
|
||||
(void)srcFileName;
|
||||
|
||||
@ -1361,10 +1375,7 @@ int FIO_compressFilename(FIO_prefs_t* const prefs,
|
||||
const char* dictFileName, int compressionLevel,
|
||||
ZSTD_compressionParameters comprParams)
|
||||
{
|
||||
U64 const fileSize = UTIL_getFileSize(srcFileName);
|
||||
U64 const srcSize = (fileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : fileSize;
|
||||
|
||||
cRess_t const ress = FIO_createCResources(prefs, dictFileName, compressionLevel, srcSize, comprParams);
|
||||
cRess_t const ress = FIO_createCResources(prefs, dictFileName, compressionLevel, comprParams);
|
||||
int const result = FIO_compressFilename_srcFile(prefs, ress, dstFileName, srcFileName, compressionLevel);
|
||||
|
||||
|
||||
@ -1415,10 +1426,7 @@ int FIO_compressMultipleFilenames(FIO_prefs_t* const prefs,
|
||||
ZSTD_compressionParameters comprParams)
|
||||
{
|
||||
int error = 0;
|
||||
U64 const firstFileSize = UTIL_getFileSize(inFileNamesTable[0]);
|
||||
U64 const firstSrcSize = (firstFileSize == UTIL_FILESIZE_UNKNOWN) ? ZSTD_CONTENTSIZE_UNKNOWN : firstFileSize;
|
||||
U64 const srcSize = (nbFiles != 1) ? ZSTD_CONTENTSIZE_UNKNOWN : firstSrcSize ;
|
||||
cRess_t ress = FIO_createCResources(prefs, dictFileName, compressionLevel, srcSize, comprParams);
|
||||
cRess_t ress = FIO_createCResources(prefs, dictFileName, compressionLevel, comprParams);
|
||||
|
||||
/* init */
|
||||
assert(outFileName != NULL || suffix != NULL);
|
||||
|
@ -71,7 +71,9 @@ void FIO_setOverlapLog(FIO_prefs_t* const prefs, int overlapLog);
|
||||
void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag);
|
||||
void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse); /**< 0: no sparse; 1: disable on stdout; 2: always enabled */
|
||||
void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable);
|
||||
void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize);
|
||||
void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize);
|
||||
void FIO_setSrcSizeHint(FIO_prefs_t* const prefs, size_t srcSizeHint);
|
||||
void FIO_setLiteralCompressionMode(
|
||||
FIO_prefs_t* const prefs,
|
||||
ZSTD_literalCompressionMode_e mode);
|
||||
|
@ -144,6 +144,18 @@ the last one takes effect.
|
||||
Due to the chaotic nature of dynamic adaptation, compressed result is not reproducible.
|
||||
_note_ : at the time of this writing, `--adapt` can remain stuck at low speed
|
||||
when combined with multiple worker threads (>=2).
|
||||
* `--stream-size=#` :
|
||||
Sets the pledged source size of input coming from a stream. This value must be exact, as it
|
||||
will be included in the produced frame header. Incorrect stream sizes will cause an error.
|
||||
This information will be used to better optimize compression parameters, resulting in
|
||||
better and potentially faster compression, especially for smaller source sizes.
|
||||
* `--size-hint=#`:
|
||||
When handling input from a stream, `zstd` must guess how large the source size
|
||||
will be when optimizing compression parameters. If the stream size is relatively
|
||||
small, this guess may be a poor one, resulting in a higher compression ratio than
|
||||
expected. This feature allows for controlling the guess when needed.
|
||||
Exact guesses result in better compression ratios. Overestimates result in slightly
|
||||
degraded compression ratios, while underestimates may result in significant degradation.
|
||||
* `--rsyncable` :
|
||||
`zstd` will periodically synchronize the compression state to make the
|
||||
compressed file more rsync-friendly. There is a negligible impact to
|
||||
|
@ -141,6 +141,8 @@ static int usage_advanced(const char* programName)
|
||||
DISPLAY( "--long[=#]: enable long distance matching with given window log (default: %u)\n", g_defaultMaxWindowLog);
|
||||
DISPLAY( "--fast[=#]: switch to ultra fast compression level (default: %u)\n", 1);
|
||||
DISPLAY( "--adapt : dynamically adapt compression level to I/O conditions \n");
|
||||
DISPLAY( "--stream-size=# : optimize compression parameters for streaming input of given number of bytes \n");
|
||||
DISPLAY( "--size-hint=# optimize compression parameters for streaming input of approximately this size\n");
|
||||
DISPLAY( "--target-compressed-block-size=# : make compressed block near targeted size \n");
|
||||
#ifdef ZSTD_MULTITHREAD
|
||||
DISPLAY( " -T# : spawns # compression threads (default: 1, 0==# cores) \n");
|
||||
@ -588,7 +590,9 @@ int main(int argCount, const char* argv[])
|
||||
const char* suffix = ZSTD_EXTENSION;
|
||||
unsigned maxDictSize = g_defaultMaxDictSize;
|
||||
unsigned dictID = 0;
|
||||
size_t streamSrcSize = 0;
|
||||
size_t targetCBlockSize = 0;
|
||||
size_t srcSizeHint = 0;
|
||||
int dictCLevel = g_defaultDictCLevel;
|
||||
unsigned dictSelect = g_defaultSelectivityLevel;
|
||||
#ifdef UTIL_HAS_CREATEFILELIST
|
||||
@ -745,7 +749,9 @@ int main(int argCount, const char* argv[])
|
||||
if (longCommandWArg(&argument, "--maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--dictID=")) { dictID = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) CLEAN_RETURN(badusage(programName)); continue; }
|
||||
if (longCommandWArg(&argument, "--stream-size=")) { streamSrcSize = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--size-hint=")) { srcSizeHint = readU32FromChar(&argument); continue; }
|
||||
if (longCommandWArg(&argument, "--long")) {
|
||||
unsigned ldmWindowLog = 0;
|
||||
ldmFlag = 1;
|
||||
@ -1150,7 +1156,9 @@ int main(int argCount, const char* argv[])
|
||||
FIO_setAdaptMin(prefs, adaptMin);
|
||||
FIO_setAdaptMax(prefs, adaptMax);
|
||||
FIO_setRsyncable(prefs, rsyncable);
|
||||
FIO_setStreamSrcSize(prefs, streamSrcSize);
|
||||
FIO_setTargetCBlockSize(prefs, targetCBlockSize);
|
||||
FIO_setSrcSizeHint(prefs, srcSizeHint);
|
||||
FIO_setLiteralCompressionMode(prefs, literalCompressionMode);
|
||||
if (adaptMin > cLevel) cLevel = adaptMin;
|
||||
if (adaptMax < cLevel) cLevel = adaptMax;
|
||||
@ -1160,7 +1168,7 @@ int main(int argCount, const char* argv[])
|
||||
else
|
||||
operationResult = FIO_compressMultipleFilenames(prefs, filenameTable, filenameIdx, outFileName, suffix, dictFileName, cLevel, compressionParams);
|
||||
#else
|
||||
(void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; /* not used when ZSTD_NOCOMPRESS set */
|
||||
(void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)streamSrcSize; (void)srcSizeHint; /* not used when ZSTD_NOCOMPRESS set */
|
||||
DISPLAY("Compression not supported \n");
|
||||
#endif
|
||||
} else { /* decompression or test */
|
||||
|
@ -113,15 +113,6 @@ zstd_frame_info: $(FUZZ_HEADERS) $(FUZZ_OBJ) zstd_frame_info.o
|
||||
libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c regression_driver.o
|
||||
$(AR) $(FUZZ_ARFLAGS) $@ regression_driver.o
|
||||
|
||||
# Install libfuzzer (not usable for MSAN testing)
|
||||
# Provided for convenience. To use this library run make libFuzzer and
|
||||
# set LDFLAGS=-L.
|
||||
.PHONY: libFuzzer
|
||||
libFuzzer:
|
||||
@$(RM) -rf Fuzzer
|
||||
@git clone https://chromium.googlesource.com/chromium/llvm-project/compiler-rt/lib/fuzzer Fuzzer
|
||||
@cd Fuzzer && ./build.sh
|
||||
|
||||
corpora/%_seed_corpus.zip:
|
||||
@mkdir -p corpora
|
||||
$(DOWNLOAD) $@ $(CORPORA_URL_PREFIX)$*_seed_corpus.zip
|
||||
|
@ -35,6 +35,8 @@ The environment variables can be overridden with the corresponding flags
|
||||
`--cc`, `--cflags`, etc.
|
||||
The specific fuzzing engine is selected with `LIB_FUZZING_ENGINE` or
|
||||
`--lib-fuzzing-engine`, the default is `libregression.a`.
|
||||
Alternatively, you can use Clang's built in fuzzing engine with
|
||||
`--enable-fuzzer`.
|
||||
It has flags that can easily set up sanitizers `--enable-{a,ub,m}san`, and
|
||||
coverage instrumentation `--enable-coverage`.
|
||||
It sets sane defaults which can be overridden with flags `--debug`,
|
||||
@ -51,22 +53,25 @@ The command used to run the fuzzer is printed for debugging.
|
||||
## LibFuzzer
|
||||
|
||||
```
|
||||
# Build libfuzzer if necessary
|
||||
make libFuzzer
|
||||
# Build the fuzz targets
|
||||
./fuzz.py build all --enable-coverage --enable-asan --enable-ubsan --lib-fuzzing-engine Fuzzer/libFuzzer.a --cc clang --cxx clang++
|
||||
./fuzz.py build all --enable-fuzzer --enable-asan --enable-ubsan --cc clang --cxx clang++
|
||||
# OR equivalently
|
||||
CC=clang CXX=clang++ LIB_FUZZING_ENGINE=Fuzzer/libFuzzer.a ./fuzz.py build all --enable-coverage --enable-asan --enable-ubsan
|
||||
CC=clang CXX=clang++ ./fuzz.py build all --enable-fuzzer --enable-asan --enable-ubsan
|
||||
# Run the fuzzer
|
||||
./fuzz.py libfuzzer TARGET -max_len=8192 -jobs=4
|
||||
./fuzz.py libfuzzer TARGET <libfuzzer args like -jobs=4>
|
||||
```
|
||||
|
||||
where `TARGET` could be `simple_decompress`, `stream_round_trip`, etc.
|
||||
|
||||
### MSAN
|
||||
|
||||
Fuzzing with `libFuzzer` and `MSAN` will require building a C++ standard library
|
||||
and libFuzzer with MSAN.
|
||||
Fuzzing with `libFuzzer` and `MSAN` is as easy as:
|
||||
|
||||
```
|
||||
CC=clang CXX=clang++ ./fuzz.py build all --enable-fuzzer --enable-msan
|
||||
./fuzz.py libfuzzer TARGET <libfuzzer args>
|
||||
```
|
||||
|
||||
`fuzz.py` respects the environment variables / flags `MSAN_EXTRA_CPPFLAGS`,
|
||||
`MSAN_EXTRA_CFLAGS`, `MSAN_EXTRA_CXXFLAGS`, `MSAN_EXTRA_LDFLAGS` to easily pass
|
||||
the extra parameters only for MSAN.
|
||||
|
@ -24,21 +24,38 @@ def abs_join(a, *p):
|
||||
return os.path.abspath(os.path.join(a, *p))
|
||||
|
||||
|
||||
class InputType(object):
|
||||
RAW_DATA = 1
|
||||
COMPRESSED_DATA = 2
|
||||
|
||||
|
||||
class FrameType(object):
|
||||
ZSTD = 1
|
||||
BLOCK = 2
|
||||
|
||||
|
||||
class TargetInfo(object):
|
||||
def __init__(self, input_type, frame_type=FrameType.ZSTD):
|
||||
self.input_type = input_type
|
||||
self.frame_type = frame_type
|
||||
|
||||
|
||||
# Constants
|
||||
FUZZ_DIR = os.path.abspath(os.path.dirname(__file__))
|
||||
CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora')
|
||||
TARGETS = [
|
||||
'simple_round_trip',
|
||||
'stream_round_trip',
|
||||
'block_round_trip',
|
||||
'simple_decompress',
|
||||
'stream_decompress',
|
||||
'block_decompress',
|
||||
'dictionary_round_trip',
|
||||
'dictionary_decompress',
|
||||
'zstd_frame_info',
|
||||
'simple_compress',
|
||||
]
|
||||
TARGET_INFO = {
|
||||
'simple_round_trip': TargetInfo(InputType.RAW_DATA),
|
||||
'stream_round_trip': TargetInfo(InputType.RAW_DATA),
|
||||
'block_round_trip': TargetInfo(InputType.RAW_DATA, FrameType.BLOCK),
|
||||
'simple_decompress': TargetInfo(InputType.COMPRESSED_DATA),
|
||||
'stream_decompress': TargetInfo(InputType.COMPRESSED_DATA),
|
||||
'block_decompress': TargetInfo(InputType.COMPRESSED_DATA, FrameType.BLOCK),
|
||||
'dictionary_round_trip': TargetInfo(InputType.RAW_DATA),
|
||||
'dictionary_decompress': TargetInfo(InputType.COMPRESSED_DATA),
|
||||
'zstd_frame_info': TargetInfo(InputType.COMPRESSED_DATA),
|
||||
'simple_compress': TargetInfo(InputType.RAW_DATA),
|
||||
}
|
||||
TARGETS = list(TARGET_INFO.keys())
|
||||
ALL_TARGETS = TARGETS + ['all']
|
||||
FUZZ_RNG_SEED_SIZE = 4
|
||||
|
||||
@ -67,7 +84,7 @@ MSAN_EXTRA_LDFLAGS = os.environ.get('MSAN_EXTRA_LDFLAGS', '')
|
||||
def create(r):
|
||||
d = os.path.abspath(r)
|
||||
if not os.path.isdir(d):
|
||||
os.mkdir(d)
|
||||
os.makedirs(d)
|
||||
return d
|
||||
|
||||
|
||||
@ -158,7 +175,7 @@ def compiler_version(cc, cxx):
|
||||
assert(b'clang' in cxx_version_bytes)
|
||||
compiler = 'clang'
|
||||
elif b'gcc' in cc_version_bytes:
|
||||
assert(b'gcc' in cxx_version_bytes)
|
||||
assert(b'gcc' in cxx_version_bytes or b'g++' in cxx_version_bytes)
|
||||
compiler = 'gcc'
|
||||
if compiler is not None:
|
||||
version_regex = b'([0-9])+\.([0-9])+\.([0-9])+'
|
||||
@ -699,7 +716,8 @@ def gen(args):
|
||||
'-o{}'.format(decompressed),
|
||||
]
|
||||
|
||||
if 'block_' in args.TARGET:
|
||||
info = TARGET_INFO[args.TARGET]
|
||||
if info.frame_type == FrameType.BLOCK:
|
||||
cmd += [
|
||||
'--gen-blocks',
|
||||
'--max-block-size-log={}'.format(args.max_size_log)
|
||||
@ -710,10 +728,11 @@ def gen(args):
|
||||
print(' '.join(cmd))
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
if '_round_trip' in args.TARGET:
|
||||
if info.input_type == InputType.RAW_DATA:
|
||||
print('using decompressed data in {}'.format(decompressed))
|
||||
samples = decompressed
|
||||
elif '_decompress' in args.TARGET:
|
||||
else:
|
||||
assert info.input_type == InputType.COMPRESSED_DATA
|
||||
print('using compressed data in {}'.format(compressed))
|
||||
samples = compressed
|
||||
|
||||
|
@ -14,6 +14,7 @@
|
||||
#ifndef FUZZ_HELPERS_H
|
||||
#define FUZZ_HELPERS_H
|
||||
|
||||
#include "debug.h"
|
||||
#include "fuzz.h"
|
||||
#include "xxhash.h"
|
||||
#include "zstd.h"
|
||||
|
@ -36,6 +36,7 @@ int main(int argc, char const **argv) {
|
||||
fprintf(stderr, "WARNING: No files passed to %s\n", argv[0]);
|
||||
for (i = 0; i < numFiles; ++i) {
|
||||
char const *fileName = files[i];
|
||||
DEBUGLOG(3, "Running %s", fileName);
|
||||
size_t const fileSize = UTIL_getFileSize(fileName);
|
||||
size_t readSize;
|
||||
FILE *file;
|
||||
|
@ -90,6 +90,9 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, uint32_t *state)
|
||||
setRand(cctx, ZSTD_c_forceMaxWindow, 0, 1, state);
|
||||
setRand(cctx, ZSTD_c_literalCompressionMode, 0, 2, state);
|
||||
setRand(cctx, ZSTD_c_forceAttachDict, 0, 2, state);
|
||||
if (FUZZ_rand32(state, 0, 1) == 0) {
|
||||
setRand(cctx, ZSTD_c_srcSizeHint, ZSTD_SRCSIZEHINT_MIN, 2 * srcSize, state);
|
||||
}
|
||||
}
|
||||
|
||||
FUZZ_dict_t FUZZ_train(void const* src, size_t srcSize, uint32_t *state)
|
||||
|
@ -108,7 +108,6 @@ else
|
||||
fi
|
||||
|
||||
|
||||
|
||||
println "\n===> simple tests "
|
||||
|
||||
./datagen > tmp
|
||||
@ -409,6 +408,53 @@ println "compress multiple files including a missing one (notHere) : "
|
||||
$ZSTD -f tmp1 notHere tmp2 && die "missing file not detected!"
|
||||
|
||||
|
||||
println "\n===> stream-size mode"
|
||||
|
||||
./datagen -g11000 > tmp
|
||||
println "test : basic file compression vs sized streaming compression"
|
||||
file_size=$($ZSTD -14 -f tmp -o tmp.zst && wc -c < tmp.zst)
|
||||
stream_size=$(cat tmp | $ZSTD -14 --stream-size=11000 | wc -c)
|
||||
if [ "$stream_size" -gt "$file_size" ]; then
|
||||
die "hinted compression larger than expected"
|
||||
fi
|
||||
println "test : sized streaming compression and decompression"
|
||||
cat tmp | $ZSTD -14 -f tmp -o --stream-size=11000 tmp.zst
|
||||
$ZSTD -df tmp.zst -o tmp_decompress
|
||||
cmp tmp tmp_decompress || die "difference between original and decompressed file"
|
||||
println "test : incorrect stream size"
|
||||
cat tmp | $ZSTD -14 -f -o tmp.zst --stream-size=11001 && die "should fail with incorrect stream size"
|
||||
|
||||
|
||||
println "\n===> size-hint mode"
|
||||
|
||||
./datagen -g11000 > tmp
|
||||
./datagen -g11000 > tmp2
|
||||
./datagen > tmpDict
|
||||
println "test : basic file compression vs hinted streaming compression"
|
||||
file_size=$($ZSTD -14 -f tmp -o tmp.zst && wc -c < tmp.zst)
|
||||
stream_size=$(cat tmp | $ZSTD -14 --size-hint=11000 | wc -c)
|
||||
if [ "$stream_size" -ge "$file_size" ]; then
|
||||
die "hinted compression larger than expected"
|
||||
fi
|
||||
println "test : hinted streaming compression and decompression"
|
||||
cat tmp | $ZSTD -14 -f -o tmp.zst --size-hint=11000
|
||||
$ZSTD -df tmp.zst -o tmp_decompress
|
||||
cmp tmp tmp_decompress || die "difference between original and decompressed file"
|
||||
println "test : hinted streaming compression with dictionary"
|
||||
cat tmp | $ZSTD -14 -f -D tmpDict --size-hint=11000 | $ZSTD -t -D tmpDict
|
||||
println "test : multiple file compression with hints and dictionary"
|
||||
$ZSTD -14 -f -D tmpDict --size-hint=11000 tmp tmp2
|
||||
$ZSTD -14 -f -o tmp1_.zst -D tmpDict --size-hint=11000 tmp
|
||||
$ZSTD -14 -f -o tmp2_.zst -D tmpDict --size-hint=11000 tmp2
|
||||
cmp tmp.zst tmp1_.zst || die "first file's output differs"
|
||||
cmp tmp2.zst tmp2_.zst || die "second file's output differs"
|
||||
println "test : incorrect hinted stream sizes"
|
||||
cat tmp | $ZSTD -14 -f --size-hint=11050 | $ZSTD -t # slightly too high
|
||||
cat tmp | $ZSTD -14 -f --size-hint=10950 | $ZSTD -t # slightly too low
|
||||
cat tmp | $ZSTD -14 -f --size-hint=22000 | $ZSTD -t # considerably too high
|
||||
cat tmp | $ZSTD -14 -f --size-hint=5500 | $ZSTD -t # considerably too low
|
||||
|
||||
|
||||
println "\n===> dictionary tests "
|
||||
|
||||
println "- test with raw dict (content only) "
|
||||
|
@ -2106,6 +2106,7 @@ static int fuzzerTests_newAPI(U32 seed, int nbTests, int startTest,
|
||||
if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_ldmMinMatch, FUZ_randomClampedLength(&lseed, ZSTD_LDM_MINMATCH_MIN, ZSTD_LDM_MINMATCH_MAX), opaqueAPI) );
|
||||
if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_ldmBucketSizeLog, FUZ_randomClampedLength(&lseed, ZSTD_LDM_BUCKETSIZELOG_MIN, ZSTD_LDM_BUCKETSIZELOG_MAX), opaqueAPI) );
|
||||
if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_ldmHashRateLog, FUZ_randomClampedLength(&lseed, ZSTD_LDM_HASHRATELOG_MIN, ZSTD_LDM_HASHRATELOG_MAX), opaqueAPI) );
|
||||
if (FUZ_rand(&lseed) & 3) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_srcSizeHint, FUZ_randomClampedLength(&lseed, ZSTD_SRCSIZEHINT_MIN, ZSTD_SRCSIZEHINT_MAX), opaqueAPI) );
|
||||
}
|
||||
|
||||
/* mess with frame parameters */
|
||||
|
Loading…
Reference in New Issue
Block a user